summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2014-03-20 19:10:46 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2014-03-20 19:10:59 +0000
commite8facfd99e91cf5fefa4291a3ba0b6a0710eea09 (patch)
treedf73671fcd2421ade9cb711f1f67e4241c5493bc /usr/src
parent76436dc0854d1e18a76ccb58a9b496a0e93ae7c7 (diff)
downloadillumos-joyent-e8facfd99e91cf5fefa4291a3ba0b6a0710eea09.tar.gz
OS-2834 ship lx brand
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/Makefile.lint1
-rw-r--r--usr/src/Targetdirs7
-rw-r--r--usr/src/cmd/devfsadm/i386/Makefile3
-rw-r--r--usr/src/cmd/devfsadm/i386/lx_link_i386.c86
-rw-r--r--usr/src/cmd/zlogin/zlogin.c14
-rw-r--r--usr/src/cmd/zoneadm/svc-zones2
-rw-r--r--usr/src/common/brand/lx/lx_signum.c242
-rw-r--r--usr/src/common/brand/lx/lx_signum.h84
-rw-r--r--usr/src/head/regexp.h12
-rw-r--r--usr/src/lib/brand/Makefile3
-rw-r--r--usr/src/lib/brand/lx/Makefile56
-rw-r--r--usr/src/lib/brand/lx/Makefile.lx34
-rw-r--r--usr/src/lib/brand/lx/cmd/Makefile48
-rw-r--r--usr/src/lib/brand/lx/cmd/lx_lockd.sh36
-rw-r--r--usr/src/lib/brand/lx/cmd/lx_native.sh29
-rw-r--r--usr/src/lib/brand/lx/cmd/lx_statd.sh36
-rw-r--r--usr/src/lib/brand/lx/cmd/lx_thunk.sh29
-rw-r--r--usr/src/lib/brand/lx/librtld_db/Makefile54
-rw-r--r--usr/src/lib/brand/lx/librtld_db/Makefile.com83
-rw-r--r--usr/src/lib/brand/lx/librtld_db/amd64/Makefile38
-rw-r--r--usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers44
-rw-r--r--usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c575
-rw-r--r--usr/src/lib/brand/lx/librtld_db/common/mapfile-vers58
-rw-r--r--usr/src/lib/brand/lx/librtld_db/i386/Makefile33
-rw-r--r--usr/src/lib/brand/lx/lx_brand/Makefile53
-rw-r--r--usr/src/lib/brand/lx/lx_brand/Makefile.com102
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/clock.c116
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/clone.c546
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/debug.c147
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/dir.c160
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/fcntl.c387
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/file.c747
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/fork.c65
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/id.c269
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/ioctl.c2719
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/iovec.c241
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/lx_brand.c1237
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c1026
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mapfile47
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mapfile-vers47
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mem.c210
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/misc.c546
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/module.c90
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mount.c719
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/open.c183
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/pgrp.c157
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/poll_select.c215
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/priority.c89
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/ptrace.c2114
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/rlimit.c233
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sched.c610
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sendfile.c97
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/signal.c1714
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/socket.c1487
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/stat.c551
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/statfs.c309
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sysctl.c138
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c893
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/time.c184
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/truncate.c63
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/wait.c288
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/Makefile56
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s65
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s377
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s61
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/offsets.in40
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h48
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h114
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h382
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h153
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h139
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h63
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h299
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h258
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h95
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h79
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h556
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h211
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h53
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h143
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_types.h108
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/Makefile52
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com68
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile35
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c479
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers51
-rw-r--r--usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile34
-rw-r--r--usr/src/lib/brand/lx/lx_support/Makefile54
-rw-r--r--usr/src/lib/brand/lx/lx_support/lx_support.c578
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/Makefile52
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/Makefile.com74
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/amd64/Makefile34
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c1123
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers58
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/i386/Makefile33
-rw-r--r--usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h56
-rw-r--r--usr/src/lib/brand/lx/netfiles/Makefile48
-rw-r--r--usr/src/lib/brand/lx/netfiles/etc_netconfig38
-rw-r--r--usr/src/lib/brand/lx/zone/Makefile67
-rw-r--r--usr/src/lib/brand/lx/zone/SUNWlx.xml34
-rw-r--r--usr/src/lib/brand/lx/zone/SUNWlx26.xml35
-rw-r--r--usr/src/lib/brand/lx/zone/config.xml94
-rw-r--r--usr/src/lib/brand/lx/zone/distros/Makefile50
-rw-r--r--usr/src/lib/brand/lx/zone/distros/centos35.distro66
-rw-r--r--usr/src/lib/brand/lx/zone/distros/centos36.distro66
-rw-r--r--usr/src/lib/brand/lx/zone/distros/centos37.distro65
-rw-r--r--usr/src/lib/brand/lx/zone/distros/centos38.distro79
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel35.distro98
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel36.distro97
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel37.distro96
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel38.distro109
-rw-r--r--usr/src/lib/brand/lx/zone/distros/rhel_centos_common1016
-rw-r--r--usr/src/lib/brand/lx/zone/lx_distro_install.ksh2772
-rw-r--r--usr/src/lib/brand/lx/zone/lx_init_zone.ksh686
-rw-r--r--usr/src/lib/brand/lx/zone/lx_install.ksh579
-rw-r--r--usr/src/lib/brand/lx/zone/platform.xml85
-rw-r--r--usr/src/pkg/manifests/SUNWlx.mf11
-rw-r--r--usr/src/pkg/manifests/system-zones-brand-lx.mf105
-rw-r--r--usr/src/uts/common/Makefile.files6
-rw-r--r--usr/src/uts/common/brand/lx/autofs/lx_autofs.c1569
-rw-r--r--usr/src/uts/common/brand/lx/dtrace/lx_systrace.c396
-rw-r--r--usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf27
-rw-r--r--usr/src/uts/common/brand/lx/io/ldlinux.c297
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_audio.c1996
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_audio.conf27
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_ptm.c1156
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_ptm.conf27
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c943
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_misc.c362
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_pid.c369
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_syscall.c454
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_proc.h232
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prsubr.c516
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c374
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvnops.c3030
-rw-r--r--usr/src/uts/common/brand/lx/sys/ldlinux.h117
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_audio.h130
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_autofs.h334
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h121
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h232
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_futex.h53
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_impl.h62
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_ldt.h93
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_pid.h61
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_ptm.h44
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_sched.h60
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_syscalls.h68
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_brk.c57
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_clone.c135
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_futex.c471
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_getpid.c74
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_id.c297
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_kill.c253
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c121
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sched.c513
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c118
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_thread_area.c128
-rw-r--r--usr/src/uts/common/brand/sn1/sn1_brand.c6
-rw-r--r--usr/src/uts/common/brand/solaris10/s10_brand.c6
-rw-r--r--usr/src/uts/common/io/ptm.c47
-rw-r--r--usr/src/uts/common/os/brand.c2
-rw-r--r--usr/src/uts/common/os/pid.c12
-rw-r--r--usr/src/uts/common/os/streamio.c1
-rw-r--r--usr/src/uts/common/sys/ptms.h19
-rw-r--r--usr/src/uts/common/sys/termios.h18
-rw-r--r--usr/src/uts/i86pc/ml/syscall_asm.s30
-rw-r--r--usr/src/uts/i86pc/ml/syscall_asm_amd64.s42
-rw-r--r--usr/src/uts/i86pc/sys/apic.h2
-rw-r--r--usr/src/uts/intel/Makefile6
-rw-r--r--usr/src/uts/intel/Makefile.files25
-rw-r--r--usr/src/uts/intel/Makefile.intel8
-rw-r--r--usr/src/uts/intel/brand/lx/lx_brand_asm.s162
-rw-r--r--usr/src/uts/intel/genassym/Makefile85
-rw-r--r--usr/src/uts/intel/genassym/offsets.in43
-rw-r--r--usr/src/uts/intel/ia32/os/archdep.c15
-rw-r--r--usr/src/uts/intel/ia32/os/desctbls.c28
-rw-r--r--usr/src/uts/intel/ldlinux/Makefile103
-rw-r--r--usr/src/uts/intel/lx_afs/Makefile108
-rw-r--r--usr/src/uts/intel/lx_afs/Makefile.rules40
-rw-r--r--usr/src/uts/intel/lx_audio/Makefile100
-rw-r--r--usr/src/uts/intel/lx_brand/Makefile107
-rw-r--r--usr/src/uts/intel/lx_brand/Makefile.rules85
-rw-r--r--usr/src/uts/intel/lx_proc/Makefile113
-rw-r--r--usr/src/uts/intel/lx_proc/Makefile.rules38
-rw-r--r--usr/src/uts/intel/lx_ptm/Makefile91
-rw-r--r--usr/src/uts/intel/lx_systrace/Makefile80
-rw-r--r--usr/src/uts/intel/sys/machbrand.h9
-rw-r--r--usr/src/uts/intel/sys/segments.h2
188 files changed, 49502 insertions, 37 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint
index 03850d711c..96faacb9e9 100644
--- a/usr/src/Makefile.lint
+++ b/usr/src/Makefile.lint
@@ -486,6 +486,7 @@ i386_SUBDIRS= \
cmd/biosdev \
cmd/rtc \
cmd/ucodeadm \
+ lib/brand/lx \
lib/cfgadm_plugins/sata \
lib/cfgadm_plugins/sbd \
lib/libfdisk
diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs
index b81bfd4517..0374d13e5e 100644
--- a/usr/src/Targetdirs
+++ b/usr/src/Targetdirs
@@ -50,6 +50,9 @@ i386_DIRS= \
/boot/grub \
/boot/grub/bin \
/platform/i86pc \
+ /usr/lib/brand/lx \
+ /usr/lib/brand/lx/amd64 \
+ /usr/lib/brand/lx/distros \
/usr/lib/xen \
/usr/lib/xen/bin
@@ -594,6 +597,9 @@ SYM.DIRS= \
/usr/ucblib/32 \
/var/ld/32
+i386_SYM.DIRS64= \
+ /usr/lib/brand/lx/64
+
sparc_SYM.DIRS64=
SYM.DIRS64= \
@@ -711,6 +717,7 @@ $(BUILD64) $(ROOT)/lib/crypto/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/lib/secure/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/elfedit/64:= LINKDEST=$(MACH64)
+$(BUILD64) $(ROOT)/usr/lib/brand/lx/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/brand/sn1/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/brand/sngl/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/brand/solaris10/64:= LINKDEST=$(MACH64)
diff --git a/usr/src/cmd/devfsadm/i386/Makefile b/usr/src/cmd/devfsadm/i386/Makefile
index 1f14c93dad..75f2da3436 100644
--- a/usr/src/cmd/devfsadm/i386/Makefile
+++ b/usr/src/cmd/devfsadm/i386/Makefile
@@ -24,8 +24,11 @@
LINK_OBJS_i386 = \
misc_link_i386.o \
+ lx_link_i386.o \
xen_link.o
+lx_link_i386.o lx_link_i386.po lx_link_i386.ln := CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
xen_link.o xen_link.ln xen_link.po := CPPFLAGS += -I$(UTSBASE)/i86xpv
include ../Makefile.com
diff --git a/usr/src/cmd/devfsadm/i386/lx_link_i386.c b/usr/src/cmd/devfsadm/i386/lx_link_i386.c
new file mode 100644
index 0000000000..855f4f7383
--- /dev/null
+++ b/usr/src/cmd/devfsadm/i386/lx_link_i386.c
@@ -0,0 +1,86 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <devfsadm.h>
+#include <strings.h>
+#include <stdio.h>
+#include <sys/lx_ptm.h>
+#include <sys/lx_audio.h>
+
+static int lx_ptm(di_minor_t minor, di_node_t node);
+static int lx_audio(di_minor_t minor, di_node_t node);
+static int lx_systrace(di_minor_t minor, di_node_t node);
+
+static devfsadm_create_t lx_create_cbt[] = {
+ { "pseudo", "ddi_pseudo", LX_PTM_DRV,
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_ptm },
+ { "pseudo", "ddi_pseudo", LX_AUDIO_DRV,
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_audio },
+ { "pseudo", "ddi_pseudo", "lx_systrace",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_systrace },
+};
+
+DEVFSADM_CREATE_INIT_V0(lx_create_cbt);
+
+static int
+lx_ptm(di_minor_t minor, di_node_t node)
+{
+ char *mname = di_minor_name(minor);
+
+ if (strcmp(LX_PTM_MINOR_NODE, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/ptmx", node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
+
+static int
+lx_audio(di_minor_t minor, di_node_t node)
+{
+ char *mname = di_minor_name(minor);
+
+ if (strcmp(LXA_MINORNAME_DEVCTL, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/audio_devctl", node, minor, 0);
+ if (strcmp(LXA_MINORNAME_DSP, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/dsp", node, minor, 0);
+ if (strcmp(LXA_MINORNAME_MIXER, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/mixer", node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
+
+static int
+lx_systrace(di_minor_t minor, di_node_t node)
+{
+ char *mname = di_minor_name(minor);
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), "dtrace/provider/%s", mname);
+ (void) devfsadm_mklink(path, node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
diff --git a/usr/src/cmd/zlogin/zlogin.c b/usr/src/cmd/zlogin/zlogin.c
index f8d2656ee1..cadbda4a0b 100644
--- a/usr/src/cmd/zlogin/zlogin.c
+++ b/usr/src/cmd/zlogin/zlogin.c
@@ -2233,8 +2233,18 @@ main(int argc, char **argv)
/*
* In failsafe mode, we don't use login(1), so don't try
* setting up a utmpx entry.
- */
- if (!failsafe)
+ *
+ * A branded zone may have very different utmpx semantics.
+ * At the moment, we only have two brand types:
+ * Solaris-like (native, sn1) and Linux. In the Solaris
+ * case, we know exactly how to do the necessary utmpx
+ * setup. Fortunately for us, the Linux /bin/login is
+ * prepared to deal with a non-initialized utmpx entry, so
+ * we can simply skip it. If future brands don't fall into
+ * either category, we'll have to add a per-brand utmpx
+ * setup hook.
+ */
+ if (!failsafe && (strcmp(zonebrand, "lx") != 0))
if (setup_utmpx(slaveshortname) == -1)
return (1);
diff --git a/usr/src/cmd/zoneadm/svc-zones b/usr/src/cmd/zoneadm/svc-zones
index 9d307835bd..30d54f5272 100644
--- a/usr/src/cmd/zoneadm/svc-zones
+++ b/usr/src/cmd/zoneadm/svc-zones
@@ -32,7 +32,7 @@
shutdown_zones()
{
zoneadm list -p | nawk -F: '{
- if ($2 != "global") {
+ if (($5 != "lx") && ($2 != "global")) {
print $2
}
}'
diff --git a/usr/src/common/brand/lx/lx_signum.c b/usr/src/common/brand/lx/lx_signum.c
new file mode 100644
index 0000000000..5554750874
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_signum.c
@@ -0,0 +1,242 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/signal.h>
+#include <lx_signum.h>
+
+/*
+ * Delivering signals to a Linux process is complicated by differences in
+ * signal numbering, stack structure and contents, and the action taken when a
+ * signal handler exits. In addition, many signal-related structures, such as
+ * sigset_ts, vary between Solaris and Linux.
+ *
+ * The simplest transformation that must be done when sending signals is to
+ * translate between Linux and Solaris signal numbers.
+ *
+ * These are the major signal number differences between Linux and Solaris:
+ *
+ * ====================================
+ * | Number | Linux | Solaris |
+ * | ====== | ========= | ========== |
+ * | 7 | SIGBUS | SIGEMT |
+ * | 10 | SIGUSR1 | SIGBUS |
+ * | 12 | SIGUSR2 | SIGSYS |
+ * | 16 | SIGSTKFLT | SIGUSR1 |
+ * | 17 | SIGCHLD | SIGUSR2 |
+ * | 18 | SIGCONT | SIGCHLD |
+ * | 19 | SIGSTOP | SIGPWR |
+ * | 20 | SIGTSTP | SIGWINCH |
+ * | 21 | SIGTTIN | SIGURG |
+ * | 22 | SIGTTOU | SIGPOLL |
+ * | 23 | SIGURG | SIGSTOP |
+ * | 24 | SIGXCPU | SIGTSTP |
+ * | 25 | SIGXFSZ | SIGCONT |
+ * | 26 | SIGVTALARM | SIGTTIN |
+ * | 27 | SIGPROF | SIGTTOU |
+ * | 28 | SIGWINCH | SIGVTALARM |
+ * | 29 | SIGPOLL | SIGPROF |
+ * | 30 | SIGPWR | SIGXCPU |
+ * | 31 | SIGSYS | SIGXFSZ |
+ * ====================================
+ *
+ * Not every Linux signal maps to a Solaris signal, nor does every Solaris
+ * signal map to a Linux counterpart. However, when signals do map, the
+ * mapping is unique.
+ *
+ * One mapping issue is that Linux supports 33 real time signals, with SIGRTMIN
+ * typically starting at or near 32 (SIGRTMIN) and proceeding to 64 (SIGRTMAX)
+ * (SIGRTMIN is "at or near" 32 because glibc usually "steals" one ore more of
+ * these signals for its own internal use, adjusting SIGRTMIN and SIGRTMAX as
+ * needed.) Conversely, Solaris actively uses signals 32-40 for other purposes
+ * and supports exactly 32 real time signals, in the range 41 (SIGRTMIN)
+ * to 72 (SIGRTMAX).
+ *
+ * At present, attempting to translate a Linux signal equal to 63
+ * will generate an error (we allow SIGRTMAX because a program
+ * should be able to send SIGRTMAX without getting an EINVAL, though obviously
+ * anything that loops through the signals from SIGRTMIN to SIGRTMAX will
+ * fail.)
+ *
+ * Similarly, attempting to translate a native Solaris signal in the range
+ * 32-40 will also generate an error as we don't want to support the receipt of
+ * those signals from the Solaris global zone.
+ */
+
+/*
+ * Linux to Solaris signal map
+ *
+ * Usage: solaris_signal = ltos_signum[lx_signal];
+ */
+const int
+ltos_signo[LX_NSIG] = {
+ 0,
+ SIGHUP,
+ SIGINT,
+ SIGQUIT,
+ SIGILL,
+ SIGTRAP,
+ SIGABRT,
+ SIGBUS,
+ SIGFPE,
+ SIGKILL,
+ SIGUSR1,
+ SIGSEGV,
+ SIGUSR2,
+ SIGPIPE,
+ SIGALRM,
+ SIGTERM,
+ SIGEMT, /* 16: Linux SIGSTKFLT; use Solaris SIGEMT */
+ SIGCHLD,
+ SIGCONT,
+ SIGSTOP,
+ SIGTSTP,
+ SIGTTIN,
+ SIGTTOU,
+ SIGURG,
+ SIGXCPU,
+ SIGXFSZ,
+ SIGVTALRM,
+ SIGPROF,
+ SIGWINCH,
+ SIGPOLL,
+ SIGPWR,
+ SIGSYS,
+ _SIGRTMIN, /* 32: Linux SIGRTMIN */
+ _SIGRTMIN + 1,
+ _SIGRTMIN + 2,
+ _SIGRTMIN + 3,
+ _SIGRTMIN + 4,
+ _SIGRTMIN + 5,
+ _SIGRTMIN + 6,
+ _SIGRTMIN + 7,
+ _SIGRTMIN + 8,
+ _SIGRTMIN + 9,
+ _SIGRTMIN + 10,
+ _SIGRTMIN + 11,
+ _SIGRTMIN + 12,
+ _SIGRTMIN + 13,
+ _SIGRTMIN + 14,
+ _SIGRTMIN + 15,
+ _SIGRTMIN + 16,
+ _SIGRTMIN + 17,
+ _SIGRTMIN + 18,
+ _SIGRTMIN + 19,
+ _SIGRTMIN + 20,
+ _SIGRTMIN + 21,
+ _SIGRTMIN + 22,
+ _SIGRTMIN + 23,
+ _SIGRTMIN + 24,
+ _SIGRTMIN + 25,
+ _SIGRTMIN + 26,
+ _SIGRTMIN + 27,
+ _SIGRTMIN + 28,
+ _SIGRTMIN + 29,
+ _SIGRTMIN + 30,
+ -1, /* 63: Linux SIGRTMIN + 31, or SIGRTMAX - 1 */
+ _SIGRTMAX, /* 64: Linux SIGRTMAX */
+};
+
+/*
+ * Solaris to Linux signal map
+ *
+ * Usage: lx_signal = stol_signo[solaris_signal];
+ */
+const int
+stol_signo[NSIG] = {
+ 0,
+ LX_SIGHUP,
+ LX_SIGINT,
+ LX_SIGQUIT,
+ LX_SIGILL,
+ LX_SIGTRAP,
+ LX_SIGABRT,
+ LX_SIGSTKFLT, /* 7: Solaris SIGEMT; use for LX_SIGSTKFLT */
+ LX_SIGFPE,
+ LX_SIGKILL,
+ LX_SIGBUS,
+ LX_SIGSEGV,
+ LX_SIGSYS,
+ LX_SIGPIPE,
+ LX_SIGALRM,
+ LX_SIGTERM,
+ LX_SIGUSR1,
+ LX_SIGUSR2,
+ LX_SIGCHLD,
+ LX_SIGPWR,
+ LX_SIGWINCH,
+ LX_SIGURG,
+ LX_SIGPOLL,
+ LX_SIGSTOP,
+ LX_SIGTSTP,
+ LX_SIGCONT,
+ LX_SIGTTIN,
+ LX_SIGTTOU,
+ LX_SIGVTALRM,
+ LX_SIGPROF,
+ LX_SIGXCPU,
+ LX_SIGXFSZ,
+ -1, /* 32: Solaris SIGWAITING */
+ -1, /* 33: Solaris SIGLWP */
+ -1, /* 34: Solaris SIGFREEZE */
+ -1, /* 35: Solaris SIGTHAW */
+ -1, /* 36: Solaris SIGCANCEL */
+ -1, /* 37: Solaris SIGLOST */
+ -1, /* 38: Solaris SIGXRES */
+ -1, /* 39: Solaris SIGJVM1 */
+ -1, /* 40: Solaris SIGJVM2 */
+ LX_SIGRTMIN, /* 41: Solaris _SIGRTMIN */
+ LX_SIGRTMIN + 1,
+ LX_SIGRTMIN + 2,
+ LX_SIGRTMIN + 3,
+ LX_SIGRTMIN + 4,
+ LX_SIGRTMIN + 5,
+ LX_SIGRTMIN + 6,
+ LX_SIGRTMIN + 7,
+ LX_SIGRTMIN + 8,
+ LX_SIGRTMIN + 9,
+ LX_SIGRTMIN + 10,
+ LX_SIGRTMIN + 11,
+ LX_SIGRTMIN + 12,
+ LX_SIGRTMIN + 13,
+ LX_SIGRTMIN + 14,
+ LX_SIGRTMIN + 15,
+ LX_SIGRTMIN + 16,
+ LX_SIGRTMIN + 17,
+ LX_SIGRTMIN + 18,
+ LX_SIGRTMIN + 19,
+ LX_SIGRTMIN + 20,
+ LX_SIGRTMIN + 21,
+ LX_SIGRTMIN + 22,
+ LX_SIGRTMIN + 23,
+ LX_SIGRTMIN + 24,
+ LX_SIGRTMIN + 25,
+ LX_SIGRTMIN + 26,
+ LX_SIGRTMIN + 27,
+ LX_SIGRTMIN + 28,
+ LX_SIGRTMIN + 29,
+ LX_SIGRTMIN + 30,
+ LX_SIGRTMAX, /* 72: Solaris _SIGRTMAX */
+};
diff --git a/usr/src/common/brand/lx/lx_signum.h b/usr/src/common/brand/lx/lx_signum.h
new file mode 100644
index 0000000000..1ec6fa09c9
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_signum.h
@@ -0,0 +1,84 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_SIGNUM_H
+#define _LX_SIGNUM_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_SIGHUP 1
+#define LX_SIGINT 2
+#define LX_SIGQUIT 3
+#define LX_SIGILL 4
+#define LX_SIGTRAP 5
+#define LX_SIGABRT 6
+#define LX_SIGIOT 6
+#define LX_SIGBUS 7
+#define LX_SIGFPE 8
+#define LX_SIGKILL 9
+#define LX_SIGUSR1 10
+#define LX_SIGSEGV 11
+#define LX_SIGUSR2 12
+#define LX_SIGPIPE 13
+#define LX_SIGALRM 14
+#define LX_SIGTERM 15
+#define LX_SIGSTKFLT 16
+#define LX_SIGCHLD 17
+#define LX_SIGCONT 18
+#define LX_SIGSTOP 19
+#define LX_SIGTSTP 20
+#define LX_SIGTTIN 21
+#define LX_SIGTTOU 22
+#define LX_SIGURG 23
+#define LX_SIGXCPU 24
+#define LX_SIGXFSZ 25
+#define LX_SIGVTALRM 26
+#define LX_SIGPROF 27
+#define LX_SIGWINCH 28
+#define LX_SIGIO 29
+#define LX_SIGPOLL LX_SIGIO
+#define LX_SIGPWR 30
+#define LX_SIGSYS 31
+#define LX_SIGUNUSED 31
+
+#define LX_NSIG_WORDS 2
+#define LX_NBPW 32
+#define LX_NSIG ((LX_NBPW * LX_NSIG_WORDS) + 1)
+
+#define LX_SIGRTMIN 32
+#define LX_SIGRTMAX LX_NSIG - 1
+
+extern const int ltos_signo[];
+extern const int stol_signo[];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SIGNUM_H */
diff --git a/usr/src/head/regexp.h b/usr/src/head/regexp.h
index c8a58a1981..cceb6c7bed 100644
--- a/usr/src/head/regexp.h
+++ b/usr/src/head/regexp.h
@@ -422,12 +422,12 @@ register char *lp, *ep;
/*FALLTHRU*/
case CBRA:
- braslist[*ep++] = (char *)lp;
+ braslist[(int)*ep++] = (char *)lp;
continue;
/*FALLTHRU*/
case CKET:
- braelist[*ep++] = (char *)lp;
+ braelist[(int)*ep++] = (char *)lp;
continue;
/*FALLTHRU*/
@@ -505,8 +505,8 @@ register char *lp, *ep;
/*FALLTHRU*/
case CBACK:
- bbeg = braslist[*ep];
- ct = braelist[*ep++] - bbeg;
+ bbeg = braslist[(int)*ep];
+ ct = braelist[(int)*ep++] - bbeg;
if (ecmp(bbeg, lp, ct)) {
lp += ct;
@@ -516,8 +516,8 @@ register char *lp, *ep;
/*FALLTHRU*/
case CBACK | STAR:
- bbeg = braslist[*ep];
- ct = braelist[*ep++] - bbeg;
+ bbeg = braslist[(int)*ep];
+ ct = braelist[(int)*ep++] - bbeg;
curlp = lp;
while (ecmp(bbeg, lp, ct))
lp += ct;
diff --git a/usr/src/lib/brand/Makefile b/usr/src/lib/brand/Makefile
index db59df2efc..bd766f3f9d 100644
--- a/usr/src/lib/brand/Makefile
+++ b/usr/src/lib/brand/Makefile
@@ -30,6 +30,9 @@ include ../../Makefile.master
# Build everything in parallel; use .WAIT for dependencies
.PARALLEL:
+i386_SUBDIRS= lx
+i386_MSGSUBDIRS= lx
+
SUBDIRS= shared .WAIT sn1 sngl solaris10 ipkg labeled $($(MACH)_SUBDIRS)
MSGSUBDIRS= solaris10 shared $($(MACH)_MSGSUBDIRS)
diff --git a/usr/src/lib/brand/lx/Makefile b/usr/src/lib/brand/lx/Makefile
new file mode 100644
index 0000000000..7fafad20da
--- /dev/null
+++ b/usr/src/lib/brand/lx/Makefile
@@ -0,0 +1,56 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+default: all
+
+include Makefile.lx
+
+# Build everything in parallel; use .WAIT for dependencies
+.PARALLEL:
+
+SUBDIRS= cmd librtld_db lx_support lx_brand lx_thunk netfiles zone \
+ .WAIT lx_nametoaddr
+MSGSUBDIRS= lx_brand lx_support zone
+
+all := TARGET= all
+install := TARGET= install
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+_msg := TARGET= _msg
+
+.KEEP_STATE:
+
+all install clean clobber lint: $(SUBDIRS)
+
+_msg: $(MSGSUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/Makefile.lx b/usr/src/lib/brand/lx/Makefile.lx
new file mode 100644
index 0000000000..4db4679cef
--- /dev/null
+++ b/usr/src/lib/brand/lx/Makefile.lx
@@ -0,0 +1,34 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# lib/brand/lx/Makefile.lx
+#
+# include global definitions
+
+BRAND= lx
+
+include $(SRC)/lib/brand/Makefile.brand
+
diff --git a/usr/src/lib/brand/lx/cmd/Makefile b/usr/src/lib/brand/lx/cmd/Makefile
new file mode 100644
index 0000000000..1519961954
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+PROGS = lx_lockd lx_native lx_statd lx_thunk
+
+include ../Makefile.lx
+
+# override the install directory
+ROOTBIN = $(ROOTBRANDDIR)
+CLOBBERFILES = $(ROOTPROGS)
+
+.KEEP_STATE:
+
+lint:
+
+all: $(PROGS)
+
+install: all $(ROOTPROGS)
+
+clean:
+ $(RM) $(PROGS)
+
+clobber: clean
+ $(RM) $(ROOTPROGS)
diff --git a/usr/src/lib/brand/lx/cmd/lx_lockd.sh b/usr/src/lib/brand/lx/cmd/lx_lockd.sh
new file mode 100644
index 0000000000..cb60d19749
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/lx_lockd.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LD_LIBRARY_PATH=/usr/lib/brand/lx
+LD_PRELOAD=/native/usr/lib/brand/lx/lx_thunk.so.1
+LD_BIND_NOW=1
+export LD_LIBRARY_PATH LD_PRELOAD LD_BIND_NOW
+
+exec /native/usr/lib/brand/lx/lx_native \
+ /native/usr/lib/nfs/lockd -P -U 29 -G 29
diff --git a/usr/src/lib/brand/lx/cmd/lx_native.sh b/usr/src/lib/brand/lx/cmd/lx_native.sh
new file mode 100644
index 0000000000..8e8344a375
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/lx_native.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+exit 0
diff --git a/usr/src/lib/brand/lx/cmd/lx_statd.sh b/usr/src/lib/brand/lx/cmd/lx_statd.sh
new file mode 100644
index 0000000000..998fd90af2
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/lx_statd.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LD_LIBRARY_PATH=/usr/lib/brand/lx
+LD_PRELOAD=/native/usr/lib/brand/lx/lx_thunk.so.1
+LD_BIND_NOW=1
+export LD_LIBRARY_PATH LD_PRELOAD LD_BIND_NOW
+
+exec /native/usr/lib/brand/lx/lx_native \
+ /native/usr/lib/nfs/statd -P -U 29 -G 29
diff --git a/usr/src/lib/brand/lx/cmd/lx_thunk.sh b/usr/src/lib/brand/lx/cmd/lx_thunk.sh
new file mode 100644
index 0000000000..4e1e6cbc03
--- /dev/null
+++ b/usr/src/lib/brand/lx/cmd/lx_thunk.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+exec /native/usr/lib/brand/lx/lx_thunk
diff --git a/usr/src/lib/brand/lx/librtld_db/Makefile b/usr/src/lib/brand/lx/librtld_db/Makefile
new file mode 100644
index 0000000000..2fc0a818f6
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/Makefile
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+default: all
+
+include $(SRC)/lib/Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+LINT_SUBDIRS= $(MACH)
+$(BUILD64)LINT_SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+all install clean clobber: $(SUBDIRS)
+
+lint: $(LINT_SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/librtld_db/Makefile.com b/usr/src/lib/brand/lx/librtld_db/Makefile.com
new file mode 100644
index 0000000000..202cc0fe7b
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/Makefile.com
@@ -0,0 +1,83 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY = lx_librtld_db.a
+VERS = .1
+COBJS = lx_librtld_db.o
+OBJECTS = $(COBJS) $(COBJS64)
+
+include $(SRC)/lib/Makefile.lib
+include ../../Makefile.lx
+
+CSRCS = $(COBJS:%o=../common/%c)
+SRCS = $(CSRCS)
+
+SRCDIR = ../common
+UTSBASE = $(SRC)/uts
+
+#
+# ATTENTION:
+# Librtl_db brand plugin libraries should NOT directly invoke any
+# libproc.so interfaces or be linked against libproc. If a librtl_db
+# brand plugin library uses libproc.so interfaces then it may break
+# any other librtld_db consumers (like mdb) that tries to attach
+# to a branded process. The only safe interfaces that the a librtld_db
+# brand plugin library can use to access a target process are the
+# proc_service(3PROC) apis.
+#
+DYNFLAGS += $(VERSREF) -M../common/mapfile-vers
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lrtld_db
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I../ -I$(UTSBASE)/common/brand/lx \
+ -I$(SRC)/cmd/sgs/librtld_db/common \
+ -I$(SRC)/cmd/sgs/include \
+ -I$(SRC)/cmd/sgs/include/$(MACH)
+
+ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx
+ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64)
+
+#
+# The top level Makefiles define define TEXT_DOMAIN. But librtld_db.so.1
+# isn't internationalized and this library won't be either. The only
+# messages that this library can generate are messages used for debugging
+# the operation of the library itself.
+#
+DTEXTDOM =
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+pics/%64.o: ../common/%.c
+ $(COMPILE.c) -D_ELF64 $(PICFLAGS) -o $@ $<
+ $(POST_PROCESS_O)
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/brand/lx/librtld_db/amd64/Makefile b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile
new file mode 100644
index 0000000000..726e7ef6d3
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+COBJS64 = lx_librtld_db64.o
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+DYNFLAGS += -Mmapfile-vers
+
+CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers b/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers
new file mode 100644
index 0000000000..4893b02998
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+SUNWprivate_1.1 {
+ global:
+ rtld_db_brand_ops64;
+};
diff --git a/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c
new file mode 100644
index 0000000000..50645b7780
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c
@@ -0,0 +1,575 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/link.h>
+#include <libproc.h>
+#include <proc_service.h>
+#include <rtld_db.h>
+#include <synch.h>
+
+#include <sys/lx_brand.h>
+
+/*
+ * ATTENTION:
+ * Librtl_db brand plugin libraries should NOT directly invoke any
+ * libproc.so interfaces or be linked against libproc. If a librtl_db
+ * brand plugin library uses libproc.so interfaces then it may break
+ * any other librtld_db consumers (like mdb) that tries to attach
+ * to a branded process. The only safe interfaces that the a librtld_db
+ * brand plugin library can use to access a target process are the
+ * proc_service(3PROC) apis.
+ */
+
+/*
+ * M_DATA comes from some streams header file but is also redifined in
+ * _rtld_db.h, so nuke the old streams definition here.
+ */
+#ifdef M_DATA
+#undef M_DATA
+#endif /* M_DATA */
+
+/*
+ * For 32-bit versions of this library, this file get's compiled once.
+ * For 64-bit versions of this library, this file get's compiled twice,
+ * once with _ELF64 defined and once without. The expectation is that
+ * the 64-bit version of the library can properly deal with both 32-bit
+ * and 64-bit elf files, hence in the 64-bit library there are two copies
+ * of all the interfaces in this file, one set named *32 and one named *64.
+ *
+ * This also means that we need to be careful when declaring local pointers
+ * that point to objects in another processes address space, since these
+ * pointers may not match the current processes pointer width. Basically,
+ * we should avoid using data types that change size between 32 and 64 bit
+ * modes like: long, void *, uintprt_t, caddr_t, psaddr_t, size_t, etc.
+ * Instead we should declare all pointers as uint32_t. Then when we
+ * are compiled to deal with 64-bit targets we'll re-define uint32_t
+ * to be a uint64_t.
+ *
+ * Finally, one last importante note. All the 64-bit elf file code
+ * is never used and can't be tested. This is because we don't actually
+ * support 64-bit Linux processes yet. The reason that we have it here
+ * is because we want to support debugging 32-bit elf targets with the
+ * 64-bit version of this library, so we need to have a 64-bit version
+ * of this library. But a 64-bit version of this library is expected
+ * to provide debugging interfaces for both 32 and 64-bit elf targets.
+ * So we provide the 64-bit elf target interfaces, but they will never
+ * be invoked and are untested. If we ever add support for 64-bit elf
+ * Linux processes, we'll need to verify that this code works correctly
+ * for those targets.
+ */
+#ifdef _LP64
+#ifdef _ELF64
+#define lx_ldb_get_dyns32 lx_ldb_get_dyns64
+#define lx_ldb_init32 lx_ldb_init64
+#define lx_ldb_fini32 lx_ldb_fini64
+#define lx_ldb_loadobj_iter32 lx_ldb_loadobj_iter64
+#define lx_ldb_getauxval32 lx_ldb_getauxval64
+#define lx_elf_props32 lx_elf_props64
+#define _rd_get_dyns32 _rd_get_dyns64
+#define _rd_get_ehdr32 _rd_get_ehdr64
+#define uint32_t uint64_t
+#define Elf32_Dyn Elf64_Dyn
+#define Elf32_Ehdr Elf64_Ehdr
+#define Elf32_Phdr Elf64_Phdr
+#endif /* _ELF64 */
+#endif /* _LP64 */
+
+/* Included from usr/src/cmd/sgs/librtld_db/common */
+#include <_rtld_db.h>
+
+typedef struct lx_rd {
+ rd_agent_t *lr_rap;
+ struct ps_prochandle *lr_php; /* proc handle pointer */
+ uint32_t lr_rdebug; /* address of lx r_debug */
+ uint32_t lr_exec; /* base address of executable */
+} lx_rd_t;
+
+typedef struct lx_link_map {
+ uint32_t lxm_addr; /* Base address shared object is loaded at. */
+ uint32_t lxm_name; /* Absolute file name object was found in. */
+ uint32_t lxm_ld; /* Dynamic section of the shared object. */
+ uint32_t lxm_next; /* Chain of loaded objects. */
+} lx_link_map_t;
+
+typedef struct lx_r_debug {
+ int r_version; /* Version number for this protocol. */
+ uint32_t r_map; /* Head of the chain of loaded objects. */
+
+ /*
+ * This is the address of a function internal to the run-time linker,
+ * that will always be called when the linker begins to map in a
+ * library or unmap it, and again when the mapping change is complete.
+ * The debugger can set a breakpoint at this address if it wants to
+ * notice shared object mapping changes.
+ */
+ uint32_t r_brk;
+ r_state_e r_state; /* defined the same way between lx/solaris */
+ uint32_t r_ldbase; /* Base address the linker is loaded at. */
+} lx_r_debug_t;
+
+static uint32_t
+lx_ldb_getauxval32(struct ps_prochandle *php, int type)
+{
+ const auxv_t *auxvp = NULL;
+
+ if (ps_pauxv(php, &auxvp) != PS_OK)
+ return ((uint32_t)-1);
+
+ while (auxvp->a_type != AT_NULL) {
+ if (auxvp->a_type == type)
+ return ((uint32_t)(uintptr_t)auxvp->a_un.a_ptr);
+ auxvp++;
+ }
+ return ((uint32_t)-1);
+}
+
+/*
+ * A key difference between the linux linker and ours' is that the linux
+ * linker adds the base address of segments to certain values in the
+ * segments' ELF header. As an example, look at the address of the
+ * DT_HASH hash table in a Solaris section - it is a relative address
+ * which locates the start of the hash table, relative to the beginning
+ * of the ELF file. However, when the linux linker loads a section, it
+ * modifies the in-memory ELF image by changing address of the hash
+ * table to be an absolute address. This is only done for libraries - not for
+ * executables.
+ *
+ * Solaris tools expect the relative address to remain relative, so
+ * here we will modify the in-memory ELF image so that it once again
+ * contains relative addresses.
+ *
+ * To accomplish this, we walk through all sections in the target.
+ * Linux sections are identified by pointing to the linux linker or libc in the
+ * DT_NEEDED section. For all matching sections, we subtract the segment
+ * base address to get back to relative addresses.
+ */
+static rd_err_e
+lx_ldb_get_dyns32(rd_helper_data_t rhd,
+ psaddr_t addr, void **dynpp, size_t *dynpp_sz)
+{
+ lx_rd_t *lx_rd = (lx_rd_t *)rhd;
+ rd_agent_t *rap = lx_rd->lr_rap;
+ Elf32_Ehdr ehdr;
+ Elf32_Dyn *dynp = NULL;
+ size_t dynp_sz;
+ uint_t ndyns;
+ int i;
+
+ ps_plog("lx_ldb_get_dyns: invoked for object at 0x%p", addr);
+
+ /* Read in a copy of the ehdr */
+ if (_rd_get_ehdr32(rap, addr, &ehdr, NULL) != RD_OK) {
+ ps_plog("lx_ldb_get_dyns: _rd_get_ehdr() failed");
+ return (RD_ERR);
+ }
+
+ /* read out the PT_DYNAMIC elements for this object */
+ if (_rd_get_dyns32(rap, addr, &dynp, &dynp_sz) != RD_OK) {
+ ps_plog("lx_ldb_get_dyns: _rd_get_dyns() failed");
+ return (RD_ERR);
+ }
+
+ /*
+ * From here on out if we encounter an error we'll just return
+ * success and pass back the unmolested dynamic elements that
+ * we've already obtained.
+ */
+ *dynpp = dynp;
+ *dynpp_sz = dynp_sz;
+ ndyns = dynp_sz / sizeof (Elf32_Dyn);
+
+ /* If this isn't a dynamic object, there's nothing left todo */
+ if (ehdr.e_type != ET_DYN) {
+ ps_plog("lx_ldb_get_dyns: done: not a shared object");
+ return (RD_OK);
+ }
+
+ /*
+ * Before we blindly start changing dynamic section addresses
+ * we need to figure out if the current object that we're looking
+ * at is a linux object or a solaris object. To do this first
+ * we need to find the string tab dynamic section element.
+ */
+ for (i = 0; i < ndyns; i++) {
+ if (dynp[i].d_tag == DT_STRTAB)
+ break;
+ }
+ if (i == ndyns) {
+ ps_plog("lx_ldb_get_dyns: "
+ "failed to find string tab in the dynamic section");
+ return (RD_OK);
+ }
+
+ /*
+ * Check if the strtab value looks like an offset or an address.
+ * It's an offset if the value is less then the base address that
+ * the object is loaded at, or if the value is less than the offset
+ * of the section headers in the same elf object. This check isn't
+ * perfect, but in practice it's good enough.
+ */
+ if ((dynp[i].d_un.d_ptr < addr) ||
+ (dynp[i].d_un.d_ptr < ehdr.e_shoff)) {
+ ps_plog("lx_ldb_get_dyns: "
+ "doesn't appear to be an lx object");
+ return (RD_OK);
+ }
+
+ /*
+ * This seems to be a a linux object, so we'll patch up the dynamic
+ * section addresses
+ */
+ ps_plog("lx_ldb_get_dyns: "
+ "patching up lx object dynamic section addresses");
+ for (i = 0; i < ndyns; i++) {
+ switch (dynp[i].d_tag) {
+ case DT_PLTGOT:
+ case DT_HASH:
+ case DT_STRTAB:
+ case DT_SYMTAB:
+ case DT_RELA:
+ case DT_REL:
+ case DT_DEBUG:
+ case DT_JMPREL:
+ case DT_VERSYM:
+ if (dynp[i].d_un.d_val > addr) {
+ dynp[i].d_un.d_ptr -= addr;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return (RD_OK);
+}
+
+static void
+lx_ldb_fini32(rd_helper_data_t rhd)
+{
+ lx_rd_t *lx_rd = (lx_rd_t *)rhd;
+ ps_plog("lx_ldb_fini: cleaning up lx helper");
+ free(lx_rd);
+}
+
+/*
+ * The linux linker has an r_debug structure somewhere in its data section that
+ * contains the address of the head of the link map list. To find this, we will
+ * use the DT_DEBUG token in the executable's dynamic section. The linux linker
+ * wrote the address of its r_debug structure to the DT_DEBUG dynamic entry. We
+ * get the address of the executable's program headers from the
+ * AT_SUN_BRAND_LX_PHDR aux vector entry. From there, we calculate the
+ * address of the Elf header, and from there we can easily get to the DT_DEBUG
+ * entry.
+ */
+static rd_helper_data_t
+lx_ldb_init32(rd_agent_t *rap, struct ps_prochandle *php)
+{
+ lx_rd_t *lx_rd;
+ uint32_t addr, phdr_addr, dyn_addr;
+ Elf32_Dyn *dyn;
+ Elf32_Phdr phdr, *ph, *phdrs;
+ Elf32_Ehdr ehdr;
+ int i, dyn_count;
+
+ lx_rd = calloc(sizeof (lx_rd_t), 1);
+ if (lx_rd == NULL) {
+ ps_plog("lx_ldb_init: cannot allocate memory");
+ return (NULL);
+ }
+ lx_rd->lr_rap = rap;
+ lx_rd->lr_php = php;
+
+ phdr_addr = lx_ldb_getauxval32(php, AT_SUN_BRAND_LX_PHDR);
+ if (phdr_addr == (uint32_t)-1) {
+ ps_plog("lx_ldb_init: no LX_PHDR found in aux vector");
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: found LX_PHDR auxv phdr at: 0x%p",
+ phdr_addr);
+
+ if (ps_pread(php, phdr_addr, &phdr, sizeof (phdr)) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read phdr at 0x%p",
+ phdr_addr);
+ free(lx_rd);
+ return (NULL);
+ }
+
+ /* The ELF headher should be before the program header in memory */
+ lx_rd->lr_exec = addr = phdr_addr - phdr.p_offset;
+ if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read ehdr at 0x%p",
+ lx_rd->lr_exec);
+ free(lx_rd);
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: read ehdr at: 0x%p", addr);
+
+ if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc phdrs memory");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (ps_pread(php, phdr_addr, phdrs, ehdr.e_phnum * ehdr.e_phentsize) !=
+ PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read phdrs at 0x%p",
+ phdr_addr);
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: read %d phdrs at: 0x%p",
+ ehdr.e_phnum, phdr_addr);
+
+ for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++,
+ /*LINTED */
+ ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) {
+ if (ph->p_type == PT_DYNAMIC)
+ break;
+ }
+ if (i == ehdr.e_phnum) {
+ ps_plog("lx_ldb_init: no PT_DYNAMIC in executable");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: found PT_DYNAMIC phdr[%d] at: 0x%p",
+ i, (phdr_addr + ((char *)ph - (char *)phdrs)));
+
+ if ((dyn = malloc(ph->p_filesz)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc for PT_DYNAMIC");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ dyn_addr = addr + ph->p_offset;
+ dyn_count = ph->p_filesz / sizeof (Elf32_Dyn);
+ if (ps_pread(php, dyn_addr, dyn, ph->p_filesz) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read dynamic at 0x%p",
+ dyn_addr);
+ free(lx_rd);
+ free(phdrs);
+ free(dyn);
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: read %d dynamic headers at: 0x%p",
+ dyn_count, dyn_addr);
+
+ for (i = 0; i < dyn_count; i++) {
+ if (dyn[i].d_tag == DT_DEBUG) {
+ lx_rd->lr_rdebug = dyn[i].d_un.d_ptr;
+ break;
+ }
+ }
+ free(phdrs);
+ free(dyn);
+
+ if (lx_rd->lr_rdebug == 0) {
+ ps_plog("lx_ldb_init: no DT_DEBUG found in exe");
+ free(lx_rd);
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: found DT_DEBUG: 0x%p", lx_rd->lr_rdebug);
+
+ return ((rd_helper_data_t)lx_rd);
+}
+
+/*
+ * Given the address of an ELF object in the target, return its size and
+ * the proper link map ID.
+ */
+static size_t
+lx_elf_props32(struct ps_prochandle *php, uint32_t addr, psaddr_t *data_addr)
+{
+ Elf32_Ehdr ehdr;
+ Elf32_Phdr *phdrs, *ph;
+ int i;
+ uint32_t min = (uint32_t)-1;
+ uint32_t max = 0;
+ size_t sz = NULL;
+
+ if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) {
+ ps_plog("lx_elf_props: Couldn't read ELF header at 0x%p",
+ addr);
+ return (0);
+ }
+
+ if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL)
+ return (0);
+
+ if (ps_pread(php, addr + ehdr.e_phoff, phdrs, ehdr.e_phnum *
+ ehdr.e_phentsize) != PS_OK) {
+ ps_plog("lx_elf_props: Couldn't read program headers at 0x%p",
+ addr + ehdr.e_phoff);
+ return (0);
+ }
+
+ for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++,
+ /*LINTED */
+ ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) {
+
+ if (ph->p_type != PT_LOAD)
+ continue;
+
+ if ((ph->p_flags & (PF_W | PF_R)) == (PF_W | PF_R)) {
+ *data_addr = ph->p_vaddr;
+ if (ehdr.e_type == ET_DYN)
+ *data_addr += addr;
+ if (*data_addr & (ph->p_align - 1))
+ *data_addr = *data_addr & (~(ph->p_align -1));
+ }
+
+ if (ph->p_vaddr < min)
+ min = ph->p_vaddr;
+
+ if (ph->p_vaddr > max) {
+ max = ph->p_vaddr;
+ sz = ph->p_memsz + max - min;
+ if (sz & (ph->p_align - 1))
+ sz = (sz & (~(ph->p_align - 1))) + ph->p_align;
+ }
+ }
+
+ free(phdrs);
+ return (sz);
+}
+
+static int
+lx_ldb_loadobj_iter32(rd_helper_data_t rhd, rl_iter_f *cb, void *client_data)
+{
+ lx_rd_t *lx_rd = (lx_rd_t *)rhd;
+ struct ps_prochandle *php = lx_rd->lr_php;
+ lx_r_debug_t r_debug;
+ lx_link_map_t map;
+ uint32_t p = NULL;
+ int rc;
+ rd_loadobj_t exec;
+
+ if ((rc = ps_pread(php, (psaddr_t)lx_rd->lr_rdebug, &r_debug,
+ sizeof (r_debug))) != PS_OK) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Couldn't read linux r_debug at 0x%p", lx_rd->lr_rdebug);
+ return (rc);
+ }
+
+ p = r_debug.r_map;
+
+ /*
+ * The first item on the link map list is for the executable, but it
+ * doesn't give us any useful information about it. We need to
+ * synthesize a rd_loadobj_t for the client.
+ *
+ * Linux doesn't give us the executable name, so we'll get it from
+ * the AT_EXECNAME entry instead.
+ */
+ if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) != PS_OK) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Couldn't read linux link map at 0x%p", p);
+ return (rc);
+ }
+
+ bzero(&exec, sizeof (exec));
+ exec.rl_base = lx_rd->lr_exec;
+ exec.rl_dynamic = map.lxm_ld;
+ exec.rl_nameaddr = lx_ldb_getauxval32(php, AT_SUN_EXECNAME);
+ exec.rl_lmident = LM_ID_BASE;
+
+ exec.rl_bend = exec.rl_base +
+ lx_elf_props32(php, lx_rd->lr_exec, &exec.rl_data_base);
+
+ if ((*cb)(&exec, client_data) == 0) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "client callb failed for executable");
+ return (PS_ERR);
+ }
+
+ for (p = map.lxm_next; p != NULL; p = map.lxm_next) {
+ rd_loadobj_t obj;
+
+ if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) !=
+ PS_OK) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Couldn't read lk map at %p", p);
+ return (rc);
+ }
+
+ /*
+ * The linux link map has less information than the Solaris one.
+ * We need to go fetch the missing information from the ELF
+ * headers.
+ */
+
+ obj.rl_nameaddr = (psaddr_t)map.lxm_name;
+ obj.rl_base = map.lxm_addr;
+ obj.rl_refnameaddr = (psaddr_t)map.lxm_name;
+ obj.rl_plt_base = NULL;
+ obj.rl_plt_size = 0;
+ obj.rl_lmident = LM_ID_BASE;
+
+ /*
+ * Ugh - we have to walk the ELF stuff, find the PT_LOAD
+ * sections, and calculate the end of the file's mappings
+ * ourselves.
+ */
+
+ obj.rl_bend = map.lxm_addr +
+ lx_elf_props32(php, map.lxm_addr, &obj.rl_data_base);
+ obj.rl_padstart = obj.rl_base;
+ obj.rl_padend = obj.rl_bend;
+ obj.rl_dynamic = map.lxm_ld;
+ obj.rl_tlsmodid = 0;
+
+ ps_plog("lx_ldb_loadobj_iter: 0x%p to 0x%p",
+ obj.rl_base, obj.rl_bend);
+
+ if ((*cb)(&obj, client_data) == 0) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Client callback failed on %s", map.lxm_name);
+ return (rc);
+ }
+ }
+ return (RD_OK);
+}
+
+/*
+ * Librtld_db plugin linkage struct.
+ *
+ * When we get loaded by librtld_db, it will look for the symbol below
+ * to find our plugin entry points.
+ */
+rd_helper_ops_t RTLD_DB_BRAND_OPS = {
+ LM_ID_BRAND,
+ lx_ldb_init32,
+ lx_ldb_fini32,
+ lx_ldb_loadobj_iter32,
+ lx_ldb_get_dyns32
+};
diff --git a/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers
new file mode 100644
index 0000000000..5e328d6075
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers
@@ -0,0 +1,58 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+{
+ global:
+ rtld_db_brand_ops32;
+ local:
+ *;
+};
+
+#Externally defined symbols
+{
+ global:
+ ps_pauxv = NODIRECT PARENT;
+ ps_pdmodel = NODIRECT PARENT;
+ ps_pglobal_lookup = NODIRECT PARENT;
+ ps_pglobal_sym = NODIRECT PARENT;
+ ps_plog = NODIRECT PARENT;
+ ps_pread = NODIRECT PARENT;
+ ps_pwrite = NODIRECT PARENT;
+};
diff --git a/usr/src/lib/brand/lx/librtld_db/i386/Makefile b/usr/src/lib/brand/lx/librtld_db/i386/Makefile
new file mode 100644
index 0000000000..b5f780c072
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/i386/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB)
+
+install: all $(ROOTLIBS)
diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile b/usr/src/lib/brand/lx/lx_brand/Makefile
new file mode 100644
index 0000000000..de4fa338a0
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/Makefile
@@ -0,0 +1,53 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../../../Makefile.lib
+
+default: all
+
+SUBDIRS= $(MACH)
+
+LINT_SUBDIRS= $(MACH)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+_msg := TARGET= _msg
+
+.KEEP_STATE:
+
+all install clean clobber _msg: $(SUBDIRS)
+
+lint: $(LINT_SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile.com b/usr/src/lib/brand/lx/lx_brand/Makefile.com
new file mode 100644
index 0000000000..1b58e78ba0
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/Makefile.com
@@ -0,0 +1,102 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LX_CMN = $(SRC)/common/brand/lx
+
+LIBRARY = lx_brand.a
+VERS = .1
+COBJS = clock.o \
+ clone.o \
+ debug.o \
+ dir.o \
+ file.o \
+ fcntl.o \
+ fork.o \
+ id.o \
+ ioctl.o \
+ iovec.o \
+ lx_brand.o \
+ lx_thunk_server.o \
+ mem.o \
+ misc.o \
+ module.o \
+ mount.o \
+ open.o \
+ pgrp.o \
+ poll_select.o \
+ priority.o \
+ ptrace.o \
+ rlimit.o \
+ sched.o \
+ sendfile.o \
+ signal.o \
+ socket.o \
+ stat.o \
+ statfs.o \
+ sysctl.o \
+ sysv_ipc.o \
+ time.o \
+ truncate.o \
+ wait.o
+
+CMNOBJS = lx_signum.o
+ASOBJS = lx_handler.o lx_runexe.o lx_crt.o
+OBJECTS = $(CMNOBJS) $(COBJS) $(ASOBJS)
+
+include ../../Makefile.lx
+include ../../../../Makefile.lib
+
+CSRCS = $(COBJS:%o=../common/%c) $(CMNOBJS:%o=$(LX_CMN)/%c)
+ASSRCS = $(ASOBJS:%o=$(ISASRCDIR)/%s)
+SRCS = $(CSRCS) $(ASSRCS)
+
+SRCDIR = ../common
+UTSBASE = ../../../../../uts
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lsocket -lmapmalloc -lproc -lrtld_db
+DYNFLAGS += -Wl,-e_start -Wl,-I/native/lib/ld.so.1 -M../common/mapfile
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I../ -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+ASFLAGS = -P $(ASFLAGS_$(CURTYPE)) -D_ASM -I../ \
+ -I$(UTSBASE)/common/brand/lx
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../../Makefile.targ
+
+pics/%.o: $(ISASRCDIR)/%.s
+ $(COMPILE.s) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: $(LX_CMN)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clock.c b/usr/src/lib/brand/lx/lx_brand/common/clock.c
new file mode 100644
index 0000000000..3880bcbd5c
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/clock.c
@@ -0,0 +1,116 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <sys/lx_misc.h>
+
+/*
+ * Linux uses different values for it clock identifiers, so we have to do basic
+ * translations between the two. Thankfully, both Linux and Solaris implement
+ * the same POSIX SUSv3 clock types, so the semantics should be identical.
+ */
+
+static int ltos_clock[] = {
+ CLOCK_REALTIME,
+ CLOCK_MONOTONIC,
+ CLOCK_PROCESS_CPUTIME_ID,
+ CLOCK_THREAD_CPUTIME_ID
+};
+
+#define LX_CLOCK_MAX (sizeof (ltos_clock) / sizeof (ltos_clock[0]))
+
+int
+lx_clock_gettime(int clock, struct timespec *tp)
+{
+ struct timespec ts;
+
+ if (clock < 0 || clock > LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ if (clock_gettime(ltos_clock[clock], &ts) < 0)
+ return (-errno);
+
+ return ((uucopy(&ts, tp, sizeof (struct timespec)) < 0) ? -EFAULT : 0);
+}
+
+int
+lx_clock_settime(int clock, struct timespec *tp)
+{
+ struct timespec ts;
+
+ if (clock < 0 || clock > LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ if (uucopy(tp, &ts, sizeof (struct timespec)) < 0)
+ return (-EFAULT);
+
+ return ((clock_settime(ltos_clock[clock], &ts) < 0) ? -errno : 0);
+}
+
+int
+lx_clock_getres(int clock, struct timespec *tp)
+{
+ struct timespec ts;
+
+ if (clock < 0 || clock > LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ if (clock_getres(ltos_clock[clock], &ts) < 0)
+ return (-errno);
+
+ return ((uucopy(&ts, tp, sizeof (struct timespec)) < 0) ? -EFAULT : 0);
+}
+
+int
+lx_clock_nanosleep(int clock, int flags, struct timespec *rqtp,
+ struct timespec *rmtp)
+{
+ struct timespec rqt, rmt;
+
+ if (clock < 0 || clock > LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ if (uucopy(rqtp, &rqt, sizeof (struct timespec)) < 0)
+ return (-EFAULT);
+
+ /* the TIMER_RELTIME and TIMER_ABSTIME flags are the same on Linux */
+ if (clock_nanosleep(ltos_clock[clock], flags, &rqt, &rmt) < 0)
+ return (-errno);
+
+ /*
+ * Only copy values to rmtp if the timer is TIMER_RELTIME and rmtp is
+ * non-NULL.
+ */
+ if (((flags & TIMER_RELTIME) == TIMER_RELTIME) && (rmtp != NULL) &&
+ (uucopy(&rmt, rmtp, sizeof (struct timespec)) < 0))
+ return (-EFAULT);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c
new file mode 100644
index 0000000000..f271616f49
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c
@@ -0,0 +1,546 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <ucontext.h>
+#include <thread.h>
+#include <strings.h>
+#include <libintl.h>
+#include <sys/regset.h>
+#include <sys/syscall.h>
+#include <sys/inttypes.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/segments.h>
+#include <signal.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_types.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_thread.h>
+
+#define LX_CSIGNAL 0x000000ff
+#define LX_CLONE_VM 0x00000100
+#define LX_CLONE_FS 0x00000200
+#define LX_CLONE_FILES 0x00000400
+#define LX_CLONE_SIGHAND 0x00000800
+#define LX_CLONE_PID 0x00001000
+#define LX_CLONE_PTRACE 0x00002000
+#define LX_CLONE_VFORK 0x00004000
+#define LX_CLONE_PARENT 0x00008000
+#define LX_CLONE_THREAD 0x00010000
+#define LX_CLONE_SYSVSEM 0x00040000
+#define LX_CLONE_SETTLS 0x00080000
+#define LX_CLONE_PARENT_SETTID 0x00100000
+#define LX_CLONE_CHILD_CLEARTID 0x00200000
+#define LX_CLONE_DETACH 0x00400000
+#define LX_CLONE_CHILD_SETTID 0x01000000
+
+#define SHARED_AS \
+ (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND)
+#define CLONE_VFORK (LX_CLONE_VM | LX_CLONE_VFORK)
+#define CLONE_TD (LX_CLONE_THREAD|LX_CLONE_DETACH)
+
+#define IS_FORK(f) (((f) & SHARED_AS) == 0)
+#define IS_VFORK(f) (((f) & CLONE_VFORK) == CLONE_VFORK)
+
+#define LX_EXIT 1
+#define LX_EXIT_GROUP 2
+
+/*
+ * This is dicey. This seems to be an internal glibc structure, and not
+ * part of any external interface. Thus, it is subject to change without
+ * notice. FWIW, clone(2) itself seems to be an internal (or at least
+ * unstable) interface, since strace(1) shows it differently than the man
+ * page.
+ */
+struct lx_desc
+{
+ uint32_t entry_number;
+ uint32_t base_addr;
+ uint32_t limit;
+ uint32_t seg_32bit:1;
+ uint32_t contents:2;
+ uint32_t read_exec_only:1;
+ uint32_t limit_in_pages:1;
+ uint32_t seg_not_present:1;
+ uint32_t useable:1;
+ uint32_t empty:25;
+};
+
+struct clone_state {
+ void *c_retaddr; /* instr after clone()'s int80 */
+ int c_flags; /* flags to clone(2) */
+ int c_sig; /* signal to send on thread exit */
+ void *c_stk; /* %esp of new thread */
+ void *c_ptidp;
+ struct lx_desc *c_ldtinfo; /* thread-specific segment */
+ void *c_ctidp;
+ uintptr_t c_gs; /* Linux's %gs */
+ sigset_t c_sigmask; /* signal mask */
+ lx_affmask_t c_affmask; /* CPU affinity mask */
+ volatile int *c_clone_res; /* pid/error returned to cloner */
+};
+
+extern void lx_setup_clone(uintptr_t, void *, void *);
+
+/*
+ * Counter incremented when we vfork(2) ourselves, and decremented when the
+ * vfork(2)ed child exit(2)s or exec(2)s.
+ */
+static int is_vforked = 0;
+
+int
+lx_exit(uintptr_t p1)
+{
+ int ret, status = (int)p1;
+ lx_tsd_t *lx_tsd;
+
+ /*
+ * If we are a vfork(2)ed child, we need to exit as quickly and
+ * cleanly as possible to avoid corrupting our parent.
+ */
+ if (is_vforked != 0) {
+ is_vforked--;
+ _exit(status);
+ }
+
+ if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0)
+ lx_err_fatal(gettext(
+ "%s: unable to read thread-specific data: %s"),
+ "exit", strerror(ret));
+
+ assert(lx_tsd != 0);
+
+ lx_tsd->lxtsd_exit = LX_EXIT;
+ lx_tsd->lxtsd_exit_status = status;
+
+ /*
+ * Block all signals in the exit context to avoid taking any signals
+ * (to the degree possible) while exiting.
+ */
+ (void) sigfillset(&lx_tsd->lxtsd_exit_context.uc_sigmask);
+
+ /*
+ * This thread is exiting. Restore the state of the thread to
+ * what it was before we started running linux code.
+ */
+ (void) setcontext(&lx_tsd->lxtsd_exit_context);
+
+ /*
+ * If we returned from the setcontext(2), something is very wrong.
+ */
+ lx_err_fatal(gettext("%s: unable to set exit context: %s"),
+ "exit", strerror(errno));
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+int
+lx_group_exit(uintptr_t p1)
+{
+ int ret, status = (int)p1;
+ lx_tsd_t *lx_tsd;
+
+ /*
+ * If we are a vfork(2)ed child, we need to exit as quickly and
+ * cleanly as possible to avoid corrupting our parent.
+ */
+ if (is_vforked != 0) {
+ is_vforked--;
+ _exit(status);
+ }
+
+ if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0)
+ lx_err_fatal(gettext(
+ "%s: unable to read thread-specific data: %s"),
+ "group_exit", strerror(ret));
+
+ assert(lx_tsd != 0);
+
+ lx_tsd->lxtsd_exit = LX_EXIT_GROUP;
+ lx_tsd->lxtsd_exit_status = status;
+
+ /*
+ * Block all signals in the exit context to avoid taking any signals
+ * (to the degree possible) while exiting.
+ */
+ (void) sigfillset(&lx_tsd->lxtsd_exit_context.uc_sigmask);
+
+ /*
+ * This thread is exiting. Restore the state of the thread to
+ * what it was before we started running linux code.
+ */
+ (void) setcontext(&lx_tsd->lxtsd_exit_context);
+
+ /*
+ * If we returned from the setcontext(2), something is very wrong.
+ */
+ lx_err_fatal(gettext("%s: unable to set exit context: %s"),
+ "group_exit", strerror(errno));
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+static void *
+clone_start(void *arg)
+{
+ int rval;
+ struct clone_state *cs = (struct clone_state *)arg;
+ lx_tsd_t lx_tsd;
+
+ /*
+ * Let the kernel finish setting up all the needed state for this
+ * new thread.
+ *
+ * We already created the thread using the thr_create(3C) library
+ * call, so most of the work required to emulate lx_clone(2) has
+ * been done by the time we get to this point. Instead of creating
+ * a new brandsys(2) subcommand to perform the last few bits of
+ * bookkeeping, we just use the lx_clone() slot in the syscall
+ * table.
+ */
+ lx_debug("\tre-vectoring to lx kernel module to complete lx_clone()");
+ lx_debug("\tLX_SYS_clone(0x%x, 0x%p, 0x%p, 0x%p, 0x%p)",
+ cs->c_flags, cs->c_stk, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp);
+
+ rval = syscall(SYS_brand, B_EMULATE_SYSCALL + LX_SYS_clone,
+ cs->c_flags, cs->c_stk, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp,
+ NULL);
+
+ /*
+ * At this point the parent is waiting for cs->c_clone_res to go
+ * non-zero to indicate the thread has been cloned. The value set
+ * in cs->c_clone_res will be used for the return value from
+ * clone().
+ */
+ if (rval < 0) {
+ *(cs->c_clone_res) = -errno;
+ lx_debug("\tkernel clone failed, errno %d\n", errno);
+ return (NULL);
+ }
+
+ if (lx_sched_setaffinity(0, sizeof (cs->c_affmask),
+ (uintptr_t)&cs->c_affmask) != 0) {
+ *(cs->c_clone_res) = -errno;
+
+ lx_err_fatal(gettext(
+ "Unable to set affinity mask in child thread: %s"),
+ strerror(errno));
+ }
+
+ /* Initialize the thread specific data for this thread. */
+ bzero(&lx_tsd, sizeof (lx_tsd));
+ lx_tsd.lxtsd_gs = cs->c_gs;
+
+ /*
+ * Use the address of the stack-allocated lx_tsd as the
+ * per-thread storage area to cache various values for later
+ * use.
+ *
+ * This address is only used by this thread, so there is no
+ * danger of other threads using this storage area, nor of it
+ * being accessed once this stack frame has been freed.
+ */
+ if (thr_setspecific(lx_tsd_key, &lx_tsd) != 0) {
+ *(cs->c_clone_res) = -errno;
+ lx_err_fatal(
+ gettext("Unable to set thread-specific ptr for clone: %s"),
+ strerror(rval));
+ }
+
+ /*
+ * Save the current context of this thread.
+ *
+ * We'll restore this context when this thread attempts to exit.
+ */
+ if (getcontext(&lx_tsd.lxtsd_exit_context) != 0) {
+ *(cs->c_clone_res) = -errno;
+
+ lx_err_fatal(gettext(
+ "Unable to initialize thread-specific exit context: %s"),
+ strerror(errno));
+ }
+
+ /*
+ * Do the final stack twiddling, reset %gs, and return to the
+ * clone(2) path.
+ */
+ if (lx_tsd.lxtsd_exit == 0) {
+ if (sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL) < 0) {
+ *(cs->c_clone_res) = -errno;
+
+ lx_err_fatal(gettext(
+ "Unable to release held signals for child "
+ "thread: %s"), strerror(errno));
+ }
+
+ /*
+ * Let the parent know that the clone has (effectively) been
+ * completed.
+ */
+ *(cs->c_clone_res) = rval;
+
+ lx_setup_clone(cs->c_gs, cs->c_retaddr, cs->c_stk);
+
+ /* lx_setup_clone() should never return. */
+ assert(0);
+ }
+
+ /*
+ * We are here because the Linux application called the exit() or
+ * exit_group() system call. In turn the brand library did a
+ * setcontext() to jump to the thread context state saved in
+ * getcontext(), above.
+ */
+ if (lx_tsd.lxtsd_exit == LX_EXIT)
+ thr_exit((void *)lx_tsd.lxtsd_exit_status);
+ else
+ exit(lx_tsd.lxtsd_exit_status);
+
+ assert(0);
+ /*NOTREACHED*/
+}
+
+int
+lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ struct clone_state *cs;
+ int flags = (int)p1;
+ void *cldstk = (void *)p2;
+ void *ptidp = (void *)p3;
+ struct lx_desc *ldtinfo = (void *)p4;
+ void *ctidp = (void *)p5;
+ thread_t tid;
+ volatile int clone_res;
+ int sig;
+ int rval;
+ int pid;
+ lx_regs_t *rp;
+ sigset_t sigmask;
+
+ if (flags & LX_CLONE_SETTLS) {
+ lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p "
+ "ctidp=0x%p", flags, cldstk, ptidp, ldtinfo, ctidp);
+ } else {
+ lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p)",
+ flags, cldstk, ptidp);
+ }
+
+ /*
+ * Only supported for pid 0 on Linux
+ */
+ if (flags & LX_CLONE_PID)
+ return (-EINVAL);
+
+ /*
+ * CLONE_THREAD requires CLONE_SIGHAND.
+ *
+ * CLONE_THREAD and CLONE_DETACHED must both be either set or cleared
+ * in kernel 2.4 and prior.
+ * In kernel 2.6 CLONE_DETACHED was dropped completely, so we no
+ * longer have this requirement.
+ */
+
+ if (flags & CLONE_TD) {
+ if (!(flags & LX_CLONE_SIGHAND))
+ return (-EINVAL);
+ if ((lx_get_kern_version() <= LX_KERN_2_4) &&
+ (flags & CLONE_TD) != CLONE_TD)
+ return (-EINVAL);
+ }
+
+ rp = lx_syscall_regs();
+
+ /* test if pointer passed by user are writable */
+ if (flags & LX_CLONE_PARENT_SETTID) {
+ if (uucopy(ptidp, &pid, sizeof (int)) != 0)
+ return (-EFAULT);
+ if (uucopy(&pid, ptidp, sizeof (int)) != 0)
+ return (-EFAULT);
+ }
+ if (flags & LX_CLONE_CHILD_SETTID) {
+ if (uucopy(ctidp, &pid, sizeof (int)) != 0)
+ return (-EFAULT);
+ if (uucopy(&pid, ctidp, sizeof (int)) != 0)
+ return (-EFAULT);
+ }
+
+ /* See if this is a fork() operation or a thr_create(). */
+ if (IS_FORK(flags) || IS_VFORK(flags)) {
+ if (flags & LX_CLONE_PARENT) {
+ lx_unsupported(gettext(
+ "clone(2) only supports CLONE_PARENT "
+ "for threads.\n"));
+ return (-ENOTSUP);
+ }
+
+ if (flags & LX_CLONE_PTRACE)
+ lx_ptrace_fork();
+
+ if (flags & LX_CLONE_VFORK) {
+ is_vforked++;
+ rval = vfork();
+ if (rval != 0)
+ is_vforked--;
+ } else {
+ rval = fork1();
+ if (rval == 0 && lx_is_rpm)
+ (void) sleep(lx_rpm_delay);
+ }
+
+ /*
+ * Since we've already forked, we can't do much if uucopy fails,
+ * so we just ignore failure. Failure is unlikely since we've
+ * tested the memory before we did the fork.
+ */
+ if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) {
+ (void) uucopy(&rval, ptidp, sizeof (int));
+ }
+
+ if (rval == 0 && (flags & LX_CLONE_CHILD_SETTID)) {
+ /*
+ * lx_getpid should not fail, and if it does, there's
+ * not much we can do about it since we've already
+ * forked, so on failure, we just don't copy the
+ * memory.
+ */
+ pid = lx_getpid();
+ if (pid >= 0)
+ (void) uucopy(&pid, ctidp, sizeof (int));
+ }
+
+ /* Parent just returns */
+ if (rval != 0)
+ return ((rval < 0) ? -errno : rval);
+
+ /*
+ * If provided, the child needs its new stack set up.
+ */
+ if (cldstk)
+ lx_setup_clone(rp->lxr_gs, (void *)rp->lxr_eip, cldstk);
+
+ return (0);
+ }
+
+ /*
+ * We have very restricted support.... only exactly these flags are
+ * supported
+ */
+ if (((flags & SHARED_AS) != SHARED_AS)) {
+ lx_unsupported(gettext(
+ "clone(2) requires that all or none of CLONE_VM "
+ "CLONE_FS, CLONE_FILES, and CLONE_SIGHAND be set.\n"));
+ return (-ENOTSUP);
+ }
+
+ if (cldstk == NULL) {
+ lx_unsupported(gettext(
+ "clone(2) requires the caller to allocate the "
+ "child's stack.\n"));
+ return (-ENOTSUP);
+ }
+
+ /*
+ * If we want a signal-on-exit, ensure that the signal is valid.
+ */
+ if ((sig = ltos_signo[flags & LX_CSIGNAL]) == -1) {
+ lx_unsupported(gettext(
+ "clone(2) passed unsupported signal: %d"), sig);
+ return (-ENOTSUP);
+ }
+
+ /*
+ * To avoid malloc() here, we steal a part of the new thread's
+ * stack to store all the info that thread might need for
+ * initialization. We also make it 64-bit aligned for good
+ * measure.
+ */
+ cs = (struct clone_state *)
+ ((p2 - sizeof (struct clone_state)) & -((uintptr_t)8));
+ cs->c_flags = flags;
+ cs->c_sig = sig;
+ cs->c_stk = cldstk;
+ cs->c_ptidp = ptidp;
+ cs->c_ldtinfo = ldtinfo;
+ cs->c_ctidp = ctidp;
+ cs->c_clone_res = &clone_res;
+ cs->c_gs = rp->lxr_gs;
+
+ if (lx_sched_getaffinity(0, sizeof (cs->c_affmask),
+ (uintptr_t)&cs->c_affmask) == -1)
+ lx_err_fatal(gettext(
+ "Unable to get affinity mask for parent thread: %s"),
+ strerror(errno));
+
+ /*
+ * We want the new thread to return directly to the return site for
+ * the system call.
+ */
+ cs->c_retaddr = (void *)rp->lxr_eip;
+ clone_res = 0;
+
+ (void) sigfillset(&sigmask);
+
+ /*
+ * Block all signals because the thread we create won't be able to
+ * properly handle them until it's fully set up.
+ */
+ if (sigprocmask(SIG_BLOCK, &sigmask, &cs->c_sigmask) < 0) {
+ lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno));
+ return (-errno);
+ }
+
+ rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid);
+
+ /*
+ * Release any pending signals
+ */
+ (void) sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL);
+
+ /*
+ * Wait for the child to be created and have its tid assigned.
+ */
+ if (rval == 0) {
+ while (clone_res == 0)
+ ;
+
+ rval = clone_res;
+ }
+
+ return (rval);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/debug.c b/usr/src/lib/brand/lx/lx_brand/common/debug.c
new file mode 100644
index 0000000000..dccdcbb419
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/debug.c
@@ -0,0 +1,147 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <thread.h>
+#include <unistd.h>
+
+#include <sys/modctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+
+/* internal debugging state */
+static char *lx_debug_path = NULL; /* debug output file path */
+static char lx_debug_path_buf[MAXPATHLEN];
+
+void
+lx_debug_enable(void)
+{
+ /* send all debugging output to /dev/tty */
+ lx_debug_path = "/dev/tty";
+ lx_debug("lx_debug: debugging output enabled: %s", lx_debug_path);
+}
+
+void
+lx_debug_init(void)
+{
+ if (getenv("LX_DEBUG") == NULL)
+ return;
+
+ /*
+ * It's OK to use this value without any locking, as all callers can
+ * use the return value to decide whether extra work should be done
+ * before calling lx_debug().
+ *
+ * If debugging is disabled after a routine calls this function it
+ * doesn't really matter as lx_debug() will see debugging is disabled
+ * and will not output anything.
+ */
+ lx_debug_enabled = 1;
+
+ /* check if there's a debug log file specified */
+ lx_debug_path = getenv("LX_DEBUG_FILE");
+ if (lx_debug_path == NULL) {
+ /* send all debugging output to /dev/tty */
+ lx_debug_path = "/dev/tty";
+ }
+
+ (void) strlcpy(lx_debug_path_buf, lx_debug_path,
+ sizeof (lx_debug_path_buf));
+ lx_debug_path = lx_debug_path_buf;
+
+ lx_debug("lx_debug: debugging output ENABLED to path: \"%s\"",
+ lx_debug_path);
+}
+
+void
+lx_debug(const char *msg, ...)
+{
+ va_list ap;
+ char buf[LX_MSG_MAXLEN + 1];
+ int rv, fd, n;
+ int errno_backup;
+
+ if (lx_debug_enabled == 0)
+ return;
+
+ errno_backup = errno;
+
+ /* prefix the message with pid/tid */
+ if ((n = snprintf(buf, sizeof (buf), "%u/%u: ",
+ getpid(), thr_self())) == -1) {
+ errno = errno_backup;
+ return;
+ }
+
+ /* format the message */
+ va_start(ap, msg);
+ rv = vsnprintf(&buf[n], sizeof (buf) - n, msg, ap);
+ va_end(ap);
+ if (rv == -1) {
+ errno = errno_backup;
+ return;
+ }
+
+ /* add a carrige return if there isn't one already */
+ if ((buf[strlen(buf) - 1] != '\n') &&
+ (strlcat(buf, "\n", sizeof (buf)) >= sizeof (buf))) {
+ errno = errno_backup;
+ return;
+ }
+
+ /*
+ * Open the debugging output file. note that we don't protect
+ * ourselves against exec or fork1 here. if an mt process were
+ * to exec/fork1 while we're doing this they'd end up with an
+ * extra open desciptor in their fd space. a'well. shouldn't
+ * really matter.
+ */
+ if ((fd = open(lx_debug_path,
+ O_WRONLY|O_APPEND|O_CREAT|O_NDELAY|O_NOCTTY, 0666)) == -1) {
+ return;
+ }
+ (void) fchmod(fd, 0666);
+
+ /* we retry in case of EINTR */
+ do {
+ rv = write(fd, buf, strlen(buf));
+ } while ((rv == -1) && (errno == EINTR));
+ (void) fsync(fd);
+
+ (void) close(fd);
+ errno = errno_backup;
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/dir.c b/usr/src/lib/brand/lx/lx_brand/common/dir.c
new file mode 100644
index 0000000000..1c0a5aaf8f
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/dir.c
@@ -0,0 +1,160 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <string.h>
+#include <stddef.h>
+#include <errno.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/dirent.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+
+#define LX_NAMEMAX 256
+
+struct lx_dirent {
+ long d_ino; /* not l_ino_t */
+ long d_off;
+ ushort_t d_reclen;
+ char d_name[LX_NAMEMAX];
+};
+
+struct lx_dirent64 {
+ uint64_t d_ino;
+ int64_t d_off;
+ ushort_t d_reclen;
+ uchar_t d_type;
+ char d_name[LX_NAMEMAX];
+};
+
+#define LX_RECLEN(namelen) \
+ ((offsetof(struct lx_dirent64, d_name) + 1 + (namelen) + 7) & ~7)
+
+/*
+ * Read in one dirent structure from fd into dirp.
+ * p3 (count) is ignored.
+ */
+/*ARGSUSED*/
+int
+lx_readdir(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ struct lx_dirent *dirp = (struct lx_dirent *)p2;
+ uint_t count = sizeof (struct lx_dirent);
+ int rc = 0;
+ struct lx_dirent _ld;
+ struct dirent *sd = (struct dirent *)&_ld;
+
+ /*
+ * The return value from getdents is not applicable, as
+ * it might have squeezed more than one dirent in the buffer
+ * we provided.
+ *
+ * getdents() will deal with the case of dirp == NULL
+ */
+ if ((rc = getdents(fd, sd, count)) < 0)
+ return (-errno);
+
+ /*
+ * Set rc 1 (pass), or 0 (end of directory).
+ */
+ rc = (sd->d_reclen == 0) ? 0 : 1;
+
+ if (uucopy(sd, dirp, count) != 0)
+ return (-errno);
+
+ return (rc);
+}
+
+/*
+ * Read in dirent64 structures from p1 (fd) into p2 (buffer).
+ * p3 (count) is the size of the memory area.
+ */
+int
+lx_getdents64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (uint_t)p1;
+ void *buf = (void *)p2;
+ void *sbuf, *lbuf;
+ int lbufsz = (uint_t)p3;
+ int sbufsz;
+ int namelen;
+ struct dirent *sd;
+ struct lx_dirent64 *ld;
+ int bytes, rc;
+
+ if (lbufsz < sizeof (struct lx_dirent64))
+ return (-EINVAL);
+
+ /*
+ * The Linux dirent64 is bigger than the Solaris dirent64. To
+ * avoid inadvertently consuming more of the directory than we can
+ * pass back to the Linux app, we hand the kernel a smaller buffer
+ * than the app handed us.
+ */
+ sbufsz = (lbufsz / 32) * 24;
+
+ sbuf = SAFE_ALLOCA(sbufsz);
+ lbuf = SAFE_ALLOCA(lbufsz);
+ if (sbuf == NULL || lbuf == NULL)
+ return (-ENOMEM);
+
+ if ((bytes = getdents(fd, sbuf, sbufsz)) < 0)
+ return (-errno);
+
+ /* munge the Solaris buffer to a linux buffer. */
+ sd = (struct dirent *)sbuf;
+ ld = (struct lx_dirent64 *)lbuf;
+ rc = 0;
+ while (bytes > 0) {
+ namelen = strlen(sd->d_name);
+ if (namelen >= LX_NAMEMAX)
+ namelen = LX_NAMEMAX - 1;
+ ld->d_ino = (uint64_t)sd->d_ino;
+ ld->d_off = (int64_t)sd->d_off;
+ ld->d_type = 0;
+
+ (void) strncpy(ld->d_name, sd->d_name, namelen);
+ ld->d_name[namelen] = 0;
+ ld->d_reclen = (ushort_t)LX_RECLEN(namelen);
+
+ bytes -= (int)sd->d_reclen;
+ rc += (int)ld->d_reclen;
+
+ sd = (struct dirent *)(void *)((caddr_t)sd + sd->d_reclen);
+ ld = (struct lx_dirent64 *)(void *)((caddr_t)ld + ld->d_reclen);
+ }
+
+ /* now copy the lbuf to the userland buffer */
+ assert(rc <= lbufsz);
+ if (uucopy(lbuf, buf, rc) != 0)
+ return (-EFAULT);
+
+ return (rc);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/fcntl.c b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c
new file mode 100644
index 0000000000..995a3b5e7b
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c
@@ -0,0 +1,387 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/filio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stropts.h>
+#include <libintl.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/lx_fcntl.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+
+static int lx_fcntl_com(int fd, int cmd, ulong_t arg);
+static void ltos_flock(struct lx_flock *l, struct flock *s);
+static void stol_flock(struct flock *s, struct lx_flock *l);
+static void ltos_flock64(struct lx_flock64 *l, struct flock64 *s);
+static void stol_flock64(struct flock64 *s, struct lx_flock64 *l);
+static short ltos_type(short l_type);
+static short stol_type(short l_type);
+static int lx_fcntl_getfl(int fd);
+static int lx_fcntl_setfl(int fd, ulong_t arg);
+
+int
+lx_dup2(uintptr_t p1, uintptr_t p2)
+{
+ int oldfd = (int)p1;
+ int newfd = (int)p2;
+ int rc;
+
+ rc = fcntl(oldfd, F_DUP2FD, newfd);
+ return ((rc == -1) ? -errno : rc);
+}
+
+int
+lx_fcntl(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ int cmd = (int)p2;
+ ulong_t arg = (ulong_t)p3;
+ struct lx_flock lxflk;
+ struct flock fl;
+ int lk = 0;
+ int rc;
+
+ /*
+ * The 64-bit fcntl commands must go through fcntl64().
+ */
+ if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLK64 ||
+ cmd == LX_F_SETLKW64)
+ return (-EINVAL);
+
+ if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE ||
+ cmd == LX_F_GETLEASE) {
+ lx_unsupported(gettext("%s(): unsupported command: %d"),
+ "fcntl", cmd);
+ return (-ENOTSUP);
+ }
+
+ if (cmd == LX_F_GETLK || cmd == LX_F_SETLK ||
+ cmd == LX_F_SETLKW) {
+ if (uucopy((void *)p3, (void *)&lxflk,
+ sizeof (struct lx_flock)) != 0)
+ return (-errno);
+ lk = 1;
+ ltos_flock(&lxflk, &fl);
+ arg = (ulong_t)&fl;
+ }
+
+ rc = lx_fcntl_com(fd, cmd, arg);
+
+ if (lk)
+ stol_flock(&fl, (struct lx_flock *)p3);
+
+ return (rc);
+}
+
+int
+lx_fcntl64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ int cmd = (int)p2;
+ struct lx_flock lxflk;
+ struct lx_flock64 lxflk64;
+ struct flock fl;
+ struct flock64 fl64;
+ int rc;
+
+ if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE ||
+ cmd == LX_F_GETLEASE) {
+ lx_unsupported(gettext("%s(): unsupported command: %d"),
+ "fcntl64", cmd);
+ return (-ENOTSUP);
+ }
+
+ if (cmd == LX_F_GETLK || cmd == LX_F_SETLK || cmd == LX_F_SETLKW) {
+ if (uucopy((void *)p3, (void *)&lxflk,
+ sizeof (struct lx_flock)) != 0)
+ return (-errno);
+ ltos_flock(&lxflk, &fl);
+ rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl);
+ stol_flock(&fl, (struct lx_flock *)p3);
+ } else if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLKW64 || \
+ cmd == LX_F_SETLK64) {
+ if (uucopy((void *)p3, (void *)&lxflk64,
+ sizeof (struct lx_flock64)) != 0)
+ return (-errno);
+ ltos_flock64(&lxflk64, &fl64);
+ rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl64);
+ stol_flock64(&fl64, (struct lx_flock64 *)p3);
+ } else {
+ rc = lx_fcntl_com(fd, cmd, (ulong_t)p3);
+ }
+
+ return (rc);
+}
+
+static int
+lx_fcntl_com(int fd, int cmd, ulong_t arg)
+{
+ int rc = 0;
+
+ switch (cmd) {
+ case LX_F_DUPFD:
+ rc = fcntl(fd, F_DUPFD, arg);
+ break;
+
+ case LX_F_GETFD:
+ rc = fcntl(fd, F_GETFD, 0);
+ break;
+
+ case LX_F_SETFD:
+ rc = fcntl(fd, F_SETFD, arg);
+ break;
+
+ case LX_F_GETFL:
+ rc = lx_fcntl_getfl(fd);
+ break;
+
+ case LX_F_SETFL:
+ rc = lx_fcntl_setfl(fd, arg);
+ break;
+
+ case LX_F_GETLK:
+ rc = fcntl(fd, F_GETLK, arg);
+ break;
+
+ case LX_F_SETLK:
+ rc = fcntl(fd, F_SETLK, arg);
+ break;
+
+ case LX_F_SETLKW:
+ rc = fcntl(fd, F_SETLKW, arg);
+ break;
+
+ case LX_F_GETLK64:
+ rc = fcntl(fd, F_GETLK64, arg);
+ break;
+
+ case LX_F_SETLK64:
+ rc = fcntl(fd, F_SETLK64, arg);
+ break;
+
+ case LX_F_SETLKW64:
+ rc = fcntl(fd, F_SETLKW64, arg);
+ break;
+
+ case LX_F_SETOWN:
+ rc = fcntl(fd, F_SETOWN, arg);
+ break;
+
+ case LX_F_GETOWN:
+ rc = fcntl(fd, F_GETOWN, arg);
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ return ((rc == -1) ? -errno : rc);
+}
+
+
+#define LTOS_FLOCK(l, s) \
+{ \
+ s->l_type = ltos_type(l->l_type); \
+ s->l_whence = l->l_whence; \
+ s->l_start = l->l_start; \
+ s->l_len = l->l_len; \
+ s->l_sysid = 0; /* not defined in linux */ \
+ s->l_pid = (pid_t)l->l_pid; \
+}
+
+#define STOL_FLOCK(s, l) \
+{ \
+ l->l_type = stol_type(s->l_type); \
+ l->l_whence = s->l_whence; \
+ l->l_start = s->l_start; \
+ l->l_len = s->l_len; \
+ l->l_pid = (int)s->l_pid; \
+}
+
+static void
+ltos_flock(struct lx_flock *l, struct flock *s)
+{
+ LTOS_FLOCK(l, s)
+}
+
+static void
+stol_flock(struct flock *s, struct lx_flock *l)
+{
+ STOL_FLOCK(s, l)
+}
+
+static void
+ltos_flock64(struct lx_flock64 *l, struct flock64 *s)
+{
+ LTOS_FLOCK(l, s)
+}
+
+static void
+stol_flock64(struct flock64 *s, struct lx_flock64 *l)
+{
+ STOL_FLOCK(s, l)
+}
+
+static short
+ltos_type(short l_type)
+{
+ switch (l_type) {
+ case LX_F_RDLCK:
+ return (F_RDLCK);
+ case LX_F_WRLCK:
+ return (F_WRLCK);
+ case LX_F_UNLCK:
+ return (F_UNLCK);
+ default:
+ return (-1);
+ }
+}
+
+static short
+stol_type(short l_type)
+{
+ switch (l_type) {
+ case F_RDLCK:
+ return (LX_F_RDLCK);
+ case F_WRLCK:
+ return (LX_F_WRLCK);
+ case F_UNLCK:
+ return (LX_F_UNLCK);
+ default:
+ /* can't ever happen */
+ return (0);
+ }
+}
+
+int
+lx_fcntl_getfl(int fd)
+{
+ int retval;
+ int rc;
+
+ retval = fcntl(fd, F_GETFL, 0);
+
+ if ((retval & O_ACCMODE) == O_RDONLY)
+ rc = LX_O_RDONLY;
+ else if ((retval & O_ACCMODE) == O_WRONLY)
+ rc = LX_O_WRONLY;
+ else
+ rc = LX_O_RDWR;
+ /* O_NDELAY != O_NONBLOCK, so we need to check for both */
+ if (retval & O_NDELAY)
+ rc |= LX_O_NDELAY;
+ if (retval & O_NONBLOCK)
+ rc |= LX_O_NONBLOCK;
+ if (retval & O_APPEND)
+ rc |= LX_O_APPEND;
+ if (retval & O_SYNC)
+ rc |= LX_O_SYNC;
+ if (retval & O_LARGEFILE)
+ rc |= LX_O_LARGEFILE;
+ if (retval & FASYNC)
+ rc |= LX_O_ASYNC;
+
+ return (rc);
+}
+
+int
+lx_fcntl_setfl(int fd, ulong_t arg)
+{
+ int new_arg;
+
+ new_arg = 0;
+ /* LX_O_NDELAY == LX_O_NONBLOCK, so we only check for one */
+ if (arg & LX_O_NDELAY)
+ new_arg |= O_NONBLOCK;
+ if (arg & LX_O_APPEND)
+ new_arg |= O_APPEND;
+ if (arg & LX_O_SYNC)
+ new_arg |= O_SYNC;
+ if (arg & LX_O_LARGEFILE)
+ new_arg |= O_LARGEFILE;
+ if (arg & LX_O_ASYNC)
+ new_arg |= FASYNC;
+
+ return ((fcntl(fd, F_SETFL, new_arg) == 0) ? 0 : -errno);
+}
+
+/*
+ * flock() applies or removes an advisory lock on the file
+ * associated with the file descriptor fd.
+ *
+ * Stolen verbatim from usr/src/ucblib/libucb/port/sys/flock.c
+ *
+ * operation is: LX_LOCK_SH, LX_LOCK_EX, LX_LOCK_UN, LX_LOCK_NB
+ */
+int
+lx_flock(uintptr_t p1, uintptr_t p2)
+{
+ int fd = (int)p1;
+ int operation = (int)p2;
+ struct flock fl;
+ int cmd;
+ int ret;
+
+ /* In non-blocking lock, use F_SETLK for cmd, F_SETLKW otherwise */
+ if (operation & LX_LOCK_NB) {
+ cmd = F_SETLK;
+ operation &= ~LX_LOCK_NB; /* turn off this bit */
+ } else
+ cmd = F_SETLKW;
+
+ switch (operation) {
+ case LX_LOCK_UN:
+ fl.l_type = F_UNLCK;
+ break;
+ case LX_LOCK_SH:
+ fl.l_type = F_RDLCK;
+ break;
+ case LX_LOCK_EX:
+ fl.l_type = F_WRLCK;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ fl.l_whence = 0;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ ret = fcntl(fd, cmd, &fl);
+
+ if (ret == -1 && errno == EACCES)
+ return (-EWOULDBLOCK);
+
+ return ((ret == -1) ? -errno : ret);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/file.c b/usr/src/lib/brand/lx/lx_brand/common/file.c
new file mode 100644
index 0000000000..eaa5349b6e
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/file.c
@@ -0,0 +1,747 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <fcntl.h>
+#include <string.h>
+#include <utime.h>
+#include <atomic.h>
+
+#include <sys/lx_syscall.h>
+#include <sys/lx_types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_fcntl.h>
+
+static int
+install_checkpath(uintptr_t p1)
+{
+ int saved_errno = errno;
+ char path[MAXPATHLEN];
+
+ /*
+ * The "dev" RPM package wants to modify /dev/pts, but /dev/pts is a
+ * lofs mounted copy of /native/dev/pts, so that won't work.
+ *
+ * Instead, if we're trying to modify /dev/pts from install mode, just
+ * act as if it succeded.
+ */
+ if (uucopystr((void *)p1, path, MAXPATHLEN) == -1)
+ return (-errno);
+
+ if (strcmp(path, "/dev/pts") == 0)
+ return (0);
+
+ errno = saved_errno;
+ return (-errno);
+}
+
+/*
+ * Convert linux LX_AT_* flags to solaris AT_* flags, while verifying allowed
+ * flags have been passed. This also allows EACCESS/REMOVEDIR to be translated
+ * correctly since on linux they have the same value.
+ */
+int
+ltos_at_flag(int lflag, int allow)
+{
+ int sflag = 0;
+
+ if ((lflag & LX_AT_EACCESS) && (allow & AT_EACCESS)) {
+ lflag &= ~LX_AT_EACCESS;
+ sflag |= AT_EACCESS;
+ }
+
+ if ((lflag & LX_AT_REMOVEDIR) && (allow & AT_REMOVEDIR)) {
+ lflag &= ~LX_AT_REMOVEDIR;
+ sflag |= AT_REMOVEDIR;
+ }
+
+ if ((lflag & LX_AT_SYMLINK_NOFOLLOW) && (allow & AT_SYMLINK_NOFOLLOW)) {
+ lflag &= ~LX_AT_SYMLINK_NOFOLLOW;
+ sflag |= AT_SYMLINK_NOFOLLOW;
+ }
+
+ /* right now solaris doesn't have a _FOLLOW flag, so use a fake one */
+ if ((lflag & LX_AT_SYMLINK_FOLLOW) && (allow & LX_AT_SYMLINK_FOLLOW)) {
+ lflag &= ~LX_AT_SYMLINK_FOLLOW;
+ sflag |= LX_AT_SYMLINK_FOLLOW;
+ }
+
+ /* if flag is not zero than some flags did not hit the above code */
+ if (lflag)
+ return (-EINVAL);
+
+ return (sflag);
+}
+
+
+/*
+ * Miscellaneous file-related system calls.
+ */
+
+/*
+ * Linux creates half-duplex unnamed pipes and Solaris creates full-duplex
+ * pipes. Thus, to get the correct semantics, our simple pipe() system
+ * call actually needs to create a named pipe, do three opens, a close, and
+ * an unlink. This is woefully expensive. If performance becomes a real
+ * issue, we can implement a half-duplex pipe() in the brand module.
+ */
+#define PIPENAMESZ 32 /* enough room for /tmp/.pipe.<pid>.<num> */
+
+int
+lx_pipe(uintptr_t p1)
+{
+ static uint32_t pipecnt = 0;
+ int cnt;
+ char pipename[PIPENAMESZ];
+ int fds[3];
+ int r = 0;
+
+ fds[0] = -1;
+ fds[1] = -1;
+ fds[2] = -1;
+
+ /*
+ * Construct a name for the named pipe: /tmp/.pipe.<pid>.<++cnt>
+ */
+ cnt = atomic_inc_32_nv(&pipecnt);
+
+ (void) snprintf(pipename, PIPENAMESZ, "/tmp/.pipe.%d.%d",
+ getpid(), cnt);
+
+ if (mkfifo(pipename, 0600))
+ return (-errno);
+
+ /*
+ * To prevent either the read-only or write-only open from
+ * blocking, we first need to open the pipe for both reading and
+ * writing.
+ */
+ if (((fds[2] = open(pipename, O_RDWR)) < 0) ||
+ ((fds[0] = open(pipename, O_RDONLY)) < 0) ||
+ ((fds[1] = open(pipename, O_WRONLY)) < 0)) {
+ r = errno;
+ } else {
+ /*
+ * Copy the two one-way fds back to the app's address
+ * space.
+ */
+ if (uucopy(fds, (void *)p1, 2 * sizeof (int)))
+ r = errno;
+ }
+
+ if (fds[2] >= 0)
+ (void) close(fds[2]);
+ (void) unlink(pipename);
+
+ if (r != 0) {
+ if (fds[0] >= 0)
+ (void) close(fds[0]);
+ if (fds[1] >= 0)
+ (void) close(fds[1]);
+ }
+
+ return (-r);
+}
+
+/*
+ * On Linux, even root cannot create a link to a directory, so we have to
+ * add an explicit check.
+ */
+int
+lx_link(uintptr_t p1, uintptr_t p2)
+{
+ char *from = (char *)p1;
+ char *to = (char *)p2;
+ struct stat64 statbuf;
+
+ if ((stat64(from, &statbuf) == 0) && S_ISDIR(statbuf.st_mode))
+ return (-EPERM);
+
+ return (link(from, to) ? -errno : 0);
+}
+
+/*
+ * On Linux, an unlink of a directory returns EISDIR, not EPERM.
+ */
+int
+lx_unlink(uintptr_t p)
+{
+ char *pathname = (char *)p;
+ struct stat64 statbuf;
+
+ if ((lstat64(pathname, &statbuf) == 0) && S_ISDIR(statbuf.st_mode))
+ return (-EISDIR);
+
+ return (unlink(pathname) ? -errno : 0);
+}
+
+int
+lx_unlinkat(uintptr_t ext1, uintptr_t p1, uintptr_t p2)
+{
+ int atfd = (int)ext1;
+ char *pathname = (char *)p1;
+ int flag = (int)p2;
+ struct stat64 statbuf;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flag = ltos_at_flag(flag, AT_REMOVEDIR);
+ if (flag < 0)
+ return (-EINVAL);
+
+ if (!(flag & AT_REMOVEDIR)) {
+ /* Behave like unlink() */
+ if ((fstatat64(atfd, pathname, &statbuf, AT_SYMLINK_NOFOLLOW) ==
+ 0) && S_ISDIR(statbuf.st_mode))
+ return (-EISDIR);
+ }
+
+ return (unlinkat(atfd, pathname, flag) ? -errno : 0);
+}
+
+/*
+ * fsync() and fdatasync() - On Solaris, these calls translate into a common
+ * fsync() syscall with a different parameter, so we layer on top of the librt
+ * functions instead.
+ */
+int
+lx_fsync(uintptr_t fd)
+{
+ int fildes = (int)fd;
+ struct stat64 statbuf;
+
+ if ((fstat64(fildes, &statbuf) == 0) &&
+ (S_ISCHR(statbuf.st_mode) || S_ISFIFO(statbuf.st_mode)))
+ return (-EINVAL);
+
+ return (fsync((int)fd) ? -errno : 0);
+}
+
+int
+lx_fdatasync(uintptr_t fd)
+{
+ int fildes = (int)fd;
+ struct stat64 statbuf;
+
+ if ((fstat64(fildes, &statbuf) == 0) &&
+ (S_ISCHR(statbuf.st_mode) || S_ISFIFO(statbuf.st_mode)))
+ return (-EINVAL);
+
+ return (fdatasync((int)fd) ? -errno : 0);
+}
+
+/*
+ * Linux, unlike Solaris, ALWAYS resets the setuid and setgid bits on a
+ * chown/fchown regardless of whether it was done by root or not. Therefore,
+ * we must do extra work after each chown/fchown call to emulate this behavior.
+ */
+#define SETUGID (S_ISUID | S_ISGID)
+
+/*
+ * [lf]chown16() - Translate the uid/gid and pass onto the real functions.
+ */
+int
+lx_chown16(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *filename = (char *)p1;
+ struct stat64 statbuf;
+
+ if (chown(filename, LX_UID16_TO_UID32((lx_gid16_t)p2),
+ LX_GID16_TO_GID32((lx_gid16_t)p3)))
+ return (-errno);
+
+ if (stat64(filename, &statbuf) == 0) {
+ statbuf.st_mode &= ~S_ISUID;
+ if (statbuf.st_mode & S_IXGRP)
+ statbuf.st_mode &= ~S_ISGID;
+ (void) chmod(filename, (statbuf.st_mode & MODEMASK));
+ }
+
+ return (0);
+}
+
+int
+lx_fchown16(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ struct stat64 statbuf;
+
+ if (fchown(fd, LX_UID16_TO_UID32((lx_gid16_t)p2),
+ LX_GID16_TO_GID32((lx_gid16_t)p3)))
+ return (-errno);
+
+ if (fstat64(fd, &statbuf) == 0) {
+ statbuf.st_mode &= ~S_ISUID;
+ if (statbuf.st_mode & S_IXGRP)
+ statbuf.st_mode &= ~S_ISGID;
+ (void) fchmod(fd, (statbuf.st_mode & MODEMASK));
+ }
+
+ return (0);
+}
+
+int
+lx_lchown16(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ return (lchown((char *)p1, LX_UID16_TO_UID32((lx_gid16_t)p2),
+ LX_GID16_TO_GID32((lx_gid16_t)p3)) ? -errno : 0);
+}
+
+int
+lx_chown(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *filename = (char *)p1;
+ struct stat64 statbuf;
+ int ret;
+
+ ret = chown(filename, (uid_t)p2, (gid_t)p3);
+
+ if (ret < 0) {
+ /*
+ * If chown() failed and we're in install mode, return success
+ * if the the reason we failed was because the source file
+ * didn't actually exist or if we're trying to modify /dev/pts.
+ */
+ if ((lx_install != 0) &&
+ ((errno == ENOENT) || (install_checkpath(p1) == 0)))
+ return (0);
+
+ return (-errno);
+ }
+
+ if (stat64(filename, &statbuf) == 0) {
+ statbuf.st_mode &= ~S_ISUID;
+ if (statbuf.st_mode & S_IXGRP)
+ statbuf.st_mode &= ~S_ISGID;
+ (void) chmod(filename, (statbuf.st_mode & MODEMASK));
+ }
+
+ return (0);
+}
+
+int
+lx_fchown(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ struct stat64 statbuf;
+
+ if (fchown(fd, (uid_t)p2, (gid_t)p3))
+ return (-errno);
+
+ if (fstat64(fd, &statbuf) == 0) {
+ statbuf.st_mode &= ~S_ISUID;
+ if (statbuf.st_mode & S_IXGRP)
+ statbuf.st_mode &= ~S_ISGID;
+ (void) fchmod(fd, (statbuf.st_mode & MODEMASK));
+ }
+
+ return (0);
+}
+
+int
+lx_chmod(uintptr_t p1, uintptr_t p2)
+{
+ int ret;
+
+ ret = chmod((const char *)p1, (mode_t)p2);
+
+ if (ret < 0) {
+ /*
+ * If chown() failed and we're in install mode, return success
+ * if the the reason we failed was because the source file
+ * didn't actually exist or if we're trying to modify /dev/pts.
+ */
+ if ((lx_install != 0) &&
+ ((errno == ENOENT) || (install_checkpath(p1) == 0)))
+ return (0);
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+int
+lx_utime(uintptr_t p1, uintptr_t p2)
+{
+ int ret;
+
+ ret = utime((const char *)p1, (const struct utimbuf *)p2);
+
+ if (ret < 0) {
+ /*
+ * If chown() failed and we're in install mode, return success
+ * if the the reason we failed was because the source file
+ * didn't actually exist or if we're trying to modify /dev/pts.
+ */
+ if ((lx_install != 0) &&
+ ((errno == ENOENT) || (install_checkpath(p1) == 0)))
+ return (0);
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+/*
+ * llseek() - The Linux implementation takes an additional parameter, which is
+ * the resulting position in the file.
+ */
+int
+lx_llseek(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ offset_t ret;
+ offset_t *res = (offset_t *)p4;
+
+ /* SEEK_DATA and SEEK_HOLE are only valid in Solaris */
+ if ((int)p5 > SEEK_END)
+ return (-EINVAL);
+
+ if ((ret = llseek((int)p1, LX_32TO64(p3, p2), p5)) < 0)
+ return (-errno);
+
+ *res = ret;
+ return (0);
+}
+
+/*
+ * seek() - When the resultant file offset cannot be represented in 32 bits,
+ * Linux performs the seek but Solaris doesn't, though both set EOVERFLOW. We
+ * call llseek() and then check to see if we need to return EOVERFLOW.
+ */
+int
+lx_lseek(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ offset_t offset = (offset_t)(off_t)(p2); /* sign extend */
+ offset_t ret;
+ off_t ret32;
+
+ /* SEEK_DATA and SEEK_HOLE are only valid in Solaris */
+ if ((int)p3 > SEEK_END)
+ return (-EINVAL);
+
+ if ((ret = llseek((int)p1, offset, p3)) < 0)
+ return (-errno);
+
+ ret32 = (off_t)ret;
+ if ((offset_t)ret32 == ret)
+ return (ret32);
+ else
+ return (-EOVERFLOW);
+}
+
+/*
+ * Neither Solaris nor Linux actually returns anything to the caller, but glibc
+ * expects to see SOME value returned, so placate it and return 0.
+ */
+int
+lx_sync(void)
+{
+ sync();
+ return (0);
+}
+
+int
+lx_rmdir(uintptr_t p1)
+{
+ int r;
+
+ r = rmdir((char *)p1);
+ if (r < 0)
+ return ((errno == EEXIST) ? -ENOTEMPTY : -errno);
+ return (0);
+}
+
+/*
+ * Exactly the same as Solaris' sysfs(2), except Linux numbers their fs indices
+ * starting at 0, and Solaris starts at 1.
+ */
+int
+lx_sysfs(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int option = (int)p1;
+ int res;
+
+ /*
+ * Linux actually doesn't have #defines for these; their sysfs(2)
+ * man page literally defines the "option" field as being 1, 2 or 3,
+ * corresponding to Solaris' GETFSIND, GETFSTYP and GETNFSTYP,
+ * respectively.
+ */
+ switch (option) {
+ case 1:
+ if ((res = sysfs(GETFSIND, (const char *)p2)) < 0)
+ return (-errno);
+
+ return (res - 1);
+
+ case 2:
+ if ((res = sysfs(GETFSTYP, (int)p2 + 1,
+ (char *)p3)) < 0)
+ return (-errno);
+
+ return (0);
+
+ case 3:
+ if ((res = sysfs(GETNFSTYP)) < 0)
+ return (-errno);
+
+ return (res);
+
+ default:
+ break;
+ }
+
+ return (-EINVAL);
+}
+
+int
+lx_faccessat(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int atfd = (int)p1;
+ char *path = (char *)p2;
+ int mode = (mode_t)p3;
+ int flag = (int)p4;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flag = ltos_at_flag(flag, AT_EACCESS);
+ if (flag < 0)
+ return (-EINVAL);
+
+ return (faccessat(atfd, path, mode, flag) ? -errno : 0);
+}
+
+int
+lx_futimesat(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)p1;
+ char *path = (char *)p2;
+ struct timeval *times = (struct timeval *)p3;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ return (futimesat(atfd, path, times) ? -errno : 0);
+}
+
+
+/*
+ * Constructs an absolute path string in buf from the path of fd and the
+ * relative path string pointed to by "p1". This is required for emulating
+ * *at() system calls.
+ * Example:
+ * If the path of fd is "/foo/bar" and path is "etc" the string returned is
+ * "/foo/bar/etc", if the fd is a file fd then it fails with ENOTDIR.
+ * If path is absolute then no modifcations are made to it when copied.
+ */
+static int
+getpathat(int fd, uintptr_t p1, char *outbuf, size_t outbuf_size)
+{
+ char pathbuf[MAXPATHLEN];
+ char fdpathbuf[MAXPATHLEN];
+ char *fdpath;
+ struct stat64 statbuf;
+
+ if (uucopystr((void *)p1, pathbuf, MAXPATHLEN) == -1)
+ return (-errno);
+
+ /* If the path is absolute then we can early out */
+ if ((pathbuf[0] == '/') || (fd == LX_AT_FDCWD)) {
+ (void) strlcpy(outbuf, pathbuf, outbuf_size);
+ return (0);
+ }
+
+ fdpath = lx_fd_to_path(fd, fdpathbuf, sizeof (fdpathbuf));
+ if (fdpath == NULL)
+ return (-EBADF);
+
+ if ((fstat64(fd, &statbuf) < 0))
+ return (-EBADF);
+
+ if (!S_ISDIR(statbuf.st_mode))
+ return (-ENOTDIR);
+
+ if (snprintf(outbuf, outbuf_size, "%s/%s", fdpath, pathbuf) >
+ (outbuf_size-1))
+ return (-ENAMETOOLONG);
+
+ return (0);
+}
+
+int
+lx_mkdirat(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)p1;
+ mode_t mode = (mode_t)p3;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ ret = getpathat(atfd, p2, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ return (mkdir(pathbuf, mode) ? -errno : 0);
+}
+
+int
+lx_mknodat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ return (lx_mknod((uintptr_t)pathbuf, p2, p3));
+}
+
+int
+lx_symlinkat(uintptr_t p1, uintptr_t ext1, uintptr_t p2)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ ret = getpathat(atfd, p2, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ return (symlink((char *)p1, pathbuf) ? -errno : 0);
+}
+
+int
+lx_linkat(uintptr_t ext1, uintptr_t p1, uintptr_t ext2, uintptr_t p2,
+ uintptr_t p3)
+{
+ int atfd1 = (int)ext1;
+ int atfd2 = (int)ext2;
+ char pathbuf1[MAXPATHLEN];
+ char pathbuf2[MAXPATHLEN];
+ int ret;
+
+ /*
+ * The flag specifies whether the hardlink will point to a symlink or
+ * not, on solaris the default behaviour of link() is to dereference a
+ * symlink and there is no obvious way to trigger the other behaviour.
+ * So for now we just ignore this flag and act like link().
+ */
+ /* LINTED [set but not used in function] */
+ int flag = p3;
+
+ if (flag != p3)
+ return (flag); /* workaround */
+
+ ret = getpathat(atfd1, p1, pathbuf1, sizeof (pathbuf1));
+ if (ret < 0)
+ return (ret);
+
+ ret = getpathat(atfd2, p2, pathbuf2, sizeof (pathbuf2));
+ if (ret < 0)
+ return (ret);
+
+ return (lx_link((uintptr_t)pathbuf1, (uintptr_t)pathbuf2));
+}
+
+int
+lx_readlinkat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ ret = readlink(pathbuf, (char *)p2, (size_t)p3);
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+int
+lx_fchownat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4)
+{
+ int flag;
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ flag = ltos_at_flag(p4, AT_SYMLINK_NOFOLLOW);
+ if (flag < 0)
+ return (-EINVAL);
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ if (flag & AT_SYMLINK_NOFOLLOW)
+ return (lchown(pathbuf, (uid_t)p2, (gid_t)p3) ? -errno : 0);
+ else
+ return (lx_chown((uintptr_t)pathbuf, p2, p3));
+}
+
+int
+lx_fchmodat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ /*
+ * It seems that at least some versions of glibc do not set or clear
+ * the flags arg, so checking them will result in random behaviour.
+ */
+ /* LINTED [set but not used in function] */
+ int flag = p3;
+
+ if (flag != p3)
+ return (flag); /* workaround */
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ return (lx_chmod((uintptr_t)pathbuf, p2));
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/fork.c b/usr/src/lib/brand/lx/lx_brand/common/fork.c
new file mode 100644
index 0000000000..7e75efaa39
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/fork.c
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/lx_misc.h>
+
+/*
+ * fork() and vfork()
+ *
+ * These cannot be pass thru system calls because we need libc to do its own
+ * initialization or else bad things will happen (i.e. ending up with a bad
+ * schedctl page). On Linux, there is no such thing as forkall(), so we use
+ * fork1() here.
+ */
+int
+lx_fork(void)
+{
+ int ret = fork1();
+
+ if (ret == 0 && lx_is_rpm)
+ (void) sleep(lx_rpm_delay);
+
+ return (ret == -1 ? -errno : ret);
+}
+
+/*
+ * For vfork(), we have a serious problem because the child is not allowed to
+ * return from the current frame because it will corrupt the parent's stack.
+ * Since the semantics of vfork() are rather ill-defined (other than "it's
+ * faster than fork"), we should theoretically be safe by falling back to
+ * fork1().
+ */
+int
+lx_vfork(void)
+{
+ int ret = fork1();
+
+ return (ret == -1 ? -errno : ret);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/id.c b/usr/src/lib/brand/lx/lx_brand/common/id.c
new file mode 100644
index 0000000000..a9987cea52
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/id.c
@@ -0,0 +1,269 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/zone.h>
+#include <sys/lx_types.h>
+#include <sys/lx_syscall.h>
+#include <sys/cred_impl.h>
+#include <sys/policy.h>
+#include <sys/ucred.h>
+#include <sys/syscall.h>
+#include <alloca.h>
+#include <errno.h>
+#include <ucred.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/lx_misc.h>
+
+int
+lx_setuid16(uintptr_t uid)
+{
+ return ((setuid(LX_UID16_TO_UID32((lx_uid16_t)uid))) ? -errno : 0);
+}
+
+int
+lx_getuid16(void)
+{
+ return ((int)LX_UID32_TO_UID16(getuid()));
+}
+
+int
+lx_setgid16(uintptr_t gid)
+{
+ return ((setgid(LX_GID16_TO_GID32((lx_gid16_t)gid))) ? -errno : 0);
+}
+
+int
+lx_getgid16(void)
+{
+ return ((int)LX_GID32_TO_GID16(getgid()));
+}
+
+int
+lx_geteuid16(void)
+{
+ return ((int)LX_UID32_TO_UID16(geteuid()));
+}
+
+int
+lx_getegid16(void)
+{
+ return ((int)LX_GID32_TO_GID16(getegid()));
+}
+
+int
+lx_geteuid(void)
+{
+ return ((int)geteuid());
+}
+
+int
+lx_getegid(void)
+{
+ return ((int)getegid());
+}
+
+int
+lx_getresuid(uintptr_t ruid, uintptr_t euid, uintptr_t suid)
+{
+ lx_uid_t lx_ruid, lx_euid, lx_suid;
+ ucred_t *cr;
+ size_t sz;
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0)
+ return (-errno);
+
+ if (((lx_ruid = (lx_uid_t)ucred_getruid(cr)) == (lx_uid_t)-1) ||
+ ((lx_euid = (lx_uid_t)ucred_geteuid(cr)) == (lx_uid_t)-1) ||
+ ((lx_suid = (lx_uid_t)ucred_getsuid(cr)) == (lx_uid_t)-1)) {
+ return (-errno);
+ }
+
+ if (uucopy(&lx_ruid, (void *)ruid, sizeof (lx_uid_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_euid, (void *)euid, sizeof (lx_uid_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_suid, (void *)suid, sizeof (lx_uid_t)) != 0)
+ ? -errno : 0);
+}
+
+int
+lx_getresuid16(uintptr_t ruid16, uintptr_t euid16, uintptr_t suid16)
+{
+ lx_uid_t lx_ruid, lx_euid, lx_suid;
+ lx_uid16_t lx_ruid16, lx_euid16, lx_suid16;
+ int rv;
+
+ if ((rv = lx_getresuid((uintptr_t)&lx_ruid, (uintptr_t)&lx_euid,
+ (uintptr_t)&lx_suid)) != 0)
+ return (rv);
+
+ lx_ruid16 = LX_UID32_TO_UID16(lx_ruid);
+ lx_euid16 = LX_UID32_TO_UID16(lx_euid);
+ lx_suid16 = LX_UID32_TO_UID16(lx_suid);
+
+ if (uucopy(&lx_ruid16, (void *)ruid16, sizeof (lx_uid16_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_euid16, (void *)euid16, sizeof (lx_uid16_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_suid16, (void *)suid16, sizeof (lx_uid16_t)) != 0)
+ ? -errno : 0);
+}
+
+int
+lx_getresgid(uintptr_t rgid, uintptr_t egid, uintptr_t sgid)
+{
+ ucred_t *cr;
+ lx_gid_t lx_rgid, lx_egid, lx_sgid;
+ size_t sz;
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0)
+ return (-errno);
+
+ if (((lx_rgid = (lx_gid_t)ucred_getrgid(cr)) == (lx_gid_t)-1) ||
+ ((lx_egid = (lx_gid_t)ucred_getegid(cr)) == (lx_gid_t)-1) ||
+ ((lx_sgid = (lx_gid_t)ucred_getsgid(cr)) == (lx_gid_t)-1)) {
+ return (-errno);
+ }
+
+ if (uucopy(&lx_rgid, (void *)rgid, sizeof (lx_gid_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_egid, (void *)egid, sizeof (lx_gid_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_sgid, (void *)sgid, sizeof (lx_gid_t)) != 0)
+ ? -errno : 0);
+}
+
+int
+lx_getresgid16(uintptr_t rgid16, uintptr_t egid16, uintptr_t sgid16)
+{
+ lx_gid_t lx_rgid, lx_egid, lx_sgid;
+ lx_gid16_t lx_rgid16, lx_egid16, lx_sgid16;
+ int rv;
+
+ if ((rv = lx_getresgid((uintptr_t)&lx_rgid, (uintptr_t)&lx_egid,
+ (uintptr_t)&lx_sgid)) != 0)
+ return (rv);
+
+ lx_rgid16 = LX_UID32_TO_UID16(lx_rgid);
+ lx_egid16 = LX_UID32_TO_UID16(lx_egid);
+ lx_sgid16 = LX_UID32_TO_UID16(lx_sgid);
+
+ if (uucopy(&lx_rgid16, (void *)rgid16, sizeof (lx_gid16_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_egid16, (void *)egid16, sizeof (lx_gid16_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_sgid16, (void *)sgid16, sizeof (lx_gid16_t)) != 0)
+ ? -errno : 0);
+}
+
+int
+lx_setreuid16(uintptr_t ruid, uintptr_t euid)
+{
+ return ((setreuid(LX_UID16_TO_UID32((lx_uid16_t)ruid),
+ LX_UID16_TO_UID32((lx_uid16_t)euid))) ? -errno : 0);
+}
+
+int
+lx_setregid16(uintptr_t rgid, uintptr_t egid)
+{
+ return ((setregid(LX_UID16_TO_UID32((lx_gid16_t)rgid),
+ LX_UID16_TO_UID32((lx_gid16_t)egid))) ? -errno : 0);
+}
+
+/*
+ * The lx brand cannot support the setfs[ug]id16/setfs[ug]id calls as that
+ * would require significant rework of Solaris' privilege mechanisms, so
+ * instead return the current effective [ug]id.
+ *
+ * In Linux, fsids track effective IDs, so returning the effective IDs works
+ * as a substitute; returning the current value also denotes failure of the
+ * call if the caller had specified something different. We don't need to
+ * worry about setting error codes because the Linux calls don't set any.
+ */
+/*ARGSUSED*/
+int
+lx_setfsuid16(uintptr_t fsuid16)
+{
+ return (lx_geteuid16());
+}
+
+/*ARGSUSED*/
+int
+lx_setfsgid16(uintptr_t fsgid16)
+{
+ return (lx_getegid16());
+}
+
+/*ARGSUSED*/
+int
+lx_setfsuid(uintptr_t fsuid)
+{
+ return (geteuid());
+}
+
+/*ARGSUSED*/
+int
+lx_setfsgid(uintptr_t fsgid)
+{
+ return (getegid());
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/ioctl.c b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c
new file mode 100644
index 0000000000..2f2b022ed5
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c
@@ -0,0 +1,2719 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <assert.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stropts.h>
+#include <strings.h>
+#include <thread.h>
+#include <errno.h>
+#include <libintl.h>
+#include <sys/bitmap.h>
+#include <sys/lx_autofs.h>
+#include <sys/modctl.h>
+#include <sys/filio.h>
+#include <sys/termios.h>
+#include <sys/termio.h>
+#include <sys/sockio.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <sys/ptms.h>
+#include <sys/ldlinux.h>
+#include <sys/lx_ptm.h>
+#include <sys/lx_socket.h>
+#include <sys/syscall.h>
+#include <sys/brand.h>
+#include <sys/lx_audio.h>
+#include <sys/lx_ioctl.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/ptyvar.h>
+#include <sys/audio.h>
+#include <sys/mixer.h>
+
+/* Define _KERNEL to get the devt manipulation macros. */
+#define _KERNEL
+#include <sys/sysmacros.h>
+#undef _KERNEL
+
+/* Maximum number of modules on a stream that we can handle. */
+#define MAX_STRMODS 10
+
+/* Maximum buffer size for debugging messages. */
+#define MSGBUF 1024
+
+/* Structure used to define an ioctl translator. */
+typedef struct ioc_cmd_translator {
+ int ict_lx_cmd;
+ char *ict_lx_cmd_str;
+ int ict_cmd;
+ char *ict_cmd_str;
+ int (*ict_func)(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg);
+} ioc_cmd_translator_t;
+
+/*
+ * Structures used to associate a group of ioctl translators with
+ * a specific device.
+ */
+typedef struct ioc_dev_translator {
+ char *idt_driver;
+ major_t idt_major;
+
+ /* Array of command translators. */
+ ioc_cmd_translator_t *idt_cmds;
+} ioc_dev_translator_t;
+
+/*
+ * Structures used to associate a group of ioctl translators with
+ * a specific filesystem.
+ */
+typedef struct ioc_fs_translator {
+ char *ift_filesystem;
+
+ /* Array of command translators. */
+ ioc_cmd_translator_t *ift_cmds;
+} ioc_fs_translator_t;
+
+/* Structure used to define a unsupported ioctl error codes. */
+typedef struct ioc_errno_translator {
+ int iet_lx_cmd;
+ char *iet_lx_cmd_str;
+ int iet_errno;
+} ioc_errno_translator_t;
+
+/* Structure used to convert oss format flags into Solaris options. */
+typedef struct oss_fmt_translator {
+ int oft_oss_fmt;
+ int oft_encoding;
+ int oft_precision;
+} oss_fmt_translator_t;
+
+/* Translator forward declerations. */
+static oss_fmt_translator_t oft_table[];
+static ioc_cmd_translator_t ioc_translators_file[];
+static ioc_cmd_translator_t ioc_translators_fifo[];
+static ioc_cmd_translator_t ioc_translators_sock[];
+static ioc_dev_translator_t ioc_translator_ptm;
+static ioc_dev_translator_t *ioc_translators_dev[];
+static ioc_fs_translator_t *ioc_translators_fs[];
+static ioc_errno_translator_t ioc_translators_errno[];
+
+/*
+ * Interface name table.
+ */
+typedef struct ifname_map {
+ char im_linux[IFNAMSIZ];
+ char im_solaris[IFNAMSIZ];
+ struct ifname_map *im_next;
+} ifname_map_t;
+
+static ifname_map_t *ifname_map;
+static mutex_t ifname_mtx;
+
+/*
+ * Macros and structures to help convert integers to string
+ * values that they represent (for displaying in debug output).
+ */
+#define I2S_ENTRY(x) { x, #x },
+#define I2S_END { 0, NULL }
+
+typedef struct int2str {
+ int i2s_int;
+ char *i2s_str;
+} int2str_t;
+
+static int2str_t st_mode_strings[] = {
+ I2S_ENTRY(S_IFIFO)
+ I2S_ENTRY(S_IFCHR)
+ I2S_ENTRY(S_IFDIR)
+ I2S_ENTRY(S_IFBLK)
+ I2S_ENTRY(S_IFREG)
+ I2S_ENTRY(S_IFLNK)
+ I2S_ENTRY(S_IFSOCK)
+ I2S_ENTRY(S_IFDOOR)
+ I2S_ENTRY(S_IFPORT)
+ I2S_END
+};
+
+static int2str_t oss_fmt_str[] = {
+ I2S_ENTRY(LX_OSS_AFMT_QUERY)
+ I2S_ENTRY(LX_OSS_AFMT_MU_LAW)
+ I2S_ENTRY(LX_OSS_AFMT_A_LAW)
+ I2S_ENTRY(LX_OSS_AFMT_IMA_ADPCM)
+ I2S_ENTRY(LX_OSS_AFMT_U8)
+ I2S_ENTRY(LX_OSS_AFMT_S16_LE)
+ I2S_ENTRY(LX_OSS_AFMT_S16_BE)
+ I2S_ENTRY(LX_OSS_AFMT_S8)
+ I2S_ENTRY(LX_OSS_AFMT_U16_LE)
+ I2S_ENTRY(LX_OSS_AFMT_U16_BE)
+ I2S_ENTRY(LX_OSS_AFMT_MPEG)
+ I2S_END
+};
+
+static void
+lx_ioctl_msg(int fd, int cmd, char *lx_cmd_str, struct stat *stat, char *msg)
+{
+ int errno_backup = errno;
+ char *path, path_buf[MAXPATHLEN];
+
+ assert(msg != NULL);
+
+ if (lx_debug_enabled == 0)
+ return;
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+ if (path == NULL)
+ path = "?";
+
+ if (lx_cmd_str == NULL)
+ lx_cmd_str = "?";
+
+ /* Display the initial error message and extended ioctl information. */
+ lx_debug("\t%s", msg);
+ lx_debug("\tlx_ioctl(): cmd = 0x%x - %s, fd = %d - %s",
+ cmd, lx_cmd_str, fd, path);
+
+ /* Display information about the target file, if it's available. */
+ if (stat != NULL) {
+ major_t fd_major = getmajor(stat->st_rdev);
+ minor_t fd_minor = getminor(stat->st_rdev);
+ int fd_mode = stat->st_mode & S_IFMT;
+ char *fd_mode_str = "unknown";
+ char buf[LX_MSG_MAXLEN];
+ int i;
+
+ /* Translate the file type bits into a string. */
+ for (i = 0; st_mode_strings[i].i2s_str != NULL; i++) {
+ if (fd_mode != st_mode_strings[i].i2s_int)
+ continue;
+ fd_mode_str = st_mode_strings[i].i2s_str;
+ break;
+ }
+
+ (void) snprintf(buf, sizeof (buf),
+ "\tlx_ioctl(): mode = %s", fd_mode_str);
+
+ if ((fd_mode == S_IFCHR) || (fd_mode == S_IFBLK)) {
+ char *fd_driver[MODMAXNAMELEN + 1];
+ int i;
+
+ /* This is a device so display the devt. */
+ i = strlen(buf);
+ (void) snprintf(buf + i, sizeof (buf) - i,
+ "; rdev = [%d, %d]", fd_major, fd_minor);
+
+ /* Try to display the drivers name. */
+ if (modctl(MODGETNAME,
+ fd_driver, sizeof (fd_driver), &fd_major) == 0)
+ i = strlen(buf);
+ (void) snprintf(buf + i, sizeof (buf) - i,
+ "; driver = %s", fd_driver);
+ }
+ lx_debug(buf);
+ }
+
+ /* Restore errno. */
+ errno = errno_backup;
+}
+
+static int
+ldlinux_check(int fd)
+{
+ struct str_mlist mlist[MAX_STRMODS];
+ struct str_list strlist;
+ int i;
+
+ /* Get the number of modules on the stream. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, I_LIST, "I_LIST");
+ if ((i = ioctl(fd, I_LIST, (struct str_list *)NULL)) < 0) {
+ lx_debug("\tldlinux_check(): unable to count stream modules");
+ return (-errno);
+ }
+
+ /* Sanity check the number of modules on the stream. */
+ assert(i <= MAX_STRMODS);
+
+ /* Get the list of modules on the stream. */
+ strlist.sl_nmods = i;
+ strlist.sl_modlist = mlist;
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, I_LIST, "I_LIST");
+ if (ioctl(fd, I_LIST, &strlist) < 0) {
+ lx_debug("\tldlinux_check(): unable to list stream modules");
+ return (-errno);
+ }
+
+ for (i = 0; i < strlist.sl_nmods; i++)
+ if (strcmp(strlist.sl_modlist[i].l_name, LDLINUX_MOD) == 0)
+ return (1);
+
+ return (0);
+}
+
+static int
+ioctl_istr(int fd, int cmd, char *cmd_str, void *arg, int arg_len)
+{
+ struct strioctl istr;
+
+ istr.ic_cmd = cmd;
+ istr.ic_len = arg_len;
+ istr.ic_timout = 0;
+ istr.ic_dp = arg;
+
+ lx_debug("\tioctl_istr(%d, 0x%x - %s, ...)", fd, cmd, cmd_str);
+ if (ioctl(fd, I_STR, &istr) < 0)
+ return (-1);
+ return (0);
+}
+
+/*
+ * Add an interface name mapping if it doesn't already exist.
+ *
+ * Interfaces with IFF_LOOPBACK flag get renamed to loXXX.
+ * Interfaces with IFF_BROADCAST flag get renamed to ethXXX.
+ *
+ * Caller locks the name table.
+ */
+static int
+ifname_add(char *if_name, int if_flags)
+{
+ static int eth_index = 0;
+ static int lo_index = 0;
+ ifname_map_t **im_pp;
+
+ for (im_pp = &ifname_map; *im_pp; im_pp = &(*im_pp)->im_next)
+ if (strncmp((*im_pp)->im_solaris, if_name, IFNAMSIZ) == 0)
+ return (0);
+
+ *im_pp = calloc(1, sizeof (ifname_map_t));
+ if (*im_pp == NULL)
+ return (-1);
+
+ (void) strlcpy((*im_pp)->im_solaris, if_name, IFNAMSIZ);
+ if (if_flags & IFF_LOOPBACK) {
+ /* Loopback */
+ if (lo_index == 0)
+ (void) strlcpy((*im_pp)->im_linux, "lo", IFNAMSIZ);
+ else
+ (void) snprintf((*im_pp)->im_linux, IFNAMSIZ,
+ "lo:%d", lo_index);
+ lo_index++;
+ } else if (if_flags & IFF_BROADCAST) {
+ /* Assume ether if it has a broadcast address */
+ (void) snprintf((*im_pp)->im_linux, IFNAMSIZ,
+ "eth%d", eth_index);
+ eth_index++;
+ } else {
+ /* Do not translate unknown interfaces */
+ (void) strlcpy((*im_pp)->im_linux, if_name, IFNAMSIZ);
+ }
+
+ lx_debug("map interface %s -> %s", if_name, (*im_pp)->im_linux);
+
+ return (0);
+}
+
+static int
+ifname_cmp(const void *p1, const void *p2)
+{
+ struct ifreq *rp1 = (struct ifreq *)p1;
+ struct ifreq *rp2 = (struct ifreq *)p2;
+
+ return (strncmp(rp1->ifr_name, rp2->ifr_name, IFNAMSIZ));
+}
+
+/*
+ * (Re-)scan all interfaces and add them to the name table.
+ * Caller locks the name table.
+ */
+static int
+ifname_scan(void)
+{
+ struct ifconf conf;
+ int i, fd, ifcount;
+
+ conf.ifc_buf = NULL;
+
+ if ((fd = socket(PF_INET, SOCK_DGRAM, 0)) < 0)
+ goto fail;
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGIFNUM, "SIOCGIFNUM");
+ if (ioctl(fd, SIOCGIFNUM, &ifcount) < 0) {
+ lx_debug("\tifname_scan(): unable to get number of interfaces");
+ goto fail;
+ }
+
+ conf.ifc_len = ifcount * sizeof (struct ifreq);
+ if ((conf.ifc_buf = calloc(ifcount, sizeof (struct ifreq))) == NULL)
+ goto fail;
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGIFCONF, "SIOCGIFCONF");
+ if (ioctl(fd, SIOCGIFCONF, &conf) < 0) {
+ lx_debug("\tifname_scan(): unable to get interfaces");
+ goto fail;
+ }
+
+ /* Get the interface flags */
+ for (i = 0; i < ifcount; i++) {
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, SIOCGIFFLAGS, "SIOCGIFFLAGS");
+ if (ioctl(fd, SIOCGIFFLAGS, &conf.ifc_req[i]) < 0) {
+ conf.ifc_req[i].ifr_flags = 0;
+ lx_debug("\tifname_scan(): unable to get flags for %s",
+ conf.ifc_req[i].ifr_name);
+ }
+ }
+
+ /*
+ * Sort the interfaces by name to preserve the order
+ * across reboots of this zone. Note that the order of
+ * interface names won't be consistent across network
+ * configuration changes. ie. If network interfaces
+ * are added or removed from a zone (either dynamically
+ * or statically) the network interfaces names to physical
+ * network interface mappings that linux apps see may
+ * change.
+ */
+ qsort(conf.ifc_req, ifcount, sizeof (struct ifreq), ifname_cmp);
+
+ /* Add to the name table */
+ for (i = 0; i < ifcount; i++)
+ if (ifname_add(conf.ifc_req[i].ifr_name,
+ conf.ifc_req[i].ifr_flags) != 0)
+ goto fail;
+
+ (void) close(fd);
+ free(conf.ifc_buf);
+
+ return (0);
+
+fail:
+ if (fd >= 0)
+ (void) close(fd);
+ if (conf.ifc_buf != NULL)
+ free(conf.ifc_buf);
+
+ return (-1);
+}
+
+static int
+ifname_from_linux(char *name)
+{
+ int pass;
+ ifname_map_t *im_p;
+
+ (void) mutex_lock(&ifname_mtx);
+
+ for (pass = 0; pass < 2; pass++) {
+ for (im_p = ifname_map; im_p; im_p = im_p->im_next)
+ if (strncmp(im_p->im_linux, name, IFNAMSIZ) == 0)
+ break;
+ if (im_p != NULL || (pass == 0 && ifname_scan() != 0))
+ break;
+ }
+
+ (void) mutex_unlock(&ifname_mtx);
+
+ if (im_p) {
+ (void) strlcpy(name, im_p->im_solaris, IFNAMSIZ);
+ return (0);
+ }
+
+ return (-1);
+}
+
+static int
+ifname_from_solaris(char *name)
+{
+ int pass;
+ ifname_map_t *im_p;
+
+ (void) mutex_lock(&ifname_mtx);
+
+ for (pass = 0; pass < 2; pass++) {
+ for (im_p = ifname_map; im_p; im_p = im_p->im_next)
+ if (strncmp(im_p->im_solaris, name, IFNAMSIZ) == 0)
+ break;
+ if (im_p != NULL || (pass == 0 && ifname_scan() != 0))
+ break;
+ }
+
+ (void) mutex_unlock(&ifname_mtx);
+
+ if (im_p) {
+ (void) strlcpy(name, im_p->im_linux, IFNAMSIZ);
+ return (0);
+ }
+
+ return (-1);
+}
+
+/*
+ * Called to initialize the ioctl translation subsystem.
+ */
+int
+lx_ioctl_init()
+{
+ int i, ret;
+
+ /* Figure out the major numbers for our devices translators. */
+ for (i = 0; ioc_translators_dev[i] != NULL; i++) {
+ ioc_dev_translator_t *idt = ioc_translators_dev[i];
+
+ ret = modctl(MODGETMAJBIND,
+ idt->idt_driver, strlen(idt->idt_driver) + 1,
+ &idt->idt_major);
+
+ if (ret != 0) {
+ lx_err(gettext("%s%s) failed: %s\n"),
+ "lx_ioctl_init(): modctl(MODGETMAJBIND, ",
+ idt->idt_driver, strerror(errno));
+ lx_err(gettext("%s: %s translator disabled for: %s\n"),
+ "lx_ioctl_init()", "ioctl", idt->idt_driver);
+ idt->idt_major = (major_t)-1;
+ }
+ }
+
+ /* Create the interface name table */
+ if (ifname_scan() != 0)
+ lx_err("lx_ioctl_init(): ifname_scan() failed\n");
+
+ return (0);
+}
+
+static ioc_cmd_translator_t *
+lx_ioctl_find_ict_cmd(ioc_cmd_translator_t *ict, int cmd)
+{
+ assert(ict != NULL);
+ while ((ict != NULL) && (ict->ict_func != NULL)) {
+ if (cmd == ict->ict_lx_cmd)
+ return (ict);
+ ict++;
+ }
+ return (NULL);
+}
+
+/*
+ * Main entry point for the ioctl translater.
+ */
+int
+lx_ioctl(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ int cmd = (int)p2;
+ intptr_t arg = (uintptr_t)p3;
+ struct stat stat;
+ ioc_cmd_translator_t *ict = NULL;
+ ioc_errno_translator_t *iet = NULL;
+ major_t fd_major;
+ int i, ret;
+
+ if (fstat(fd, &stat) != 0) {
+ lx_ioctl_msg(fd, cmd, NULL, NULL,
+ "lx_ioctl(): fstat() failed");
+
+ /*
+ * Linux ioctl(2) is only documented to return EBADF, EFAULT,
+ * EINVAL or ENOTTY.
+ *
+ * EINVAL is documented to be "Request or argp is not valid",
+ * so it's reasonable to force any errno that's not EBADF,
+ * EFAULT or ENOTTY to be EINVAL.
+ */
+ if ((errno != EBADF) && (errno != EFAULT) && (errno != ENOTTY))
+ errno = EINVAL;
+
+ return (-errno); /* errno already set. */
+ }
+
+ switch (stat.st_mode & S_IFMT) {
+ default:
+ break;
+ case S_IFREG:
+ /* Use file translators. */
+ ict = ioc_translators_file;
+ break;
+
+ case S_IFSOCK:
+ /* Use socket translators. */
+ ict = ioc_translators_sock;
+ break;
+
+ case S_IFIFO:
+ /* Use fifo translators. */
+ ict = ioc_translators_fifo;
+ break;
+
+ case S_IFCHR:
+ fd_major = getmajor(stat.st_rdev);
+
+ /*
+ * Look through all the device translators to see if there
+ * is one for this device.
+ */
+ for (i = 0; ioc_translators_dev[i] != NULL; i++) {
+ if (fd_major != ioc_translators_dev[i]->idt_major)
+ continue;
+
+ /* We found a translator for this device. */
+ ict = ioc_translators_dev[i]->idt_cmds;
+ break;
+ }
+ break;
+ }
+
+ /*
+ * Search the selected translator group to see if we have a
+ * translator for this specific command.
+ */
+ if ((ict != NULL) &&
+ ((ict = lx_ioctl_find_ict_cmd(ict, cmd)) != NULL)) {
+ /* We found a translator for this command, invoke it. */
+ lx_ioctl_msg(fd, cmd, ict->ict_lx_cmd_str, &stat,
+ "lx_ioctl(): emulating ioctl");
+
+ ret = ict->ict_func(fd, &stat, ict->ict_cmd, ict->ict_cmd_str,
+ arg);
+
+ if ((ret < 0) && (ret != -EBADF) && (ret != -EFAULT) &&
+ (ret != -ENOTTY))
+ ret = -EINVAL;
+
+ return (ret);
+ }
+
+ /*
+ * If we didn't find a file or device translator for this
+ * command then try to find a filesystem translator for
+ * this command.
+ */
+ for (i = 0; ioc_translators_fs[i] != NULL; i++) {
+ if (strcmp(stat.st_fstype,
+ ioc_translators_fs[i]->ift_filesystem) != 0)
+ continue;
+
+ /* We found a translator for this filesystem. */
+ ict = ioc_translators_fs[i]->ift_cmds;
+ break;
+ }
+
+ /*
+ * Search the selected translator group to see if we have a
+ * translator for this specific command.
+ */
+ if ((ict != NULL) &&
+ ((ict = lx_ioctl_find_ict_cmd(ict, cmd)) != NULL)) {
+ /* We found a translator for this command, invoke it. */
+ lx_ioctl_msg(fd, cmd, ict->ict_lx_cmd_str, &stat,
+ "lx_ioctl(): emulating ioctl");
+ ret = ict->ict_func(fd, &stat, ict->ict_cmd, ict->ict_cmd_str,
+ arg);
+
+ if ((ret < 0) && (ret != -EBADF) && (ret != -EFAULT) &&
+ (ret != -ENOTTY))
+ ret = -EINVAL;
+
+ return (ret);
+ }
+
+ /*
+ * No translator for this ioctl was found.
+ * Check if there is an errno translator.
+ */
+ for (iet = ioc_translators_errno; iet->iet_lx_cmd_str != NULL; iet++) {
+ if (cmd != iet->iet_lx_cmd)
+ continue;
+
+ /* We found a an errno translator for this ioctl. */
+ lx_ioctl_msg(fd, cmd, iet->iet_lx_cmd_str, &stat,
+ "lx_ioctl(): emulating errno");
+
+ ret = -iet->iet_errno;
+
+ if ((ret < 0) && (ret != -EBADF) && (ret != -EFAULT) &&
+ (ret != -ENOTTY))
+ ret = -EINVAL;
+
+ return (ret);
+ }
+
+ lx_ioctl_msg(fd, cmd, NULL, &stat,
+ "lx_ioctl(): unsupported linux ioctl");
+ lx_unsupported(gettext("lx_ioctl(): unsupported linux ioctl (%d)"),
+ cmd);
+ return (-EINVAL);
+}
+
+
+/*
+ * Ioctl translator functions.
+ */
+/*
+ * Used by translators that want to explicitly return EINVAL for an
+ * ioctl(2) instead of having the translation framework do it implicitly.
+ * This allows us to indicate which unsupported ioctl(2)s should not
+ * trigger a SIGSYS when running in LX_STRICT mode.
+ */
+/* ARGSUSED */
+static int
+ict_einval(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ return (-EINVAL);
+}
+
+static int
+/*ARGSUSED*/
+ict_pass(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ int ret;
+
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, cmd, cmd_str);
+ ret = ioctl(fd, cmd, arg);
+ return (ret < 0 ? -errno : ret);
+}
+
+static int
+/*ARGSUSED*/
+ict_tcsbrkp(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ int ret, dur = 0;
+
+ assert(cmd == LX_TCSBRKP);
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, TCSBRK, "TCSBRK");
+ ret = ioctl(fd, TCSBRK, (intptr_t)&dur);
+ return (ret < 0 ? -errno : ret);
+}
+
+static int
+/*ARGSUSED*/
+ict_sioifoob(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ int req, *reqp = (int *)arg;
+ int len, val;
+
+ assert(cmd == SIOCATMARK);
+
+ if (uucopy(reqp, &req, sizeof (req)) != 0)
+ return (-errno);
+
+ len = sizeof (val);
+
+ /*
+ * Linux expects a SIOCATMARK of a UDP socket to return EINVAL, while
+ * Solaris allows it.
+ */
+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &val, &len) < 0) {
+ lx_debug("ict_siofmark: getsockopt failed, errno %d", errno);
+ return (-EINVAL);
+ }
+
+ if ((len != sizeof (val)) || (val != SOCK_STREAM))
+ return (-EINVAL);
+
+ if (ioctl(fd, cmd, &req) < 0)
+ return (-errno);
+
+ if (uucopy(&req, reqp, sizeof (req)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_sioifreq(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ struct ifreq req, *reqp = (struct ifreq *)arg;
+
+ assert(cmd == SIOCGIFFLAGS || cmd == SIOCSIFFLAGS ||
+ cmd == SIOCGIFADDR || cmd == SIOCSIFADDR ||
+ cmd == SIOCGIFDSTADDR || cmd == SIOCSIFDSTADDR ||
+ cmd == SIOCGIFBRDADDR || cmd == SIOCSIFBRDADDR ||
+ cmd == SIOCGIFNETMASK || cmd == SIOCSIFNETMASK ||
+ cmd == SIOCGIFMETRIC || cmd == SIOCSIFMETRIC ||
+ cmd == SIOCGIFMTU || cmd == SIOCSIFMTU);
+
+ /* Copy in the data */
+ if (uucopy(reqp, &req, sizeof (struct ifreq)) != 0)
+ return (-errno);
+
+ if (ifname_from_linux(req.ifr_name) < 0)
+ return (-EINVAL);
+
+ lx_debug("\tioctl(%d, 0x%x - %s, %.14s",
+ fd, cmd, cmd_str, req.ifr_name);
+
+ if (ioctl(fd, cmd, &req) < 0)
+ return (-errno);
+
+ if (ifname_from_solaris(req.ifr_name) < 0)
+ return (-EINVAL);
+
+ /* Copy out the data */
+ if (uucopy(&req, reqp, sizeof (struct ifreq)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_siocgifconf(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ struct ifconf conf, *confp = (struct ifconf *)arg;
+ int i, ifcount, ret;
+
+ assert(cmd == LX_SIOCGIFCONF);
+
+ /* Copy in the data. */
+ if (uucopy(confp, &conf, sizeof (conf)) != 0)
+ return (-errno);
+
+ if (conf.ifc_len == 0) {
+ /* They want to know how many interfaces there are. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, SIOCGIFNUM, "SIOCGIFNUM");
+ if (ioctl(fd, SIOCGIFNUM, (intptr_t)&ifcount) < 0)
+ return (-errno);
+ conf.ifc_len = ifcount * sizeof (struct ifreq);
+
+ /* Check if we're done. */
+ if (conf.ifc_buf == NULL) {
+ /* Copy out the data. */
+ if (uucopy(&conf, confp, sizeof (conf)) != 0)
+ return (-errno);
+ return (0);
+ }
+ }
+
+ /* Get interface configuration list. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGIFCONF, "SIOCGIFCONF");
+ ret = ioctl(fd, SIOCGIFCONF, &conf);
+ if (ret < 0)
+ return (-errno);
+
+ /* Rename interfaces to linux */
+ for (i = 0; i < conf.ifc_len / sizeof (struct ifreq); i++)
+ if (ifname_from_solaris(conf.ifc_req[i].ifr_name) < 0)
+ return (-EINVAL);
+
+ /* Copy out the data */
+ if (uucopy(&conf, confp, sizeof (conf)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_siocifhwaddr(int fd, struct stat *stat, int cmd, char *cmd_str,
+ intptr_t arg)
+{
+ struct ifreq req, *reqp = (struct ifreq *)arg;
+ struct arpreq arpreq;
+
+ assert(cmd == LX_SIOCGIFHWADDR || cmd == LX_SIOCSIFHWADDR);
+
+ /* Copy in the data */
+ if (uucopy(reqp, &req, sizeof (struct ifreq)) != 0)
+ return (-errno);
+
+ lx_debug("\tioctl(%d, 0x%x - %s, lx %.14s)",
+ fd, cmd,
+ (cmd == LX_SIOCGIFHWADDR) ? "SIOCGIFHWADDR" : "SIOCSIFHWADDR",
+ req.ifr_name);
+
+ /*
+ * We're not going to support SIOCSIFHWADDR, but we need to be
+ * able to check the result of the uucopy first to see if the command
+ * should have returned EFAULT.
+ */
+ if (cmd == LX_SIOCSIFHWADDR) {
+ lx_unsupported(gettext(
+ "lx_ioctl(): unsupported linux ioctl: %s"),
+ "SIOCSIFHWADDR");
+ return (-EINVAL);
+ }
+
+ if (strcmp(req.ifr_name, "lo") == 0 ||
+ strncmp(req.ifr_name, "lo:", 3) == 0) {
+ /* Abuse ifr_addr for linux ifr_hwaddr */
+ bzero(&req.ifr_addr, sizeof (struct sockaddr));
+ req.ifr_addr.sa_family = LX_ARPHRD_LOOPBACK;
+
+ /* Copy out the data */
+ if (uucopy(&req, reqp, sizeof (struct ifreq)) != 0)
+ return (-errno);
+
+ return (0);
+ }
+
+ if (ifname_from_linux(req.ifr_name) < 0)
+ return (-EINVAL);
+
+ lx_debug("\tioctl(%d, 0x%x - %s, %.14s)",
+ fd, SIOCGIFADDR, "SIOCGIFADDR", req.ifr_name);
+
+ if (ioctl(fd, SIOCGIFADDR, &req) < 0)
+ return (-errno);
+
+ bcopy(&req.ifr_addr, &arpreq.arp_pa, sizeof (struct sockaddr));
+
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGARP, "SIOCGARP");
+
+ if (ioctl(fd, SIOCGARP, &arpreq) < 0)
+ return (-errno);
+
+ if (ifname_from_solaris(req.ifr_name) < 0)
+ return (-EINVAL);
+
+ /* Abuse ifr_addr for linux ifr_hwaddr */
+ bcopy(&arpreq.arp_ha, &req.ifr_addr, sizeof (struct sockaddr));
+ if (strncmp(req.ifr_name, "eth", 3) == 0)
+ req.ifr_addr.sa_family = LX_ARPHRD_ETHER;
+ else
+ req.ifr_addr.sa_family = LX_ARPHRD_VOID;
+
+ /* Copy out the data */
+ if (uucopy(&req, reqp, sizeof (struct ifreq)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static void
+l2s_termios(struct lx_termios *l_tios, struct termios *s_tios)
+{
+ assert((l_tios != NULL) && (s_tios != NULL));
+
+ bzero(s_tios, sizeof (*s_tios));
+
+ s_tios->c_iflag = l_tios->c_iflag;
+ s_tios->c_oflag = l_tios->c_oflag;
+ s_tios->c_cflag = l_tios->c_cflag;
+
+ s_tios->c_lflag = l_tios->c_lflag;
+ if (s_tios->c_lflag & ICANON) {
+ s_tios->c_cc[VEOF] = l_tios->c_cc[LX_VEOF];
+ s_tios->c_cc[VEOL] = l_tios->c_cc[LX_VEOL];
+ } else {
+ s_tios->c_cc[VMIN] = l_tios->c_cc[LX_VMIN];
+ s_tios->c_cc[VTIME] = l_tios->c_cc[LX_VTIME];
+ }
+
+ s_tios->c_cc[VEOL2] = l_tios->c_cc[LX_VEOL2];
+ s_tios->c_cc[VERASE] = l_tios->c_cc[LX_VERASE];
+ s_tios->c_cc[VKILL] = l_tios->c_cc[LX_VKILL];
+ s_tios->c_cc[VREPRINT] = l_tios->c_cc[LX_VREPRINT];
+ s_tios->c_cc[VLNEXT] = l_tios->c_cc[LX_VLNEXT];
+ s_tios->c_cc[VWERASE] = l_tios->c_cc[LX_VWERASE];
+ s_tios->c_cc[VINTR] = l_tios->c_cc[LX_VINTR];
+ s_tios->c_cc[VQUIT] = l_tios->c_cc[LX_VQUIT];
+ s_tios->c_cc[VSWTCH] = l_tios->c_cc[LX_VSWTC];
+ s_tios->c_cc[VSTART] = l_tios->c_cc[LX_VSTART];
+ s_tios->c_cc[VSTOP] = l_tios->c_cc[LX_VSTOP];
+ s_tios->c_cc[VSUSP] = l_tios->c_cc[LX_VSUSP];
+ s_tios->c_cc[VDISCARD] = l_tios->c_cc[LX_VDISCARD];
+}
+
+static void
+l2s_termio(struct lx_termio *l_tio, struct termio *s_tio)
+{
+ assert((l_tio != NULL) && (s_tio != NULL));
+
+ bzero(s_tio, sizeof (*s_tio));
+
+ s_tio->c_iflag = l_tio->c_iflag;
+ s_tio->c_oflag = l_tio->c_oflag;
+ s_tio->c_cflag = l_tio->c_cflag;
+
+ s_tio->c_lflag = l_tio->c_lflag;
+ if (s_tio->c_lflag & ICANON) {
+ s_tio->c_cc[VEOF] = l_tio->c_cc[LX_VEOF];
+ } else {
+ s_tio->c_cc[VMIN] = l_tio->c_cc[LX_VMIN];
+ s_tio->c_cc[VTIME] = l_tio->c_cc[LX_VTIME];
+ }
+
+ s_tio->c_cc[VINTR] = l_tio->c_cc[LX_VINTR];
+ s_tio->c_cc[VQUIT] = l_tio->c_cc[LX_VQUIT];
+ s_tio->c_cc[VERASE] = l_tio->c_cc[LX_VERASE];
+ s_tio->c_cc[VKILL] = l_tio->c_cc[LX_VKILL];
+ s_tio->c_cc[VSWTCH] = l_tio->c_cc[LX_VSWTC];
+}
+
+static void
+termios2lx_cc(struct lx_termios *l_tios, struct lx_cc *lio)
+{
+ assert((l_tios != NULL) && (lio != NULL));
+
+ bzero(lio, sizeof (*lio));
+
+ lio->veof = l_tios->c_cc[LX_VEOF];
+ lio->veol = l_tios->c_cc[LX_VEOL];
+ lio->vmin = l_tios->c_cc[LX_VMIN];
+ lio->vtime = l_tios->c_cc[LX_VTIME];
+}
+
+static void
+termio2lx_cc(struct lx_termio *l_tio, struct lx_cc *lio)
+{
+ assert((l_tio != NULL) && (lio != NULL));
+
+ bzero(lio, sizeof (*lio));
+
+ lio->veof = l_tio->c_cc[LX_VEOF];
+ lio->veol = 0;
+ lio->vmin = l_tio->c_cc[LX_VMIN];
+ lio->vtime = l_tio->c_cc[LX_VTIME];
+}
+
+static void
+s2l_termios(struct termios *s_tios, struct lx_termios *l_tios)
+{
+ assert((s_tios != NULL) && (l_tios != NULL));
+
+ bzero(l_tios, sizeof (*l_tios));
+
+ l_tios->c_iflag = s_tios->c_iflag;
+ l_tios->c_oflag = s_tios->c_oflag;
+ l_tios->c_cflag = s_tios->c_cflag;
+ l_tios->c_lflag = s_tios->c_lflag;
+
+ if (s_tios->c_lflag & ICANON) {
+ l_tios->c_cc[LX_VEOF] = s_tios->c_cc[VEOF];
+ l_tios->c_cc[LX_VEOL] = s_tios->c_cc[VEOL];
+ } else {
+ l_tios->c_cc[LX_VMIN] = s_tios->c_cc[VMIN];
+ l_tios->c_cc[LX_VTIME] = s_tios->c_cc[VTIME];
+ }
+
+ l_tios->c_cc[LX_VEOL2] = s_tios->c_cc[VEOL2];
+ l_tios->c_cc[LX_VERASE] = s_tios->c_cc[VERASE];
+ l_tios->c_cc[LX_VKILL] = s_tios->c_cc[VKILL];
+ l_tios->c_cc[LX_VREPRINT] = s_tios->c_cc[VREPRINT];
+ l_tios->c_cc[LX_VLNEXT] = s_tios->c_cc[VLNEXT];
+ l_tios->c_cc[LX_VWERASE] = s_tios->c_cc[VWERASE];
+ l_tios->c_cc[LX_VINTR] = s_tios->c_cc[VINTR];
+ l_tios->c_cc[LX_VQUIT] = s_tios->c_cc[VQUIT];
+ l_tios->c_cc[LX_VSWTC] = s_tios->c_cc[VSWTCH];
+ l_tios->c_cc[LX_VSTART] = s_tios->c_cc[VSTART];
+ l_tios->c_cc[LX_VSTOP] = s_tios->c_cc[VSTOP];
+ l_tios->c_cc[LX_VSUSP] = s_tios->c_cc[VSUSP];
+ l_tios->c_cc[LX_VDISCARD] = s_tios->c_cc[VDISCARD];
+}
+
+static void
+s2l_termio(struct termio *s_tio, struct lx_termio *l_tio)
+{
+ assert((s_tio != NULL) && (l_tio != NULL));
+
+ bzero(l_tio, sizeof (*l_tio));
+
+ l_tio->c_iflag = s_tio->c_iflag;
+ l_tio->c_oflag = s_tio->c_oflag;
+ l_tio->c_cflag = s_tio->c_cflag;
+ l_tio->c_lflag = s_tio->c_lflag;
+
+ if (s_tio->c_lflag & ICANON) {
+ l_tio->c_cc[LX_VEOF] = s_tio->c_cc[VEOF];
+ } else {
+ l_tio->c_cc[LX_VMIN] = s_tio->c_cc[VMIN];
+ l_tio->c_cc[LX_VTIME] = s_tio->c_cc[VTIME];
+ }
+
+ l_tio->c_cc[LX_VINTR] = s_tio->c_cc[VINTR];
+ l_tio->c_cc[LX_VQUIT] = s_tio->c_cc[VQUIT];
+ l_tio->c_cc[LX_VERASE] = s_tio->c_cc[VERASE];
+ l_tio->c_cc[LX_VKILL] = s_tio->c_cc[VKILL];
+ l_tio->c_cc[LX_VSWTC] = s_tio->c_cc[VSWTCH];
+}
+
+static int
+/*ARGSUSED*/
+ict_tcsets(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ struct lx_termios l_tios, *l_tiosp = (struct lx_termios *)arg;
+ struct termios s_tios;
+ struct lx_cc lio;
+ int ldlinux, ret;
+
+ assert(cmd == TCSETS || cmd == TCSETSW || cmd == TCSETSF);
+
+ /* Copy in the data. */
+ if (uucopy(l_tiosp, &l_tios, sizeof (l_tios)) != 0)
+ return (-errno);
+
+ /*
+ * The TIOCSETLD/TIOCGETLD ioctls are only supported by the
+ * ldlinux strmod. So make sure the module exists on the
+ * target stream before we invoke the ioctl.
+ */
+ if ((ldlinux = ldlinux_check(fd)) < 0)
+ return (ldlinux);
+
+ if (ldlinux == 1) {
+ termios2lx_cc(&l_tios, &lio);
+ if (ioctl_istr(fd, TIOCSETLD, "TIOCSETLD",
+ &lio, sizeof (lio)) < 0)
+ return (-errno);
+ }
+
+ l2s_termios(&l_tios, &s_tios);
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, cmd, cmd_str);
+ ret = ioctl(fd, cmd, (intptr_t)&s_tios);
+ return ((ret < 0) ? -errno : ret);
+}
+
+static int
+/*ARGSUSED*/
+ict_tcseta(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ struct lx_termio l_tio, *l_tiop = (struct lx_termio *)arg;
+ struct termio s_tio;
+ struct lx_cc lio;
+ int ldlinux, ret;
+
+ assert(cmd == TCSETA || cmd == TCSETAW || cmd == TCSETAF);
+
+ /* Copy in the data. */
+ if (uucopy(l_tiop, &l_tio, sizeof (l_tio)) != 0)
+ return (-errno);
+
+ /*
+ * The TIOCSETLD/TIOCGETLD ioctls are only supported by the
+ * ldlinux strmod. So make sure the module exists on the
+ * target stream before we invoke the ioctl.
+ */
+ if ((ldlinux = ldlinux_check(fd)) < 0)
+ return (ldlinux);
+
+ if (ldlinux == 1) {
+ termio2lx_cc(&l_tio, &lio);
+ if (ioctl_istr(fd, TIOCSETLD, "TIOCSETLD",
+ &lio, sizeof (lio)) < 0)
+ return (-errno);
+ }
+
+ l2s_termio(&l_tio, &s_tio);
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, cmd, cmd_str);
+ ret = ioctl(fd, cmd, (intptr_t)&s_tio);
+ return ((ret < 0) ? -errno : ret);
+}
+
+/*
+ * The Solaris TIOCGPGRP ioctl does not have exactly the same semantics as
+ * the Linux one. To mimic Linux semantics we have to do some extra work
+ * normally done by the Solaris version of tcgetpgrp().
+ */
+static int
+/*ARGSUSED*/
+ict_tiocgpgrp(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ pid_t ttysid, mysid;
+ int ret;
+
+ assert(cmd == LX_TIOCGPGRP);
+
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, TIOCGSID, "TIOCGSID");
+ if (ioctl(fd, TIOCGSID, (intptr_t)&ttysid) < 0)
+ return (-errno);
+ if ((mysid = getsid(0)) < 0)
+ return (-errno);
+ if (mysid != ttysid)
+ return (-ENOTTY);
+
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, TIOCGPGRP, "TIOCGPGRP");
+ ret = ioctl(fd, TIOCGPGRP, arg);
+ return ((ret < 0) ? -errno : ret);
+}
+
+static int
+/*ARGSUSED*/
+ict_sptlock(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ assert(cmd == LX_TIOCSPTLCK);
+
+ /*
+ * The success/fail return values are different between Linux
+ * and Solaris. Linux expects 0 or -1. Solaris can return
+ * positive number on success.
+ */
+ if (ioctl_istr(fd, UNLKPT, "UNLKPT", NULL, 0) < 0)
+ return (-errno);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_gptn(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ int ptyno, *ptynop = (int *)arg;
+ pt_own_t pto;
+
+ assert(cmd == LX_TIOCGPTN);
+ assert(getmajor(stat->st_rdev) == ioc_translator_ptm.idt_major);
+
+ /* This operation is only valid for the lx_ptm device. */
+ ptyno = LX_PTM_DEV_TO_PTS(stat->st_rdev);
+
+ /*
+ * We'd like to just use grantpt() directly, but we can't since
+ * it assumes the fd node that's passed to it is a ptm node,
+ * and in our case it's an lx_ptm node. It also relies on
+ * naming services to get the current process group name.
+ * Hence we have to invoke the OWNERPT ioctl directly here.
+ */
+ pto.pto_ruid = getuid();
+ pto.pto_rgid = getgid();
+ if (ioctl_istr(fd, OWNERPT, "OWNERPT", &pto, sizeof (pto)) != 0)
+ return (-EACCES);
+
+ /* Copy out the data. */
+ if (uucopy(&ptyno, ptynop, sizeof (ptyno)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_tiocgwinsz(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ struct winsize winsize, *winsizep = (struct winsize *)arg;
+
+ assert(cmd == LX_TIOCGWINSZ);
+
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, TIOCGWINSZ, "TIOCGWINSZ");
+ if (ioctl(fd, TIOCGWINSZ, arg) >= 0)
+ return (0);
+ if (errno != EINVAL)
+ return (-errno);
+
+ bzero(&winsize, sizeof (winsize));
+ if (uucopy(&winsize, winsizep, sizeof (winsize)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_tcgets_emulate(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ struct lx_termios l_tios, *l_tiosp = (struct lx_termios *)arg;
+ struct termios s_tios;
+
+ assert(cmd == LX_TCGETS);
+
+ if (syscall(SYS_brand, B_TTYMODES, &s_tios) < 0)
+ return (-errno);
+
+ /* Now munge the data to how Linux wants it. */
+ s2l_termios(&s_tios, &l_tios);
+ if (uucopy(&l_tios, l_tiosp, sizeof (l_tios)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_tcgets_native(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ struct lx_termios l_tios, *l_tiosp = (struct lx_termios *)arg;
+ struct termios s_tios;
+ struct lx_cc lio;
+ int ldlinux;
+
+ assert(cmd == LX_TCGETS);
+
+ if ((ldlinux = ldlinux_check(fd)) < 0)
+ return (ldlinux);
+
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, TCGETS, "TCGETS");
+ if (ioctl(fd, TCGETS, (intptr_t)&s_tios) < 0)
+ return (-errno);
+
+ /* Now munge the data to how Linux wants it. */
+ s2l_termios(&s_tios, &l_tios);
+
+ /*
+ * The TIOCSETLD/TIOCGETLD ioctls are only supported by the
+ * ldlinux strmod. So make sure the module exists on the
+ * target stream before we invoke the ioctl.
+ */
+ if (ldlinux != 0) {
+ if (ioctl_istr(fd, TIOCGETLD, "TIOCGETLD",
+ &lio, sizeof (lio)) < 0)
+ return (-errno);
+
+ l_tios.c_cc[LX_VEOF] = lio.veof;
+ l_tios.c_cc[LX_VEOL] = lio.veol;
+ l_tios.c_cc[LX_VMIN] = lio.vmin;
+ l_tios.c_cc[LX_VTIME] = lio.vtime;
+ }
+
+ /* Copy out the data. */
+ if (uucopy(&l_tios, l_tiosp, sizeof (l_tios)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_tcgeta(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ struct lx_termio l_tio, *l_tiop = (struct lx_termio *)arg;
+ struct termio s_tio;
+ struct lx_cc lio;
+ int ldlinux;
+
+ assert(cmd == LX_TCGETA);
+
+ if ((ldlinux = ldlinux_check(fd)) < 0)
+ return (ldlinux);
+
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, TCGETA, "TCGETA");
+ if (ioctl(fd, TCGETA, (intptr_t)&s_tio) < 0)
+ return (-errno);
+
+ /* Now munge the data to how Linux wants it. */
+ s2l_termio(&s_tio, &l_tio);
+
+ /*
+ * The TIOCSETLD/TIOCGETLD ioctls are only supported by the
+ * ldlinux strmod. So make sure the module exists on the
+ * target stream before we invoke the ioctl.
+ */
+ if (ldlinux != 0) {
+ if (ioctl_istr(fd, TIOCGETLD, "TIOCGETLD",
+ &lio, sizeof (lio)) < 0)
+ return (-errno);
+
+ l_tio.c_cc[LX_VEOF] = lio.veof;
+ l_tio.c_cc[LX_VMIN] = lio.vmin;
+ l_tio.c_cc[LX_VTIME] = lio.vtime;
+ }
+
+ /* Copy out the data. */
+ if (uucopy(&l_tio, l_tiop, sizeof (l_tio)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_tiocsctty(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg)
+{
+ pid_t mysid, ttysid;
+
+ if ((mysid = getsid(0)) < 0)
+ return (-errno);
+
+ /* Check if this fd is already our ctty. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, TIOCGSID, "TIOCGSID");
+ if (ioctl(fd, TIOCGSID, (intptr_t)&ttysid) >= 0)
+ if (mysid == ttysid)
+ return (0);
+
+ /*
+ * Need to make sure we're a session leader, otherwise the
+ * TIOCSCTTY ioctl will fail.
+ */
+ if (mysid != getpid())
+ (void) setpgrp();
+
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, TIOCSCTTY, "TIOCSCTTY");
+ if (ioctl(fd, TIOCSCTTY, 0) < 0)
+ return (-errno);
+ return (0);
+}
+
+/*
+ * /dev/dsp ioctl translators and support
+ */
+static int
+i_is_dsp_dev(int fd)
+{
+ int minor;
+
+ /*
+ * This is a cloning device so we have to ask the driver
+ * what kind of minor node this is.
+ */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_GETMINORNUM, "LXA_IOC_GETMINORNUM");
+ if (ioctl(fd, LXA_IOC_GETMINORNUM, &minor) < 0)
+ return (-EINVAL);
+ if (minor != LXA_MINORNUM_DSP)
+ return (-EINVAL);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_reset(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ int err;
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ /* Nothing to really do on Solaris. */
+ return (0);
+}
+
+static void
+i_oss_fmt_str(char *buf, int buf_size, uint_t mask)
+{
+ int i, first = 1;
+
+ assert(buf != NULL);
+
+ buf[0] = '\0';
+ for (i = 0; oss_fmt_str[i].i2s_str != NULL; i++) {
+ if ((oss_fmt_str[i].i2s_int != mask) &&
+ ((oss_fmt_str[i].i2s_int & mask) == 0))
+ continue;
+ if (first)
+ first = 0;
+ else
+ (void) strlcat(buf, " | ", buf_size);
+ (void) strlcat(buf, oss_fmt_str[i].i2s_str, buf_size);
+ }
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_getfmts(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ audio_info_t sa_info;
+ char buf[MSGBUF];
+ uint_t *maskp = (uint_t *)arg;
+ uint_t mask = 0;
+ int i, amode, err;
+
+ assert(cmd == LX_OSS_SNDCTL_DSP_GETFMTS);
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ /* We need to know the access mode for the file. */
+ if ((amode = fcntl(fd, F_GETFL)) < 0)
+ return (-EINVAL);
+ amode &= O_ACCMODE;
+ assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR));
+
+ /* Test to see what Linux oss formats the target device supports. */
+ for (i = 0; oft_table[i].oft_oss_fmt != 0; i++) {
+
+ /* Initialize the mode request. */
+ AUDIO_INITINFO(&sa_info);
+
+ /* Translate a Linux oss format into Solaris settings. */
+ if ((amode == O_RDONLY) || (amode == O_RDWR)) {
+ sa_info.record.encoding = oft_table[i].oft_encoding;
+ sa_info.record.precision = oft_table[i].oft_precision;
+ }
+ if ((amode == O_WRONLY) || (amode == O_RDWR)) {
+ sa_info.play.encoding = oft_table[i].oft_encoding;
+ sa_info.play.precision = oft_table[i].oft_precision;
+ }
+
+ /* Send the request. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, AUDIO_SETINFO, "AUDIO_SETINFO");
+ if (ioctl(fd, AUDIO_SETINFO, &sa_info) < 0)
+ continue;
+
+ /* This Linux oss format is supported. */
+ mask |= oft_table[i].oft_oss_fmt;
+ }
+
+ if (lx_debug_enabled != 0) {
+ i_oss_fmt_str(buf, sizeof (buf), mask);
+ lx_debug("\toss formats supported = 0x%x (%s)", mask, buf);
+ }
+ if (uucopy(&mask, maskp, sizeof (mask)) != 0)
+ return (-errno);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_setfmts(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ audio_info_t sa_info;
+ char buf[MSGBUF];
+ uint_t *maskp = (uint_t *)arg;
+ uint_t mask;
+ int i, amode, err;
+
+ assert(cmd == LX_OSS_SNDCTL_DSP_SETFMTS);
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ if (uucopy(maskp, &mask, sizeof (mask)) != 0)
+ return (-errno);
+
+ if (lx_debug_enabled != 0) {
+ i_oss_fmt_str(buf, sizeof (buf), mask);
+ lx_debug("\toss formats request = 0x%x (%s)", mask, buf);
+ }
+
+ if ((mask == (uint_t)-1) || (mask == 0)) {
+ lx_debug("\tXXX: possible oss formats query?");
+ return (-EINVAL);
+ }
+
+ /* Check if multiple format bits were specified. */
+ if (!BIT_ONLYONESET(mask))
+ return (-EINVAL);
+
+ /* Decode the oss format request into a native format. */
+ for (i = 0; oft_table[i].oft_oss_fmt != 0; i++) {
+ if (oft_table[i].oft_oss_fmt == mask)
+ break;
+ }
+ if (oft_table[i].oft_oss_fmt == 0)
+ return (-EINVAL);
+
+ /* We need to know the access mode for the file. */
+ if ((amode = fcntl(fd, F_GETFL)) < 0)
+ return (-EINVAL);
+ amode &= O_ACCMODE;
+ assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR));
+
+ /* Initialize the mode request. */
+ AUDIO_INITINFO(&sa_info);
+
+ /* Translate the Linux oss request into a Solaris request. */
+ if ((amode == O_RDONLY) || (amode == O_RDWR)) {
+ sa_info.record.encoding = oft_table[i].oft_encoding;
+ sa_info.record.precision = oft_table[i].oft_precision;
+ }
+ if ((amode == O_WRONLY) || (amode == O_RDWR)) {
+ sa_info.play.encoding = oft_table[i].oft_encoding;
+ sa_info.play.precision = oft_table[i].oft_precision;
+ }
+
+ /* Send the request. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, AUDIO_SETINFO, "AUDIO_SETINFO");
+ return ((ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) ? -errno : 0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_channels(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ audio_info_t sa_info;
+ uint_t *channelsp = (uint_t *)arg;
+ uint_t channels;
+ int amode, err;
+
+ assert((cmd == LX_OSS_SNDCTL_DSP_CHANNELS) ||
+ (cmd == LX_OSS_SNDCTL_DSP_STEREO));
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ if (uucopy(channelsp, &channels, sizeof (channels)) != 0)
+ return (-errno);
+
+ lx_debug("\toss %s request = 0x%x (%u)",
+ (cmd == LX_OSS_SNDCTL_DSP_CHANNELS) ? "channel" : "stereo",
+ channels, channels);
+
+ if (channels == (uint_t)-1) {
+ lx_debug("\tXXX: possible channel/stereo query?");
+ return (-EINVAL);
+ }
+
+ if (cmd == LX_OSS_SNDCTL_DSP_STEREO) {
+ /*
+ * There doesn't seem to be any documentation for
+ * SNDCTL_DSP_STEREO. Looking at source that uses or
+ * used this ioctl seems to indicate that the
+ * functionality provided by this ioctl has been
+ * subsumed by the SNDCTL_DSP_CHANNELS ioctl. It
+ * seems that the only arguments ever passed to
+ * the SNDCTL_DSP_STEREO. Ioctl are boolean values
+ * of '0' or '1'. Hence we'll start out strict and
+ * only support those values.
+ *
+ * Some online forum discussions about this ioctl
+ * seemed to indicate that in case of success it
+ * returns the "stereo" setting (ie, either
+ * '0' for mono or '1' for stereo).
+ */
+ if ((channels != 0) && (channels != 1)) {
+ lx_debug("\tinvalid stereo request");
+ return (-EINVAL);
+ }
+ channels += 1;
+ } else {
+ /* Limit the system to one or two channels. */
+ if ((channels != 1) && (channels != 2)) {
+ lx_debug("\tinvalid channel request");
+ return (-EINVAL);
+ }
+ }
+
+ /* We need to know the access mode for the file. */
+ if ((amode = fcntl(fd, F_GETFL)) < 0)
+ return (-EINVAL);
+ amode &= O_ACCMODE;
+ assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR));
+
+ /* Initialize the channel request. */
+ AUDIO_INITINFO(&sa_info);
+
+ /* Translate the Linux oss request into a Solaris request. */
+ if ((amode == O_RDONLY) || (amode == O_RDWR))
+ sa_info.record.channels = channels;
+ if ((amode == O_WRONLY) || (amode == O_RDWR))
+ sa_info.play.channels = channels;
+
+ /* Send the request. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, AUDIO_SETINFO, "AUDIO_SETINFO");
+ if (ioctl(fd, AUDIO_SETINFO, &sa_info) < 0)
+ return (-errno);
+
+ if (cmd == LX_OSS_SNDCTL_DSP_STEREO)
+ return (channels - 1);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_speed(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ audio_info_t sa_info;
+ uint_t *speedp = (uint_t *)arg;
+ uint_t speed;
+ int amode, err;
+
+ assert(cmd == LX_OSS_SNDCTL_DSP_SPEED);
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ if (uucopy(speedp, &speed, sizeof (speed)) != 0)
+ return (-errno);
+
+ lx_debug("\toss speed request = 0x%x (%u)", speed, speed);
+
+ if (speed == (uint_t)-1) {
+ lx_debug("\tXXX: possible oss speed query?");
+ return (-EINVAL);
+ }
+
+ /* We need to know the access mode for the file. */
+ if ((amode = fcntl(fd, F_GETFL)) < 0)
+ return (-EINVAL);
+ amode &= O_ACCMODE;
+ assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR));
+
+ /* Initialize the speed request. */
+ AUDIO_INITINFO(&sa_info);
+
+ /* Translate the Linux oss request into a Solaris request. */
+ if ((amode == O_RDONLY) || (amode == O_RDWR))
+ sa_info.record.sample_rate = speed;
+ if ((amode == O_WRONLY) || (amode == O_RDWR))
+ sa_info.play.sample_rate = speed;
+
+ /* Send the request. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, AUDIO_SETINFO, "AUDIO_SETINFO");
+ return ((ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) ? -errno : 0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_getblksize(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ lxa_frag_info_t fi;
+ uint_t *blksizep = (uint_t *)arg;
+ uint_t blksize;
+ int err;
+
+ assert(cmd == LX_OSS_SNDCTL_DSP_GETBLKSIZE);
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ /* Query the current fragment count and size. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_GET_FRAG_INFO, "LXA_IOC_GET_FRAG_INFO");
+ if (ioctl(fd, LXA_IOC_GET_FRAG_INFO, &fi) < 0)
+ return (-errno);
+
+ blksize = fi.lxa_fi_size;
+
+ if (uucopy(&blksize, blksizep, sizeof (blksize)) != 0)
+ return (-errno);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_getspace(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ lx_oss_audio_buf_info_t *spacep = (lx_oss_audio_buf_info_t *)arg;
+ lx_oss_audio_buf_info_t space;
+ lxa_frag_info_t fi;
+ int err;
+
+ assert((cmd == LX_OSS_SNDCTL_DSP_GETOSPACE) ||
+ (cmd == LX_OSS_SNDCTL_DSP_GETISPACE));
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ /* Query the current fragment count and size. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_GET_FRAG_INFO, "LXA_IOC_GET_FRAG_INFO");
+ if (ioctl(fd, LXA_IOC_GET_FRAG_INFO, &fi) < 0)
+ return (-errno);
+
+ /* Return the current fragment count and size. */
+ space.fragstotal = fi.lxa_fi_cnt;
+ space.fragsize = fi.lxa_fi_size;
+
+ /*
+ * We'll lie and tell applications that they can always write
+ * out at least one fragment without blocking.
+ */
+ space.fragments = 1;
+ space.bytes = space.fragsize;
+
+ if (cmd == LX_OSS_SNDCTL_DSP_GETOSPACE)
+ lx_debug("\toss get output space result = ");
+ if (cmd == LX_OSS_SNDCTL_DSP_GETISPACE)
+ lx_debug("\toss get input space result = ");
+
+ lx_debug("\t\tbytes = 0x%x (%u), fragments = 0x%x (%u)",
+ space.bytes, space.bytes, space.fragments, space.fragments);
+ lx_debug("\t\tfragtotal = 0x%x (%u), fragsize = 0x%x (%u)",
+ space.fragstotal, space.fragstotal,
+ space.fragsize, space.fragsize);
+
+ if (uucopy(&space, spacep, sizeof (space)) != 0)
+ return (-errno);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_setfragment(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ lxa_frag_info_t fi;
+ uint_t *fraginfop = (uint_t *)arg;
+ uint_t fraginfo, frag_size, frag_cnt;
+ int err;
+
+ assert(cmd == LX_OSS_SNDCTL_DSP_SETFRAGMENT);
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ if (uucopy(fraginfop, &fraginfo, sizeof (fraginfo)) != 0)
+ return (-errno);
+
+ /*
+ * The argument to this ioctl is a 32-bit integer of the
+ * format 0x MMMM SSSS where:
+ * SSSS - requests a fragment size of 2^SSSS
+ * MMMM - requests a maximum fragment count of 2^MMMM
+ * if MMMM is 0x7fff then the application is requesting
+ * no limits on the number of fragments.
+ */
+
+ frag_size = fraginfo & 0xffff;
+ frag_cnt = fraginfo >> 16;
+
+ lx_debug("\toss fragment request: "
+ "power size = 0x%x (%u), power cnt = 0x%x (%u)",
+ frag_size, frag_size, frag_cnt, frag_cnt);
+
+ /* Limit the supported fragment size from 2^4 to 2^31. */
+ if ((frag_size < 4) || (frag_size > 31))
+ return (-EINVAL);
+
+ /* Limit the number of fragments from 2^1 to 2^32. */
+ if (((frag_cnt < 1) || (frag_cnt > 32)) && (frag_cnt != 0x7fff))
+ return (-EINVAL);
+
+ /* Expand the fragment values. */
+ frag_size = 1 << frag_size;
+ if ((frag_cnt == 32) || (frag_cnt == 0x7fff)) {
+ frag_cnt = UINT_MAX;
+ } else {
+ frag_cnt = 1 << frag_cnt;
+ }
+
+ lx_debug("\toss fragment request: "
+ "translated size = 0x%x (%u), translated cnt = 0x%x (%u)",
+ frag_size, frag_size, frag_cnt, frag_cnt);
+
+ fi.lxa_fi_size = frag_size;
+ fi.lxa_fi_cnt = frag_cnt;
+
+ /* Set the current fragment count and size. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_SET_FRAG_INFO, "LXA_IOC_SET_FRAG_INFO");
+ return ((ioctl(fd, LXA_IOC_SET_FRAG_INFO, &fi) < 0) ? -errno : 0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_getcaps(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ uint_t *capsp = (uint_t *)arg;
+ uint_t caps;
+ int err;
+
+ assert(cmd == LX_OSS_SNDCTL_DSP_GETCAPS);
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ /*
+ * Report that we support mmap access
+ * this is where things start to get fun.
+ */
+ caps = LX_OSS_DSP_CAP_MMAP | LX_OSS_DSP_CAP_TRIGGER;
+
+ if (uucopy(&caps, capsp, sizeof (caps)) != 0)
+ return (-errno);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_settrigger(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ uint_t *triggerp = (uint_t *)arg;
+ uint_t trigger;
+ int err;
+
+ assert(cmd == LX_OSS_SNDCTL_DSP_SETTRIGGER);
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ if (uucopy(triggerp, &trigger, sizeof (trigger)) != 0)
+ return (-errno);
+
+ lx_debug("\toss set trigger request = 0x%x (%u)",
+ trigger, trigger);
+
+ /* We only support two types of trigger requests. */
+ if ((trigger != LX_OSS_PCM_DISABLE_OUTPUT) &&
+ (trigger != LX_OSS_PCM_ENABLE_OUTPUT))
+ return (-EINVAL);
+
+ /*
+ * We only support triggers on devices open for write access,
+ * but we don't need to check for that here since the driver will
+ * verify this for us.
+ */
+
+ /* Send the trigger command to the audio device. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_MMAP_OUTPUT, "LXA_IOC_MMAP_OUTPUT");
+ return ((ioctl(fd, LXA_IOC_MMAP_OUTPUT, &trigger) < 0) ? -errno : 0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_getoptr(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ static uint_t bytes = 0;
+ lx_oss_count_info_t ci;
+ lxa_frag_info_t fi;
+ audio_info_t ai;
+ int ptr, err;
+
+ assert(cmd == LX_OSS_SNDCTL_DSP_GETOPTR);
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ /* Query the current fragment size. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_GET_FRAG_INFO, "LXA_IOC_GET_FRAG_INFO");
+ if (ioctl(fd, LXA_IOC_GET_FRAG_INFO, &fi) < 0)
+ return (-errno);
+
+ /* Figure out how many samples have been played. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, AUDIO_GETINFO, "AUDIO_GETINFO");
+ if (ioctl(fd, AUDIO_GETINFO, &ai) < 0)
+ return (-errno);
+ ci.bytes = ai.play.samples + ai.record.samples;
+
+ /*
+ * Figure out how many fragments of audio have gone out since
+ * the last call to this ioctl.
+ */
+ ci.blocks = (ci.bytes - bytes) / fi.lxa_fi_size;
+ bytes = ci.bytes;
+
+ /* Figure out the current fragment offset for mmap audio output. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_MMAP_PTR, "LXA_IOC_MMAP_PTR");
+ if (ioctl(fd, LXA_IOC_MMAP_PTR, &ptr) < 0) {
+ /*
+ * We really should return an error here, but some
+ * application (*cough* *cough* flash) expect this
+ * ioctl to work even if they haven't mmaped the
+ * device.
+ */
+ ci.ptr = 0;
+ } else {
+ ci.ptr = ptr;
+ }
+
+ lx_debug("\toss get output ptr result = ");
+ lx_debug("\t\t"
+ "bytes = 0x%x (%u), blocks = 0x%x (%u), ptr = 0x%x (%u)",
+ ci.bytes, ci.bytes, ci.blocks, ci.blocks, ci.ptr, ci.ptr);
+
+ if (uucopy(&ci, (void *)arg, sizeof (ci)) != 0)
+ return (-errno);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_sndctl_dsp_sync(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ int amode, err;
+
+ assert(cmd == LX_OSS_SNDCTL_DSP_SYNC);
+
+ /* Ioctl is only supported on dsp audio devices. */
+ if ((err = i_is_dsp_dev(fd)) != 0)
+ return (err);
+
+ /* We need to know the access mode for the file. */
+ if ((amode = fcntl(fd, F_GETFL)) < 0)
+ return (-EINVAL);
+ amode &= O_ACCMODE;
+ assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR));
+
+ /*
+ * A sync is basically a noop for record only device.
+ * We check for this here because on Linux a sync on a record
+ * only device returns success immediately. But the Solaris
+ * equivalent to a drain operation is a AUDIO_DRAIN, and if
+ * it's issued to a record only device it will fail and return
+ * EINVAL.
+ */
+ if (amode == O_RDONLY)
+ return (0);
+
+ /* Drain any pending output. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, AUDIO_DRAIN, "AUDIO_DRAIN");
+ return ((ioctl(fd, AUDIO_DRAIN, NULL) < 0) ? -errno : 0);
+}
+
+/*
+ * /dev/mixer ioctl translators and support
+ *
+ * There are some interesting things to take note of for supporting
+ * /dev/mixer ioctls.
+ *
+ * 1) We report support for the following mixer resources:
+ * VOLUME, PCM, MIC
+ *
+ * 2) We assume the following number of channels for each mixer resource:
+ * VOLUME: 2 channels
+ * PCM: 2 channels
+ * MIC: 1 channel
+ *
+ * 3) OSS sets the gain on each channel independently but on Solaris
+ * there is only one gain value and a balance value. So we need
+ * to do some translation back and forth.
+ *
+ * 4) OSS assumes direct access to hardware but Solaris provides
+ * virtualized audio device access (where everyone who opens /dev/audio
+ * get a virtualized audio channel stream, all of which are merged
+ * together by a software mixer before reaching the hardware). Hence
+ * mapping OSS mixer resources to Solaris mixer resources takes some
+ * work. VOLUME and Mic resources are mapped to the actual underlying
+ * audio hardware resources. PCM resource are mapped to the virtual
+ * audio channel output level. This mapping becomes more complicated
+ * if there are no open audio output channels. In this case the
+ * lx_audio device caches the PCM channels setting for us and applies
+ * them to any new audio output channels that get opened. (This
+ * is the reason that we don't use AUDIO_SETINFO ioctls directly
+ * but instead the lx_audio driver custom LXA_IOC_MIXER_SET_*
+ * and LXA_IOC_MIXER_GET_* ioctls.) For more information see
+ * the comments in lx_audio.c.
+ */
+static int
+i_is_mixer_dev(int fd)
+{
+ int minor;
+
+ /*
+ * This is a cloning device so we have to ask the driver
+ * what kind of minor node this is.
+ */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_GETMINORNUM, "LXA_IOC_GETMINORNUM");
+ if (ioctl(fd, LXA_IOC_GETMINORNUM, &minor) < 0)
+ return (-EINVAL);
+ if (minor != LXA_MINORNUM_MIXER)
+ return (-EINVAL);
+ return (0);
+}
+
+static int
+i_oss_mixer_ml_to_val(lxa_mixer_levels_t *ml, uint_t *val)
+{
+ int range, val1, val2;
+
+ /* Deal with the other easy case, both channels have the same level. */
+ if (ml->lxa_ml_balance == AUDIO_MID_BALANCE) {
+ *val = LX_OSS_MIXER_ENC2(
+ LX_OSS_S2L_GAIN(ml->lxa_ml_gain),
+ LX_OSS_S2L_GAIN(ml->lxa_ml_gain));
+ assert(LX_OSS_MIXER_2CH_OK(*val));
+ return (0);
+ }
+
+ /* Decode the balance/gain into two separate levels. */
+ if (ml->lxa_ml_balance > AUDIO_MID_BALANCE) {
+ val2 = ml->lxa_ml_gain;
+
+ range = AUDIO_RIGHT_BALANCE - AUDIO_MID_BALANCE;
+ val1 = AUDIO_RIGHT_BALANCE - ml->lxa_ml_balance;
+ val1 = (val2 * val1) / range;
+ } else {
+ assert(ml->lxa_ml_balance < AUDIO_MID_BALANCE);
+ val1 = ml->lxa_ml_gain;
+
+ range = AUDIO_MID_BALANCE - AUDIO_LEFT_BALANCE;
+ val2 = ml->lxa_ml_balance;
+ val2 = (val1 * val2) / range;
+ }
+
+ *val = LX_OSS_MIXER_ENC2(LX_OSS_S2L_GAIN(val1),
+ LX_OSS_S2L_GAIN(val2));
+ return (0);
+}
+
+static int
+i_oss_mixer_val_to_ml(uint_t val, lxa_mixer_levels_t *ml_old,
+ lxa_mixer_levels_t *ml)
+{
+ int range, val1, val2;
+
+ if (!LX_OSS_MIXER_2CH_OK(val))
+ return (-EINVAL);
+
+ val1 = LX_OSS_MIXER_DEC1(val);
+ val2 = LX_OSS_MIXER_DEC2(val);
+
+ /*
+ * Deal with the easy case.
+ * Both channels have the same non-zero level.
+ */
+ if ((val1 != 0) && (val1 == val2)) {
+ ml->lxa_ml_gain = LX_OSS_L2S_GAIN(val1);
+ ml->lxa_ml_balance = AUDIO_MID_BALANCE;
+ return (0);
+ }
+
+ /* If both levels are zero, preserve the current balance setting. */
+ if ((val1 == 0) && (val2 == 0)) {
+ ml->lxa_ml_gain = 0;
+ ml->lxa_ml_balance = ml_old->lxa_ml_balance;
+ return (0);
+ }
+
+ /*
+ * First set the gain to match the highest channel value volume.
+ * Then use the balance to simulate lower volume on the second
+ * channel.
+ */
+ if (val1 > val2) {
+ ml->lxa_ml_gain = LX_OSS_L2S_GAIN(val1);
+
+ range = AUDIO_MID_BALANCE - AUDIO_LEFT_BALANCE;
+ ml->lxa_ml_balance = 0;
+ ml->lxa_ml_balance += ((val2 * range) / val1);
+ } else {
+ assert(val1 < val2);
+
+ ml->lxa_ml_gain = LX_OSS_L2S_GAIN(val2);
+
+ range = AUDIO_RIGHT_BALANCE - AUDIO_MID_BALANCE;
+ ml->lxa_ml_balance = AUDIO_RIGHT_BALANCE;
+ ml->lxa_ml_balance -= ((val1 * range) / val2);
+ }
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_mixer_read_volume(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ lxa_mixer_levels_t ml;
+ uint_t *valp = (uint_t *)arg;
+ uint_t val;
+ char *cmd_txt;
+ int err, cmd_new;
+
+ assert((cmd == LX_OSS_SOUND_MIXER_READ_VOLUME) ||
+ (cmd == LX_OSS_SOUND_MIXER_READ_PCM));
+
+ /* Ioctl is only supported on mixer audio devices. */
+ if ((err = i_is_mixer_dev(fd)) != 0)
+ return (err);
+
+ if (cmd == LX_OSS_SOUND_MIXER_READ_VOLUME) {
+ cmd_new = LXA_IOC_MIXER_GET_VOL;
+ cmd_txt = "LXA_IOC_MIXER_GET_VOL";
+ }
+ if (cmd == LX_OSS_SOUND_MIXER_READ_PCM) {
+ cmd_new = LXA_IOC_MIXER_GET_PCM;
+ cmd_txt = "LXA_IOC_MIXER_GET_PCM";
+ }
+
+ /* Attempt to set the device output gain. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, cmd_new, cmd_txt);
+ if (ioctl(fd, cmd_new, &ml) < 0)
+ return (-errno);
+
+ lx_debug("\tlx_audio mixer results, "
+ "gain = 0x%x (%u), balance = 0x%x (%u)",
+ ml.lxa_ml_gain, ml.lxa_ml_gain,
+ ml.lxa_ml_balance, ml.lxa_ml_balance);
+
+ assert(LXA_MIXER_LEVELS_OK(&ml));
+
+ /* Translate the mixer levels struct to an OSS mixer value. */
+ if ((err = i_oss_mixer_ml_to_val(&ml, &val)) != 0)
+ return (err);
+ assert(LX_OSS_MIXER_2CH_OK(val));
+
+ lx_debug("\toss get mixer %s result = 0x%x (%u)",
+ (cmd == LX_OSS_SOUND_MIXER_READ_VOLUME) ? "volume" : "pcm",
+ val, val);
+
+ if (uucopy(&val, valp, sizeof (val)) != 0)
+ return (-errno);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_mixer_write_volume(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ lxa_mixer_levels_t ml, ml_old;
+ uint_t *valp = (uint_t *)arg;
+ uint_t val;
+ char *cmd_txt;
+ int err, cmd_new;
+
+ assert((cmd == LX_OSS_SOUND_MIXER_WRITE_VOLUME) ||
+ (cmd == LX_OSS_SOUND_MIXER_WRITE_PCM));
+
+ /* Ioctl is only supported on mixer audio devices. */
+ if ((err = i_is_mixer_dev(fd)) != 0)
+ return (err);
+
+ if (uucopy(valp, &val, sizeof (val)) != 0)
+ return (-errno);
+
+ if (cmd == LX_OSS_SOUND_MIXER_WRITE_VOLUME) {
+ cmd_new = LXA_IOC_MIXER_SET_VOL;
+ cmd_txt = "LXA_IOC_MIXER_SET_VOL";
+
+ /* Attempt to get the device output gain. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd,
+ LXA_IOC_MIXER_GET_VOL, "LXA_IOC_MIXER_GET_VOL");
+ if (ioctl(fd, LXA_IOC_MIXER_GET_VOL, &ml_old) < 0)
+ return (-errno);
+ }
+
+ if (cmd == LX_OSS_SOUND_MIXER_WRITE_PCM) {
+ cmd_new = LXA_IOC_MIXER_SET_PCM;
+ cmd_txt = "LXA_IOC_MIXER_SET_PCM";
+
+ /* Attempt to get the device output gain. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd,
+ LXA_IOC_MIXER_GET_PCM, "LXA_IOC_MIXER_GET_PCM");
+ if (ioctl(fd, LXA_IOC_MIXER_GET_PCM, &ml_old) < 0)
+ return (-errno);
+ }
+
+ lx_debug("\toss set mixer %s request = 0x%x (%u)",
+ (cmd == LX_OSS_SOUND_MIXER_WRITE_VOLUME) ? "volume" : "pcm",
+ val, val);
+
+ /* Translate an OSS mixer value to mixer levels. */
+ if ((err = i_oss_mixer_val_to_ml(val, &ml_old, &ml)) != 0)
+ return (err);
+ assert(LXA_MIXER_LEVELS_OK(&ml));
+
+ lx_debug("\tlx_audio mixer request, "
+ "gain = 0x%x (%u), balance = 0x%x (%u)",
+ ml.lxa_ml_gain, ml.lxa_ml_gain,
+ ml.lxa_ml_balance, ml.lxa_ml_balance);
+
+ /* Attempt to set the device output gain. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, cmd_new, cmd_txt);
+ if (ioctl(fd, cmd_new, &ml) < 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_mixer_read_mic(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ lxa_mixer_levels_t ml;
+ uint_t *valp = (uint_t *)arg;
+ uint_t val;
+ int err;
+
+ assert((cmd == LX_OSS_SOUND_MIXER_READ_MIC) ||
+ (cmd == LX_OSS_SOUND_MIXER_READ_IGAIN));
+
+ /* Ioctl is only supported on mixer audio devices. */
+ if ((err = i_is_mixer_dev(fd)) != 0)
+ return (err);
+
+ /* Attempt to get the device input gain. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_MIXER_GET_MIC, "LXA_IOC_MIXER_GET_MIC");
+ if (ioctl(fd, LXA_IOC_MIXER_GET_MIC, &ml) < 0)
+ return (-errno);
+
+ /* Report the mixer as having two channels. */
+ val = LX_OSS_MIXER_ENC2(
+ LX_OSS_S2L_GAIN(ml.lxa_ml_gain),
+ LX_OSS_S2L_GAIN(ml.lxa_ml_gain));
+
+ if (cmd == LX_OSS_SOUND_MIXER_READ_MIC)
+ lx_debug("\toss get mixer mic result = 0x%x (%u)", val, val);
+ if (cmd == LX_OSS_SOUND_MIXER_READ_IGAIN)
+ lx_debug("\toss get mixer igain result = 0x%x (%u)", val, val);
+
+ if (uucopy(&val, valp, sizeof (val)) != 0)
+ return (-errno);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_mixer_write_mic(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ lxa_mixer_levels_t ml;
+ uint_t *valp = (uint_t *)arg;
+ uint_t val;
+ int err;
+
+ assert((cmd == LX_OSS_SOUND_MIXER_WRITE_MIC) ||
+ (cmd == LX_OSS_SOUND_MIXER_WRITE_IGAIN));
+
+ /* Ioctl is only supported on mixer audio devices. */
+ if ((err = i_is_mixer_dev(fd)) != 0)
+ return (err);
+
+ if (uucopy(valp, &val, sizeof (val)) != 0)
+ return (-errno);
+
+ if (cmd == LX_OSS_SOUND_MIXER_WRITE_MIC)
+ lx_debug("\toss set mixer mic request = 0x%x (%u)", val, val);
+ if (cmd == LX_OSS_SOUND_MIXER_WRITE_IGAIN)
+ lx_debug("\toss set mixer igain request = 0x%x (%u)", val, val);
+
+ /* The mic only supports one channel. */
+ val = LX_OSS_MIXER_DEC1(val);
+
+ ml.lxa_ml_balance = AUDIO_MID_BALANCE;
+ ml.lxa_ml_gain = LX_OSS_L2S_GAIN(val);
+
+ /* Attempt to set the device input gain. */
+ lx_debug("\tioctl(%d, 0x%x - %s, ...)",
+ fd, LXA_IOC_MIXER_SET_MIC, "LXA_IOC_MIXER_SET_MIC");
+ if (ioctl(fd, LXA_IOC_MIXER_SET_MIC, &ml) < 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+ict_oss_mixer_read_devs(int fd, struct stat *stat,
+ int cmd, char *cmd_str, intptr_t arg)
+{
+ uint_t *resultp = (uint_t *)arg;
+ uint_t result = 0;
+ int err;
+
+ if (cmd == LX_OSS_SOUND_MIXER_READ_DEVMASK) {
+ /* Bitmap of all the mixer channels we supposedly support. */
+ result = ((1 << LX_OSS_SM_PCM) |
+ (1 << LX_OSS_SM_MIC) |
+ (1 << LX_OSS_SM_VOLUME));
+ }
+ if (cmd == LX_OSS_SOUND_MIXER_READ_STEREODEVS) {
+ /* Bitmap of the stereo mixer channels we supposedly support. */
+ result = ((1 << LX_OSS_SM_PCM) |
+ (1 << LX_OSS_SM_VOLUME));
+ }
+ if ((cmd == LX_OSS_SOUND_MIXER_READ_RECMASK) ||
+ (cmd == LX_OSS_SOUND_MIXER_READ_RECSRC)) {
+ /* Bitmap of the mixer input channels we supposedly support. */
+ result = (1 << LX_OSS_SM_MIC);
+ }
+ assert(result != 0);
+
+ /* Ioctl is only supported on mixer audio devices. */
+ if ((err = i_is_mixer_dev(fd)) != 0)
+ return (err);
+
+ if (uucopy(&result, resultp, sizeof (result)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/*
+ * Audio ioctl conversion support structures.
+ */
+static oss_fmt_translator_t oft_table[] = {
+ { LX_OSS_AFMT_MU_LAW, AUDIO_ENCODING_ULAW, 8 },
+ { LX_OSS_AFMT_A_LAW, AUDIO_ENCODING_ALAW, 8 },
+ { LX_OSS_AFMT_S8, AUDIO_ENCODING_LINEAR, 8 },
+ { LX_OSS_AFMT_U8, AUDIO_ENCODING_LINEAR8, 8 },
+ { LX_OSS_AFMT_S16_NE, AUDIO_ENCODING_LINEAR, 16 },
+ { 0, 0, 0 }
+};
+
+/*
+ * Ioctl translator definitions.
+ */
+
+/*
+ * Defines to help with creating ioctl translators.
+ *
+ * IOC_CMD_TRANSLATOR_NONE - Ioctl has the same semantics and argument
+ * values on Solaris and Linux but may have different command values.
+ * (Macro assumes the symbolic Linux name assigned to the ioctl command
+ * value is the same as the Solaris symbol but pre-pended with an "LX_")
+ *
+ * IOC_CMD_TRANSLATOR_PASS - Ioctl is a Linux specific ioctl and should
+ * be passed through unmodified.
+ *
+ * IOC_CMD_TRANSLATOR_FILTER - Ioctl has the same command name on
+ * Solaris and Linux and needs a translation function that is common to
+ * more than one ioctl. (Macro assumes the symbolic Linux name assigned
+ * to the ioctl command value is the same as the Solaris symbol but
+ * pre-pended with an "LX_")
+ *
+ * IOC_CMD_TRANSLATOR_CUSTOM - Ioctl needs special handling via a
+ * translation function.
+ */
+#define IOC_CMD_TRANSLATOR_NONE(ioc_cmd_sym) \
+ { (int)LX_##ioc_cmd_sym, "LX_" #ioc_cmd_sym, \
+ ioc_cmd_sym, #ioc_cmd_sym, ict_pass },
+
+#define IOC_CMD_TRANSLATOR_PASS(ioc_cmd_sym) \
+ { (int)ioc_cmd_sym, #ioc_cmd_sym, \
+ ioc_cmd_sym, #ioc_cmd_sym, ict_pass },
+
+#define IOC_CMD_TRANSLATOR_FILTER(ioc_cmd_sym, ioct_handler) \
+ { (int)LX_##ioc_cmd_sym, "LX_" #ioc_cmd_sym, \
+ ioc_cmd_sym, #ioc_cmd_sym, ioct_handler },
+
+#define IOC_CMD_TRANSLATOR_CUSTOM(ioc_cmd_sym, ioct_handler) \
+ { (int)ioc_cmd_sym, #ioc_cmd_sym, \
+ (int)ioc_cmd_sym, #ioc_cmd_sym, ioct_handler },
+
+#define IOC_CMD_TRANSLATOR_END \
+ { 0, NULL, 0, NULL, NULL }
+
+/* All files will need to support these ioctls. */
+#define IOC_CMD_TRANSLATORS_ALL \
+ IOC_CMD_TRANSLATOR_NONE(FIONREAD) \
+ IOC_CMD_TRANSLATOR_NONE(FIONBIO)
+
+/* Any files supporting streams semantics will need these ioctls. */
+#define IOC_CMD_TRANSLATORS_STREAMS \
+ IOC_CMD_TRANSLATOR_NONE(TCXONC) \
+ IOC_CMD_TRANSLATOR_NONE(TCFLSH) \
+ IOC_CMD_TRANSLATOR_NONE(TIOCEXCL) \
+ IOC_CMD_TRANSLATOR_NONE(TIOCNXCL) \
+ IOC_CMD_TRANSLATOR_NONE(TIOCSPGRP) \
+ IOC_CMD_TRANSLATOR_NONE(TIOCSTI) \
+ IOC_CMD_TRANSLATOR_NONE(TIOCSWINSZ) \
+ IOC_CMD_TRANSLATOR_NONE(TIOCMBIS) \
+ IOC_CMD_TRANSLATOR_NONE(TIOCMBIC) \
+ IOC_CMD_TRANSLATOR_NONE(TIOCMSET) \
+ IOC_CMD_TRANSLATOR_NONE(TIOCSETD) \
+ IOC_CMD_TRANSLATOR_NONE(FIOASYNC) \
+ IOC_CMD_TRANSLATOR_NONE(FIOSETOWN) \
+ IOC_CMD_TRANSLATOR_NONE(TCSBRK) \
+ \
+ IOC_CMD_TRANSLATOR_FILTER(TCSETS, ict_tcsets) \
+ IOC_CMD_TRANSLATOR_FILTER(TCSETSW, ict_tcsets) \
+ IOC_CMD_TRANSLATOR_FILTER(TCSETSF, ict_tcsets) \
+ IOC_CMD_TRANSLATOR_FILTER(TCSETA, ict_tcseta) \
+ IOC_CMD_TRANSLATOR_FILTER(TCSETAW, ict_tcseta) \
+ IOC_CMD_TRANSLATOR_FILTER(TCSETAF, ict_tcseta) \
+ \
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TCSBRKP, ict_tcsbrkp)
+
+
+/*
+ * Translators for non-device files.
+ */
+static ioc_cmd_translator_t ioc_translators_file[] = {
+ IOC_CMD_TRANSLATORS_ALL
+ IOC_CMD_TRANSLATOR_END
+};
+
+static ioc_cmd_translator_t ioc_translators_fifo[] = {
+ IOC_CMD_TRANSLATORS_ALL
+ IOC_CMD_TRANSLATORS_STREAMS
+ IOC_CMD_TRANSLATOR_END
+};
+
+static ioc_cmd_translator_t ioc_translators_sock[] = {
+ IOC_CMD_TRANSLATORS_ALL
+
+ IOC_CMD_TRANSLATOR_NONE(FIOASYNC)
+ IOC_CMD_TRANSLATOR_NONE(FIOGETOWN)
+ IOC_CMD_TRANSLATOR_NONE(FIOSETOWN)
+ IOC_CMD_TRANSLATOR_NONE(SIOCSPGRP)
+ IOC_CMD_TRANSLATOR_NONE(SIOCGPGRP)
+
+ IOC_CMD_TRANSLATOR_FILTER(SIOCATMARK, ict_sioifoob)
+
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFFLAGS, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFFLAGS, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFDSTADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFDSTADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFBRDADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFBRDADDR, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFNETMASK, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFNETMASK, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMETRIC, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMETRIC, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMTU, ict_sioifreq)
+ IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMTU, ict_sioifreq)
+
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCGIFCONF, ict_siocgifconf)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCGIFHWADDR, ict_siocifhwaddr)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCSIFHWADDR, ict_siocifhwaddr)
+
+ IOC_CMD_TRANSLATOR_END
+};
+
+/*
+ * Translators for devices.
+ */
+static ioc_cmd_translator_t ioc_cmd_translators_ptm[] = {
+ IOC_CMD_TRANSLATORS_ALL
+ IOC_CMD_TRANSLATORS_STREAMS
+
+ IOC_CMD_TRANSLATOR_NONE(TIOCPKT)
+
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPGRP, ict_tiocgpgrp)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSPTLCK, ict_sptlock)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPTN, ict_gptn)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGWINSZ, ict_tiocgwinsz)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETS, ict_tcgets_emulate)
+
+ IOC_CMD_TRANSLATOR_END
+};
+static ioc_dev_translator_t ioc_translator_ptm = {
+ LX_PTM_DRV, /* idt_driver */
+ 0, /* idt_major */
+ ioc_cmd_translators_ptm
+};
+
+static ioc_cmd_translator_t ioc_cmd_translators_pts[] = {
+ IOC_CMD_TRANSLATORS_ALL
+ IOC_CMD_TRANSLATORS_STREAMS
+
+ IOC_CMD_TRANSLATOR_NONE(TIOCGETD)
+ IOC_CMD_TRANSLATOR_NONE(TIOCGSID)
+ IOC_CMD_TRANSLATOR_NONE(TIOCNOTTY)
+
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPGRP, ict_tiocgpgrp)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETS, ict_tcgets_native)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETA, ict_tcgeta)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGWINSZ, ict_tiocgwinsz)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSCTTY, ict_tiocsctty)
+
+ IOC_CMD_TRANSLATOR_END
+};
+static ioc_dev_translator_t ioc_translator_pts = {
+ "pts", /* idt_driver */
+ 0, /* idt_major */
+ ioc_cmd_translators_pts
+};
+
+static ioc_dev_translator_t ioc_translator_sy = {
+ "sy", /* idt_driver */
+ 0, /* idt_major */
+
+ /*
+ * /dev/tty (which is implemented via the "sy" driver) is basically
+ * a layered driver that passes on requests to the ctty for the
+ * current process. Since ctty's are currently always implemented
+ * via the pts driver, we should make sure to support all the
+ * same ioctls on the sy driver as we do on the pts driver.
+ */
+ ioc_cmd_translators_pts
+};
+
+static ioc_cmd_translator_t ioc_cmd_translators_zcons[] = {
+ IOC_CMD_TRANSLATORS_ALL
+ IOC_CMD_TRANSLATORS_STREAMS
+
+ IOC_CMD_TRANSLATOR_NONE(TIOCNOTTY)
+
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETS, ict_tcgets_native)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETA, ict_tcgeta)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGWINSZ, ict_tiocgwinsz)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSCTTY, ict_tiocsctty)
+
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCLINUX, ict_einval)
+
+ IOC_CMD_TRANSLATOR_END
+};
+static ioc_dev_translator_t ioc_translator_zcons = {
+ "zcons", /* idt_driver */
+ 0, /* idt_major */
+ ioc_cmd_translators_zcons
+};
+
+static ioc_cmd_translator_t ioc_cmd_translators_lx_audio[] = {
+ IOC_CMD_TRANSLATORS_ALL
+
+ /* /dev/dsp ioctls */
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_RESET,
+ ict_oss_sndctl_dsp_reset)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETFMTS,
+ ict_oss_sndctl_dsp_getfmts)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SETFMTS,
+ ict_oss_sndctl_dsp_setfmts)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_CHANNELS,
+ ict_oss_sndctl_dsp_channels)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_STEREO,
+ ict_oss_sndctl_dsp_channels)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SPEED,
+ ict_oss_sndctl_dsp_speed)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETBLKSIZE,
+ ict_oss_sndctl_dsp_getblksize)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SYNC,
+ ict_oss_sndctl_dsp_sync)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SETFRAGMENT,
+ ict_oss_sndctl_dsp_setfragment)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETOSPACE,
+ ict_oss_sndctl_dsp_getspace)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETCAPS,
+ ict_oss_sndctl_dsp_getcaps)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SETTRIGGER,
+ ict_oss_sndctl_dsp_settrigger)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETOPTR,
+ ict_oss_sndctl_dsp_getoptr)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETISPACE,
+ ict_oss_sndctl_dsp_getspace)
+
+ /* /dev/mixer level ioctls */
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_VOLUME,
+ ict_oss_mixer_read_volume)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_PCM,
+ ict_oss_mixer_read_volume)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_MIC,
+ ict_oss_mixer_read_mic)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_IGAIN,
+ ict_oss_mixer_read_mic)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_VOLUME,
+ ict_oss_mixer_write_volume)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_PCM,
+ ict_oss_mixer_write_volume)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_MIC,
+ ict_oss_mixer_write_mic)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_IGAIN,
+ ict_oss_mixer_write_mic)
+
+ /* /dev/mixer capability ioctls */
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_STEREODEVS,
+ ict_oss_mixer_read_devs)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_DEVMASK,
+ ict_oss_mixer_read_devs)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_RECMASK,
+ ict_oss_mixer_read_devs)
+ IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_RECSRC,
+ ict_oss_mixer_read_devs)
+
+ IOC_CMD_TRANSLATOR_END
+};
+static ioc_dev_translator_t ioc_translator_lx_audio = {
+ "lx_audio", /* idt_driver */
+ 0, /* idt_major */
+ ioc_cmd_translators_lx_audio
+};
+
+/*
+ * An array of all the device translators.
+ */
+static ioc_dev_translator_t *ioc_translators_dev[] = {
+ &ioc_translator_lx_audio,
+ &ioc_translator_ptm,
+ &ioc_translator_pts,
+ &ioc_translator_sy,
+ &ioc_translator_zcons,
+ NULL
+};
+
+/*
+ * Translators for filesystems.
+ */
+static ioc_cmd_translator_t ioc_cmd_translators_autofs[] = {
+ IOC_CMD_TRANSLATOR_PASS(LX_AUTOFS_IOC_READY)
+ IOC_CMD_TRANSLATOR_PASS(LX_AUTOFS_IOC_FAIL)
+ IOC_CMD_TRANSLATOR_PASS(LX_AUTOFS_IOC_CATATONIC)
+ IOC_CMD_TRANSLATOR_END
+};
+
+static ioc_fs_translator_t ioc_translator_autofs = {
+ LX_AUTOFS_NAME, /* ift_filesystem */
+ ioc_cmd_translators_autofs
+};
+
+/*
+ * An array of all the filesystem translators.
+ */
+static ioc_fs_translator_t *ioc_translators_fs[] = {
+ &ioc_translator_autofs,
+ NULL
+};
+
+/*
+ * Ioctl error translator definitions.
+ */
+#define IOC_ERRNO_TRANSLATOR(iet_cmd_sym, iet_errno) \
+ { (int)LX_##iet_cmd_sym, "LX_" #iet_cmd_sym, iet_errno },
+
+#define IOC_ERRNO_TRANSLATOR_END \
+ { 0, NULL, 0 }
+
+static ioc_errno_translator_t ioc_translators_errno[] = {
+ IOC_ERRNO_TRANSLATOR(TCGETS, ENOTTY)
+ IOC_ERRNO_TRANSLATOR(TCSETS, ENOTTY)
+ IOC_ERRNO_TRANSLATOR(TCSBRK, ENOTTY)
+ IOC_ERRNO_TRANSLATOR(TCXONC, ENOTTY)
+ IOC_ERRNO_TRANSLATOR(TCFLSH, ENOTTY)
+ IOC_ERRNO_TRANSLATOR(TIOCGPGRP, ENOTTY)
+ IOC_ERRNO_TRANSLATOR(TIOCSPGRP, ENOTTY)
+ IOC_ERRNO_TRANSLATOR(TIOCGWINSZ, ENOTTY)
+ IOC_ERRNO_TRANSLATOR_END
+};
+
+int
+lx_vhangup(void)
+{
+ if (geteuid() != 0)
+ return (-EPERM);
+
+ vhangup();
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/iovec.c b/usr/src/lib/brand/lx/lx_brand/common/iovec.c
new file mode 100644
index 0000000000..49af88d22e
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/iovec.c
@@ -0,0 +1,241 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/uio.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <alloca.h>
+#include <string.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_types.h>
+
+static int
+lx_is_directory(int fd)
+{
+ struct stat64 sbuf;
+
+ if (fstat64(fd, &sbuf) < 0)
+ sbuf.st_mode = 0;
+
+ return ((sbuf.st_mode & S_IFMT) == S_IFDIR);
+}
+
+int
+lx_read(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ void *buf = (void *)p2;
+ size_t nbyte = (size_t)p3;
+ ssize_t ret;
+
+ if (lx_is_directory(fd))
+ return (-EISDIR);
+
+ if ((ret = read(fd, buf, nbyte)) < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+int
+lx_pread64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5)
+{
+ int fd = (int)p1;
+ void *buf = (void *)p2;
+ size_t nbyte = (size_t)p3;
+ uintptr_t off_lo = p4;
+ uintptr_t off_hi = p5;
+ ssize_t ret;
+
+ if (lx_is_directory(fd))
+ return (-EISDIR);
+
+ ret = pread64(fd, buf, nbyte, (off64_t)LX_32TO64(off_lo, off_hi));
+
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+/*
+ * On Linux, the pwrite(2) system call behaves identically to Solaris except
+ * in the case of the file being opened with O_APPEND. In that case Linux's
+ * pwrite(2) ignores the offset parameter and instead appends the data to the
+ * file without modifying the current seek pointer.
+ */
+int
+lx_pwrite64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ int fd = (int)p1;
+ void *buf = (void *)p2;
+ size_t nbyte = (size_t)p3;
+ uintptr_t off_lo = p4;
+ uintptr_t off_hi = p5;
+ ssize_t ret;
+ int rval;
+ struct stat64 statbuf;
+
+ if ((rval = fcntl(fd, F_GETFL, 0)) < 0)
+ return (-errno);
+
+ if (!(rval & O_APPEND)) {
+ ret = pwrite64(fd, buf, nbyte,
+ (off64_t)LX_32TO64(off_lo, off_hi));
+ } else if ((ret = fstat64(fd, &statbuf)) == 0) {
+ ret = pwrite64(fd, buf, nbyte, statbuf.st_size);
+ }
+
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+/*
+ * Implementation of Linux readv() and writev() system calls.
+ *
+ * The Linux system calls differ from the Solaris system calls in a few key
+ * areas:
+ *
+ * - On Solaris, the maximum number of I/O vectors that can be passed to readv()
+ * or writev() is IOV_MAX (16). Linux has a much larger restriction (1024).
+ *
+ * - Passing 0 as a vector count is an error on Solaris, but on Linux results
+ * in a return value of 0. Even though the man page says the opposite.
+ *
+ * - If the Nth vector results in an error, Solaris will return an error code
+ * for the entire operation. Linux only returns an error if there has been
+ * no data transferred yet. Otherwise, it returns the number of bytes
+ * transferred up until that point.
+ *
+ * In order to accomodate these differences, we implement these functions as a
+ * series of ordinary read() or write() calls.
+ */
+
+#define LX_IOV_MAX 1024 /* Also called MAX_IOVEC */
+
+static int
+lx_iovec_copy_and_check(const struct iovec *iovp, struct iovec *iov, int count)
+{
+ int i;
+ ssize_t cnt = 0;
+
+ if (uucopy(iovp, (void *)iov, count * sizeof (struct iovec)) != 0)
+ return (-errno);
+
+ for (i = 0; i < count; i++) {
+ cnt += iov[i].iov_len;
+ if (iov[i].iov_len < 0 || cnt < 0)
+ return (-EINVAL);
+ }
+
+ return (0);
+}
+
+int
+lx_readv(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ const struct iovec *iovp = (const struct iovec *)p2;
+ int count = (int)p3;
+ struct iovec *iov;
+ ssize_t total = 0, ret;
+ int i;
+
+ if (count == 0)
+ return (0);
+
+ if (count < 0 || count > LX_IOV_MAX)
+ return (-EINVAL);
+
+ if (lx_is_directory(fd))
+ return (-EISDIR);
+
+ iov = SAFE_ALLOCA(count * sizeof (struct iovec));
+ if (iov == NULL)
+ return (-ENOMEM);
+ if ((ret = lx_iovec_copy_and_check(iovp, iov, count)) != 0)
+ return (ret);
+
+ for (i = 0; i < count; i++) {
+ ret = read(fd, iov[i].iov_base, iov[i].iov_len);
+
+ if (ret < 0) {
+ if (total > 0)
+ return (total);
+ return (-errno);
+ }
+
+ total += ret;
+ }
+
+ return (total);
+}
+
+int
+lx_writev(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ const struct iovec *iovp = (const struct iovec *)p2;
+ int count = (int)p3;
+ struct iovec *iov;
+ ssize_t total = 0, ret;
+ int i;
+
+ if (count == 0)
+ return (0);
+
+ if (count < 0 || count > LX_IOV_MAX)
+ return (-EINVAL);
+
+ iov = SAFE_ALLOCA(count * sizeof (struct iovec));
+ if (iov == NULL)
+ return (-ENOMEM);
+ if ((ret = lx_iovec_copy_and_check(iovp, iov, count)) != 0)
+ return (ret);
+
+ for (i = 0; i < count; i++) {
+ ret = write(fd, iov[i].iov_base, iov[i].iov_len);
+
+ if (ret < 0) {
+ if (total > 0)
+ return (total);
+ return (-errno);
+ }
+
+ total += ret;
+ }
+
+ return (total);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
new file mode 100644
index 0000000000..41d69c9a09
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
@@ -0,0 +1,1237 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/utsname.h>
+#include <sys/inttypes.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+#include <sys/systm.h>
+#include <sys/auxv.h>
+#include <sys/frame.h>
+#include <sys/brand.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <errno.h>
+#include <syslog.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <synch.h>
+#include <libelf.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <utime.h>
+#include <dirent.h>
+#include <ucontext.h>
+#include <libintl.h>
+#include <locale.h>
+
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_types.h>
+#include <sys/lx_stat.h>
+#include <sys/lx_statfs.h>
+#include <sys/lx_ioctl.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_thread.h>
+#include <sys/lx_thunk_server.h>
+
+/*
+ * Map solaris errno to the linux equivalent.
+ */
+static int stol_errno[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 42, 43, 44, 45, 46,
+ 47, 48, 49, 50, 51, 35, 47, 22, 38, 22, /* 49 */
+ 52, 53, 54, 55, 56, 57, 58, 59, 22, 22,
+ 61, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+ 70, 71, 22, 22, 72, 22, 22, 74, 36, 75,
+ 76, 77, 78, 79, 80, 81, 82, 83, 84, 38,
+ 40, 85, 86, 39, 87, 88, 89, 90, 91, 92, /* 99 */
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
+ 103, 104, 105, 106, 107, 22, 22, 22, 22, 22,
+ 22, 22, 22, 108, 109, 110, 111, 112, 113, 114, /* 149 */
+ 115, 116
+};
+
+char lx_release[128];
+
+/*
+ * Map a linux locale ending string to the solaris equivalent.
+ */
+struct lx_locale_ending {
+ const char *linux_end; /* linux ending string */
+ const char *solaris_end; /* to transform with this string */
+ int le_size; /* linux ending string length */
+ int se_size; /* solaris ending string length */
+};
+
+#define l2s_locale(lname, sname) \
+ {(lname), (sname), sizeof ((lname)) - 1, sizeof ((sname)) - 1}
+
+#define MAXLOCALENAMELEN 30
+#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
+#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
+#endif
+
+/*
+ * This flag is part of the registration with the in-kernel brand module. It's
+ * used in lx_handler() to determine if we should go back into the kernel after
+ * a system call in case the kernel needs to perform some post-syscall work
+ * like tracing for example.
+ */
+int lx_traceflag;
+
+#define NOSYS_NULL 1
+#define NOSYS_NO_EQUIV 2
+#define NOSYS_KERNEL 3
+#define NOSYS_UNDOC 4
+#define NOSYS_OBSOLETE 5
+
+/*
+ * SYS_PASSTHRU denotes a system call we can just call on behalf of the
+ * branded process without having to translate the arguments.
+ *
+ * The restriction on this is that the call in question MUST return -1 to
+ * denote an error.
+ */
+#define SYS_PASSTHRU 5
+
+static char *nosys_msgs[] = {
+ "Either not yet done, or we haven't come up with an excuse",
+ "No such Linux system call",
+ "No equivalent Solaris functionality",
+ "Reads/modifies Linux kernel state",
+ "Undocumented and/or rarely used system call",
+ "Unsupported, obsolete system call"
+};
+
+struct lx_sysent {
+ char *sy_name;
+ int (*sy_callc)();
+ char sy_flags;
+ char sy_narg;
+};
+
+static struct lx_sysent sysents[LX_NSYSCALLS + 1];
+
+static uintptr_t stack_bottom;
+
+int lx_install = 0; /* install mode enabled if non-zero */
+boolean_t lx_is_rpm = B_FALSE;
+int lx_rpm_delay = 1;
+int lx_strict = 0; /* "strict" mode enabled if non-zero */
+int lx_verbose = 0; /* verbose mode enabled if non-zero */
+int lx_debug_enabled = 0; /* debugging output enabled if non-zero */
+
+pid_t zoneinit_pid; /* zone init PID */
+
+thread_key_t lx_tsd_key;
+
+int
+uucopy_unsafe(const void *src, void *dst, size_t n)
+{
+ bcopy(src, dst, n);
+ return (0);
+}
+
+int
+uucopystr_unsafe(const void *src, void *dst, size_t n)
+{
+ (void) strncpy((char *)src, dst, n);
+ return (0);
+}
+
+static void
+i_lx_msg(int fd, char *msg, va_list ap)
+{
+ int i;
+ char buf[LX_MSG_MAXLEN];
+
+ /* LINTED [possible expansion issues] */
+ i = vsnprintf(buf, sizeof (buf), msg, ap);
+ buf[LX_MSG_MAXLEN - 1] = '\0';
+ if (i == -1)
+ return;
+
+ /* if debugging is enabled, send this message to debug output */
+ if (lx_debug_enabled != 0)
+ lx_debug(buf);
+
+ /*
+ * If we are trying to print to stderr, we also want to send the
+ * message to syslog.
+ */
+ if (fd == 2) {
+ syslog(LOG_ERR, "%s", buf);
+
+ /*
+ * We let the user choose whether or not to see these
+ * messages on the console.
+ */
+ if (lx_verbose == 0)
+ return;
+ }
+
+ /* we retry in case of EINTR */
+ do {
+ i = write(fd, buf, strlen(buf));
+ } while ((i == -1) && (errno == EINTR));
+}
+
+/*PRINTFLIKE1*/
+void
+lx_err(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+
+ va_start(ap, msg);
+ i_lx_msg(STDERR_FILENO, msg, ap);
+ va_end(ap);
+}
+
+/*
+ * This is just a non-zero exit value which also isn't one that would allow
+ * us to easily detect if a branded process exited because of a recursive
+ * fatal error.
+ */
+#define LX_ERR_FATAL 42
+
+/*
+ * Our own custom version of abort(), this routine will be used in place
+ * of the one located in libc. The primary difference is that this version
+ * will first reset the signal handler for SIGABRT to SIG_DFL, ensuring the
+ * SIGABRT sent causes us to dump core and is not caught by a user program.
+ */
+void
+abort(void)
+{
+ static int aborting = 0;
+
+ struct sigaction sa;
+ sigset_t sigmask;
+
+ /* watch out for recursive calls to this function */
+ if (aborting != 0)
+ exit(LX_ERR_FATAL);
+
+ aborting = 1;
+
+ /*
+ * Block all signals here to avoid taking any signals while exiting
+ * in an effort to avoid any strange user interaction with our death.
+ */
+ (void) sigfillset(&sigmask);
+ (void) sigprocmask(SIG_BLOCK, &sigmask, NULL);
+
+ /*
+ * Our own version of abort(3C) that we know will never call
+ * a user-installed SIGABRT handler first. We WANT to die.
+ *
+ * Do this by resetting the handler to SIG_DFL, and releasing any
+ * held SIGABRTs.
+ *
+ * If no SIGABRTs are pending, send ourselves one.
+ *
+ * The while loop is a bit of overkill, but abort(3C) does it to
+ * assure it never returns so we will as well.
+ */
+ (void) sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = SIG_DFL;
+ sa.sa_flags = 0;
+
+ for (;;) {
+ (void) sigaction(SIGABRT, &sa, NULL);
+ (void) sigrelse(SIGABRT);
+ (void) thr_kill(thr_self(), SIGABRT);
+ }
+
+ /*NOTREACHED*/
+}
+
+/*PRINTFLIKE1*/
+void
+lx_msg(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+ va_start(ap, msg);
+ i_lx_msg(STDOUT_FILENO, msg, ap);
+ va_end(ap);
+}
+
+/*PRINTFLIKE1*/
+void
+lx_err_fatal(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+
+ va_start(ap, msg);
+ i_lx_msg(STDERR_FILENO, msg, ap);
+ va_end(ap);
+ abort();
+}
+
+/*
+ * See if it is safe to alloca() sz bytes. Return 1 for yes, 0 for no.
+ */
+int
+lx_check_alloca(size_t sz)
+{
+ uintptr_t sp = (uintptr_t)&sz;
+ uintptr_t end = sp - sz;
+
+ return ((end < sp) && (end >= stack_bottom));
+}
+
+/*PRINTFLIKE1*/
+void
+lx_unsupported(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+
+ /* send the msg to the error stream */
+ va_start(ap, msg);
+ i_lx_msg(STDERR_FILENO, msg, ap);
+ va_end(ap);
+
+ /*
+ * If the user doesn't trust the application to responsibly
+ * handle ENOTSUP, we kill the application.
+ */
+ if (lx_strict)
+ (void) kill(getpid(), SIGSYS);
+}
+
+extern void lx_runexe(void *argv, int32_t entry);
+int lx_init(int argc, char *argv[], char *envp[]);
+
+static int
+lx_emulate_args(lx_regs_t *rp, struct lx_sysent *s, uintptr_t *args)
+{
+ /*
+ * If the system call takes 6 args, then libc has stashed them in
+ * memory at the address contained in %ebx. Except for some syscalls
+ * which store the 6th argument in %ebp.
+ */
+ if (s->sy_narg == 6 && !(s->sy_flags & EBP_HAS_ARG6)) {
+ if (uucopy((void *)rp->lxr_ebx, args,
+ sizeof (args[0]) * 6) != 0)
+ return (-stol_errno[errno]);
+ } else {
+ args[0] = rp->lxr_ebx;
+ args[1] = rp->lxr_ecx;
+ args[2] = rp->lxr_edx;
+ args[3] = rp->lxr_esi;
+ args[4] = rp->lxr_edi;
+ args[5] = rp->lxr_ebp;
+ }
+
+ return (0);
+}
+
+void
+lx_emulate(lx_regs_t *rp)
+{
+ struct lx_sysent *s;
+ uintptr_t args[6];
+ uintptr_t gs = rp->lxr_gs & 0xffff; /* %gs is only 16 bits */
+ int syscall_num, ret;
+
+ syscall_num = rp->lxr_eax;
+
+ /*
+ * lx_brand_int80_callback() ensures that the syscall_num is sane;
+ * Use it as is.
+ */
+ assert(syscall_num >= 0);
+ assert(syscall_num < (sizeof (sysents) / sizeof (sysents[0])));
+ s = &sysents[syscall_num];
+
+ if ((ret = lx_emulate_args(rp, s, args)) != 0)
+ goto out;
+
+ /*
+ * If the tracing flag is enabled we call into the brand-specific
+ * kernel module to handle the tracing activity (DTrace or ptrace).
+ * It would be tempting to perform DTrace activity in the brand
+ * module's syscall trap callback, rather than having to return
+ * to the kernel here, but -- since argument encoding can vary
+ * according to the specific system call -- that would require
+ * replicating the knowledge of argument decoding in the kernel
+ * module as well as here in the brand library.
+ */
+ if (lx_traceflag != 0) {
+ /*
+ * Part of the ptrace "interface" is that on syscall entry
+ * %eax should be reported as -ENOSYS while the orig_eax
+ * field of the user structure needs to contain the actual
+ * system call number. If we end up stopping here, the
+ * controlling process will dig the lx_regs_t structure out of
+ * our stack.
+ */
+ rp->lxr_orig_eax = syscall_num;
+ rp->lxr_eax = -stol_errno[ENOSYS];
+
+ (void) syscall(SYS_brand, B_SYSENTRY, syscall_num, args);
+
+ /*
+ * The external tracer may have modified the arguments to this
+ * system call. Refresh the argument cache to account for this.
+ */
+ if ((ret = lx_emulate_args(rp, s, args)) != 0)
+ goto out;
+ }
+
+ if (s->sy_callc == NULL) {
+ lx_unsupported(gettext("unimplemented syscall #%d (%s): %s\n"),
+ syscall_num, s->sy_name, nosys_msgs[(int)s->sy_flags]);
+ ret = -stol_errno[ENOTSUP];
+ goto out;
+ }
+
+ if (lx_debug_enabled != 0) {
+ const char *fmt = NULL;
+
+ switch (s->sy_narg) {
+ case 0:
+ fmt = "calling %s()";
+ break;
+ case 1:
+ fmt = "calling %s(0x%p)";
+ break;
+ case 2:
+ fmt = "calling %s(0x%p, 0x%p)";
+ break;
+ case 3:
+ fmt = "calling %s(0x%p, 0x%p, 0x%p)";
+ break;
+ case 4:
+ fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p)";
+ break;
+ case 5:
+ fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p)";
+ break;
+ case 6:
+ fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)";
+ break;
+ }
+
+ lx_debug(fmt, s->sy_name, args[0], args[1], args[2], args[3],
+ args[4], args[5]);
+ }
+
+ if (gs != LWPGS_SEL) {
+ lx_tsd_t *lx_tsd;
+
+ /*
+ * While a %gs of 0 is technically legal (as long as the
+ * application never dereferences memory using %gs), Solaris
+ * has its own ideas as to how a zero %gs should be handled in
+ * _update_sregs(), such that any 32-bit user process with a
+ * %gs of zero running on a system with a 64-bit kernel will
+ * have its %gs hidden base register stomped on on return from
+ * a system call, leaving an incorrect base address in place
+ * until the next time %gs is actually reloaded (forcing a
+ * reload of the base address from the appropriate descriptor
+ * table.)
+ *
+ * Of course the kernel will once again stomp on THAT base
+ * address when returning from a system call, resulting in an
+ * an application segmentation fault.
+ *
+ * To avoid this situation, disallow a save of a zero %gs
+ * here in order to try and capture any Linux process that
+ * attempts to make a syscall with a zero %gs installed.
+ */
+ assert(gs != 0);
+
+ if ((ret = thr_getspecific(lx_tsd_key,
+ (void **)&lx_tsd)) != 0)
+ lx_err_fatal(gettext(
+ "%s: unable to read thread-specific data: %s"),
+ "lx_emulate", strerror(ret));
+
+ assert(lx_tsd != 0);
+
+ lx_tsd->lxtsd_gs = gs;
+
+ lx_debug("lx_emulate(): gsp 0x%p, saved gs: 0x%x", lx_tsd, gs);
+ }
+
+ if (s->sy_flags == SYS_PASSTHRU)
+ lx_debug("\tCalling Solaris %s()", s->sy_name);
+
+ ret = s->sy_callc(args[0], args[1], args[2], args[3], args[4], args[5]);
+
+ if (ret > -65536 && ret < 65536)
+ lx_debug("\t= %d", ret);
+ else
+ lx_debug("\t= 0x%x", ret);
+
+ if ((s->sy_flags == SYS_PASSTHRU) && (ret == -1)) {
+ ret = -stol_errno[errno];
+ } else {
+ /*
+ * If the return value is between -4096 and 0 we assume it's an
+ * error, so we translate the Solaris error number into the
+ * Linux equivalent.
+ */
+ if (ret < 0 && ret > -4096) {
+ if (-ret >=
+ sizeof (stol_errno) / sizeof (stol_errno[0])) {
+ lx_debug("Invalid return value from emulated "
+ "syscall %d (%s): %d\n",
+ syscall_num, s->sy_name, ret);
+ assert(0);
+ }
+
+ ret = -stol_errno[-ret];
+ }
+ }
+
+out:
+ /*
+ * %eax holds the return code from the system call.
+ */
+ rp->lxr_eax = ret;
+
+ /*
+ * If the trace flag is set, bounce into the kernel to let it do
+ * any necessary tracing (DTrace or ptrace).
+ */
+ if (lx_traceflag != 0) {
+ rp->lxr_orig_eax = syscall_num;
+ (void) syscall(SYS_brand, B_SYSRETURN, syscall_num, ret);
+ }
+}
+
+static void
+lx_close_fh(FILE *file)
+{
+ int fd, fd_new;
+
+ if (file == NULL)
+ return;
+
+ if ((fd = fileno(file)) < 0)
+ return;
+
+ fd_new = dup(fd);
+ if (fd_new == -1)
+ return;
+
+ (void) fclose(file);
+ (void) dup2(fd_new, fd);
+ (void) close(fd_new);
+}
+
+
+extern int set_l10n_alternate_root(char *path);
+
+/*ARGSUSED*/
+int
+lx_init(int argc, char *argv[], char *envp[])
+{
+ char *r;
+ auxv_t *ap;
+ int *p, err;
+ lx_elf_data_t edp;
+ lx_brand_registration_t reg;
+ static lx_tsd_t lx_tsd;
+
+
+ /* Look up the PID that serves as init for this zone */
+ if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0)
+ lx_err_fatal(gettext(
+ "Unable to find PID for zone init process: %s"),
+ strerror(err));
+
+ /*
+ * Ubuntu init will fail if its TERM environment variable is not set
+ * so if we are running init, and TERM is not set, we set term and
+ * reexec so that the new environment variable is propagated to the
+ * linux application stack.
+ */
+ if ((getpid() == zoneinit_pid) && (getenv("TERM") == NULL)) {
+ if (setenv("TERM", "vt100", 1) < 0 || execv(argv[0], argv) < 0)
+ lx_err_fatal(gettext("failed to set TERM"));
+ }
+
+ stack_bottom = 2 * sysconf(_SC_PAGESIZE);
+
+ /*
+ * We need to shutdown all libc stdio. libc stdio normally goes to
+ * file descriptors, but since we're actually part of a linux
+ * process we don't own these file descriptors and we can't make
+ * any assumptions about their state.
+ */
+ lx_close_fh(stdin);
+ lx_close_fh(stdout);
+ lx_close_fh(stderr);
+
+ lx_debug_init();
+
+ r = getenv("LX_RELEASE");
+ if (r == NULL) {
+ if (lx_get_kern_version() == LX_KERN_2_6)
+ (void) strlcpy(lx_release, LX_UNAME_RELEASE_2_6,
+ sizeof (lx_release));
+ else
+ (void) strlcpy(lx_release, LX_UNAME_RELEASE_2_4,
+ sizeof (lx_release));
+ } else {
+ (void) strlcpy(lx_release, r, 128);
+ }
+
+ lx_debug("lx_release: %s\n", lx_release);
+
+ /*
+ * Should we kill an application that attempts an unimplemented
+ * system call?
+ */
+ if (getenv("LX_STRICT") != NULL) {
+ lx_strict = 1;
+ lx_debug("STRICT mode enabled.\n");
+ }
+
+ /*
+ * Are we in install mode?
+ */
+ if (getenv("LX_INSTALL") != NULL) {
+ lx_install = 1;
+ lx_debug("INSTALL mode enabled.\n");
+ }
+
+ /*
+ * Should we attempt to send messages to the screen?
+ */
+ if (getenv("LX_VERBOSE") != NULL) {
+ lx_verbose = 1;
+ lx_debug("VERBOSE mode enabled.\n");
+ }
+
+ lx_debug("executing linux process: %s", argv[0]);
+ lx_debug("branding myself and setting handler to 0x%p",
+ (void *)lx_handler_table);
+
+ /*
+ * The version of rpm that ships with CentOS/RHEL 3.x has a race
+ * condition in it. If it creates a child process to run a
+ * post-install script, and that child process completes too
+ * quickly, it will disappear before the parent notices. This
+ * causes the parent to hang forever waiting for the already dead
+ * child to die. I'm sure there's a Lazarus joke buried in here
+ * somewhere.
+ *
+ * Anyway, as a workaround, we make every child of an 'rpm' process
+ * sleep for 1 second, giving the parent a chance to enter its
+ * wait-for-the-child-to-die loop. Thay may be the hackiest trick
+ * in all of our Linux emulation code - and that's saying
+ * something.
+ */
+ if (strcmp("rpm", basename(argv[0])) == NULL)
+ lx_is_rpm = B_TRUE;
+
+ reg.lxbr_version = LX_VERSION;
+ reg.lxbr_handler = (void *)&lx_handler_table;
+ reg.lxbr_tracehandler = (void *)&lx_handler_trace_table;
+ reg.lxbr_traceflag = &lx_traceflag;
+
+ /*
+ * Register the address of the user-space handler with the lx
+ * brand module.
+ */
+ if (syscall(SYS_brand, B_REGISTER, &reg))
+ lx_err_fatal(gettext("failed to brand the process"));
+
+ /*
+ * Download data about the lx executable from the kernel.
+ */
+ if (syscall(SYS_brand, B_ELFDATA, (void *)&edp))
+ lx_err_fatal(gettext(
+ "failed to get required ELF data from the kernel"));
+
+ if (lx_ioctl_init() != 0)
+ lx_err_fatal(gettext("failed to setup the %s translator"),
+ "ioctl");
+
+ if (lx_stat_init() != 0)
+ lx_err_fatal(gettext("failed to setup the %s translator"),
+ "stat");
+
+ if (lx_statfs_init() != 0)
+ lx_err_fatal(gettext("failed to setup the %s translator"),
+ "statfs");
+
+ /*
+ * Find the aux vector on the stack.
+ */
+ p = (int *)envp;
+ while (*p != NULL)
+ p++;
+ /*
+ * p is now pointing at the 0 word after the environ pointers. After
+ * that is the aux vectors.
+ */
+ p++;
+ for (ap = (auxv_t *)p; ap->a_type != 0; ap++) {
+ switch (ap->a_type) {
+ case AT_BASE:
+ ap->a_un.a_val = edp.ed_base;
+ break;
+ case AT_ENTRY:
+ ap->a_un.a_val = edp.ed_entry;
+ break;
+ case AT_PHDR:
+ ap->a_un.a_val = edp.ed_phdr;
+ break;
+ case AT_PHENT:
+ ap->a_un.a_val = edp.ed_phent;
+ break;
+ case AT_PHNUM:
+ ap->a_un.a_val = edp.ed_phnum;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Do any thunk server initalization. */
+ lxt_server_init(argc, argv);
+
+ /* Setup signal handler information. */
+ if (lx_siginit())
+ lx_err_fatal(gettext(
+ "failed to initialize lx signals for the branded process"));
+
+ /* Setup thread-specific data area for managing linux threads. */
+ if ((err = thr_keycreate(&lx_tsd_key, NULL)) != 0)
+ lx_err_fatal(
+ gettext("%s failed: %s"), "thr_keycreate(lx_tsd_key)",
+ strerror(err));
+
+ lx_debug("thr_keycreate created lx_tsd_key (%d)", lx_tsd_key);
+
+ /* Initialize the thread specific data for this thread. */
+ bzero(&lx_tsd, sizeof (lx_tsd));
+ lx_tsd.lxtsd_gs = LWPGS_SEL;
+
+ if ((err = thr_setspecific(lx_tsd_key, &lx_tsd)) != 0)
+ lx_err_fatal(gettext(
+ "Unable to initialize thread-specific data: %s"),
+ strerror(err));
+
+ /*
+ * Save the current context of this thread.
+ * We'll restore this context when this thread attempts to exit.
+ */
+ if (getcontext(&lx_tsd.lxtsd_exit_context) != 0)
+ lx_err_fatal(gettext(
+ "Unable to initialize thread-specific exit context: %s"),
+ strerror(errno));
+
+ if (lx_tsd.lxtsd_exit == 0) {
+ lx_runexe(argv, edp.ed_ldentry);
+ /* lx_runexe() never returns. */
+ assert(0);
+ }
+
+ /*
+ * We are here because the Linux application called the exit() or
+ * exit_group() system call. In turn the brand library did a
+ * setcontext() to jump to the thread context state we saved above.
+ */
+ if (lx_tsd.lxtsd_exit == 1)
+ thr_exit((void *)lx_tsd.lxtsd_exit_status);
+ else
+ exit(lx_tsd.lxtsd_exit_status);
+
+ assert(0);
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+/*
+ * Walk back through the stack until we find the lx_emulate() frame.
+ */
+lx_regs_t *
+lx_syscall_regs(void)
+{
+ /* LINTED - alignment */
+ struct frame *fr = (struct frame *)_getfp();
+
+ while (fr->fr_savpc != (uintptr_t)&lx_emulate_done) {
+ fr = (struct frame *)fr->fr_savfp;
+ assert(fr->fr_savpc != NULL);
+ }
+
+ return ((lx_regs_t *)((uintptr_t *)fr)[2]);
+}
+
+int
+lx_lpid_to_spair(pid_t lpid, pid_t *spid, lwpid_t *slwp)
+{
+ pid_t pid;
+ lwpid_t tid;
+
+ if (lpid == 0) {
+ pid = getpid();
+ tid = thr_self();
+ } else {
+ if (syscall(SYS_brand, B_LPID_TO_SPAIR, lpid, &pid, &tid) < 0)
+ return (-errno);
+
+ /*
+ * If the returned pid is -1, that indicates we tried to
+ * look up the PID for init, but that process no longer
+ * exists.
+ */
+ if (pid == -1)
+ return (-ESRCH);
+ }
+
+ if (uucopy(&pid, spid, sizeof (pid_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&tid, slwp, sizeof (lwpid_t)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+int
+lx_lpid_to_spid(pid_t lpid, pid_t *spid)
+{
+ lwpid_t slwp;
+
+ return (lx_lpid_to_spair(lpid, spid, &slwp));
+}
+
+char *
+lx_fd_to_path(int fd, char *buf, int buf_size)
+{
+ char path_proc[MAXPATHLEN];
+ pid_t pid;
+ int n;
+
+ assert((buf != NULL) && (buf_size >= 0));
+
+ if (fd < 0)
+ return (NULL);
+
+ if ((pid = getpid()) == -1)
+ return (NULL);
+
+ (void) snprintf(path_proc, MAXPATHLEN,
+ "/native/proc/%d/path/%d", pid, fd);
+
+ if ((n = readlink(path_proc, buf, buf_size - 1)) == -1)
+ return (NULL);
+ buf[n] = '\0';
+
+ return (buf);
+}
+
+/*
+ * Create a translation routine that jumps to a particular emulation
+ * module syscall.
+ */
+#define IN_KERNEL_SYSCALL(name, num) \
+int \
+lx_##name(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, \
+ uintptr_t p5, uintptr_t p6) \
+{ \
+ int r; \
+ lx_debug("\tsyscall %d re-vectoring to lx kernel module " \
+ "for " #name "()", num); \
+ r = syscall(SYS_brand, B_EMULATE_SYSCALL + num, p1, p2, \
+ p3, p4, p5, p6); \
+ return ((r == -1) ? -errno : r); \
+}
+
+IN_KERNEL_SYSCALL(kill, 37)
+IN_KERNEL_SYSCALL(brk, 45)
+IN_KERNEL_SYSCALL(ustat, 62)
+IN_KERNEL_SYSCALL(getppid, 64)
+IN_KERNEL_SYSCALL(sysinfo, 116)
+IN_KERNEL_SYSCALL(modify_ldt, 123)
+IN_KERNEL_SYSCALL(adjtimex, 124)
+IN_KERNEL_SYSCALL(setresuid16, 164)
+IN_KERNEL_SYSCALL(setresgid16, 170)
+IN_KERNEL_SYSCALL(setresuid, 208)
+IN_KERNEL_SYSCALL(setresgid, 210)
+IN_KERNEL_SYSCALL(gettid, 224)
+IN_KERNEL_SYSCALL(tkill, 238)
+IN_KERNEL_SYSCALL(futex, 240)
+IN_KERNEL_SYSCALL(set_thread_area, 243)
+IN_KERNEL_SYSCALL(get_thread_area, 244)
+IN_KERNEL_SYSCALL(set_tid_address, 258)
+
+static struct lx_sysent sysents[] = {
+ {"nosys", NULL, NOSYS_NULL, 0}, /* 0 */
+ {"exit", lx_exit, 0, 1}, /* 1 */
+ {"fork", lx_fork, 0, 0}, /* 2 */
+ {"read", lx_read, 0, 3}, /* 3 */
+ {"write", write, SYS_PASSTHRU, 3}, /* 4 */
+ {"open", lx_open, 0, 3}, /* 5 */
+ {"close", close, SYS_PASSTHRU, 1}, /* 6 */
+ {"waitpid", lx_waitpid, 0, 3}, /* 7 */
+ {"creat", creat, SYS_PASSTHRU, 2}, /* 8 */
+ {"link", lx_link, 0, 2}, /* 9 */
+ {"unlink", lx_unlink, 0, 1}, /* 10 */
+ {"execve", lx_execve, 0, 3}, /* 11 */
+ {"chdir", chdir, SYS_PASSTHRU, 1}, /* 12 */
+ {"time", lx_time, 0, 1}, /* 13 */
+ {"mknod", lx_mknod, 0, 3}, /* 14 */
+ {"chmod", lx_chmod, 0, 2}, /* 15 */
+ {"lchown16", lx_lchown16, 0, 3}, /* 16 */
+ {"break", NULL, NOSYS_OBSOLETE, 0}, /* 17 */
+ {"stat", NULL, NOSYS_OBSOLETE, 0}, /* 18 */
+ {"lseek", lx_lseek, 0, 3}, /* 19 */
+ {"getpid", lx_getpid, 0, 0}, /* 20 */
+ {"mount", lx_mount, 0, 5}, /* 21 */
+ {"umount", lx_umount, 0, 1}, /* 22 */
+ {"setuid16", lx_setuid16, 0, 1}, /* 23 */
+ {"getuid16", lx_getuid16, 0, 0}, /* 24 */
+ {"stime", stime, SYS_PASSTHRU, 1}, /* 25 */
+ {"ptrace", lx_ptrace, 0, 4}, /* 26 */
+ {"alarm", (int (*)())alarm, SYS_PASSTHRU, 1}, /* 27 */
+ {"fstat", NULL, NOSYS_OBSOLETE, 0}, /* 28 */
+ {"pause", pause, SYS_PASSTHRU, 0}, /* 29 */
+ {"utime", lx_utime, 0, 2}, /* 30 */
+ {"stty", NULL, NOSYS_OBSOLETE, 0}, /* 31 */
+ {"gtty", NULL, NOSYS_OBSOLETE, 0}, /* 32 */
+ {"access", access, SYS_PASSTHRU, 2}, /* 33 */
+ {"nice", nice, SYS_PASSTHRU, 1}, /* 34 */
+ {"ftime", NULL, NOSYS_OBSOLETE, 0}, /* 35 */
+ {"sync", lx_sync, 0, 0}, /* 36 */
+ {"kill", lx_kill, 0, 2}, /* 37 */
+ {"rename", lx_rename, 0, 2}, /* 38 */
+ {"mkdir", mkdir, SYS_PASSTHRU, 2}, /* 39 */
+ {"rmdir", lx_rmdir, 0, 1}, /* 40 */
+ {"dup", dup, SYS_PASSTHRU, 1}, /* 41 */
+ {"pipe", lx_pipe, 0, 1}, /* 42 */
+ {"times", lx_times, 0, 1}, /* 43 */
+ {"prof", NULL, NOSYS_OBSOLETE, 0}, /* 44 */
+ {"brk", lx_brk, 0, 1}, /* 45 */
+ {"setgid16", lx_setgid16, 0, 1}, /* 46 */
+ {"getgid16", lx_getgid16, 0, 0}, /* 47 */
+ {"signal", lx_signal, 0, 2}, /* 48 */
+ {"geteuid16", lx_geteuid16, 0, 0}, /* 49 */
+ {"getegid16", lx_getegid16, 0, 0}, /* 50 */
+ {"acct", NULL, NOSYS_NO_EQUIV, 0}, /* 51 */
+ {"umount2", lx_umount2, 0, 2}, /* 52 */
+ {"lock", NULL, NOSYS_OBSOLETE, 0}, /* 53 */
+ {"ioctl", lx_ioctl, 0, 3}, /* 54 */
+ {"fcntl", lx_fcntl, 0, 3}, /* 55 */
+ {"mpx", NULL, NOSYS_OBSOLETE, 0}, /* 56 */
+ {"setpgid", lx_setpgid, 0, 2}, /* 57 */
+ {"ulimit", NULL, NOSYS_OBSOLETE, 0}, /* 58 */
+ {"olduname", NULL, NOSYS_OBSOLETE, 0}, /* 59 */
+ {"umask", (int (*)())umask, SYS_PASSTHRU, 1}, /* 60 */
+ {"chroot", chroot, SYS_PASSTHRU, 1}, /* 61 */
+ {"ustat", lx_ustat, 0, 2}, /* 62 */
+ {"dup2", lx_dup2, 0, 2}, /* 63 */
+ {"getppid", lx_getppid, 0, 0}, /* 64 */
+ {"getpgrp", lx_getpgrp, 0, 0}, /* 65 */
+ {"setsid", lx_setsid, 0, 0}, /* 66 */
+ {"sigaction", lx_sigaction, 0, 3}, /* 67 */
+ {"sgetmask", NULL, NOSYS_OBSOLETE, 0}, /* 68 */
+ {"ssetmask", NULL, NOSYS_OBSOLETE, 0}, /* 69 */
+ {"setreuid16", lx_setreuid16, 0, 2}, /* 70 */
+ {"setregid16", lx_setregid16, 0, 2}, /* 71 */
+ {"sigsuspend", lx_sigsuspend, 0, 1}, /* 72 */
+ {"sigpending", lx_sigpending, 0, 1}, /* 73 */
+ {"sethostname", lx_sethostname, 0, 2}, /* 74 */
+ {"setrlimit", lx_setrlimit, 0, 2}, /* 75 */
+ {"getrlimit", lx_oldgetrlimit, 0, 2}, /* 76 */
+ {"getrusage", lx_getrusage, 0, 2}, /* 77 */
+ {"gettimeofday", lx_gettimeofday, 0, 2}, /* 78 */
+ {"settimeofday", lx_settimeofday, 0, 2}, /* 79 */
+ {"getgroups16", lx_getgroups16, 0, 2}, /* 80 */
+ {"setgroups16", lx_setgroups16, 0, 2}, /* 81 */
+ {"select", NULL, NOSYS_OBSOLETE, 0}, /* 82 */
+ {"symlink", symlink, SYS_PASSTHRU, 2}, /* 83 */
+ {"oldlstat", NULL, NOSYS_OBSOLETE, 0}, /* 84 */
+ {"readlink", readlink, SYS_PASSTHRU, 3}, /* 85 */
+ {"uselib", NULL, NOSYS_KERNEL, 0}, /* 86 */
+ {"swapon", NULL, NOSYS_KERNEL, 0}, /* 87 */
+ {"reboot", lx_reboot, 0, 4}, /* 88 */
+ {"readdir", lx_readdir, 0, 3}, /* 89 */
+ {"mmap", lx_mmap, 0, 6}, /* 90 */
+ {"munmap", munmap, SYS_PASSTHRU, 2}, /* 91 */
+ {"truncate", lx_truncate, 0, 2}, /* 92 */
+ {"ftruncate", lx_ftruncate, 0, 2}, /* 93 */
+ {"fchmod", fchmod, SYS_PASSTHRU, 2}, /* 94 */
+ {"fchown16", lx_fchown16, 0, 3}, /* 95 */
+ {"getpriority", lx_getpriority, 0, 2}, /* 96 */
+ {"setpriority", lx_setpriority, 0, 3}, /* 97 */
+ {"profil", NULL, NOSYS_NO_EQUIV, 0}, /* 98 */
+ {"statfs", lx_statfs, 0, 2}, /* 99 */
+ {"fstatfs", lx_fstatfs, 0, 2}, /* 100 */
+ {"ioperm", NULL, NOSYS_NO_EQUIV, 0}, /* 101 */
+ {"socketcall", lx_socketcall, 0, 2}, /* 102 */
+ {"syslog", NULL, NOSYS_KERNEL, 0}, /* 103 */
+ {"setitimer", lx_setitimer, 0, 3}, /* 104 */
+ {"getitimer", getitimer, SYS_PASSTHRU, 2}, /* 105 */
+ {"stat", lx_stat, 0, 2}, /* 106 */
+ {"lstat", lx_lstat, 0, 2}, /* 107 */
+ {"fstat", lx_fstat, 0, 2}, /* 108 */
+ {"uname", NULL, NOSYS_OBSOLETE, 0}, /* 109 */
+ {"oldiopl", NULL, NOSYS_NO_EQUIV, 0}, /* 110 */
+ {"vhangup", lx_vhangup, 0, 0}, /* 111 */
+ {"idle", NULL, NOSYS_NO_EQUIV, 0}, /* 112 */
+ {"vm86old", NULL, NOSYS_OBSOLETE, 0}, /* 113 */
+ {"wait4", lx_wait4, 0, 4}, /* 114 */
+ {"swapoff", NULL, NOSYS_KERNEL, 0}, /* 115 */
+ {"sysinfo", lx_sysinfo, 0, 1}, /* 116 */
+ {"ipc", lx_ipc, 0, 5}, /* 117 */
+ {"fsync", lx_fsync, 0, 1}, /* 118 */
+ {"sigreturn", lx_sigreturn, 0, 1}, /* 119 */
+ {"clone", lx_clone, 0, 5}, /* 120 */
+ {"setdomainname", lx_setdomainname, 0, 2}, /* 121 */
+ {"uname", lx_uname, 0, 1}, /* 122 */
+ {"modify_ldt", lx_modify_ldt, 0, 3}, /* 123 */
+ {"adjtimex", lx_adjtimex, 0, 1}, /* 124 */
+ {"mprotect", lx_mprotect, 0, 3}, /* 125 */
+ {"sigprocmask", lx_sigprocmask, 0, 3}, /* 126 */
+ {"create_module", NULL, NOSYS_KERNEL, 0}, /* 127 */
+ {"init_module", NULL, NOSYS_KERNEL, 0}, /* 128 */
+ {"delete_module", NULL, NOSYS_KERNEL, 0}, /* 129 */
+ {"get_kernel_syms", NULL, NOSYS_KERNEL, 0}, /* 130 */
+ {"quotactl", NULL, NOSYS_KERNEL, 0}, /* 131 */
+ {"getpgid", lx_getpgid, 0, 1}, /* 132 */
+ {"fchdir", fchdir, SYS_PASSTHRU, 1}, /* 133 */
+ {"bdflush", NULL, NOSYS_KERNEL, 0}, /* 134 */
+ {"sysfs", lx_sysfs, 0, 3}, /* 135 */
+ {"personality", lx_personality, 0, 1}, /* 136 */
+ {"afs_syscall", NULL, NOSYS_KERNEL, 0}, /* 137 */
+ {"setfsuid16", lx_setfsuid16, 0, 1}, /* 138 */
+ {"setfsgid16", lx_setfsgid16, 0, 1}, /* 139 */
+ {"llseek", lx_llseek, 0, 5}, /* 140 */
+ {"getdents", getdents, SYS_PASSTHRU, 3}, /* 141 */
+ {"select", lx_select, 0, 5}, /* 142 */
+ {"flock", lx_flock, 0, 2}, /* 143 */
+ {"msync", lx_msync, 0, 3}, /* 144 */
+ {"readv", lx_readv, 0, 3}, /* 145 */
+ {"writev", lx_writev, 0, 3}, /* 146 */
+ {"getsid", lx_getsid, 0, 1}, /* 147 */
+ {"fdatasync", lx_fdatasync, 0, 1}, /* 148 */
+ {"sysctl", lx_sysctl, 0, 1}, /* 149 */
+ {"mlock", lx_mlock, 0, 2}, /* 150 */
+ {"munlock", lx_munlock, 0, 2}, /* 151 */
+ {"mlockall", lx_mlockall, 0, 1}, /* 152 */
+ {"munlockall", lx_munlockall, 0, 0}, /* 153 */
+ {"sched_setparam", lx_sched_setparam, 0, 2}, /* 154 */
+ {"sched_getparam", lx_sched_getparam, 0, 2}, /* 155 */
+ {"sched_setscheduler", lx_sched_setscheduler, 0, 3}, /* 156 */
+ {"sched_getscheduler", lx_sched_getscheduler, 0, 1}, /* 157 */
+ {"sched_yield", (int (*)())yield, SYS_PASSTHRU, 0}, /* 158 */
+ {"sched_get_priority_max", lx_sched_get_priority_max, 0, 1}, /* 159 */
+ {"sched_get_priority_min", lx_sched_get_priority_min, 0, 1}, /* 160 */
+ {"sched_rr_get_interval", lx_sched_rr_get_interval, 0, 2}, /* 161 */
+ {"nanosleep", nanosleep, SYS_PASSTHRU, 2}, /* 162 */
+ {"mremap", NULL, NOSYS_NO_EQUIV, 0}, /* 163 */
+ {"setresuid16", lx_setresuid16, 0, 3}, /* 164 */
+ {"getresuid16", lx_getresuid16, 0, 3}, /* 165 */
+ {"vm86", NULL, NOSYS_NO_EQUIV, 0}, /* 166 */
+ {"query_module", lx_query_module, NOSYS_KERNEL, 5}, /* 167 */
+ {"poll", lx_poll, 0, 3}, /* 168 */
+ {"nfsservctl", NULL, NOSYS_KERNEL, 0}, /* 169 */
+ {"setresgid16", lx_setresgid16, 0, 3}, /* 170 */
+ {"getresgid16", lx_getresgid16, 0, 3}, /* 171 */
+ {"prctl", NULL, NOSYS_UNDOC, 0}, /* 172 */
+ {"rt_sigreturn", lx_rt_sigreturn, 0, 0}, /* 173 */
+ {"rt_sigaction", lx_rt_sigaction, 0, 4}, /* 174 */
+ {"rt_sigprocmask", lx_rt_sigprocmask, 0, 4}, /* 175 */
+ {"rt_sigpending", lx_rt_sigpending, 0, 2}, /* 176 */
+ {"rt_sigtimedwait", lx_rt_sigtimedwait, 0, 4}, /* 177 */
+ {"sigqueueinfo", NULL, NOSYS_UNDOC, 0}, /* 178 */
+ {"rt_sigsuspend", lx_rt_sigsuspend, 0, 2}, /* 179 */
+ {"pread64", lx_pread64, 0, 5}, /* 180 */
+ {"pwrite64", lx_pwrite64, 0, 5}, /* 181 */
+ {"chown16", lx_chown16, 0, 3}, /* 182 */
+ {"getcwd", lx_getcwd, 0, 2}, /* 183 */
+ {"capget", NULL, NOSYS_NO_EQUIV, 0}, /* 184 */
+ {"capset", NULL, NOSYS_NO_EQUIV, 0}, /* 185 */
+ {"sigaltstack", lx_sigaltstack, 0, 2}, /* 186 */
+ {"sendfile", lx_sendfile, 0, 4}, /* 187 */
+ {"getpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 188 */
+ {"putpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 189 */
+ {"vfork", lx_vfork, 0, 0}, /* 190 */
+ {"getrlimit", lx_getrlimit, 0, 2}, /* 191 */
+ {"mmap2", lx_mmap2, EBP_HAS_ARG6, 6}, /* 192 */
+ {"truncate64", lx_truncate64, 0, 3}, /* 193 */
+ {"ftruncate64", lx_ftruncate64, 0, 3}, /* 194 */
+ {"stat64", lx_stat64, 0, 2}, /* 195 */
+ {"lstat64", lx_lstat64, 0, 2}, /* 196 */
+ {"fstat64", lx_fstat64, 0, 2}, /* 197 */
+ {"lchown", lchown, SYS_PASSTHRU, 3}, /* 198 */
+ {"getuid", (int (*)())getuid, SYS_PASSTHRU, 0}, /* 199 */
+ {"getgid", (int (*)())getgid, SYS_PASSTHRU, 0}, /* 200 */
+ {"geteuid", lx_geteuid, 0, 0}, /* 201 */
+ {"getegid", lx_getegid, 0, 0}, /* 202 */
+ {"setreuid", setreuid, SYS_PASSTHRU, 0}, /* 203 */
+ {"setregid", setregid, SYS_PASSTHRU, 0}, /* 204 */
+ {"getgroups", getgroups, SYS_PASSTHRU, 2}, /* 205 */
+ {"setgroups", lx_setgroups, 0, 2}, /* 206 */
+ {"fchown", lx_fchown, 0, 3}, /* 207 */
+ {"setresuid", lx_setresuid, 0, 3}, /* 208 */
+ {"getresuid", lx_getresuid, 0, 3}, /* 209 */
+ {"setresgid", lx_setresgid, 0, 3}, /* 210 */
+ {"getresgid", lx_getresgid, 0, 3}, /* 211 */
+ {"chown", lx_chown, 0, 3}, /* 212 */
+ {"setuid", setuid, SYS_PASSTHRU, 1}, /* 213 */
+ {"setgid", setgid, SYS_PASSTHRU, 1}, /* 214 */
+ {"setfsuid", lx_setfsuid, 0, 1}, /* 215 */
+ {"setfsgid", lx_setfsgid, 0, 1}, /* 216 */
+ {"pivot_root", NULL, NOSYS_KERNEL, 0}, /* 217 */
+ {"mincore", mincore, SYS_PASSTHRU, 3}, /* 218 */
+ {"madvise", lx_madvise, 0, 3}, /* 219 */
+ {"getdents64", lx_getdents64, 0, 3}, /* 220 */
+ {"fcntl64", lx_fcntl64, 0, 3}, /* 221 */
+ {"tux", NULL, NOSYS_NO_EQUIV, 0}, /* 222 */
+ {"security", NULL, NOSYS_NO_EQUIV, 0}, /* 223 */
+ {"gettid", lx_gettid, 0, 0}, /* 224 */
+ {"readahead", NULL, NOSYS_NO_EQUIV, 0}, /* 225 */
+ {"setxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 226 */
+ {"lsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 227 */
+ {"fsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 228 */
+ {"getxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 229 */
+ {"lgetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 230 */
+ {"fgetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 231 */
+ {"listxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 232 */
+ {"llistxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 233 */
+ {"flistxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 234 */
+ {"removexattr", NULL, NOSYS_NO_EQUIV, 0}, /* 235 */
+ {"lremovexattr", NULL, NOSYS_NO_EQUIV, 0}, /* 236 */
+ {"fremovexattr", NULL, NOSYS_NO_EQUIV, 0}, /* 237 */
+ {"tkill", lx_tkill, 0, 2}, /* 238 */
+ {"sendfile64", lx_sendfile64, 0, 4}, /* 239 */
+ {"futex", lx_futex, EBP_HAS_ARG6, 6}, /* 240 */
+ {"sched_setaffinity", lx_sched_setaffinity, 0, 3}, /* 241 */
+ {"sched_getaffinity", lx_sched_getaffinity, 0, 3}, /* 242 */
+ {"set_thread_area", lx_set_thread_area, 0, 1}, /* 243 */
+ {"get_thread_area", lx_get_thread_area, 0, 1}, /* 244 */
+ {"io_setup", NULL, NOSYS_NO_EQUIV, 0}, /* 245 */
+ {"io_destroy", NULL, NOSYS_NO_EQUIV, 0}, /* 246 */
+ {"io_getevents", NULL, NOSYS_NO_EQUIV, 0}, /* 247 */
+ {"io_submit", NULL, NOSYS_NO_EQUIV, 0}, /* 248 */
+ {"io_cancel", NULL, NOSYS_NO_EQUIV, 0}, /* 249 */
+ {"fadvise64", NULL, NOSYS_UNDOC, 0}, /* 250 */
+ {"nosys", NULL, 0, 0}, /* 251 */
+ {"group_exit", lx_group_exit, 0, 1}, /* 252 */
+ {"lookup_dcookie", NULL, NOSYS_NO_EQUIV, 0}, /* 253 */
+ {"epoll_create", NULL, NOSYS_NO_EQUIV, 0}, /* 254 */
+ {"epoll_ctl", NULL, NOSYS_NO_EQUIV, 0}, /* 255 */
+ {"epoll_wait", NULL, NOSYS_NO_EQUIV, 0}, /* 256 */
+ {"remap_file_pages", NULL, NOSYS_NO_EQUIV, 0}, /* 257 */
+ {"set_tid_address", lx_set_tid_address, 0, 1}, /* 258 */
+ {"timer_create", NULL, NOSYS_UNDOC, 0}, /* 259 */
+ {"timer_settime", NULL, NOSYS_UNDOC, 0}, /* 260 */
+ {"timer_gettime", NULL, NOSYS_UNDOC, 0}, /* 261 */
+ {"timer_getoverrun", NULL, NOSYS_UNDOC, 0}, /* 262 */
+ {"timer_delete", NULL, NOSYS_UNDOC, 0}, /* 263 */
+ {"clock_settime", lx_clock_settime, 0, 2}, /* 264 */
+ {"clock_gettime", lx_clock_gettime, 0, 2}, /* 265 */
+ {"clock_getres", lx_clock_getres, 0, 2}, /* 266 */
+ {"clock_nanosleep", lx_clock_nanosleep, 0, 4}, /* 267 */
+ {"statfs64", lx_statfs64, 0, 2}, /* 268 */
+ {"fstatfs64", lx_fstatfs64, 0, 2}, /* 269 */
+ {"tgkill", lx_tgkill, 0, 3}, /* 270 */
+
+ /* The following system calls only exist in kernel 2.6 and greater */
+ {"utimes", utimes, SYS_PASSTHRU, 2}, /* 271 */
+ {"fadvise64_64", NULL, NOSYS_NULL, 0}, /* 272 */
+ {"vserver", NULL, NOSYS_NULL, 0}, /* 273 */
+ {"mbind", NULL, NOSYS_NULL, 0}, /* 274 */
+ {"get_mempolicy", NULL, NOSYS_NULL, 0}, /* 275 */
+ {"set_mempolicy", NULL, NOSYS_NULL, 0}, /* 276 */
+ {"mq_open", NULL, NOSYS_NULL, 0}, /* 277 */
+ {"mq_unlink", NULL, NOSYS_NULL, 0}, /* 278 */
+ {"mq_timedsend", NULL, NOSYS_NULL, 0}, /* 279 */
+ {"mq_timedreceive", NULL, NOSYS_NULL, 0}, /* 280 */
+ {"mq_notify", NULL, NOSYS_NULL, 0}, /* 281 */
+ {"mq_getsetattr", NULL, NOSYS_NULL, 0}, /* 282 */
+ {"kexec_load", NULL, NOSYS_NULL, 0}, /* 283 */
+ {"waitid", lx_waitid, 0, 4}, /* 284 */
+ {"sys_setaltroot", NULL, NOSYS_NULL, 0}, /* 285 */
+ {"add_key", NULL, NOSYS_NULL, 0}, /* 286 */
+ {"request_key", NULL, NOSYS_NULL, 0}, /* 287 */
+ {"keyctl", NULL, NOSYS_NULL, 0}, /* 288 */
+ {"ioprio_set", NULL, NOSYS_NULL, 0}, /* 289 */
+ {"ioprio_get", NULL, NOSYS_NULL, 0}, /* 290 */
+ {"inotify_init", NULL, NOSYS_NULL, 0}, /* 291 */
+ {"inotify_add_watch", NULL, NOSYS_NULL, 0}, /* 292 */
+ {"inotify_rm_watch", NULL, NOSYS_NULL, 0}, /* 293 */
+ {"migrate_pages", NULL, NOSYS_NULL, 0}, /* 294 */
+ {"openat", lx_openat, 0, 4}, /* 295 */
+ {"mkdirat", lx_mkdirat, 0, 3}, /* 296 */
+ {"mknodat", lx_mknodat, 0, 4}, /* 297 */
+ {"fchownat", lx_fchownat, 0, 5}, /* 298 */
+ {"futimesat", lx_futimesat, 0, 3}, /* 299 */
+ {"fstatat64", lx_fstatat64, 0, 4}, /* 300 */
+ {"unlinkat", lx_unlinkat, 0, 3}, /* 301 */
+ {"renameat", lx_renameat, 0, 4}, /* 302 */
+ {"linkat", lx_linkat, 0, 5}, /* 303 */
+ {"symlinkat", lx_symlinkat, 0, 3}, /* 304 */
+ {"readlinkat", lx_readlinkat, 0, 4}, /* 305 */
+ {"fchmodat", lx_fchmodat, 0, 4}, /* 306 */
+ {"faccessat", lx_faccessat, 0, 4}, /* 307 */
+ {"pselect6", NULL, NOSYS_NULL, 0}, /* 308 */
+ {"ppoll", NULL, NOSYS_NULL, 0}, /* 309 */
+ {"unshare", NULL, NOSYS_NULL, 0}, /* 310 */
+ {"set_robust_list", NULL, NOSYS_NULL, 0}, /* 311 */
+ {"get_robust_list", NULL, NOSYS_NULL, 0}, /* 312 */
+ {"splice", NULL, NOSYS_NULL, 0}, /* 313 */
+ {"sync_file_range", NULL, NOSYS_NULL, 0}, /* 314 */
+ {"tee", NULL, NOSYS_NULL, 0}, /* 315 */
+ {"vmsplice", NULL, NOSYS_NULL, 0}, /* 316 */
+ {"move_pages", NULL, NOSYS_NULL, 0}, /* 317 */
+};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c b/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c
new file mode 100644
index 0000000000..e547762378
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c
@@ -0,0 +1,1026 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * The BrandZ Linux thunking server.
+ *
+ * The interfaces defined in this file form the server side of a bridge
+ * to allow native solaris process to access Linux services. Currently
+ * the Linux services that is made accessible by these interfaces here
+ * are:
+ * - Linux host <-> address naming services
+ * - Linux service <-> port naming services
+ * - Linux syslog
+ *
+ * Access to all these services is provided through a doors server.
+ * Currently the only client of these interfaces and the process that
+ * initially starts up the doors server is lx_thunk.so.
+ *
+ * lx_thunk.so is a native solaris library that is loaded into native
+ * solaris process that need to run inside a Linux zone and have access
+ * to Linux services. When lx_thunk.so receives a request that requires
+ * accessing Linux services it creates a "thunk server" process by
+ * forking and executing the following shell script (which runs as
+ * a native /bin/sh Linux process):
+ * /native/usr/lib/brand/lx/lx_thunk
+ *
+ * The first and only thing this shell script attempts to do is re-exec
+ * itself. The brand library will detect when this script attempts to
+ * re-exec itself and take control of the process. The exec() system
+ * call made by the Linux shell will never return.
+ *
+ * At this point the process becomes a "thunk server" process.
+ * The first thing it does is a bunch of initialization:
+ *
+ * - Sanity check that a file descriptor based communication mechanism
+ * needed talk to the parent process is correctly initialized.
+ *
+ * - Verify that two predetermined file descriptors are FIFOs.
+ * These FIFOs will be used to establish communications with
+ * the client program that spawned us and which will be sending
+ * us requests.
+ *
+ * - Use existing debugging libraries (libproc.so, librtld_db.so,
+ * and the BrandZ lx plug-in to librtld_db.so) and /native/proc to
+ * walk the Linux link maps in our own address space to determine
+ * the address of the Linux dlsym() function.
+ *
+ * - Use the native Linux dlsym() function to look up other symbols
+ * (for both functions and variables) that we will need access
+ * to service thunking requests.
+ *
+ * - Create a doors server and notify the parent process that we
+ * are ready to service requests.
+ *
+ * - Enter a service loop and wait for requests.
+ *
+ * At this point the lx_thunk process is ready to service door
+ * based requests. When door service request is received the
+ * following happens inside the lx_thunk process:
+ *
+ * - The doors server function is is invoked on a new solaris thread
+ * that the kernel injects into the lx_thunk process. We sanity
+ * check the incoming request, place it on a service queue, and
+ * wait for notification that the request has been completed.
+ *
+ * - A Linux thread takes this request off the service queue
+ * and dispatches it to a service function that will:
+ * - Decode the request.
+ * - Handle the request by invoking native Linux interfaces.
+ * - Encode the results for the request.
+ *
+ * - The Linux thread then notifies the requesting doors server
+ * thread that the request has been completed and goes to sleep
+ * until it receives another request.
+ *
+ * - the solaris door server thread returns the results of the
+ * operation to the caller.
+ *
+ * Notes:
+ *
+ * - The service request hand off operation from the solaris doors thread to
+ * the "Linux thread" is required because only "Linux threads" can call
+ * into Linux code. In this context a "Linux thread" is a thread that
+ * is either the initial thread of a Linux process or a thread that was
+ * created by calling the Linux version of thread_create(). The reason
+ * for this restriction is that any thread that invokes Linux code needs
+ * to have been initialized in the Linux threading libraries and have
+ * things like Linux thread local storage properly setup.
+ *
+ * But under solaris all door server threads are created and destroyed
+ * dynamically. This means that when a doors server function is invoked,
+ * it is invoked via a thread that hasn't been initialized in the Linux
+ * environment and there for can't call directly into Linux code.
+ *
+ * - Currently when a thunk server process is starting up, it communicated
+ * with it's parent via two FIFOs. These FIFOs are setup by the
+ * lx_thunk.so library. After creating the FIFOs and starting the lx_thunk
+ * server, lx_thunk.so writes the name of the file that the door should
+ * be attached to to the first pipe. The lx_thunk server reads in this
+ * value, initialized the server, fattach()s it to the file request by
+ * lx_thunk.so and does a write to the second FIFO to let lx_thunk.so
+ * know that the server is ready to take requests.
+ *
+ * This negotiation could be simplified to use only use one FIFO.
+ * lx_thunk.so would attempt to read from the FIFO and the lx_thunk
+ * server process could send the new door server file descriptor
+ * to this process via an I_SENDFD ioctl (see streamio.7I).
+ *
+ * - The lx_thunk server process will exit when the client process
+ * that it's handling requests for exists. (ie, when there are no
+ * more open file handles to the doors server.)
+ */
+
+#include <assert.h>
+#include <door.h>
+#include <errno.h>
+#include <libproc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_thread.h>
+#include <sys/lx_thunk_server.h>
+#include <sys/varargs.h>
+#include <thread.h>
+#include <unistd.h>
+
+/*
+ * Generic interfaces used for looking up and calling Linux functions.
+ */
+typedef struct __lx_handle_dlsym *lx_handle_dlsym_t;
+typedef struct __lx_handle_sym *lx_handle_sym_t;
+
+uintptr_t lx_call0(lx_handle_sym_t);
+uintptr_t lx_call1(lx_handle_sym_t, uintptr_t);
+uintptr_t lx_call2(lx_handle_sym_t, uintptr_t, uintptr_t);
+uintptr_t lx_call3(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t);
+uintptr_t lx_call4(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+uintptr_t lx_call5(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+uintptr_t lx_call6(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t);
+uintptr_t lx_call7(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+uintptr_t lx_call8(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+/*
+ * Flag indicating if this process is destined to become a thunking
+ * server process.
+ */
+static int lxt_server_processes = 0;
+
+/*
+ * Linux function call defines and handles.
+ */
+static lx_handle_dlsym_t lxh_init = NULL;
+
+#define LXTH_GETHOSTBYNAME_R 0
+#define LXTH_GETHOSTBYADDR_R 1
+#define LXTH_GETSERVBYNAME_R 2
+#define LXTH_GETSERVBYPORT_R 3
+#define LXTH_OPENLOG 4
+#define LXTH_SYSLOG 5
+#define LXTH_CLOSELOG 6
+#define LXTH_PROGNAME 7
+
+static struct lxt_handles {
+ int lxth_index;
+ char *lxth_name;
+ lx_handle_sym_t lxth_handle;
+} lxt_handles[] = {
+ { LXTH_GETHOSTBYNAME_R, "gethostbyname_r", NULL },
+ { LXTH_GETHOSTBYADDR_R, "gethostbyaddr_r", NULL },
+ { LXTH_GETSERVBYNAME_R, "getservbyname_r", NULL },
+ { LXTH_GETSERVBYPORT_R, "getservbyport_r", NULL },
+ { LXTH_OPENLOG, "openlog", NULL },
+ { LXTH_SYSLOG, "syslog", NULL },
+ { LXTH_CLOSELOG, "closelog", NULL },
+ { LXTH_PROGNAME, "__progname", NULL },
+ { -1, NULL, NULL },
+};
+
+/*
+ * Door server operations dispatch functions and table.
+ *
+ * When the doors server get's a request for a particlar operation
+ * this dispatch table controls what function will be invoked to
+ * service the request. The function is invoked via Linux thread
+ * so that it can call into native Linux code if necessary.
+ */
+static void lxt_server_gethost(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+static void lxt_server_getserv(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+static void lxt_server_openlog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+static void lxt_server_syslog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+static void lxt_server_closelog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+
+typedef void (*lxt_op_func_t)(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size);
+
+static struct lxt_operations {
+ int lxto_index;
+ lxt_op_func_t lxto_fp;
+} lxt_operations[] = {
+ { LXT_SERVER_OP_PING, NULL },
+ { LXT_SERVER_OP_NAME2HOST, lxt_server_gethost },
+ { LXT_SERVER_OP_ADDR2HOST, lxt_server_gethost },
+ { LXT_SERVER_OP_NAME2SERV, lxt_server_getserv },
+ { LXT_SERVER_OP_PORT2SERV, lxt_server_getserv },
+ { LXT_SERVER_OP_OPENLOG, lxt_server_openlog },
+ { LXT_SERVER_OP_SYSLOG, lxt_server_syslog },
+ { LXT_SERVER_OP_CLOSELOG, lxt_server_closelog },
+};
+
+/*
+ * Structures for passing off requests from doors threads (which are
+ * solaris threads) to a Linux thread that that can handle them.
+ */
+typedef struct lxt_req {
+ lxt_server_arg_t *lxtr_request;
+ size_t lxtr_request_size;
+ char *lxtr_result;
+ size_t lxtr_result_size;
+ int lxtr_complete;
+ cond_t lxtr_complete_cv;
+} lxt_req_t;
+
+static mutex_t lxt_req_lock = DEFAULTMUTEX;
+static cond_t lxt_req_cv = DEFAULTCV;
+static lxt_req_t *lxt_req_ptr = NULL;
+
+static mutex_t lxt_pid_lock = DEFAULTMUTEX;
+static pid_t lxt_pid = NULL;
+
+/*
+ * Interfaces used to call from lx_brand.so into Linux code.
+ */
+typedef struct lookup_cb_arg {
+ struct ps_prochandle *lca_ph;
+ caddr_t lca_ptr;
+} lookup_cb_arg_t;
+
+static int
+/*ARGSUSED*/
+lookup_cb(void *data, const prmap_t *pmp, const char *object)
+{
+ lookup_cb_arg_t *lcap = (lookup_cb_arg_t *)data;
+ prsyminfo_t si;
+ GElf_Sym sym;
+
+ if (Pxlookup_by_name(lcap->lca_ph,
+ LM_ID_BASE, object, "dlsym", &sym, &si) != 0)
+ return (0);
+
+ if (sym.st_shndx == SHN_UNDEF)
+ return (0);
+
+ /*
+ * XXX: we should be more paranoid and verify that the symbol
+ * we just looked up is libdl.so.2`dlsym
+ */
+ lcap->lca_ptr = (caddr_t)(uintptr_t)sym.st_value;
+ return (1);
+}
+
+lx_handle_dlsym_t
+lx_call_init(void)
+{
+ struct ps_prochandle *ph;
+ lookup_cb_arg_t lca;
+ extern int __libc_threaded;
+ int err;
+
+ lx_debug("lx_call_init(): looking up Linux dlsym");
+
+ /*
+ * The handle is really the address of the Linux "dlsym" function.
+ * Once we have this address we can call into the Linux "dlsym"
+ * function to lookup other functions. It's the initial lookup
+ * of "dlsym" that's difficult. To do this we'll leverage the
+ * brand support that we added to librtld_db. We're going
+ * to fire up a seperate native solaris process that will
+ * attach to us via libproc/librtld_db and lookup the symbol
+ * for us.
+ */
+
+ /* Make sure we're single threaded. */
+ if (__libc_threaded) {
+ lx_debug("lx_call_init() fail: "
+ "process must be single threaded");
+ return (NULL);
+ }
+
+ /* Tell libproc.so where the real procfs is mounted. */
+ Pset_procfs_path("/native/proc");
+
+ /* Tell librtld_db.so where the real /native is */
+ (void) rd_ctl(RD_CTL_SET_HELPPATH, "/native");
+
+ /* Grab ourselves but don't stop ourselves. */
+ if ((ph = Pgrab(getpid(),
+ PGRAB_FORCE | PGRAB_RDONLY | PGRAB_NOSTOP, &err)) == NULL) {
+ lx_debug("lx_call_init() fail: Pgrab failed: %s",
+ Pgrab_error(err));
+ return (NULL);
+ }
+
+ lca.lca_ph = ph;
+ if (Pobject_iter(ph, lookup_cb, &lca) == -1) {
+ lx_debug("lx_call_init() fail: couldn't find Linux dlsym");
+ return (NULL);
+ }
+
+ lx_debug("lx_call_init(): Linux dlsym = 0x%p", lca.lca_ptr);
+ return ((lx_handle_dlsym_t)lca.lca_ptr);
+}
+
+#define LX_RTLD_DEFAULT ((void *)0)
+#define LX_RTLD_NEXT ((void *) -1l)
+
+lx_handle_sym_t
+lx_call_dlsym(lx_handle_dlsym_t lxh_dlsym, const char *str)
+{
+ lx_handle_sym_t result;
+ lx_debug("lx_call_dlsym: calling Linux dlsym for: %s", str);
+ result = (lx_handle_sym_t)lx_call2((lx_handle_sym_t)lxh_dlsym,
+ (uintptr_t)LX_RTLD_DEFAULT, (uintptr_t)str);
+ lx_debug("lx_call_dlsym: Linux sym: \"%s\" = 0x%p", str, result);
+ return (result);
+}
+
+static uintptr_t
+/*ARGSUSED*/
+lx_call(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2,
+ uintptr_t p3, uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7,
+ uintptr_t p8)
+{
+ typedef uintptr_t (*fp8_t)(uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ lx_regs_t *rp;
+ uintptr_t ret;
+ fp8_t lx_funcp = (fp8_t)lx_ch;
+ long cur_gs;
+
+ rp = lx_syscall_regs();
+
+ lx_debug("lx_call: calling to Linux code at 0x%p", lx_ch);
+ lx_debug("lx_call: loading Linux gs, rp = 0x%p, gs = 0x%p",
+ rp, rp->lxr_gs);
+
+ lx_swap_gs(rp->lxr_gs, &cur_gs);
+ ret = lx_funcp(p1, p2, p3, p4, p5, p6, p7, p8);
+ lx_swap_gs(cur_gs, &rp->lxr_gs);
+
+ lx_debug("lx_call: returned from Linux code at 0x%p (%p)", lx_ch, ret);
+ lx_debug("lx_call: restored solaris gs 0x%p", cur_gs);
+ return (ret);
+}
+
+uintptr_t
+lx_call0(lx_handle_sym_t lx_ch)
+{
+ return (lx_call(lx_ch, 0, 0, 0, 0, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call1(lx_handle_sym_t lx_ch, uintptr_t p1)
+{
+ return (lx_call(lx_ch, p1, 0, 0, 0, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call2(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2)
+{
+ return (lx_call(lx_ch, p1, p2, 0, 0, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call3(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ return (lx_call(lx_ch, p1, p2, p3, 0, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call4(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, 0, 0, 0, 0));
+}
+
+uintptr_t
+lx_call5(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4, uintptr_t p5)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, p5, 0, 0, 0));
+}
+
+uintptr_t
+lx_call6(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4, uintptr_t p5, uintptr_t p6)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, 0, 0));
+}
+
+uintptr_t
+lx_call7(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, p7, 0));
+}
+
+uintptr_t
+lx_call8(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3,
+ uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7, uintptr_t p8)
+{
+ return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, p7, p8));
+}
+
+/*
+ * Linux Thunking Interfaces - Server Side
+ */
+static int
+lxt_gethost_arg_check(lxt_gethost_arg_t *x, int x_size)
+{
+ if (x_size != sizeof (*x) + x->lxt_gh_buf_len - 1)
+ return (-1);
+
+ if ((x->lxt_gh_token_len < 0) || (x->lxt_gh_buf_len < 0))
+ return (-1);
+
+ /* Token and buf should use up all the storage. */
+ if ((x->lxt_gh_token_len + x->lxt_gh_buf_len) != x->lxt_gh_storage_len)
+ return (-1);
+
+ return (0);
+}
+
+static void
+lxt_server_gethost(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ lxt_gethost_arg_t *data;
+ struct hostent *result, *rv;
+ int token_len, buf_len, type, data_size, i;
+ char *token, *buf;
+ int h_errnop;
+
+ assert((request->lxt_sa_op == LXT_SERVER_OP_NAME2HOST) ||
+ (request->lxt_sa_op == LXT_SERVER_OP_ADDR2HOST));
+
+ /*LINTED*/
+ data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0];
+ data_size = request_size - sizeof (*request) - 1;
+
+ if (!lxt_gethost_arg_check(data, data_size)) {
+ lx_debug("lxt_server_gethost: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ /* Unpack the arguments. */
+ type = data->lxt_gh_type;
+ token = &data->lxt_gh_storage[0];
+ token_len = data->lxt_gh_token_len;
+ result = &data->lxt_gh_result;
+ buf = &data->lxt_gh_storage[data->lxt_gh_token_len];
+ buf_len = data->lxt_gh_buf_len - data->lxt_gh_token_len;
+
+ if (request->lxt_sa_op == LXT_SERVER_OP_NAME2HOST) {
+ (void) lx_call6(lxt_handles[LXTH_GETHOSTBYNAME_R].lxth_handle,
+ (uintptr_t)token, (uintptr_t)result,
+ (uintptr_t)buf, buf_len, (uintptr_t)&rv,
+ (uintptr_t)&h_errnop);
+ } else {
+ (void) lx_call8(lxt_handles[LXTH_GETHOSTBYADDR_R].lxth_handle,
+ (uintptr_t)token, token_len, type, (uintptr_t)result,
+ (uintptr_t)buf, buf_len, (uintptr_t)&rv,
+ (uintptr_t)&h_errnop);
+ }
+
+ if (rv == NULL) {
+ /* the lookup failed */
+ request->lxt_sa_success = 0;
+ request->lxt_sa_errno = errno;
+ data->lxt_gh_h_errno = h_errnop;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+ return;
+ }
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+ data->lxt_gh_h_errno = 0;
+
+ /*
+ * The result structure that we would normally return contains a
+ * bunch of pointers, but those pointers are useless to our caller
+ * since they are in a different address space. So before returning
+ * we'll convert all the result pointers into offsets. The caller
+ * can then map the offsets back into pointers.
+ */
+ for (i = 0; result->h_aliases[i] != NULL; i++) {
+ result->h_aliases[i] =
+ LXT_PTR_TO_OFFSET(result->h_aliases[i], buf);
+ }
+ for (i = 0; result->h_addr_list[i] != NULL; i++) {
+ result->h_addr_list[i] =
+ LXT_PTR_TO_OFFSET(result->h_addr_list[i], buf);
+ }
+ result->h_name = LXT_PTR_TO_OFFSET(result->h_name, buf);
+ result->h_aliases = LXT_PTR_TO_OFFSET(result->h_aliases, buf);
+ result->h_addr_list = LXT_PTR_TO_OFFSET(result->h_addr_list, buf);
+
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static int
+lxt_getserv_arg_check(lxt_getserv_arg_t *x, int x_size)
+{
+ if (x_size != sizeof (*x) + x->lxt_gs_buf_len - 1)
+ return (-1);
+
+ if ((x->lxt_gs_token_len < 0) || (x->lxt_gs_buf_len < 0))
+ return (-1);
+
+ /* Token and buf should use up all the storage. */
+ if ((x->lxt_gs_token_len + x->lxt_gs_buf_len) != x->lxt_gs_storage_len)
+ return (-1);
+
+ return (0);
+}
+
+static void
+lxt_server_getserv(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ lxt_getserv_arg_t *data;
+ struct servent *result, *rv;
+ int token_len, buf_len, data_size, i, port;
+ char *token, *buf, *proto = NULL;
+
+ assert((request->lxt_sa_op == LXT_SERVER_OP_NAME2SERV) ||
+ (request->lxt_sa_op == LXT_SERVER_OP_PORT2SERV));
+
+ /*LINTED*/
+ data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0];
+ data_size = request_size - sizeof (*request) - 1;
+
+ if (!lxt_getserv_arg_check(data, data_size)) {
+ lx_debug("lxt_server_getserv: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ /* Unpack the arguments. */
+ token = &data->lxt_gs_storage[0];
+ token_len = data->lxt_gs_token_len;
+ result = &data->lxt_gs_result;
+ buf = &data->lxt_gs_storage[data->lxt_gs_token_len];
+ buf_len = data->lxt_gs_buf_len - data->lxt_gs_token_len;
+ if (strlen(data->lxt_gs_proto) > 0)
+ proto = data->lxt_gs_proto;
+
+ /* Do more sanity checks */
+ if ((request->lxt_sa_op == LXT_SERVER_OP_PORT2SERV) &&
+ (token_len != sizeof (int))) {
+ lx_debug("lxt_server_getserv: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ if (request->lxt_sa_op == LXT_SERVER_OP_NAME2SERV) {
+ (void) lx_call6(lxt_handles[LXTH_GETSERVBYNAME_R].lxth_handle,
+ (uintptr_t)token, (uintptr_t)proto, (uintptr_t)result,
+ (uintptr_t)buf, buf_len, (uintptr_t)&rv);
+ } else {
+ bcopy(token, &port, sizeof (int));
+ (void) lx_call6(lxt_handles[LXTH_GETSERVBYPORT_R].lxth_handle,
+ port, (uintptr_t)proto, (uintptr_t)result,
+ (uintptr_t)buf, buf_len, (uintptr_t)&rv);
+ }
+
+ if (rv == NULL) {
+ /* the lookup failed */
+ request->lxt_sa_success = 0;
+ request->lxt_sa_errno = errno;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+ return;
+ }
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+
+ /*
+ * The result structure that we would normally return contains a
+ * bunch of pointers, but those pointers are useless to our caller
+ * since they are in a different address space. So before returning
+ * we'll convert all the result pointers into offsets. The caller
+ * can then map the offsets back into pointers.
+ */
+ for (i = 0; result->s_aliases[i] != NULL; i++) {
+ result->s_aliases[i] =
+ LXT_PTR_TO_OFFSET(result->s_aliases[i], buf);
+ }
+ result->s_proto = LXT_PTR_TO_OFFSET(result->s_proto, buf);
+ result->s_aliases = LXT_PTR_TO_OFFSET(result->s_aliases, buf);
+ result->s_name = LXT_PTR_TO_OFFSET(result->s_name, buf);
+
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static void
+/*ARGSUSED*/
+lxt_server_openlog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ lxt_openlog_arg_t *data;
+ int data_size;
+ static char ident[128];
+
+ assert(request->lxt_sa_op == LXT_SERVER_OP_OPENLOG);
+
+ /*LINTED*/
+ data = (lxt_openlog_arg_t *)&request->lxt_sa_data[0];
+ data_size = request_size - sizeof (*request);
+
+ if (data_size != sizeof (*data)) {
+ lx_debug("lxt_server_openlog: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ /*
+ * Linux expects that the ident pointer passed to openlog()
+ * points to a static string that won't go away. Linux
+ * saves the pointer and references with syslog() is called.
+ * Hence we'll make a local copy of the ident string here.
+ */
+ (void) mutex_lock(&lxt_pid_lock);
+ (void) strlcpy(ident, data->lxt_ol_ident, sizeof (ident));
+ (void) mutex_unlock(&lxt_pid_lock);
+
+ /* Call Linx openlog(). */
+ (void) lx_call3(lxt_handles[LXTH_OPENLOG].lxth_handle,
+ (uintptr_t)ident, data->lxt_ol_logopt, data->lxt_ol_facility);
+
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static void
+/*ARGSUSED*/
+lxt_server_syslog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ lxt_syslog_arg_t *data;
+ int data_size;
+ char *progname_ptr_new;
+ char *progname_ptr_old;
+
+ assert(request->lxt_sa_op == LXT_SERVER_OP_SYSLOG);
+
+ /*LINTED*/
+ data = (lxt_syslog_arg_t *)&request->lxt_sa_data[0];
+ data_size = request_size - sizeof (*request);
+
+ if (data_size != sizeof (*data)) {
+ lx_debug("lxt_server_openlog: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+ progname_ptr_new = data->lxt_sl_progname;
+
+ (void) mutex_lock(&lxt_pid_lock);
+
+ /*
+ * Ensure the message has the correct pid.
+ * We do this by telling our getpid() system call to return a
+ * different value.
+ */
+ lxt_pid = data->lxt_sl_pid;
+
+ /*
+ * Ensure the message has the correct program name.
+ * Normally instead of a program name an "ident" string is
+ * used, this is the string passed to openlog(). But if
+ * openlog() wasn't called before syslog() then Linux
+ * syslog() will attempt to use the program name as
+ * the ident string, and the program name is determined
+ * by looking at the __progname variable. So we'll just
+ * update the Linux __progname variable while we do the
+ * call.
+ */
+ (void) uucopy(lxt_handles[LXTH_PROGNAME].lxth_handle,
+ &progname_ptr_old, sizeof (char *));
+ (void) uucopy(&progname_ptr_new,
+ lxt_handles[LXTH_PROGNAME].lxth_handle, sizeof (char *));
+
+ /* Call Linux syslog(). */
+ (void) lx_call2(lxt_handles[LXTH_SYSLOG].lxth_handle,
+ data->lxt_sl_priority, (uintptr_t)data->lxt_sl_message);
+
+ /* Restore pid and program name. */
+ (void) uucopy(&progname_ptr_old,
+ lxt_handles[LXTH_PROGNAME].lxth_handle, sizeof (char *));
+ lxt_pid = NULL;
+
+ (void) mutex_unlock(&lxt_pid_lock);
+
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static void
+/*ARGSUSED*/
+lxt_server_closelog(lxt_server_arg_t *request, size_t request_size,
+ char **door_result, size_t *door_result_size)
+{
+ int data_size;
+
+ assert(request->lxt_sa_op == LXT_SERVER_OP_CLOSELOG);
+
+ data_size = request_size - sizeof (*request);
+ if (data_size != 0) {
+ lx_debug("lxt_server_closelog: invalid request");
+ *door_result = NULL;
+ *door_result_size = 0;
+ return;
+ }
+
+ /* Call Linux closelog(). */
+ (void) lx_call0(lxt_handles[LXTH_CLOSELOG].lxth_handle);
+
+ request->lxt_sa_success = 1;
+ request->lxt_sa_errno = 0;
+ *door_result = (char *)request;
+ *door_result_size = request_size;
+}
+
+static void
+/*ARGSUSED*/
+lxt_server(void *cookie, char *argp, size_t request_size,
+ door_desc_t *dp, uint_t n_desc)
+{
+ /*LINTED*/
+ lxt_server_arg_t *request = (lxt_server_arg_t *)argp;
+ lxt_req_t lxt_req;
+ char *door_path = cookie;
+
+ /* Check if there's no callers left */
+ if (argp == DOOR_UNREF_DATA) {
+ (void) fdetach(door_path);
+ (void) unlink(door_path);
+ lx_debug("lxt_thunk_server: no clients, exiting");
+ exit(0);
+ }
+
+ /* Sanity check the incomming request. */
+ if (request_size < sizeof (*request)) {
+ /* the lookup failed */
+ lx_debug("lxt_thunk_server: invalid request size");
+ (void) door_return(NULL, 0, NULL, 0);
+ return;
+ }
+
+ if ((request->lxt_sa_op < LXT_SERVER_OP_MIN) ||
+ (request->lxt_sa_op > LXT_SERVER_OP_MAX)) {
+ lx_debug("lxt_thunk_server: invalid request op");
+ (void) door_return(NULL, 0, NULL, 0);
+ return;
+ }
+
+ /* Handle ping requests immediatly, return here. */
+ if (request->lxt_sa_op == LXT_SERVER_OP_PING) {
+ lx_debug("lxt_thunk_server: handling ping request");
+ request->lxt_sa_success = 1;
+ (void) door_return((char *)request, request_size, NULL, 0);
+ return;
+ }
+
+ lx_debug("lxt_thunk_server: hand off request to Linux thread, "
+ "request = 0x%p", request);
+
+ /* Pack the request up so we can pass it to a Linux thread. */
+ lxt_req.lxtr_request = request;
+ lxt_req.lxtr_request_size = request_size;
+ lxt_req.lxtr_result = NULL;
+ lxt_req.lxtr_result_size = 0;
+ lxt_req.lxtr_complete = 0;
+ (void) cond_init(&lxt_req.lxtr_complete_cv, USYNC_THREAD, NULL);
+
+ /* Pass the request onto a Linux thread. */
+ (void) mutex_lock(&lxt_req_lock);
+ while (lxt_req_ptr != NULL)
+ (void) cond_wait(&lxt_req_cv, &lxt_req_lock);
+ lxt_req_ptr = &lxt_req;
+ (void) cond_broadcast(&lxt_req_cv);
+
+ /* Wait for the request to be completed. */
+ while (lxt_req.lxtr_complete == 0)
+ (void) cond_wait(&lxt_req.lxtr_complete_cv, &lxt_req_lock);
+ assert(lxt_req_ptr != &lxt_req);
+ (void) mutex_unlock(&lxt_req_lock);
+
+ lx_debug("lxt_thunk_server: hand off request completed, "
+ "request = 0x%p", request);
+
+ /*
+ * If door_return() is successfull it never returns, so if we made
+ * it here there was some kind of error, but there's nothing we can
+ * really do about it.
+ */
+ (void) door_return(
+ lxt_req.lxtr_result, lxt_req.lxtr_result_size, NULL, 0);
+}
+
+static void
+lxt_server_loop(void)
+{
+ lxt_req_t *lxt_req;
+ lxt_server_arg_t *request;
+ size_t request_size;
+ char *door_result;
+ size_t door_result_size;
+
+ for (;;) {
+ /* Wait for a request from a doors server thread. */
+ (void) mutex_lock(&lxt_req_lock);
+ while (lxt_req_ptr == NULL)
+ (void) cond_wait(&lxt_req_cv, &lxt_req_lock);
+
+ /* We got a request, get a local pointer to it. */
+ lxt_req = lxt_req_ptr;
+ lxt_req_ptr = NULL;
+ (void) cond_broadcast(&lxt_req_cv);
+ (void) mutex_unlock(&lxt_req_lock);
+
+ /* Get a pointer to the request. */
+ request = lxt_req->lxtr_request;
+ request_size = lxt_req->lxtr_request_size;
+
+ lx_debug("lxt_server_loop: Linux thread request recieved, "
+ "request = %p", request);
+
+ /* Dispatch the request. */
+ assert((request->lxt_sa_op > LXT_SERVER_OP_PING) ||
+ (request->lxt_sa_op < LXT_SERVER_OP_MAX));
+ lxt_operations[request->lxt_sa_op].lxto_fp(
+ request, request_size, &door_result, &door_result_size);
+
+ lx_debug("lxt_server_loop: Linux thread request completed, "
+ "request = %p", request);
+
+ (void) mutex_lock(&lxt_req_lock);
+
+ /* Set the result pointers for the calling door thread. */
+ lxt_req->lxtr_result = door_result;
+ lxt_req->lxtr_result_size = door_result_size;
+
+ /* Let the door thread know we're done. */
+ lxt_req->lxtr_complete = 1;
+ (void) cond_signal(&lxt_req->lxtr_complete_cv);
+
+ (void) mutex_unlock(&lxt_req_lock);
+ }
+ /*NOTREACHED*/
+}
+
+static void
+lxt_server_enter(int fifo1_wr, int fifo2_rd)
+{
+ struct stat stat;
+ char door_path[MAXPATHLEN];
+ int i, dfd, junk = 0;
+
+ /*
+ * Do some sanity checks. Make sure we've got the fifos
+ * we need passed to us on the correct file descriptors.
+ */
+ if ((fstat(fifo1_wr, &stat) != 0) ||
+ ((stat.st_mode & S_IFMT) != S_IFIFO) ||
+ (fstat(fifo2_rd, &stat) != 0) ||
+ ((stat.st_mode & S_IFMT) != S_IFIFO)) {
+ lx_err("lx_thunk server aborting, can't contact parent");
+ exit(-1);
+ }
+
+ /*
+ * Get the initial Linux call handle so we can invoke other
+ * Linux calls.
+ */
+ lxh_init = lx_call_init();
+ if (lxh_init == NULL) {
+ lx_err("lx_thunk server aborting, failed Linux call init");
+ exit(-1);
+ }
+
+ /* Now lookup other Linux symbols we'll need access to. */
+ for (i = 0; lxt_handles[i].lxth_name != NULL; i++) {
+ assert(lxt_handles[i].lxth_index == i);
+ if ((lxt_handles[i].lxth_handle = lx_call_dlsym(lxh_init,
+ lxt_handles[i].lxth_name)) == NULL) {
+ lx_err("lx_thunk server aborting, "
+ "failed Linux symbol lookup: %s",
+ lxt_handles[i].lxth_name);
+ exit(-1);
+ }
+ }
+
+ /* get the path to the door server */
+ if (read(fifo2_rd, door_path, sizeof (door_path)) < 0) {
+ lx_err("lxt_server_enter: failed to get door path");
+ exit(-1);
+ }
+ (void) close(fifo2_rd);
+
+ /* Create the door server. */
+ if ((dfd = door_create(lxt_server, door_path,
+ DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
+ lx_err("lxt_server_enter: door_create() failed");
+ exit(-1);
+ }
+
+ /* Attach the door to a file system path. */
+ (void) fdetach(door_path);
+ if (fattach(dfd, door_path) < 0) {
+ lx_err("lxt_server_enter: fattach() failed");
+ exit(-1);
+ }
+
+ /* The door server is ready, signal this via a fifo write */
+ (void) write(fifo1_wr, &junk, 1);
+ (void) close(fifo1_wr);
+
+ lx_debug("lxt_server_enter: doors server initialized");
+ lxt_server_loop();
+ /*NOTREACHED*/
+}
+
+void
+lxt_server_exec_check(void)
+{
+ if (lxt_server_processes == 0)
+ return;
+
+ /*
+ * We're a thunk server process, so we take over control of
+ * the current Linux process here.
+ */
+ lx_debug("lx_thunk server initalization starting");
+ lxt_server_enter(LXT_SERVER_FIFO_WR_FD, LXT_SERVER_FIFO_RD_FD);
+ /*NOTREACHED*/
+}
+
+void
+lxt_server_init(int argc, char *argv[])
+{
+ /*
+ * The thunk server process is a shell script named LXT_SERVER_BINARY.
+ * It is executed without any parameters. Since it's a shell script
+ * the arguments passed to the shell's main entry point are:
+ * 1) the name of the shell
+ * 2) the name of the script to execute
+ *
+ * So to check if we're the thunk server process we first check
+ * for the expected number of arduments and then we'll look at
+ * the second parameter to see if it's LXT_SERVER_BINARY.
+ */
+ if ((argc != 2) ||
+ (strcmp(argv[1], LXT_SERVER_BINARY) != 0))
+ return;
+
+ lxt_server_processes = 1;
+ lx_debug("lx_thunk server detected, delaying initalization");
+}
+
+int
+lxt_server_pid(int *pid)
+{
+ if (lxt_server_processes == 0)
+ return (0);
+ *pid = lxt_pid;
+ return (1);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile b/usr/src/lib/brand/lx/lx_brand/common/mapfile
new file mode 100644
index 0000000000..0663f4bc19
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+#
+# Scope everything local -- our .init section is our only public interface.
+#
+{
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers
new file mode 100644
index 0000000000..0663f4bc19
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+#
+# Scope everything local -- our .init section is our only public interface.
+#
+{
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mem.c b/usr/src/lib/brand/lx/lx_brand/common/mem.c
new file mode 100644
index 0000000000..15b077bd33
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mem.c
@@ -0,0 +1,210 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+
+/*
+ * There are two forms of mmap, mmap() and mmap2(). The only difference is that
+ * the final argument to mmap2() specifies the number of pages, not bytes.
+ * Linux has a number of additional flags, but they are all deprecated. We also
+ * ignore the MAP_GROWSDOWN flag, which has no equivalent on Solaris.
+ *
+ * The Linux mmap() returns ENOMEM in some cases where Solaris returns
+ * EOVERFLOW, so we translate the errno as necessary.
+ */
+
+int pagesize; /* needed for mmap2() */
+
+#define LX_MAP_ANONYMOUS 0x00020
+#define LX_MAP_NORESERVE 0x04000
+
+static int
+ltos_mmap_flags(int flags)
+{
+ int new_flags;
+
+ new_flags = flags & (MAP_TYPE | MAP_FIXED);
+ if (flags & LX_MAP_ANONYMOUS)
+ new_flags |= MAP_ANONYMOUS;
+ if (flags & LX_MAP_NORESERVE)
+ new_flags |= MAP_NORESERVE;
+
+ return (new_flags);
+}
+
+static int
+mmap_common(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, off64_t p6)
+{
+ void *addr = (void *)p1;
+ size_t len = p2;
+ int prot = p3;
+ int flags = p4;
+ int fd = p5;
+ off64_t off = p6;
+ void *ret;
+
+ if (lx_debug_enabled != 0) {
+ char *path, path_buf[MAXPATHLEN];
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+ if (path == NULL)
+ path = "?";
+
+ lx_debug("\tmmap_common(): fd = %d - %s", fd, path);
+ }
+
+ /*
+ * Under Linux, the file descriptor is ignored when mapping zfod
+ * anonymous memory, On Solaris, we want the fd set to -1 for the
+ * same functionality.
+ */
+ if (flags & LX_MAP_ANONYMOUS)
+ fd = -1;
+
+ /*
+ * This is totally insane. The NOTES section in the linux mmap(2) man
+ * page claims that on some architectures, read protection may
+ * automatically include exec protection. It has been observed on a
+ * native linux system that the /proc/<pid>/maps file does indeed
+ * show that segments mmap'd from userland (such as libraries mapped in
+ * by the dynamic linker) all have exec the permission set, even for
+ * data segments.
+ */
+ if (prot & PROT_READ)
+ prot |= PROT_EXEC;
+
+ ret = mmap64(addr, len, prot, ltos_mmap_flags(flags), fd, off);
+
+ if (ret == MAP_FAILED)
+ return (errno == EOVERFLOW ? -ENOMEM : -errno);
+ else
+ return ((int)ret);
+}
+
+int
+lx_mmap(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, uintptr_t p6)
+{
+ return (mmap_common(p1, p2, p3, p4, p5, (off64_t)p6));
+}
+
+int
+lx_mmap2(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, uintptr_t p6)
+{
+ if (pagesize == 0)
+ pagesize = sysconf(_SC_PAGESIZE);
+
+ return (mmap_common(p1, p2, p3, p4, p5, (off64_t)p6 * pagesize));
+}
+
+
+/*
+ * The locking family of system calls, as well as msync(), are identical. On
+ * Solaris, they are layered on top of the memcntl syscall, so they cannot be
+ * pass-thru.
+ */
+int
+lx_mlock(uintptr_t addr, uintptr_t len)
+{
+ uintptr_t addr1 = addr & PAGEMASK;
+ uintptr_t len1 = len + (addr & PAGEOFFSET);
+
+ return (mlock((void *)addr1, (size_t)len1) ? -errno : 0);
+}
+
+int
+lx_mlockall(uintptr_t flags)
+{
+ return (mlockall(flags) ? -errno : 0);
+}
+
+int
+lx_munlock(uintptr_t addr, uintptr_t len)
+{
+ uintptr_t addr1 = addr & PAGEMASK;
+ uintptr_t len1 = len + (addr & PAGEOFFSET);
+
+ return (munlock((void *)addr1, (size_t)len1) ? -errno : 0);
+}
+
+int
+lx_munlockall(void)
+{
+ return (munlockall() ? -errno : 0);
+}
+
+int
+lx_msync(uintptr_t addr, uintptr_t len, uintptr_t flags)
+{
+ return (msync((void *)addr, (size_t)len, flags) ? -errno : 0);
+}
+
+/*
+ * Solaris recognizes more flags than Linux, so we don't want to inadvertently
+ * use what would be an invalid flag on Linux. Linux also allows the length to
+ * be zero, while Solaris does not.
+ */
+int
+lx_madvise(uintptr_t start, uintptr_t len, uintptr_t advice)
+{
+ if (len == 0)
+ return (0);
+
+ switch (advice) {
+ case MADV_NORMAL:
+ case MADV_RANDOM:
+ case MADV_SEQUENTIAL:
+ case MADV_WILLNEED:
+ case MADV_DONTNEED:
+ return (madvise((void *)start, len, advice) ? -errno : 0);
+
+ default:
+ return (-EINVAL);
+ }
+}
+
+/*
+ * mprotect() is identical except that we ignore the Linux flags PROT_GROWSDOWN
+ * and PROT_GROWSUP, which have no equivalent on Solaris.
+ */
+#define LX_PROT_GROWSDOWN 0x01000000
+#define LX_PROT_GROWSUP 0x02000000
+
+int
+lx_mprotect(uintptr_t start, uintptr_t len, uintptr_t prot)
+{
+ prot &= ~(LX_PROT_GROWSUP | LX_PROT_GROWSDOWN);
+
+ return (mprotect((void *)start, len, prot) ? -errno : 0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c
new file mode 100644
index 0000000000..1cc37f1fbb
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c
@@ -0,0 +1,546 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <assert.h>
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <strings.h>
+#include <macros.h>
+#include <sys/brand.h>
+#include <sys/reboot.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/sysmacros.h>
+#include <sys/systeminfo.h>
+#include <sys/types.h>
+#include <sys/lx_types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_stat.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_thunk_server.h>
+#include <sys/lx_fcntl.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <zone.h>
+
+extern int sethostname(char *, int);
+
+/* ARGUSED */
+int
+lx_rename(uintptr_t p1, uintptr_t p2)
+{
+ int ret;
+
+ ret = rename((const char *)p1, (const char *)p2);
+
+ if (ret < 0) {
+ /*
+ * If rename(2) failed and we're in install mode, return
+ * success if the the reason we failed was either because the
+ * source file didn't actually exist or if it was because we
+ * tried to rename it to be the name of a device currently in
+ * use (resulting in an EBUSY.)
+ *
+ * To help install along further, if the failure was due
+ * to an EBUSY, delete the original file so we don't leave
+ * extra files lying around.
+ */
+ if (lx_install != 0) {
+ if (errno == ENOENT)
+ return (0);
+
+ if (errno == EBUSY) {
+ (void) unlink((const char *)p1);
+ return (0);
+ }
+ }
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+int
+lx_renameat(uintptr_t ext1, uintptr_t p1, uintptr_t ext2, uintptr_t p2)
+{
+ int ret;
+ int atfd1 = (int)ext1;
+ int atfd2 = (int)ext2;
+
+ if (atfd1 == LX_AT_FDCWD)
+ atfd1 = AT_FDCWD;
+
+ if (atfd2 == LX_AT_FDCWD)
+ atfd2 = AT_FDCWD;
+
+ ret = renameat(atfd1, (const char *)p1, atfd2, (const char *)p2);
+
+ if (ret < 0) {
+ /* see lx_rename() for why we check lx_install */
+ if (lx_install != 0) {
+ if (errno == ENOENT)
+ return (0);
+
+ if (errno == EBUSY) {
+ (void) unlinkat(ext1, (const char *)p1, 0);
+ return (0);
+ }
+ }
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+lx_reboot(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int magic = (int)p1;
+ int magic2 = (int)p2;
+ uint_t flag = (int)p3;
+ int rc;
+
+ if (magic != LINUX_REBOOT_MAGIC1)
+ return (-EINVAL);
+ if (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
+ magic2 != LINUX_REBOOT_MAGIC2B && magic2 != LINUX_REBOOT_MAGIC2C &&
+ magic2 != LINUX_REBOOT_MAGIC2D)
+ return (-EINVAL);
+
+ if (geteuid() != 0)
+ return (-EPERM);
+
+ switch (flag) {
+ case LINUX_REBOOT_CMD_CAD_ON:
+ case LINUX_REBOOT_CMD_CAD_OFF:
+ /* ignored */
+ rc = 0;
+ break;
+ case LINUX_REBOOT_CMD_POWER_OFF:
+ case LINUX_REBOOT_CMD_HALT:
+ rc = reboot(RB_HALT, NULL);
+ break;
+ case LINUX_REBOOT_CMD_RESTART:
+ case LINUX_REBOOT_CMD_RESTART2:
+ /* RESTART2 may need more work */
+ lx_msg(gettext("Restarting system.\n"));
+ rc = reboot(RB_AUTOBOOT, NULL);
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ return ((rc == -1) ? -errno : rc);
+}
+
+/*
+ * getcwd() - Linux syscall semantics are slightly different; we need to return
+ * the length of the pathname copied (+ 1 for the terminating NULL byte.)
+ */
+int
+lx_getcwd(uintptr_t p1, uintptr_t p2)
+{
+ char *buf;
+ size_t buflen = (size_t)p2;
+ size_t copylen, local_len;
+ size_t len = 0;
+
+ if ((getcwd((char *)p1, (size_t)p2)) == NULL)
+ return (-errno);
+
+ /*
+ * We need the length of the pathname getcwd() copied but we never want
+ * to dereference a Linux pointer for any reason.
+ *
+ * Thus, to get the string length we will uucopy() up to copylen bytes
+ * at a time into a local buffer and will walk each chunk looking for
+ * the string-terminating NULL byte.
+ *
+ * We can use strlen() to find the length of the string in the
+ * local buffer by delimiting the buffer with a NULL byte in the
+ * last element that will never be overwritten.
+ */
+ copylen = min(buflen, MAXPATHLEN + 1);
+ buf = SAFE_ALLOCA(copylen + 1);
+ if (buf == NULL)
+ return (-ENOMEM);
+ buf[copylen] = '\0';
+
+ for (;;) {
+ if (uucopy((char *)p1 + len, buf, copylen) != 0)
+ return (-errno);
+
+ local_len = strlen(buf);
+ len += local_len;
+
+ /*
+ * If the strlen() is less than copylen, we found the
+ * real end of the string -- not the NULL byte used to
+ * delimit the end of our buffer.
+ */
+ if (local_len != copylen)
+ break;
+
+ /* prepare to check the next chunk of the string */
+ buflen -= copylen;
+ copylen = min(buflen, copylen);
+ }
+
+ return (len + 1);
+}
+
+int
+lx_get_kern_version(void)
+{
+ /*
+ * Since this function is called quite often, and zone_getattr is slow,
+ * we cache the kernel version in kvers_cache. -1 signifies that no
+ * value has yet been cached.
+ */
+ static int kvers_cache = -1;
+ /* dummy variable for use in zone_getattr */
+ int kvers;
+
+ if (kvers_cache != -1)
+ return (kvers_cache);
+ if (zone_getattr(getzoneid(), LX_KERN_VERSION_NUM, &kvers, sizeof (int))
+ != sizeof (int))
+ return (kvers_cache = LX_KERN_2_4);
+ else
+ return (kvers_cache = kvers);
+}
+
+int
+lx_uname(uintptr_t p1)
+{
+ struct lx_utsname *un = (struct lx_utsname *)p1;
+ char buf[LX_SYS_UTS_LN + 1];
+
+ if (gethostname(un->nodename, sizeof (un->nodename)) == -1)
+ return (-errno);
+
+ (void) strlcpy(un->sysname, LX_UNAME_SYSNAME, LX_SYS_UTS_LN);
+ (void) strlcpy(un->release, lx_release, LX_SYS_UTS_LN);
+ (void) strlcpy(un->version, LX_UNAME_VERSION, LX_SYS_UTS_LN);
+ (void) strlcpy(un->machine, LX_UNAME_MACHINE, LX_SYS_UTS_LN);
+ if ((sysinfo(SI_SRPC_DOMAIN, buf, LX_SYS_UTS_LN) < 0))
+ un->domainname[0] = '\0';
+ else
+ (void) strlcpy(un->domainname, buf, LX_SYS_UTS_LN);
+
+ return (0);
+}
+
+/*
+ * {get,set}groups16() - Handle the conversion between 16-bit Linux gids and
+ * 32-bit Solaris gids.
+ */
+int
+lx_getgroups16(uintptr_t p1, uintptr_t p2)
+{
+ int count = (int)p1;
+ lx_gid16_t *grouplist = (lx_gid16_t *)p2;
+ gid_t *grouplist32;
+ int ret;
+ int i;
+
+ grouplist32 = SAFE_ALLOCA(count * sizeof (gid_t));
+ if (grouplist32 == NULL)
+ return (-ENOMEM);
+ if ((ret = getgroups(count, grouplist32)) < 0)
+ return (-errno);
+
+ for (i = 0; i < ret; i++)
+ grouplist[i] = LX_GID32_TO_GID16(grouplist32[i]);
+
+ return (ret);
+}
+
+int
+lx_setgroups16(uintptr_t p1, uintptr_t p2)
+{
+ int count = (int)p1;
+ lx_gid16_t *grouplist = (lx_gid16_t *)p2;
+ gid_t *grouplist32;
+ int i;
+
+ grouplist32 = SAFE_ALLOCA(count * sizeof (gid_t));
+ if (grouplist32 == NULL)
+ return (-ENOMEM);
+ for (i = 0; i < count; i++)
+ grouplist32[i] = LX_GID16_TO_GID32(grouplist[i]);
+
+ return (setgroups(count, grouplist32) ? -errno : 0);
+}
+
+/*
+ * personality() - Solaris doesn't support Linux personalities, but we have to
+ * emulate enough to show that we support the basic personality.
+ */
+#define LX_PER_LINUX 0x0
+
+int
+lx_personality(uintptr_t p1)
+{
+ int per = (int)p1;
+
+ switch (per) {
+ case -1:
+ /* Request current personality */
+ return (LX_PER_LINUX);
+ case LX_PER_LINUX:
+ return (0);
+ default:
+ return (-EINVAL);
+ }
+}
+
+/*
+ * mknod() - Since we don't have the SYS_CONFIG privilege within a zone, the
+ * only mode we have to support is S_IFIFO. We also have to distinguish between
+ * an invalid type and insufficient privileges.
+ */
+#define LX_S_IFMT 0170000
+#define LX_S_IFDIR 0040000
+#define LX_S_IFCHR 0020000
+#define LX_S_IFBLK 0060000
+#define LX_S_IFREG 0100000
+#define LX_S_IFIFO 0010000
+#define LX_S_IFLNK 0120000
+#define LX_S_IFSOCK 0140000
+
+/*ARGSUSED*/
+int
+lx_mknod(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *path = (char *)p1;
+ lx_dev_t lx_dev = (lx_dev_t)p3;
+ struct sockaddr_un sockaddr;
+ struct stat statbuf;
+ mode_t mode, type;
+ dev_t dev;
+ int fd;
+
+ type = ((mode_t)p2 & LX_S_IFMT);
+ mode = ((mode_t)p2 & 07777);
+
+ switch (type) {
+ case 0:
+ case LX_S_IFREG:
+ /* create a regular file */
+ if (stat(path, &statbuf) == 0)
+ return (-EEXIST);
+
+ if (errno != ENOENT)
+ return (-errno);
+
+ if ((fd = creat(path, mode)) < 0)
+ return (-errno);
+
+ (void) close(fd);
+ return (0);
+
+ case LX_S_IFSOCK:
+ /*
+ * Create a UNIX domain socket.
+ *
+ * Most programmers aren't even aware you can do this.
+ *
+ * Note you can also do this via Solaris' mknod(2), but
+ * Linux allows anyone who can create a UNIX domain
+ * socket via bind(2) to create one via mknod(2);
+ * Solaris requires the caller to be privileged.
+ */
+ if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return (-errno);
+
+ if (stat(path, &statbuf) == 0)
+ return (-EEXIST);
+
+ if (errno != ENOENT)
+ return (-errno);
+
+ if (uucopy(path, &sockaddr.sun_path,
+ sizeof (sockaddr.sun_path)) < 0)
+ return (-errno);
+
+ /* assure NULL termination of sockaddr.sun_path */
+ sockaddr.sun_path[sizeof (sockaddr.sun_path) - 1] = '\0';
+ sockaddr.sun_family = AF_UNIX;
+
+ if (bind(fd, (struct sockaddr *)&sockaddr,
+ strlen(sockaddr.sun_path) +
+ sizeof (sockaddr.sun_family)) < 0)
+ return (-errno);
+
+ (void) close(fd);
+ return (0);
+
+ case LX_S_IFIFO:
+ dev = 0;
+ break;
+
+ case LX_S_IFCHR:
+ case LX_S_IFBLK:
+ /*
+ * The "dev" RPM package wants to create all possible Linux
+ * device nodes, so just report its mknod()s as having
+ * succeeded if we're in install mode.
+ */
+ if (lx_install != 0) {
+ lx_debug("lx_mknod: install mode spoofed creation of "
+ "Linux device [%lld, %lld]\n",
+ LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev));
+
+ return (0);
+ }
+
+ dev = makedevice(LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev));
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ return (mknod(path, mode | type, dev) ? -errno : 0);
+}
+
+int
+lx_sethostname(uintptr_t p1, uintptr_t p2)
+{
+ char *name = (char *)p1;
+ int len = (size_t)p2;
+
+ return (sethostname(name, len) ? -errno : 0);
+}
+
+int
+lx_setdomainname(uintptr_t p1, uintptr_t p2)
+{
+ char *name = (char *)p1;
+ int len = (size_t)p2;
+ long rval;
+
+ if (len < 0 || len >= LX_SYS_UTS_LN)
+ return (-EINVAL);
+
+ rval = sysinfo(SI_SET_SRPC_DOMAIN, name, len);
+
+ return ((rval < 0) ? -errno : 0);
+}
+
+int
+lx_getpid(void)
+{
+ int pid;
+
+ /* First call the thunk server hook. */
+ if (lxt_server_pid(&pid) != 0)
+ return (pid);
+
+ pid = syscall(SYS_brand, B_EMULATE_SYSCALL + 20);
+ return ((pid == -1) ? -errno : pid);
+}
+
+int
+lx_execve(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *filename = (char *)p1;
+ char **argv = (char **)p2;
+ char **envp = (char **)p3;
+ char *nullist[] = { NULL };
+ char path[64];
+
+ /* First call the thunk server hook. */
+ lxt_server_exec_check();
+
+ /* Get a copy of the executable we're trying to run */
+ path[0] = '\0';
+ (void) uucopystr(filename, path, sizeof (path));
+
+ /* Check if we're trying to run a native binary */
+ if (strncmp(path, "/native/usr/lib/brand/lx/lx_native",
+ sizeof (path)) == 0) {
+ /* Skip the first element in the argv array */
+ argv++;
+
+ /*
+ * The name of the new program to execute was the first
+ * parameter passed to lx_native.
+ */
+ if (uucopy(argv, &filename, sizeof (char *)) != 0)
+ return (-errno);
+
+ (void) syscall(SYS_brand, B_EXEC_NATIVE, filename, argv, envp,
+ NULL, NULL, NULL);
+ return (-errno);
+ }
+
+ if (argv == NULL)
+ argv = nullist;
+
+ /* This is a normal exec call. */
+ (void) execve(filename, argv, envp);
+
+ return (-errno);
+}
+
+int
+lx_setgroups(uintptr_t p1, uintptr_t p2)
+{
+ int ng = (int)p1;
+ gid_t *glist = NULL;
+ int i, r;
+
+ lx_debug("\tlx_setgroups(%d, 0x%p", ng, p2);
+
+ if (ng > 0) {
+ if ((glist = (gid_t *)SAFE_ALLOCA(ng * sizeof (gid_t))) == NULL)
+ return (-ENOMEM);
+
+ if (uucopy((void *)p2, glist, ng * sizeof (gid_t)) != 0)
+ return (-errno);
+
+ /*
+ * Linux doesn't check the validity of the group IDs, but
+ * Solaris does. Change any invalid group IDs to a known, valid
+ * value (yuck).
+ */
+ for (i = 0; i < ng; i++) {
+ if (glist[i] > MAXUID)
+ glist[i] = MAXUID;
+ }
+ }
+
+ r = syscall(SYS_brand, B_EMULATE_SYSCALL + LX_SYS_setgroups32,
+ ng, glist);
+
+ return ((r == -1) ? -errno : r);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/module.c b/usr/src/lib/brand/lx/lx_brand/common/module.c
new file mode 100644
index 0000000000..3ec4164f71
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/module.c
@@ -0,0 +1,90 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * We don't support Linux modules, but we have to emulate enough of the system
+ * calls to show that we don't have any modules installed.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/lx_misc.h>
+
+/*
+ * For query_module(), we provide an empty list of modules, and return ENOENT
+ * on any request for a specific module.
+ */
+#define LX_QM_MODULES 1
+#define LX_QM_DEPS 2
+#define LX_QM_REFS 3
+#define LX_QM_SYMBOLS 4
+#define LX_QM_INFO 5
+
+/*ARGSUSED*/
+int
+lx_query_module(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ /*
+ * parameter p1 is the 'name' argument.
+ */
+ int which = (int)p2;
+ char *buf = (char *)p3;
+ size_t bufsize = (size_t)p4;
+ size_t *ret = (size_t *)p5;
+
+ switch (which) {
+ case 0:
+ /*
+ * Special case: always return 0
+ */
+ return (0);
+
+ case LX_QM_MODULES:
+ /*
+ * Generate an empty list of modules.
+ */
+ if (bufsize && buf)
+ buf[0] = '\0';
+ if (ret)
+ *ret = 0;
+ return (0);
+
+ case LX_QM_DEPS:
+ case LX_QM_REFS:
+ case LX_QM_SYMBOLS:
+ case LX_QM_INFO:
+ /*
+ * Any requests for specific module information return ENOENT.
+ */
+ return (-ENOENT);
+
+ default:
+ return (-EINVAL);
+ }
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mount.c b/usr/src/lib/brand/lx/lx_brand/common/mount.c
new file mode 100644
index 0000000000..3db9652eca
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mount.c
@@ -0,0 +1,719 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <alloca.h>
+#include <assert.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <strings.h>
+#include <nfs/mount.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sys/lx_autofs.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_mount.h>
+
+/*
+ * support definitions
+ */
+union fh_buffer {
+ struct nfs_fid fh2;
+ struct nfs_fh3 fh3;
+ char fh_data[NFS3_FHSIZE + 2];
+};
+
+typedef enum mount_opt_type {
+ MOUNT_OPT_INVALID = 0,
+ MOUNT_OPT_NORMAL = 1, /* option value: none */
+ MOUNT_OPT_UINT = 2 /* option value: unsigned int */
+} mount_opt_type_t;
+
+typedef struct mount_opt {
+ char *mo_name;
+ mount_opt_type_t mo_type;
+} mount_opt_t;
+
+
+/*
+ * Globals
+ */
+mount_opt_t lofs_options[] = {
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+mount_opt_t lx_proc_options[] = {
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+mount_opt_t lx_autofs_options[] = {
+ { LX_MNTOPT_FD, MOUNT_OPT_UINT },
+ { LX_MNTOPT_PGRP, MOUNT_OPT_UINT },
+ { LX_MNTOPT_MINPROTO, MOUNT_OPT_UINT },
+ { LX_MNTOPT_MAXPROTO, MOUNT_OPT_UINT },
+};
+
+
+/*
+ * i_lx_opt_verify() - Check the mount options.
+ *
+ * You might wonder why we're being so strict about the mount options
+ * we allow. The reason is that normally all mount option verification
+ * is done by the Solaris userland mount command. Once mount options
+ * are passed to the kernel, invalid options are simply ignored. So
+ * if we actually want to catch requests for functionality that we
+ * don't support, or if we want to make sure that we don't randomly
+ * enable options that we haven't check to make sure they have the
+ * same syntax on Linux and Solaris, we need to reject any options
+ * we don't know to be ok here.
+ */
+static int
+i_lx_opt_verify(char *opts, mount_opt_t *mop)
+{
+ int opts_len = strlen(opts);
+ char *opts_tmp, *opt;
+ int opt_len, i;
+
+ assert((opts != NULL) && (mop != NULL));
+
+ /* If no options were specified, there's no problem. */
+ if (opts_len == 0)
+ return (1);
+
+ /* If no options are allowed, fail. */
+ if (mop[0].mo_name == NULL)
+ return (0);
+
+ /* Don't accept leading or trailing ','. */
+ if ((opts[0] == ',') || (opts[opts_len] == ','))
+ return (0);
+
+ /* Don't accept sequential ','. */
+ for (i = 1; i < opts_len; i++)
+ if ((opts[i - 1] == ',') && (opts[i] == ','))
+ return (0);
+
+ /*
+ * We're going to use strtok() which modifies the target
+ * string so make a temporary copy.
+ */
+ opts_tmp = SAFE_ALLOCA(opts_len);
+ if (opts_tmp == NULL)
+ return (-1);
+ bcopy(opts, opts_tmp, opts_len + 1);
+
+ /* Verify each prop one at a time. */
+ opt = strtok(opts_tmp, ",");
+ opt_len = strlen(opt);
+ for (;;) {
+
+ /* Check for matching option/value pair. */
+ for (i = 0; mop[i].mo_name != NULL; i++) {
+ char *ovalue;
+ int ovalue_len, mo_len;
+
+ /* If the options is too short don't bother comparing */
+ mo_len = strlen(mop[i].mo_name);
+ if (opt_len < mo_len) {
+ /* Keep trying to find a match. */
+ continue;
+ }
+
+ /* Compare the option to an allowed option. */
+ if (strncmp(mop[i].mo_name, opt, mo_len) != 0) {
+ /* Keep trying to find a match. */
+ continue;
+ }
+
+ if (mop[i].mo_type == MOUNT_OPT_NORMAL) {
+ /* The option doesn't take a value. */
+ if (opt_len == mo_len) {
+ /* This option is ok. */
+ break;
+ } else {
+ /* Keep trying to find a match. */
+ continue;
+ }
+ }
+
+ /* This options takes a value. */
+ if ((opt_len == mo_len) || (opt[mo_len] != '=')) {
+ /* Keep trying to find a match. */
+ continue;
+ }
+
+ /* We have an option match. Verify option value. */
+ ovalue = &opt[mo_len] + 1;
+ ovalue_len = strlen(ovalue);
+
+ /* Value can't be zero length string. */
+ if (ovalue_len == 0)
+ return (0);
+
+ if (mop[i].mo_type == MOUNT_OPT_UINT) {
+ int j;
+ /* Verify that value is an unsigned int. */
+ for (j = 0; j < ovalue_len; j++)
+ if (!isdigit(ovalue[j]))
+ return (0);
+ } else {
+ /* Unknown option type specified. */
+ assert(0);
+ }
+
+ /* The option is ok. */
+ break;
+ }
+
+ /* If there were no matches this is an unsupported option. */
+ if (mop[i].mo_name == NULL)
+ return (0);
+
+ /* This option is ok, move onto the next option. */
+ if ((opt = strtok(NULL, ",")) == NULL)
+ break;
+ opt_len = strlen(opt);
+ };
+
+ /* We verified all the options. */
+ return (1);
+}
+
+static int
+i_add_option(char *option, char *buf, size_t buf_size)
+{
+ char *fmt_str = NULL;
+
+ assert((option != NULL) && (strlen(option) > 0));
+ assert((buf != NULL) && (buf_size > 0));
+
+ if (buf[0] == '\0') {
+ fmt_str = "%s";
+ } else {
+ fmt_str = ",%s";
+ }
+
+ buf_size -= strlen(buf);
+ buf += strlen(buf);
+
+ /*LINTED*/
+ if (snprintf(buf, buf_size, fmt_str, option) > (buf_size - 1))
+ return (-EOVERFLOW);
+ return (0);
+}
+
+static int
+i_add_option_int(char *option, int val, char *buf, size_t buf_size)
+{
+ char *fmt_str = NULL;
+
+ assert((option != NULL) && (strlen(option) > 0));
+ assert((buf != NULL) && (buf_size > 0));
+
+ if (buf[0] == '\0') {
+ fmt_str = "%s=%d";
+ } else {
+ fmt_str = ",%s=%d";
+ }
+
+ buf_size -= strlen(buf);
+ buf += strlen(buf);
+
+ /*LINTED*/
+ if (snprintf(buf, buf_size, fmt_str, option, val) > (buf_size - 1))
+ return (-EOVERFLOW);
+ return (0);
+}
+
+static int
+i_make_nfs_args(lx_nfs_mount_data_t *lx_nmd, struct nfs_args *nfs_args,
+ struct netbuf *nfs_args_addr, struct knetconfig *nfs_args_knconf,
+ union fh_buffer *nfs_args_fh, struct sec_data *nfs_args_secdata,
+ char *fstype, char *options, int options_size)
+{
+ struct stat statbuf;
+ int i, rv, use_tcp;
+
+ /* Sanity check the incomming Linux request. */
+ if ((lx_nmd->nmd_rsize < 0) || (lx_nmd->nmd_wsize < 0) ||
+ (lx_nmd->nmd_timeo < 0) || (lx_nmd->nmd_retrans < 0) ||
+ (lx_nmd->nmd_acregmin < 0) || (lx_nmd->nmd_acregmax < 0) ||
+ (lx_nmd->nmd_acdirmax < 0)) {
+ return (-EINVAL);
+ }
+
+ /*
+ * Additional sanity checks of incomming request.
+ *
+ * Some of the sanity checks below should probably return
+ * EINVAL (or some other error code) instead or ENOTSUP,
+ * but without experiminting on Linux to see how it
+ * deals with certain strange values there is no way
+ * to really know what we should return, hence we return
+ * ENOTSUP to tell us that eventually if we see some
+ * application hitting the problem we can go to a real
+ * Linux system, figure out how it deals with the situation
+ * and update our code to handle it in the same fashion.
+ */
+ if (lx_nmd->nmd_version != 4) {
+ lx_unsupported("unsupported nfs mount request, "
+ "unrecognized NFS mount structure: %d\n",
+ lx_nmd->nmd_version);
+ return (-ENOTSUP);
+ }
+ if ((lx_nmd->nmd_flags & ~LX_NFS_MOUNT_SUPPORTED) != 0) {
+ lx_unsupported("unsupported nfs mount request, "
+ "flags: 0x%x\n", lx_nmd->nmd_flags);
+ return (-ENOTSUP);
+ }
+ if (lx_nmd->nmd_addr.sin_family != AF_INET) {
+ lx_unsupported("unsupported nfs mount request, "
+ "transport address family: 0x%x\n",
+ lx_nmd->nmd_addr.sin_family);
+ return (-ENOTSUP);
+ }
+ for (i = 0; i < LX_NMD_MAXHOSTNAMELEN; i++) {
+ if (lx_nmd->nmd_hostname[i] == '\0')
+ break;
+ }
+ if (i == 0) {
+ lx_unsupported("unsupported nfs mount request, "
+ "no hostname specified\n");
+ return (-ENOTSUP);
+ }
+ if (i == LX_NMD_MAXHOSTNAMELEN) {
+ lx_unsupported("unsupported nfs mount request, "
+ "hostname not terminated\n");
+ return (-ENOTSUP);
+ }
+ if (lx_nmd->nmd_namlen < i) {
+ lx_unsupported("unsupported nfs mount request, "
+ "invalid namlen value: 0x%x\n", lx_nmd->nmd_namlen);
+ return (-ENOTSUP);
+ }
+ if (lx_nmd->nmd_bsize != 0) {
+ lx_unsupported("unsupported nfs mount request, "
+ "bsize value: 0x%x\n", lx_nmd->nmd_bsize);
+ return (-ENOTSUP);
+ }
+
+ /* Initialize and clear the output structure pointers passed in. */
+ bzero(nfs_args, sizeof (*nfs_args));
+ bzero(nfs_args_addr, sizeof (*nfs_args_addr));
+ bzero(nfs_args_knconf, sizeof (*nfs_args_knconf));
+ bzero(nfs_args_fh, sizeof (*nfs_args_fh));
+ bzero(nfs_args_secdata, sizeof (*nfs_args_secdata));
+ nfs_args->addr = nfs_args_addr;
+ nfs_args->knconf = nfs_args_knconf;
+ nfs_args->fh = (caddr_t)nfs_args_fh;
+ nfs_args->nfs_ext_u.nfs_extB.secdata = nfs_args_secdata;
+
+ /* Check if we're using tcp. */
+ use_tcp = (lx_nmd->nmd_flags & LX_NFS_MOUNT_TCP) ? 1 : 0;
+
+ /*
+ * These seem to be the default flags used by Solaris for v2 and v3
+ * nfs mounts.
+ *
+ * Don't bother with NFSMNT_TRYRDMA since we always specify a
+ * transport (either udp or tcp).
+ */
+ nfs_args->flags = NFSMNT_NEWARGS | NFSMNT_KNCONF | NFSMNT_INT |
+ NFSMNT_HOSTNAME;
+
+ /* Translate some Linux mount flags into Solaris mount flags. */
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_SOFT)
+ nfs_args->flags |= NFSMNT_SOFT;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_INTR)
+ nfs_args->flags |= NFSMNT_INT;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_POSIX)
+ nfs_args->flags |= NFSMNT_POSIX;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NOCTO)
+ nfs_args->flags |= NFSMNT_NOCTO;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NOAC)
+ nfs_args->flags |= NFSMNT_NOAC;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NONLM)
+ nfs_args->flags |= NFSMNT_LLOCK;
+
+ if ((lx_nmd->nmd_flags & LX_NFS_MOUNT_VER3) != 0) {
+ (void) strcpy(fstype, "nfs3");
+ if ((rv = i_add_option_int("vers", 3,
+ options, options_size)) != 0)
+ return (rv);
+
+ if (lx_nmd->nmd_root.lx_fh3_length >
+ sizeof (nfs_args_fh->fh3.fh3_u.data)) {
+ lx_unsupported("unsupported nfs mount request, "
+ "nfs file handle length: 0x%x\n",
+ lx_nmd->nmd_root.lx_fh3_length);
+ return (-ENOTSUP);
+ }
+
+ /* Set the v3 file handle info. */
+ nfs_args_fh->fh3.fh3_length = lx_nmd->nmd_root.lx_fh3_length;
+ bcopy(&lx_nmd->nmd_root.lx_fh3_data,
+ nfs_args_fh->fh3.fh3_u.data,
+ lx_nmd->nmd_root.lx_fh3_length);
+ } else {
+ /*
+ * Assume nfs v2. Note that this could also be a v1
+ * mount request but there doesn't seem to be any difference
+ * in the parameters passed to the Linux mount system
+ * call for v1 or v2 mounts so there is no way of really
+ * knowing.
+ */
+ (void) strcpy(fstype, "nfs");
+ if ((rv = i_add_option_int("vers", 2,
+ options, options_size)) != 0)
+ return (rv);
+
+ /* Solaris seems to add this flag when using v2. */
+ nfs_args->flags |= NFSMNT_SECDEFAULT;
+
+ /* Set the v2 file handle info. */
+ bcopy(&lx_nmd->nmd_old_root,
+ nfs_args_fh, sizeof (nfs_args_fh->fh2));
+ }
+
+ /*
+ * We can't use getnetconfig() here because there is no netconfig
+ * database in linux.
+ */
+ nfs_args_knconf->knc_protofmly = "inet";
+ if (use_tcp) {
+ /*
+ * TCP uses NC_TPI_COTS_ORD semantics.
+ * See /etc/netconfig.
+ */
+ nfs_args_knconf->knc_semantics = NC_TPI_COTS_ORD;
+ nfs_args_knconf->knc_proto = "tcp";
+ if ((rv = i_add_option("proto=tcp",
+ options, options_size)) != 0)
+ return (rv);
+ if (stat("/dev/tcp", &statbuf) != 0)
+ return (-errno);
+ nfs_args_knconf->knc_rdev = statbuf.st_rdev;
+ } else {
+ /*
+ * Assume UDP. UDP uses NC_TPI_CLTS semantics.
+ * See /etc/netconfig.
+ */
+ nfs_args_knconf->knc_semantics = NC_TPI_CLTS;
+ nfs_args_knconf->knc_proto = "udp";
+ if ((rv = i_add_option("proto=udp",
+ options, options_size)) != 0)
+ return (rv);
+ if (stat("/dev/udp", &statbuf) != 0)
+ return (-errno);
+ nfs_args_knconf->knc_rdev = statbuf.st_rdev;
+ }
+
+ /* Set the server address. */
+ nfs_args_addr->maxlen = nfs_args_addr->len =
+ sizeof (struct sockaddr_in);
+ nfs_args_addr->buf = (char *)&lx_nmd->nmd_addr;
+
+ /* Set the server hostname string. */
+ nfs_args->hostname = lx_nmd->nmd_hostname;
+
+ /* Translate Linux nfs mount parameters into Solaris mount options. */
+ if (lx_nmd->nmd_rsize != LX_NMD_DEFAULT_RSIZE) {
+ if ((rv = i_add_option_int("rsize", lx_nmd->nmd_rsize,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->rsize = lx_nmd->nmd_rsize;
+ nfs_args->flags |= NFSMNT_RSIZE;
+ }
+ if (lx_nmd->nmd_wsize != LX_NMD_DEFAULT_WSIZE) {
+ if ((rv = i_add_option_int("wsize", lx_nmd->nmd_wsize,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->wsize = lx_nmd->nmd_wsize;
+ nfs_args->flags |= NFSMNT_WSIZE;
+ }
+ if ((rv = i_add_option_int("timeo", lx_nmd->nmd_timeo,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->timeo = lx_nmd->nmd_timeo;
+ nfs_args->flags |= NFSMNT_TIMEO;
+ if ((rv = i_add_option_int("retrans", lx_nmd->nmd_retrans,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->retrans = lx_nmd->nmd_retrans;
+ nfs_args->flags |= NFSMNT_RETRANS;
+ if ((rv = i_add_option_int("acregmin", lx_nmd->nmd_acregmin,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acregmin = lx_nmd->nmd_acregmin;
+ nfs_args->flags |= NFSMNT_ACREGMIN;
+ if ((rv = i_add_option_int("acregmax", lx_nmd->nmd_acregmax,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acregmax = lx_nmd->nmd_acregmax;
+ nfs_args->flags |= NFSMNT_ACREGMAX;
+ if ((rv = i_add_option_int("acdirmin", lx_nmd->nmd_acdirmin,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acdirmin = lx_nmd->nmd_acdirmin;
+ nfs_args->flags |= NFSMNT_ACDIRMIN;
+ if ((rv = i_add_option_int("acdirmax", lx_nmd->nmd_acdirmax,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acdirmax = lx_nmd->nmd_acdirmax;
+ nfs_args->flags |= NFSMNT_ACDIRMAX;
+
+ /* We only support nfs with a security type of AUTH_SYS. */
+ nfs_args->nfs_args_ext = NFS_ARGS_EXTB;
+ nfs_args_secdata->secmod = AUTH_SYS;
+ nfs_args_secdata->rpcflavor = AUTH_SYS;
+ nfs_args_secdata->flags = 0;
+ nfs_args_secdata->uid = 0;
+ nfs_args_secdata->data = NULL;
+ nfs_args->nfs_ext_u.nfs_extB.next = NULL;
+
+ /*
+ * The Linux nfs mount command seems to pass an open socket fd
+ * to the kernel during the mount system call. We don't need
+ * this fd on Solaris so just close it.
+ */
+ (void) close(lx_nmd->nmd_fd);
+
+ return (0);
+}
+
+int
+lx_mount(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ /* Linux input arguments. */
+ const char *sourcep = (const char *)p1;
+ const char *targetp = (const char *)p2;
+ const char *fstypep = (const char *)p3;
+ unsigned int flags = (unsigned int)p4;
+ const void *datap = (const void *)p5;
+
+ /* Variables needed for all mounts. */
+ char source[MAXPATHLEN], target[MAXPATHLEN];
+ char fstype[MAXPATHLEN], options[MAXPATHLEN];
+ int sflags, rv;
+
+ /* Variables needed for nfs mounts. */
+ lx_nfs_mount_data_t lx_nmd;
+ struct nfs_args nfs_args;
+ struct netbuf nfs_args_addr;
+ struct knetconfig nfs_args_knconf;
+ union fh_buffer nfs_args_fh;
+ struct sec_data nfs_args_secdata;
+ char *sdataptr = NULL;
+ int sdatalen = 0;
+
+ /* Initialize Solaris mount arguments. */
+ sflags = MS_OPTIONSTR;
+ options[0] = '\0';
+ sdatalen = 0;
+
+ /* Copy in parameters that are always present. */
+ rv = uucopystr((void *)sourcep, &source, sizeof (source));
+ if ((rv == -1) || (rv == sizeof (source)))
+ return (-EFAULT);
+
+ rv = uucopystr((void *)targetp, &target, sizeof (target));
+ if ((rv == -1) || (rv == sizeof (target)))
+ return (-EFAULT);
+
+ rv = uucopystr((void *)fstypep, &fstype, sizeof (fstype));
+ if ((rv == -1) || (rv == sizeof (fstype)))
+ return (-EFAULT);
+
+ lx_debug("\tlinux mount source: %s", source);
+ lx_debug("\tlinux mount target: %s", target);
+ lx_debug("\tlinux mount fstype: %s", fstype);
+
+ /* Make sure we support the requested mount flags. */
+ if ((flags & ~LX_MS_SUPPORTED) != 0) {
+ lx_unsupported(
+ "unsupported mount flags: 0x%x", flags);
+ return (-ENOTSUP);
+ }
+
+ /* Do filesystem specific mount work. */
+ if (flags & LX_MS_BIND) {
+
+ /* If MS_BIND is set, we turn this into a lofs mount. */
+ (void) strcpy(fstype, "lofs");
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lofs_options) == 0) {
+ lx_unsupported("unsupported lofs mount options");
+ return (-ENOTSUP);
+ }
+ } else if (strcmp(fstype, "proc") == 0) {
+
+ /* Translate proc mount requests to lx_proc requests. */
+ (void) strcpy(fstype, "lx_proc");
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lx_proc_options) == 0) {
+ lx_unsupported("unsupported lx_proc mount options");
+ return (-ENOTSUP);
+ }
+ } else if (strcmp(fstype, "autofs") == 0) {
+
+ /* Translate proc mount requests to lx_afs requests. */
+ (void) strcpy(fstype, LX_AUTOFS_NAME);
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lx_autofs_options) == 0) {
+ lx_unsupported("unsupported lx_autofs mount options");
+ return (-ENOTSUP);
+ }
+ } else if (strcmp(fstype, "nfs") == 0) {
+
+ /*
+ * Copy in Linux mount options. Note that for Linux
+ * nfs mounts the mount options pointer (which normally
+ * points to a string) points to a structure.
+ */
+ if (uucopy((void *)datap, &lx_nmd, sizeof (lx_nmd)) < 0)
+ return (-errno);
+
+ /*
+ * For Solaris nfs mounts, the kernel expects a special
+ * strucutre, but a pointer to this structure is passed
+ * in via an extra parameter (sdataptr below.)
+ */
+ if ((rv = i_make_nfs_args(&lx_nmd, &nfs_args,
+ &nfs_args_addr, &nfs_args_knconf, &nfs_args_fh,
+ &nfs_args_secdata, fstype,
+ options, sizeof (options))) != 0)
+ return (rv);
+
+ /*
+ * For nfs mounts we need to tell the mount system call
+ * to expect extra parameters.
+ */
+ sflags |= MS_DATA;
+ sdataptr = (char *)&nfs_args;
+ sdatalen = sizeof (nfs_args);
+ } else {
+ lx_unsupported(
+ "unsupported mount filesystem type: %s", fstype);
+ return (-ENOTSUP);
+ }
+
+ /* Convert some Linux flags to Solaris flags. */
+ if (flags & LX_MS_RDONLY)
+ sflags |= MS_RDONLY;
+ if (flags & LX_MS_NOSUID)
+ sflags |= MS_NOSUID;
+ if (flags & LX_MS_REMOUNT)
+ sflags |= MS_REMOUNT;
+
+ /* Convert some Linux flags to Solaris option strings. */
+ if ((flags & LX_MS_NODEV) &&
+ ((rv = i_add_option("nodev", options, sizeof (options))) != 0))
+ return (rv);
+ if ((flags & LX_MS_NOEXEC) &&
+ ((rv = i_add_option("noexec", options, sizeof (options))) != 0))
+ return (rv);
+ if ((flags & LX_MS_NOATIME) &&
+ ((rv = i_add_option("noatime", options, sizeof (options))) != 0))
+ return (rv);
+
+ lx_debug("\tsolaris mount fstype: %s", fstype);
+ lx_debug("\tsolaris mount options: \"%s\"", options);
+
+ return (mount(source, target, sflags, fstype, sdataptr, sdatalen,
+ options, sizeof (options)) ? -errno : 0);
+}
+
+/*
+ * umount() is identical, though it is implemented on top of umount2() in
+ * Solaris so it cannot be a pass-thru system call.
+ */
+int
+lx_umount(uintptr_t p1)
+{
+ return (umount((char *)p1) ? -errno : 0);
+}
+
+/*
+ * The Linux umount2() system call is identical but has a different value for
+ * MNT_FORCE (the logical equivalent to MS_FORCE).
+ */
+#define LX_MNT_FORCE 0x1
+
+int
+lx_umount2(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ int flags = 0;
+
+ if (p2 & ~LX_MNT_FORCE)
+ return (-EINVAL);
+
+ if (p2 & LX_MNT_FORCE)
+ flags |= MS_FORCE;
+
+ return (umount2(path, flags) ? -errno : 0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/open.c b/usr/src/lib/brand/lx/lx_brand/common/open.c
new file mode 100644
index 0000000000..e55d8fabe3
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/open.c
@@ -0,0 +1,183 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/inttypes.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <libintl.h>
+#include <stdio.h>
+
+#include <sys/lx_types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_fcntl.h>
+#include <sys/lx_misc.h>
+
+static int
+ltos_open_flags(uintptr_t p2)
+{
+ int flags;
+
+ if ((p2 & O_ACCMODE) == LX_O_RDONLY)
+ flags = O_RDONLY;
+ else if ((p2 & O_ACCMODE) == LX_O_WRONLY)
+ flags = O_WRONLY;
+ else
+ flags = O_RDWR;
+
+ if (p2 & LX_O_CREAT) {
+ flags |= O_CREAT;
+ }
+
+ if (p2 & LX_O_EXCL)
+ flags |= O_EXCL;
+ if (p2 & LX_O_NOCTTY)
+ flags |= O_NOCTTY;
+ if (p2 & LX_O_TRUNC)
+ flags |= O_TRUNC;
+ if (p2 & LX_O_APPEND)
+ flags |= O_APPEND;
+ if (p2 & LX_O_NONBLOCK)
+ flags |= O_NONBLOCK;
+ if (p2 & LX_O_SYNC)
+ flags |= O_SYNC;
+ if (p2 & LX_O_LARGEFILE)
+ flags |= O_LARGEFILE;
+ if (p2 & LX_O_NOFOLLOW)
+ flags |= O_NOFOLLOW;
+
+ /*
+ * Linux uses the LX_O_DIRECT flag to do raw, synchronous I/O to the
+ * device backing the fd in question. Solaris doesn't have similar
+ * functionality, but we can attempt to simulate it using the flags
+ * (O_RSYNC|O_SYNC) and directio(3C).
+ *
+ * The LX_O_DIRECT flag also requires that the transfer size and
+ * alignment of I/O buffers be a multiple of the logical block size for
+ * the underlying file system, but frankly there isn't an easy way to
+ * support that functionality without doing something like adding an
+ * fcntl(2) flag to denote LX_O_DIRECT mode.
+ *
+ * Since LX_O_DIRECT is merely a performance advisory, we'll just
+ * emulate what we can and trust that the only applications expecting
+ * an error when performing I/O from a misaligned buffer or when
+ * passing a transfer size is not a multiple of the underlying file
+ * system block size will be test suites.
+ */
+ if (p2 & LX_O_DIRECT)
+ flags |= (O_RSYNC|O_SYNC);
+
+ return (flags);
+}
+
+static int
+lx_open_postprocess(int fd, uintptr_t p2)
+{
+ struct stat64 statbuf;
+
+ /*
+ * Check the file type AFTER opening the file to avoid a race condition
+ * where the file we want to open could change types between a stat64()
+ * and an open().
+ */
+ if (p2 & LX_O_DIRECTORY) {
+ if (fstat64(fd, &statbuf) < 0) {
+ int ret = -errno;
+
+ (void) close(fd);
+ return (ret);
+ } else if (!S_ISDIR(statbuf.st_mode)) {
+ (void) close(fd);
+ return (-ENOTDIR);
+ }
+ }
+
+ if (p2 & LX_O_DIRECT)
+ (void) directio(fd, DIRECTIO_ON);
+
+ /*
+ * Set the ASYNC flag if passsed.
+ */
+ if (p2 & LX_O_ASYNC) {
+ if (fcntl(fd, F_SETFL, FASYNC) < 0) {
+ int ret = -errno;
+
+ (void) close(fd);
+ return (ret);
+ }
+ }
+
+ return (fd);
+}
+
+int
+lx_openat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ int flags, fd;
+ mode_t mode = 0;
+ char *path = (char *)p1;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flags = ltos_open_flags(p2);
+
+ if (flags & O_CREAT) {
+ mode = (mode_t)p3;
+ }
+
+ lx_debug("\topenat(%d, %s, 0%o, 0%o)", atfd, path, flags, mode);
+
+ if ((fd = openat(atfd, path, flags, mode)) < 0)
+ return (-errno);
+
+ return (lx_open_postprocess(fd, p2));
+}
+
+int
+lx_open(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int flags, fd;
+ mode_t mode = 0;
+ char *path = (char *)p1;
+
+ flags = ltos_open_flags(p2);
+
+ if (flags & O_CREAT) {
+ mode = (mode_t)p3;
+ }
+
+ lx_debug("\topen(%s, 0%o, 0%o)", path, flags, mode);
+
+ if ((fd = open(path, flags, mode)) < 0)
+ return (-errno);
+
+ return (lx_open_postprocess(fd, p2));
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/pgrp.c b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c
new file mode 100644
index 0000000000..1eada7c185
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c
@@ -0,0 +1,157 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/lx_misc.h>
+
+int
+lx_getpgrp(void)
+{
+ int ret;
+
+ ret = getpgrp();
+
+ /*
+ * If the pgrp is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_PGID);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+int
+lx_getpgid(uintptr_t p1)
+{
+ pid_t spid;
+ int pid = (int)p1;
+ int ret;
+
+ if (pid < 0)
+ return (-ESRCH);
+
+ /*
+ * If the supplied pid matches that of the init process, return
+ * the pgid Linux expects.
+ */
+ if (pid == zoneinit_pid)
+ return (LX_INIT_PGID);
+
+ if ((ret = lx_lpid_to_spid(pid, &spid)) < 0)
+ return (ret);
+
+ ret = getpgid(spid);
+
+ /*
+ * If the pgid is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_PGID);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+int
+lx_setpgid(uintptr_t p1, uintptr_t p2)
+{
+ pid_t pid = (pid_t)p1;
+ pid_t pgid = (pid_t)p2;
+ pid_t spid, spgid;
+ int ret;
+
+ if (pid < 0)
+ return (-ESRCH);
+
+ if (pgid < 0)
+ return (-EINVAL);
+
+ if ((ret = lx_lpid_to_spid(pid, &spid)) < 0)
+ return (ret);
+
+ if (pgid == 0)
+ spgid = spid;
+ else if ((ret = lx_lpid_to_spid(pgid, &spgid)) < 0)
+ return (ret);
+
+ ret = setpgid(spid, spgid);
+
+ return ((ret == 0) ? 0 : -errno);
+}
+
+int
+lx_getsid(uintptr_t p1)
+{
+ pid_t spid;
+ int pid = (int)p1;
+ int ret;
+
+ if (pid < 0)
+ return (-ESRCH);
+
+ /*
+ * If the supplied matches that of the init process, return the value
+ * Linux expects.
+ */
+ if (pid == zoneinit_pid)
+ return (LX_INIT_SID);
+
+ if ((ret = lx_lpid_to_spid(pid, &spid)) < 0)
+ return (ret);
+
+ ret = getsid(spid);
+
+ /*
+ * If the sid is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_SID);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+int
+lx_setsid(void)
+{
+ int ret;
+
+ ret = setsid();
+
+ /*
+ * If the pgid is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_SID);
+
+ return ((ret == -1) ? -errno : ret);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/poll_select.c b/usr/src/lib/brand/lx/lx_brand/common/poll_select.c
new file mode 100644
index 0000000000..5eba21c652
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/poll_select.c
@@ -0,0 +1,215 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <assert.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <alloca.h>
+#include <signal.h>
+#include <strings.h>
+#include <sys/param.h>
+#include <sys/brand.h>
+#include <sys/poll.h>
+#include <sys/syscall.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_poll.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+
+extern int select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
+ struct timeval *tv);
+
+int
+lx_select(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ int nfds = (int)p1;
+ fd_set *rfdsp = NULL;
+ fd_set *wfdsp = NULL;
+ fd_set *efdsp = NULL;
+ struct timeval tv, *tvp = NULL;
+ int fd_set_len = howmany(nfds, 8);
+ int r;
+ hrtime_t start = NULL, end;
+
+ lx_debug("\tselect(%d, 0x%p, x%p, 0x%p. 0x%p, 0x%p)",
+ nfds, rfdsp, wfdsp, efdsp, tvp);
+
+ if (nfds > 0) {
+ if (p2 != NULL) {
+ rfdsp = SAFE_ALLOCA(fd_set_len);
+ if (rfdsp == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p2, rfdsp, fd_set_len) != 0)
+ return (-errno);
+ }
+ if (p3 != NULL) {
+ wfdsp = SAFE_ALLOCA(fd_set_len);
+ if (wfdsp == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p3, wfdsp, fd_set_len) != 0)
+ return (-errno);
+ }
+ if (p4 != NULL) {
+ efdsp = SAFE_ALLOCA(fd_set_len);
+ if (efdsp == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p4, efdsp, fd_set_len) != 0)
+ return (-errno);
+ }
+ }
+ if (p5 != NULL) {
+ tvp = &tv;
+ if (uucopy((void *)p5, &tv, sizeof (tv)) != 0)
+ return (-errno);
+ start = gethrtime();
+ }
+
+ if (nfds >= FD_SETSIZE)
+ r = select_large_fdset(nfds, rfdsp, wfdsp, efdsp, tvp);
+ else
+ r = select(nfds, rfdsp, wfdsp, efdsp, tvp);
+ if (r < 0)
+ return (-errno);
+
+ if (tvp != NULL) {
+ long long tv_total;
+
+ /*
+ * Linux updates the timeval parameter for select() calls
+ * with the amount of time that left before the select
+ * would have timed out.
+ */
+ end = gethrtime();
+ tv_total = (tv.tv_sec * MICROSEC) + tv.tv_usec;
+ tv_total -= ((end - start) / (NANOSEC / MICROSEC));
+ if (tv_total < 0) {
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ } else {
+ tv.tv_sec = tv_total / MICROSEC;
+ tv.tv_usec = tv_total % MICROSEC;
+ }
+
+ if (uucopy(&tv, (void *)p5, sizeof (tv)) != 0)
+ return (-errno);
+ }
+
+ if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0))
+ return (-errno);
+ if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0))
+ return (-errno);
+ if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0))
+ return (-errno);
+
+ return (r);
+}
+
+int
+lx_poll(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ struct pollfd *lfds, *sfds;
+ nfds_t nfds = (nfds_t)p2;
+ int fds_size, i, rval, revents;
+
+ /*
+ * Note: we are assuming that the Linux and Solaris pollfd
+ * structures are identical. Copy in the linux poll structure.
+ */
+ fds_size = sizeof (struct pollfd) * nfds;
+ lfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
+ if (lfds == NULL)
+ return (-ENOMEM);
+ if (uucopy((void *)p1, lfds, fds_size) != 0)
+ return (-errno);
+
+ /*
+ * The poll system call modifies the poll structures passed in
+ * so we'll need to make an exra copy of them.
+ */
+ sfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
+ if (sfds == NULL)
+ return (-ENOMEM);
+
+ /* Convert the Linux events bitmask into the Solaris equivalent. */
+ for (i = 0; i < nfds; i++) {
+ /*
+ * If the caller is polling for an unsupported event, we
+ * have to bail out.
+ */
+ if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) {
+ lx_unsupported("unsupported poll events requested: "
+ "events=0x%x", lfds[i].events);
+ return (-ENOTSUP);
+ }
+
+ sfds[i].fd = lfds[i].fd;
+ sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS;
+ if (lfds[i].events & LX_POLLWRNORM)
+ sfds[i].events |= POLLWRNORM;
+ if (lfds[i].events & LX_POLLWRBAND)
+ sfds[i].events |= POLLWRBAND;
+ sfds[i].revents = 0;
+ }
+
+ lx_debug("\tpoll(0x%p, %u, %d)", sfds, nfds, (int)p3);
+
+ if ((rval = poll(sfds, nfds, (int)p3)) < 0)
+ return (-errno);
+
+ /* Convert the Solaris revents bitmask into the Linux equivalent */
+ for (i = 0; i < nfds; i++) {
+ revents = sfds[i].revents & LX_POLL_COMMON_EVENTS;
+ if (sfds[i].revents & POLLWRBAND)
+ revents |= LX_POLLWRBAND;
+
+ /*
+ * Be carefull because on solaris POLLOUT and POLLWRNORM
+ * are defined to the same values but on linux they
+ * are not.
+ */
+ if (sfds[i].revents & POLLOUT) {
+ if ((lfds[i].events & LX_POLLOUT) == 0)
+ revents &= ~LX_POLLOUT;
+ if (lfds[i].events & LX_POLLWRNORM)
+ revents |= LX_POLLWRNORM;
+ }
+
+ lfds[i].revents = revents;
+ }
+
+ /* Copy out the results */
+ if (uucopy(lfds, (void *)p1, fds_size) != 0)
+ return (-errno);
+
+ return (rval);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/priority.c b/usr/src/lib/brand/lx/lx_brand/common/priority.c
new file mode 100644
index 0000000000..1519c18a71
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/priority.c
@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_types.h>
+#include <sys/resource.h>
+#include <sys/lx_misc.h>
+
+int
+lx_getpriority(uintptr_t p1, uintptr_t p2)
+{
+ uint_t which = (int)p1;
+ id_t who = (id_t)p2;
+ int ret;
+
+ /*
+ * The only valid values for 'which' are positive integers, and unlike
+ * Solaris, linux doesn't support anything past PRIO_USER.
+ */
+ if (which > PRIO_USER)
+ return (-EINVAL);
+
+ lx_debug("\tgetpriority(%d, %d)", which, who);
+
+ errno = 0;
+
+ if ((which == PRIO_PROCESS) && (who == 1))
+ who = zoneinit_pid;
+
+ ret = getpriority(which, who);
+ if (ret == -1 && errno != 0)
+ return (-errno);
+
+ /*
+ * The return value of the getpriority syscall is biased by 20 to avoid
+ * returning negative values when successful.
+ */
+ return (20 - ret);
+}
+
+int
+lx_setpriority(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int which = (int)p1;
+ id_t who = (id_t)p2;
+ int prio = (int)p3;
+ int rval;
+
+ if (which > PRIO_USER)
+ return (-EINVAL);
+
+ lx_debug("\tsetpriority(%d, %d, %d)", which, who, prio);
+
+ if ((which == PRIO_PROCESS) && (who == 1))
+ who = zoneinit_pid;
+
+ rval = setpriority(which, who, prio);
+
+ return ((rval == -1) ? -errno : rval);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c
new file mode 100644
index 0000000000..0c9dd5e461
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c
@@ -0,0 +1,2114 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_thread.h>
+#include <sys/lwp.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <procfs.h>
+#include <sys/frame.h>
+#include <strings.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <sys/auxv.h>
+#include <thread.h>
+#include <pthread.h>
+#include <synch.h>
+#include <elf.h>
+#include <ieeefp.h>
+#include <assert.h>
+#include <libintl.h>
+
+/*
+ * Linux ptrace compatibility.
+ *
+ * The brand support for ptrace(2) is built on top of the Solaris /proc
+ * interfaces, mounted at /native/proc in the zone. This gets quite
+ * complicated due to the way ptrace works and the Solaris realization of the
+ * Linux threading model.
+ *
+ * ptrace can only interact with a process if we are tracing it, and it is
+ * currently stopped. There are two ways a process can begin tracing another
+ * process:
+ *
+ * PTRACE_TRACEME
+ *
+ * A child process can use PTRACE_TRACEME to indicate that it wants to be
+ * traced by the parent. This sets the ptrace compatibility flag in /proc
+ * which causes ths ptrace consumer to be notified through the wait(2)
+ * system call of events of interest. PTRACE_TRACEME is typically used by
+ * the debugger by forking a process, using PTRACE_TRACEME, and finally
+ * doing an exec of the specified program.
+ *
+ *
+ * PTRACE_ATTACH
+ *
+ * We can attach to a process using PTRACE_ATTACH. This is considerably
+ * more complicated than the previous case. On Linux, the traced process is
+ * effectively reparented to the ptrace consumer so that event notification
+ * can go through the normal wait(2) system call. Solaris has no such
+ * ability to reparent a process (nor should it) so some trickery was
+ * required.
+ *
+ * When the ptrace consumer uses PTRACE_ATTACH it forks a monitor child
+ * process. The monitor enables the /proc ptrace flag for itself and uses
+ * the native /proc mechanisms to observe the traced process and wait for
+ * events of interest. When the traced process stops, the monitor process
+ * sends itself a SIGTRAP thus rousting its parent process (the ptrace
+ * consumer) out of wait(2). We then translate the process id and status
+ * code from wait(2) to those of the traced process.
+ *
+ * To detach from the process we just have to clean up tracing flags and
+ * clean up the monitor.
+ *
+ * ptrace can only interact with a process if we have traced it, and it is
+ * currently stopped (see is_traced()). For threads, there's no way to
+ * distinguish whether ptrace() has been called for all threads or some
+ * subset. Since most clients will be tracing all threads, and erroneously
+ * allowing ptrace to access a non-traced thread is non-fatal (or at least
+ * would be fatal on linux), we ignore this aspect of the problem.
+ */
+
+#define LX_PTRACE_TRACEME 0
+#define LX_PTRACE_PEEKTEXT 1
+#define LX_PTRACE_PEEKDATA 2
+#define LX_PTRACE_PEEKUSER 3
+#define LX_PTRACE_POKETEXT 4
+#define LX_PTRACE_POKEDATA 5
+#define LX_PTRACE_POKEUSER 6
+#define LX_PTRACE_CONT 7
+#define LX_PTRACE_KILL 8
+#define LX_PTRACE_SINGLESTEP 9
+#define LX_PTRACE_GETREGS 12
+#define LX_PTRACE_SETREGS 13
+#define LX_PTRACE_GETFPREGS 14
+#define LX_PTRACE_SETFPREGS 15
+#define LX_PTRACE_ATTACH 16
+#define LX_PTRACE_DETACH 17
+#define LX_PTRACE_GETFPXREGS 18
+#define LX_PTRACE_SETFPXREGS 19
+#define LX_PTRACE_SYSCALL 24
+
+/*
+ * This corresponds to the user_i387_struct Linux structure.
+ */
+typedef struct lx_user_fpregs {
+ long lxuf_cwd;
+ long lxuf_swd;
+ long lxuf_twd;
+ long lxuf_fip;
+ long lxuf_fcs;
+ long lxuf_foo;
+ long lxuf_fos;
+ long lxuf_st_space[20];
+} lx_user_fpregs_t;
+
+/*
+ * This corresponds to the user_fxsr_struct Linux structure.
+ */
+typedef struct lx_user_fpxregs {
+ uint16_t lxux_cwd;
+ uint16_t lxux_swd;
+ uint16_t lxux_twd;
+ uint16_t lxux_fop;
+ long lxux_fip;
+ long lxux_fcs;
+ long lxux_foo;
+ long lxux_fos;
+ long lxux_mxcsr;
+ long lxux_reserved;
+ long lxux_st_space[32];
+ long lxux_xmm_space[32];
+ long lxux_padding[56];
+} lx_user_fpxregs_t;
+
+/*
+ * This corresponds to the user_regs_struct Linux structure.
+ */
+typedef struct lx_user_regs {
+ long lxur_ebx;
+ long lxur_ecx;
+ long lxur_edx;
+ long lxur_esi;
+ long lxur_edi;
+ long lxur_ebp;
+ long lxur_eax;
+ long lxur_xds;
+ long lxur_xes;
+ long lxur_xfs;
+ long lxur_xgs;
+ long lxur_orig_eax;
+ long lxur_eip;
+ long lxur_xcs;
+ long lxur_eflags;
+ long lxur_esp;
+ long lxur_xss;
+} lx_user_regs_t;
+
+typedef struct lx_user {
+ lx_user_regs_t lxu_regs;
+ int lxu_fpvalid;
+ lx_user_fpregs_t lxu_i387;
+ ulong_t lxu_tsize;
+ ulong_t lxu_dsize;
+ ulong_t lxu_ssize;
+ ulong_t lxu_start_code;
+ ulong_t lxu_start_stack;
+ long lxu_signal;
+ int lxu_reserved;
+ lx_user_regs_t *lxu_ar0;
+ lx_user_fpregs_t *lxu_fpstate;
+ ulong_t lxu_magic;
+ char lxu_comm[32];
+ int lxu_debugreg[8];
+} lx_user_t;
+
+typedef struct ptrace_monitor_map {
+ struct ptrace_monitor_map *pmm_next; /* next pointer */
+ pid_t pmm_monitor; /* monitor child process */
+ pid_t pmm_target; /* traced Linux pid */
+ pid_t pmm_pid; /* Solaris pid */
+ lwpid_t pmm_lwpid; /* Solaris lwpid */
+ uint_t pmm_exiting; /* detached */
+} ptrace_monitor_map_t;
+
+typedef struct ptrace_state_map {
+ struct ptrace_state_map *psm_next; /* next pointer */
+ pid_t psm_pid; /* Solaris pid */
+ uintptr_t psm_debugreg[8]; /* debug registers */
+} ptrace_state_map_t;
+
+static ptrace_monitor_map_t *ptrace_monitor_map = NULL;
+static ptrace_state_map_t *ptrace_state_map = NULL;
+static mutex_t ptrace_map_mtx = DEFAULTMUTEX;
+
+extern void *_START_;
+
+static sigset_t blockable_sigs;
+
+#pragma init(ptrace_init)
+void
+ptrace_init(void)
+{
+ (void) sigfillset(&blockable_sigs);
+ (void) sigdelset(&blockable_sigs, SIGKILL);
+ (void) sigdelset(&blockable_sigs, SIGSTOP);
+}
+
+/*
+ * Given a pid, open the named file under /native/proc/<pid>/name using the
+ * given mode.
+ */
+static int
+open_procfile(pid_t pid, int mode, const char *name)
+{
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), "/native/proc/%d/%s", pid, name);
+
+ return (open(path, mode));
+}
+
+/*
+ * Given a pid and lwpid, open the named file under
+ * /native/proc/<pid>/<lwpid>/name using the given mode.
+ */
+static int
+open_lwpfile(pid_t pid, lwpid_t lwpid, int mode, const char *name)
+{
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), "/native/proc/%d/lwp/%d/%s",
+ pid, lwpid, name);
+
+ return (open(path, mode));
+}
+
+static int
+get_status(pid_t pid, pstatus_t *psp)
+{
+ int fd;
+
+ if ((fd = open_procfile(pid, O_RDONLY, "status")) < 0)
+ return (-ESRCH);
+
+ if (read(fd, psp, sizeof (pstatus_t)) != sizeof (pstatus_t)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+static int
+get_lwpstatus(pid_t pid, lwpid_t lwpid, lwpstatus_t *lsp)
+{
+ int fd;
+
+ if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0)
+ return (-ESRCH);
+
+ if (read(fd, lsp, sizeof (lwpstatus_t)) != sizeof (lwpstatus_t)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+static uintptr_t
+syscall_regs(int fd, uintptr_t fp, pid_t pid)
+{
+ uintptr_t addr, done;
+ struct frame fr;
+ auxv_t auxv;
+ int afd;
+ Elf32_Phdr phdr;
+
+ /*
+ * Try to walk the stack looking for a return address that corresponds
+ * to the traced process's lx_emulate_done symbol. This relies on the
+ * fact that the brand library in the traced process is the same as the
+ * brand library in this process (indeed, this is true of all processes
+ * in a given branded zone).
+ */
+
+ /*
+ * Find the base address for the brand library in the traced process
+ * by grabbing the AT_PHDR auxv entry, reading in the program header
+ * at that location and subtracting off the p_vaddr member. We use
+ * this to compute the location of lx_emulate done in the traced
+ * process.
+ */
+ if ((afd = open_procfile(pid, O_RDONLY, "auxv")) < 0)
+ return (0);
+
+ do {
+ if (read(afd, &auxv, sizeof (auxv)) != sizeof (auxv)) {
+ (void) close(afd);
+ return (0);
+ }
+ } while (auxv.a_type != AT_PHDR);
+
+ (void) close(afd);
+
+ if (pread(fd, &phdr, sizeof (phdr), auxv.a_un.a_val) != sizeof (phdr)) {
+ lx_debug("failed to read brand library's phdr");
+ return (0);
+ }
+
+ addr = auxv.a_un.a_val - phdr.p_vaddr;
+ done = (uintptr_t)&lx_emulate_done - (uintptr_t)&_START_ + addr;
+
+ fr.fr_savfp = fp;
+
+ do {
+ addr = fr.fr_savfp;
+ if (pread(fd, &fr, sizeof (fr), addr) != sizeof (fr)) {
+ lx_debug("ptrace read failed for stack walk");
+ return (0);
+ }
+
+ if (addr >= fr.fr_savfp) {
+ lx_debug("ptrace stack not monotonically increasing "
+ "%p %p (%p)", addr, fr.fr_savfp, done);
+ return (0);
+ }
+ } while (fr.fr_savpc != done);
+
+ /*
+ * The first argument to lx_emulate is known to be an lx_regs_t
+ * structure and the ABI specifies that it will be placed on the stack
+ * immediately preceeding the return address.
+ */
+ addr += sizeof (fr);
+ if (pread(fd, &addr, sizeof (addr), addr) != sizeof (addr)) {
+ lx_debug("ptrace stack failed to read register set address");
+ return (0);
+ }
+
+ return (addr);
+}
+
+static int
+getregs(pid_t pid, lwpid_t lwpid, lx_user_regs_t *rp)
+{
+ lwpstatus_t status;
+ uintptr_t addr;
+ int fd, ret;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0)
+ return (-ESRCH);
+
+ /*
+ * If we find the syscall regs (and are therefore in an emulated
+ * syscall, use the register set at given address. Otherwise, use the
+ * registers as reported by /proc.
+ */
+ if ((addr = syscall_regs(fd, status.pr_reg[EBP], pid)) != 0) {
+ lx_regs_t regs;
+
+ if (pread(fd, &regs, sizeof (regs), addr) != sizeof (regs)) {
+ (void) close(fd);
+ lx_debug("ptrace failed to read register set");
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ rp->lxur_ebx = regs.lxr_ebx;
+ rp->lxur_ecx = regs.lxr_ecx;
+ rp->lxur_edx = regs.lxr_edx;
+ rp->lxur_esi = regs.lxr_esi;
+ rp->lxur_edi = regs.lxr_edi;
+ rp->lxur_ebp = regs.lxr_ebp;
+ rp->lxur_eax = regs.lxr_eax;
+ rp->lxur_xds = status.pr_reg[DS];
+ rp->lxur_xes = status.pr_reg[ES];
+ rp->lxur_xfs = status.pr_reg[FS];
+ rp->lxur_xgs = regs.lxr_gs;
+ rp->lxur_orig_eax = regs.lxr_orig_eax;
+ rp->lxur_eip = regs.lxr_eip;
+ rp->lxur_xcs = status.pr_reg[CS];
+ rp->lxur_eflags = status.pr_reg[EFL];
+ rp->lxur_esp = regs.lxr_esp;
+ rp->lxur_xss = status.pr_reg[SS];
+
+ } else {
+ (void) close(fd);
+
+ rp->lxur_ebx = status.pr_reg[EBX];
+ rp->lxur_ecx = status.pr_reg[ECX];
+ rp->lxur_edx = status.pr_reg[EDX];
+ rp->lxur_esi = status.pr_reg[ESI];
+ rp->lxur_edi = status.pr_reg[EDI];
+ rp->lxur_ebp = status.pr_reg[EBP];
+ rp->lxur_eax = status.pr_reg[EAX];
+ rp->lxur_xds = status.pr_reg[DS];
+ rp->lxur_xes = status.pr_reg[ES];
+ rp->lxur_xfs = status.pr_reg[FS];
+ rp->lxur_xgs = status.pr_reg[GS];
+ rp->lxur_orig_eax = 0;
+ rp->lxur_eip = status.pr_reg[EIP];
+ rp->lxur_xcs = status.pr_reg[CS];
+ rp->lxur_eflags = status.pr_reg[EFL];
+ rp->lxur_esp = status.pr_reg[UESP];
+ rp->lxur_xss = status.pr_reg[SS];
+
+ /*
+ * If the target process has just returned from exec, it's not
+ * going to be sitting in the emulation function. In that case
+ * we need to manually fake up the values for %eax and orig_eax
+ * to indicate a successful return and that the traced process
+ * had called execve (respectively).
+ */
+ if (status.pr_why == PR_SYSEXIT &&
+ status.pr_what == SYS_execve) {
+ rp->lxur_eax = 0;
+ rp->lxur_orig_eax = LX_SYS_execve;
+ }
+ }
+
+ return (0);
+}
+
+static int
+setregs(pid_t pid, lwpid_t lwpid, const lx_user_regs_t *rp)
+{
+ long ctl[1 + sizeof (prgregset_t) / sizeof (long)];
+ lwpstatus_t status;
+ uintptr_t addr;
+ int fd, ret;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ if ((fd = open_procfile(pid, O_RDWR, "as")) < 0)
+ return (-ESRCH);
+
+ /*
+ * If we find the syscall regs (and are therefore in an emulated
+ * syscall, modify the register set at given address and set the
+ * remaining registers through the /proc interface. Otherwise just use
+ * the /proc interface to set register values;
+ */
+ if ((addr = syscall_regs(fd, status.pr_reg[EBP], pid)) != 0) {
+ lx_regs_t regs;
+
+ regs.lxr_ebx = rp->lxur_ebx;
+ regs.lxr_ecx = rp->lxur_ecx;
+ regs.lxr_edx = rp->lxur_edx;
+ regs.lxr_esi = rp->lxur_esi;
+ regs.lxr_edi = rp->lxur_edi;
+ regs.lxr_ebp = rp->lxur_ebp;
+ regs.lxr_eax = rp->lxur_eax;
+ regs.lxr_gs = rp->lxur_xgs;
+ regs.lxr_orig_eax = rp->lxur_orig_eax;
+ regs.lxr_eip = rp->lxur_eip;
+ regs.lxr_esp = rp->lxur_esp;
+
+ if (pwrite(fd, &regs, sizeof (regs), addr) != sizeof (regs)) {
+ (void) close(fd);
+ lx_debug("ptrace failed to write register set");
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ status.pr_reg[DS] = rp->lxur_xds;
+ status.pr_reg[ES] = rp->lxur_xes;
+ status.pr_reg[FS] = rp->lxur_xfs;
+ status.pr_reg[CS] = rp->lxur_xcs;
+ status.pr_reg[EFL] = rp->lxur_eflags;
+ status.pr_reg[SS] = rp->lxur_xss;
+
+ } else {
+ (void) close(fd);
+
+ status.pr_reg[EBX] = rp->lxur_ebx;
+ status.pr_reg[ECX] = rp->lxur_ecx;
+ status.pr_reg[EDX] = rp->lxur_edx;
+ status.pr_reg[ESI] = rp->lxur_esi;
+ status.pr_reg[EDI] = rp->lxur_edi;
+ status.pr_reg[EBP] = rp->lxur_ebp;
+ status.pr_reg[EAX] = rp->lxur_eax;
+ status.pr_reg[DS] = rp->lxur_xds;
+ status.pr_reg[ES] = rp->lxur_xes;
+ status.pr_reg[FS] = rp->lxur_xfs;
+ status.pr_reg[GS] = rp->lxur_xgs;
+ status.pr_reg[EIP] = rp->lxur_eip;
+ status.pr_reg[CS] = rp->lxur_xcs;
+ status.pr_reg[EFL] = rp->lxur_eflags;
+ status.pr_reg[UESP] = rp->lxur_esp;
+ status.pr_reg[SS] = rp->lxur_xss;
+ status.pr_reg[SS] = rp->lxur_xss;
+ }
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ ctl[0] = PCSREG;
+ bcopy(status.pr_reg, &ctl[1], sizeof (prgregset_t));
+
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+static int
+getfpregs(pid_t pid, lwpid_t lwpid, lx_user_fpregs_t *rp)
+{
+ lwpstatus_t status;
+ struct _fpstate *fp;
+ char *data;
+ int ret, i;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state;
+
+ rp->lxuf_cwd = fp->cw;
+ rp->lxuf_swd = fp->sw;
+ rp->lxuf_twd = fp->tag;
+ rp->lxuf_fip = fp->ipoff;
+ rp->lxuf_fcs = fp->cssel;
+ rp->lxuf_foo = fp->dataoff;
+ rp->lxuf_fos = fp->datasel;
+
+ /*
+ * The Linux structure uses 10 bytes per floating-point register.
+ */
+ data = (char *)&rp->lxuf_st_space[0];
+ for (i = 0; i < 8; i++) {
+ bcopy(&fp->_st[i], data, 10);
+ data += 10;
+ }
+
+ return (0);
+}
+
+static int
+setfpregs(pid_t pid, lwpid_t lwpid, const lx_user_fpregs_t *rp)
+{
+ lwpstatus_t status;
+ struct {
+ long cmd;
+ prfpregset_t regs;
+ } ctl;
+ struct _fpstate *fp = (struct _fpstate *)&ctl.regs;
+ char *data;
+ int ret, i, fd;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs));
+
+ fp->cw = rp->lxuf_cwd;
+ fp->sw = rp->lxuf_swd;
+ fp->tag = rp->lxuf_twd;
+ fp->ipoff = rp->lxuf_fip;
+ fp->cssel = rp->lxuf_fcs;
+ fp->dataoff = rp->lxuf_foo;
+ fp->datasel = rp->lxuf_fos;
+
+ /*
+ * The Linux structure uses 10 bytes per floating-point register.
+ */
+ data = (char *)&rp->lxuf_st_space[0];
+ for (i = 0; i < 8; i++) {
+ bcopy(data, &fp->_st[i], 10);
+ data += 10;
+ }
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ ctl.cmd = PCSFPREG;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+
+static int
+getfpxregs(pid_t pid, lwpid_t lwpid, lx_user_fpxregs_t *rp)
+{
+ lwpstatus_t status;
+ struct _fpstate *fp;
+ int ret, i;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state;
+
+ rp->lxux_cwd = (uint16_t)fp->cw;
+ rp->lxux_swd = (uint16_t)fp->sw;
+ rp->lxux_twd = (uint16_t)fp->tag;
+ rp->lxux_fop = (uint16_t)(fp->cssel >> 16);
+ rp->lxux_fip = fp->ipoff;
+ rp->lxux_fcs = (uint16_t)fp->cssel;
+ rp->lxux_foo = fp->dataoff;
+ rp->lxux_fos = fp->datasel;
+ rp->lxux_mxcsr = status.pr_fpreg.fp_reg_set.fpchip_state.mxcsr;
+
+ bcopy(fp->xmm, rp->lxux_xmm_space, sizeof (rp->lxux_xmm_space));
+ bzero(rp->lxux_st_space, sizeof (rp->lxux_st_space));
+ for (i = 0; i < 8; i++) {
+ bcopy(&fp->_st[i], &rp->lxux_st_space[i * 4],
+ sizeof (fp->_st[i]));
+ }
+
+ return (0);
+}
+
+static int
+setfpxregs(pid_t pid, lwpid_t lwpid, const lx_user_fpxregs_t *rp)
+{
+ lwpstatus_t status;
+ struct {
+ long cmd;
+ prfpregset_t regs;
+ } ctl;
+ struct _fpstate *fp = (struct _fpstate *)&ctl.regs;
+ int ret, i, fd;
+
+ if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
+ return (ret);
+
+ bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs));
+
+ fp->cw = rp->lxux_cwd;
+ fp->sw = rp->lxux_swd;
+ fp->tag = rp->lxux_twd;
+ fp->ipoff = rp->lxux_fip;
+ fp->cssel = rp->lxux_fcs | (rp->lxux_fop << 16);
+ fp->dataoff = rp->lxux_foo;
+ fp->datasel = rp->lxux_fos;
+
+ bcopy(rp->lxux_xmm_space, fp->xmm, sizeof (rp->lxux_xmm_space));
+ for (i = 0; i < 8; i++) {
+ bcopy(&rp->lxux_st_space[i * 4], &fp->_st[i],
+ sizeof (fp->_st[i]));
+ }
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ ctl.cmd = PCSFPREG;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+/*
+ * Solaris does not allow a process to manipulate its own or some
+ * other process's debug registers. Linux ptrace(2) allows this
+ * and gdb manipulates them for its watchpoint implementation.
+ *
+ * We keep a pseudo set of debug registers for each traced process
+ * and map their contents into the appropriate PCWATCH /proc
+ * operations when they are activated by gdb.
+ *
+ * To understand how the debug registers work on x86 machines,
+ * see section 13.1 of the AMD x86-64 Architecture Programmer's
+ * Manual, Volume 2, System Programming.
+ */
+static uintptr_t *
+debug_registers(pid_t pid)
+{
+ ptrace_state_map_t *p;
+
+ (void) mutex_lock(&ptrace_map_mtx);
+ for (p = ptrace_state_map; p != NULL; p = p->psm_next) {
+ if (p->psm_pid == pid)
+ break;
+ }
+ if (p == NULL && (p = malloc(sizeof (*p))) != NULL) {
+ bzero(p, sizeof (*p));
+ p->psm_pid = pid;
+ p->psm_next = ptrace_state_map;
+ p->psm_debugreg[6] = 0xffff0ff0; /* read as ones */
+ ptrace_state_map = p;
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+ return (p != NULL? p->psm_debugreg : NULL);
+}
+
+static void
+free_debug_registers(pid_t pid)
+{
+ ptrace_state_map_t **pp;
+ ptrace_state_map_t *p;
+
+ /* ASSERT(MUTEX_HELD(&ptrace_map_mtx) */
+ for (pp = &ptrace_state_map; (p = *pp) != NULL; pp = &p->psm_next) {
+ if (p->psm_pid == pid) {
+ *pp = p->psm_next;
+ free(p);
+ break;
+ }
+ }
+}
+
+static int
+setup_watchpoints(pid_t pid, uintptr_t *debugreg)
+{
+ int dr7 = debugreg[7];
+ int lrw;
+ int fd;
+ size_t size = NULL;
+ prwatch_t prwatch[4];
+ int nwatch;
+ int i;
+ int wflags = NULL;
+ int error;
+ struct {
+ long req;
+ prwatch_t prwatch;
+ } ctl;
+
+ /* find all watched areas */
+ if ((fd = open_procfile(pid, O_RDONLY, "watch")) < 0)
+ return (-ESRCH);
+ nwatch = read(fd, prwatch, sizeof (prwatch)) / sizeof (prwatch_t);
+ (void) close(fd);
+ if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0)
+ return (-ESRCH);
+ /* clear all watched areas */
+ for (i = 0; i < nwatch; i++) {
+ ctl.req = PCWATCH;
+ ctl.prwatch = prwatch[i];
+ ctl.prwatch.pr_wflags = 0;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ error = -errno;
+ (void) close(fd);
+ return (error);
+ }
+ }
+ /* establish all new watched areas */
+ for (i = 0; i < 4; i++) {
+ if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */
+ continue;
+ lrw = (dr7 >> (16 + (4 * i))) & 0xf;
+ switch (lrw >> 2) { /* length */
+ case 0: size = 1; break;
+ case 1: size = 2; break;
+ case 2: size = 8; break;
+ case 3: size = 4; break;
+ }
+ switch (lrw & 0x3) { /* mode */
+ case 0: wflags = WA_EXEC; break;
+ case 1: wflags = WA_WRITE; break;
+ case 2: continue;
+ case 3: wflags = WA_READ | WA_WRITE; break;
+ }
+ ctl.req = PCWATCH;
+ ctl.prwatch.pr_vaddr = debugreg[i];
+ ctl.prwatch.pr_size = size;
+ ctl.prwatch.pr_wflags = wflags | WA_TRAPAFTER;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ error = -errno;
+ (void) close(fd);
+ return (error);
+ }
+ }
+ (void) close(fd);
+ return (0);
+}
+
+/*
+ * Returns TRUE if the process is traced, FALSE otherwise. This is only true
+ * if the process is currently stopped, and has been traced using PTRACE_TRACEME
+ * or PTRACE_ATTACH.
+ */
+static int
+is_traced(pid_t pid)
+{
+ ptrace_monitor_map_t *p;
+ pstatus_t status;
+
+ if (get_status(pid, &status) != 0)
+ return (0);
+
+ if ((status.pr_flags & PR_PTRACE) &&
+ (status.pr_ppid == getpid()) &&
+ (status.pr_lwp.pr_flags & PR_ISTOP))
+ return (1);
+
+ (void) mutex_lock(&ptrace_map_mtx);
+ for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) {
+ if (p->pmm_target == pid) {
+ (void) mutex_unlock(&ptrace_map_mtx);
+ return (1);
+ }
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ return (0);
+}
+
+static int
+ptrace_trace_common(int fd)
+{
+ struct {
+ long cmd;
+ union {
+ long flags;
+ sigset_t signals;
+ fltset_t faults;
+ } arg;
+ } ctl;
+ size_t size;
+
+ ctl.cmd = PCSTRACE;
+ prfillset(&ctl.arg.signals);
+ size = sizeof (long) + sizeof (sigset_t);
+ if (write(fd, &ctl, size) != size)
+ return (-1);
+
+ ctl.cmd = PCSFAULT;
+ premptyset(&ctl.arg.faults);
+ size = sizeof (long) + sizeof (fltset_t);
+ if (write(fd, &ctl, size) != size)
+ return (-1);
+
+ ctl.cmd = PCUNSET;
+ ctl.arg.flags = PR_FORK;
+ size = sizeof (long) + sizeof (long);
+ if (write(fd, &ctl, size) != size)
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Notify that parent that we wish to be traced. This is the equivalent of:
+ *
+ * 1. Stop on all signals, and nothing else
+ * 2. Turn off inherit-on-fork flag
+ * 3. Set ptrace compatible flag
+ *
+ * If we are not the main thread, then the client is trying to request behavior
+ * by which one of its own thread is to be traced. We don't support this mode
+ * of operation.
+ */
+static int
+ptrace_traceme(void)
+{
+ int fd, ret;
+ int error;
+ long ctl[2];
+ pstatus_t status;
+ pid_t pid = getpid();
+
+ if (_lwp_self() != 1) {
+ lx_unsupported(gettext(
+ "thread %d calling PTRACE_TRACEME is unsupported"),
+ _lwp_self());
+ return (-ENOTSUP);
+ }
+
+ if ((ret = get_status(pid, &status)) != 0)
+ return (ret);
+
+ /*
+ * Why would a process try to do this twice? I'm not sure, but there's
+ * a conformance test which wants this to fail just so.
+ */
+ if (status.pr_flags & PR_PTRACE)
+ return (-EPERM);
+
+ if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0)
+ return (-errno);
+
+ ctl[0] = PCSET;
+ ctl[1] = PR_PTRACE;
+ error = 0;
+ if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl) ||
+ ptrace_trace_common(fd) != 0)
+ error = -errno;
+
+ (void) close(fd);
+ return (error);
+}
+
+/*
+ * Read a word of data from the given address. Because this is a process-wide
+ * action, we don't need the lwpid.
+ */
+static int
+ptrace_peek(pid_t pid, uintptr_t addr, int *ret)
+{
+ int fd, data;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0)
+ return (-ESRCH);
+
+ if (pread(fd, &data, sizeof (data), addr) != sizeof (data)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ if (uucopy(&data, ret, sizeof (data)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+#define LX_USER_BOUND(m) \
+(offsetof(lx_user_t, m) + sizeof (((lx_user_t *)NULL)->m))
+
+static int
+ptrace_peek_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int *ret)
+{
+ int err, data;
+ uintptr_t *debugreg;
+ int dreg;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ /*
+ * The offset specified by the user is an offset into the Linux
+ * user structure (seriously). Rather than constructing a full
+ * user structure, we figure out which part of the user structure
+ * the offset is in, and fill in just that component.
+ */
+ if (off < LX_USER_BOUND(lxu_regs)) {
+ lx_user_regs_t regs;
+
+ if ((err = getregs(pid, lwpid, &regs)) != 0)
+ return (err);
+
+ data = *(int *)((uintptr_t)&regs + off -
+ offsetof(lx_user_t, lxu_regs));
+
+ } else if (off < LX_USER_BOUND(lxu_fpvalid)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_i387)) {
+ lx_user_fpregs_t regs;
+
+ if ((err = getfpregs(pid, lwpid, &regs)) != 0)
+ return (err);
+
+ data = *(int *)((uintptr_t)&regs + off -
+ offsetof(lx_user_t, lxu_i387));
+
+ } else if (off < LX_USER_BOUND(lxu_tsize)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_dsize)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_ssize)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_start_code)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_start_stack)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_signal)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_reserved)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_ar0)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_fpstate)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_magic)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_comm)) {
+ lx_err(gettext("offset = %lu\n"), off);
+ assert(0);
+ } else if (off < LX_USER_BOUND(lxu_debugreg)) {
+ dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int);
+ if (dreg == 4) /* aliased */
+ dreg = 6;
+ else if (dreg == 5) /* aliased */
+ dreg = 7;
+ if ((debugreg = debug_registers(pid)) != NULL)
+ data = debugreg[dreg];
+ else
+ data = 0;
+ } else {
+ lx_unsupported(gettext(
+ "unsupported ptrace %s user offset: 0x%x\n"), "peek", off);
+ assert(0);
+ return (-ENOTSUP);
+ }
+
+ if (uucopy(&data, ret, sizeof (data)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/*
+ * Write a word of data to the given address. Because this is a process-wide
+ * action, we don't need the lwpid. Returns EINVAL if the address is not
+ * word-aligned.
+ */
+static int
+ptrace_poke(pid_t pid, uintptr_t addr, int data)
+{
+ int fd;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (addr & 0x3)
+ return (-EINVAL);
+
+ if ((fd = open_procfile(pid, O_WRONLY, "as")) < 0)
+ return (-ESRCH);
+
+ if (pwrite(fd, &data, sizeof (data), addr) != sizeof (data)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+ return (0);
+}
+
+static int
+ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data)
+{
+ lx_user_regs_t regs;
+ int err = 0;
+ uintptr_t *debugreg;
+ int dreg;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (off & 0x3)
+ return (-EINVAL);
+
+ if (off < offsetof(lx_user_t, lxu_regs) + sizeof (lx_user_regs_t)) {
+ if ((err = getregs(pid, lwpid, &regs)) != 0)
+ return (err);
+ *(int *)((uintptr_t)&regs + off -
+ offsetof(lx_user_t, lxu_regs)) = data;
+ return (setregs(pid, lwpid, &regs));
+ }
+
+ if (off >= offsetof(lx_user_t, lxu_debugreg) &&
+ off < offsetof(lx_user_t, lxu_debugreg) + 8 * sizeof (int)) {
+ dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int);
+ if (dreg == 4) /* aliased */
+ dreg = 6;
+ else if (dreg == 5) /* aliased */
+ dreg = 7;
+ if ((debugreg = debug_registers(pid)) != NULL) {
+ debugreg[dreg] = data;
+ if (dreg == 7)
+ err = setup_watchpoints(pid, debugreg);
+ }
+ return (err);
+ }
+
+ lx_unsupported(gettext("unsupported ptrace %s user offset: 0x%x\n"),
+ "poke", off);
+ assert(0);
+ return (-ENOTSUP);
+}
+
+static int
+ptrace_cont_common(int fd, int sig, int run, int step)
+{
+ long ctl[1 + 1 + sizeof (siginfo_t) / sizeof (long) + 2];
+ long *ctlp = ctl;
+ size_t size;
+
+ assert(0 <= sig && sig < LX_NSIG);
+ assert(!step || run);
+
+ /*
+ * Clear the current signal.
+ */
+ *ctlp++ = PCCSIG;
+
+ /*
+ * Send a signal if one was specified.
+ */
+ if (sig != 0 && sig != LX_SIGSTOP) {
+ siginfo_t *infop;
+
+ *ctlp++ = PCSSIG;
+ infop = (siginfo_t *)ctlp;
+ bzero(infop, sizeof (siginfo_t));
+ infop->si_signo = ltos_signo[sig];
+
+ ctlp += sizeof (siginfo_t) / sizeof (long);
+ }
+
+ /*
+ * If run is true, set the lwp running.
+ */
+ if (run) {
+ *ctlp++ = PCRUN;
+ *ctlp++ = step ? PRSTEP : 0;
+ }
+
+ size = (char *)ctlp - (char *)&ctl[0];
+ assert(size <= sizeof (ctl));
+
+ if (write(fd, ctl, size) != size) {
+ lx_debug("failed to continue %s", strerror(errno));
+ return (-EIO);
+ }
+
+ return (0);
+}
+
+static int
+ptrace_cont_monitor(ptrace_monitor_map_t *p)
+{
+ long ctl[2];
+ int fd;
+
+ fd = open_procfile(p->pmm_monitor, O_WRONLY, "ctl");
+ if (fd < 0) {
+ lx_debug("failed to open monitor ctl %d",
+ errno);
+ return (-EIO);
+ }
+
+ ctl[0] = PCRUN;
+ ctl[1] = PRCSIG;
+ if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ (void) close(fd);
+
+ return (0);
+}
+
+static int
+ptrace_cont(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig, int step)
+{
+ ptrace_monitor_map_t *p;
+ uintptr_t *debugreg;
+ int fd, ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (sig < 0 || sig >= LX_NSIG)
+ return (-EINVAL);
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ if ((ret = ptrace_cont_common(fd, sig, 1, step)) != 0) {
+ (void) close(fd);
+ return (ret);
+ }
+
+ (void) close(fd);
+
+ /* kludge: use debugreg[4] to remember the single-step flag */
+ if ((debugreg = debug_registers(pid)) != NULL)
+ debugreg[4] = step;
+
+ /*
+ * Check for a monitor and get it moving if we find it. If any of the
+ * /proc operations fail, we're kind of sunk so just return an error.
+ */
+ (void) mutex_lock(&ptrace_map_mtx);
+ for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) {
+ if (p->pmm_target == lxpid) {
+ if ((ret = ptrace_cont_monitor(p)) != 0)
+ return (ret);
+ break;
+ }
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ return (0);
+}
+
+/*
+ * If a monitor exists for this traced process, dispose of it.
+ * First turn off its ptrace flag so we won't be notified of its
+ * impending demise. We ignore errors for this step since they
+ * indicate only that the monitor has been damaged due to pilot
+ * error. Then kill the monitor, and wait for it. If the wait
+ * succeeds we can dispose of the corpse, otherwise another thread's
+ * wait call has collected it and we need to set a flag in the
+ * structure so that if can be picked up in wait.
+ */
+static void
+monitor_kill(pid_t lxpid, pid_t pid)
+{
+ ptrace_monitor_map_t *p, **pp;
+ pid_t mpid;
+ int fd;
+ long ctl[2];
+
+ (void) mutex_lock(&ptrace_map_mtx);
+ free_debug_registers(pid);
+ for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) {
+ if (p->pmm_target == lxpid) {
+ mpid = p->pmm_monitor;
+ if ((fd = open_procfile(mpid, O_WRONLY, "ctl")) >= 0) {
+ ctl[0] = PCUNSET;
+ ctl[1] = PR_PTRACE;
+ (void) write(fd, ctl, sizeof (ctl));
+ (void) close(fd);
+ }
+
+ (void) kill(mpid, SIGKILL);
+
+ if (waitpid(mpid, NULL, 0) == mpid) {
+ *pp = p->pmm_next;
+ free(p);
+ } else {
+ p->pmm_exiting = 1;
+ }
+
+ break;
+ }
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+}
+
+static int
+ptrace_kill(pid_t lxpid, pid_t pid)
+{
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ ret = kill(pid, SIGKILL);
+
+ /* kill off the monitor process, if any */
+ monitor_kill(lxpid, pid);
+
+ return (ret);
+}
+
+static int
+ptrace_step(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
+{
+ return (ptrace_cont(lxpid, pid, lwpid, sig, 1));
+}
+
+static int
+ptrace_getregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_regs_t regs;
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((ret = getregs(pid, lwpid, &regs)) != 0)
+ return (ret);
+
+ if (uucopy(&regs, (void *)addr, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+ptrace_setregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_regs_t regs;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (setregs(pid, lwpid, &regs));
+}
+
+static int
+ptrace_getfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_fpregs_t regs;
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((ret = getfpregs(pid, lwpid, &regs)) != 0)
+ return (ret);
+
+ if (uucopy(&regs, (void *)addr, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+ptrace_setfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_fpregs_t regs;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (setfpregs(pid, lwpid, &regs));
+}
+
+static int
+ptrace_getfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_fpxregs_t regs;
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((ret = getfpxregs(pid, lwpid, &regs)) != 0)
+ return (ret);
+
+ if (uucopy(&regs, (void *)addr, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+ptrace_setfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
+{
+ lx_user_fpxregs_t regs;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
+ return (-errno);
+
+ return (setfpxregs(pid, lwpid, &regs));
+}
+
+static void __NORETURN
+ptrace_monitor(int fd)
+{
+ struct {
+ long cmd;
+ union {
+ long flags;
+ sigset_t signals;
+ fltset_t faults;
+ } arg;
+ } ctl;
+ size_t size;
+ int monfd;
+ int rv;
+
+ monfd = open_procfile(getpid(), O_WRONLY, "ctl");
+
+ ctl.cmd = PCSTRACE; /* trace only SIGTRAP */
+ premptyset(&ctl.arg.signals);
+ praddset(&ctl.arg.signals, SIGTRAP);
+ size = sizeof (long) + sizeof (sigset_t);
+ (void) write(monfd, &ctl, size); /* can't fail */
+
+ ctl.cmd = PCSFAULT;
+ premptyset(&ctl.arg.faults);
+ size = sizeof (long) + sizeof (fltset_t);
+ (void) write(monfd, &ctl, size); /* can't fail */
+
+ ctl.cmd = PCUNSET;
+ ctl.arg.flags = PR_FORK;
+ size = sizeof (long) + sizeof (long);
+ (void) write(monfd, &ctl, size); /* can't fail */
+
+ ctl.cmd = PCSET; /* wait()able by the parent */
+ ctl.arg.flags = PR_PTRACE;
+ size = sizeof (long) + sizeof (long);
+ (void) write(monfd, &ctl, size); /* can't fail */
+
+ (void) close(monfd);
+
+ ctl.cmd = PCWSTOP;
+ size = sizeof (long);
+
+ for (;;) {
+ /*
+ * Wait for the traced process to stop.
+ */
+ if (write(fd, &ctl, size) != size) {
+ rv = (errno == ENOENT)? 0 : 1;
+ lx_debug("monitor failed to wait for LWP to stop: %s",
+ strerror(errno));
+ _exit(rv);
+ }
+
+ lx_debug("monitor caught traced LWP");
+
+ /*
+ * Pull the ptrace trigger by sending ourself a SIGTRAP. This
+ * will cause this, the monitor process, to stop which will
+ * cause the parent's waitid(2) call to return this process
+ * id. In lx_wait(), we remap the monitor process's pid and
+ * status to those of the traced LWP. When the parent process
+ * uses ptrace to resume the traced LWP, it will additionally
+ * restart this process.
+ */
+ (void) _lwp_kill(_lwp_self(), SIGTRAP);
+
+ lx_debug("monitor was resumed");
+ }
+}
+
+static int
+ptrace_attach_common(int fd, pid_t lxpid, pid_t pid, lwpid_t lwpid, int run)
+{
+ pid_t child;
+ ptrace_monitor_map_t *p;
+ sigset_t unblock;
+ pstatus_t status;
+ long ctl[1 + sizeof (sysset_t) / sizeof (long) + 2];
+ long *ctlp = ctl;
+ size_t size;
+ sysset_t *sysp;
+ int ret;
+
+ /*
+ * We're going to need this structure so better to fail now before its
+ * too late to turn back.
+ */
+ if ((p = malloc(sizeof (ptrace_monitor_map_t))) == NULL)
+ return (-EIO);
+
+ if ((ret = get_status(pid, &status)) != 0) {
+ free(p);
+ return (ret);
+ }
+
+ /*
+ * If this process is already traced, bail.
+ */
+ if (status.pr_flags & PR_PTRACE) {
+ free(p);
+ return (-EPERM);
+ }
+
+ /*
+ * Turn on the appropriate tracing flags. It's exceedingly unlikely
+ * that this operation will fail; any failure would probably be due
+ * to another /proc consumer mucking around.
+ */
+ if (ptrace_trace_common(fd) != 0) {
+ free(p);
+ return (-EIO);
+ }
+
+ /*
+ * Native ptrace automatically catches processes when they exec so we
+ * have to do that explicitly here.
+ */
+ *ctlp++ = PCSEXIT;
+ sysp = (sysset_t *)ctlp;
+ ctlp += sizeof (sysset_t) / sizeof (long);
+ premptyset(sysp);
+ praddset(sysp, SYS_execve);
+ if (run) {
+ *ctlp++ = PCRUN;
+ *ctlp++ = 0;
+ }
+
+ size = (char *)ctlp - (char *)&ctl[0];
+
+ if (write(fd, ctl, size) != size) {
+ free(p);
+ return (-EIO);
+ }
+
+ /*
+ * Spawn the monitor proceses to notify this process of events of
+ * interest in the traced process. We block signals here both so
+ * we're not interrupted during this operation and so that the
+ * monitor process doesn't accept signals.
+ */
+ (void) sigprocmask(SIG_BLOCK, &blockable_sigs, &unblock);
+ if ((child = fork1()) == 0)
+ ptrace_monitor(fd);
+ (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
+
+ if (child == -1) {
+ lx_debug("failed to fork monitor process\n");
+ free(p);
+ return (-EIO);
+ }
+
+ p->pmm_monitor = child;
+ p->pmm_target = lxpid;
+ p->pmm_pid = pid;
+ p->pmm_lwpid = lwpid;
+ p->pmm_exiting = 0;
+
+ (void) mutex_lock(&ptrace_map_mtx);
+ p->pmm_next = ptrace_monitor_map;
+ ptrace_monitor_map = p;
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ return (0);
+}
+
+static int
+ptrace_attach(pid_t lxpid, pid_t pid, lwpid_t lwpid)
+{
+ int fd, ret;
+ long ctl;
+
+ /*
+ * Linux doesn't let you trace process 1 -- go figure.
+ */
+ if (lxpid == 1)
+ return (-EPERM);
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY | O_EXCL, "lwpctl")) < 0)
+ return (errno == EBUSY ? -EPERM : -ESRCH);
+
+ ctl = PCSTOP;
+ if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
+ lx_err(gettext("failed to stop %d/%d\n"), (int)pid, (int)lwpid);
+ assert(0);
+ }
+
+ ret = ptrace_attach_common(fd, lxpid, pid, lwpid, 0);
+
+ (void) close(fd);
+
+ return (ret);
+}
+
+static int
+ptrace_detach(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
+{
+ long ctl[2];
+ int fd, ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if (sig < 0 || sig >= LX_NSIG)
+ return (-EINVAL);
+
+ if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
+ return (-ESRCH);
+
+ /*
+ * The /proc ptrace flag may not be set, but we clear it
+ * unconditionally since doing so doesn't hurt anything.
+ */
+ ctl[0] = PCUNSET;
+ ctl[1] = PR_PTRACE;
+ if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ /*
+ * Clear the brand-specific system call tracing flag to ensure that
+ * the target doesn't stop unexpectedly some time in the future.
+ */
+ if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 0)) != 0) {
+ (void) close(fd);
+ return (-ret);
+ }
+
+ /* kill off the monitor process, if any */
+ monitor_kill(lxpid, pid);
+
+ /*
+ * Turn on the run-on-last-close flag so that all tracing flags will be
+ * cleared when we close the control file descriptor.
+ */
+ ctl[0] = PCSET;
+ ctl[1] = PR_RLC;
+ if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
+ (void) close(fd);
+ return (-EIO);
+ }
+
+ /*
+ * Clear the current signal (if any) and possibly send the traced
+ * process a new signal.
+ */
+ ret = ptrace_cont_common(fd, sig, 0, 0);
+
+ (void) close(fd);
+
+ return (ret);
+}
+
+static int
+ptrace_syscall(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
+{
+ int ret;
+
+ if (!is_traced(pid))
+ return (-ESRCH);
+
+ if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 1)) != 0)
+ return (-ret);
+
+ return (ptrace_cont(lxpid, pid, lwpid, sig, 0));
+}
+
+int
+lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ pid_t pid, lxpid = (pid_t)p2;
+ lwpid_t lwpid;
+
+ if ((p1 != LX_PTRACE_TRACEME) &&
+ (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0))
+ return (-ESRCH);
+
+ switch (p1) {
+ case LX_PTRACE_TRACEME:
+ return (ptrace_traceme());
+
+ case LX_PTRACE_PEEKTEXT:
+ case LX_PTRACE_PEEKDATA:
+ return (ptrace_peek(pid, p3, (int *)p4));
+
+ case LX_PTRACE_PEEKUSER:
+ return (ptrace_peek_user(pid, lwpid, p3, (int *)p4));
+
+ case LX_PTRACE_POKETEXT:
+ case LX_PTRACE_POKEDATA:
+ return (ptrace_poke(pid, p3, (int)p4));
+
+ case LX_PTRACE_POKEUSER:
+ return (ptrace_poke_user(pid, lwpid, p3, (int)p4));
+
+ case LX_PTRACE_CONT:
+ return (ptrace_cont(lxpid, pid, lwpid, (int)p4, 0));
+
+ case LX_PTRACE_KILL:
+ return (ptrace_kill(lxpid, pid));
+
+ case LX_PTRACE_SINGLESTEP:
+ return (ptrace_step(lxpid, pid, lwpid, (int)p4));
+
+ case LX_PTRACE_GETREGS:
+ return (ptrace_getregs(pid, lwpid, p4));
+
+ case LX_PTRACE_SETREGS:
+ return (ptrace_setregs(pid, lwpid, p4));
+
+ case LX_PTRACE_GETFPREGS:
+ return (ptrace_getfpregs(pid, lwpid, p4));
+
+ case LX_PTRACE_SETFPREGS:
+ return (ptrace_setfpregs(pid, lwpid, p4));
+
+ case LX_PTRACE_ATTACH:
+ return (ptrace_attach(lxpid, pid, lwpid));
+
+ case LX_PTRACE_DETACH:
+ return (ptrace_detach(lxpid, pid, lwpid, (int)p4));
+
+ case LX_PTRACE_GETFPXREGS:
+ return (ptrace_getfpxregs(pid, lwpid, p4));
+
+ case LX_PTRACE_SETFPXREGS:
+ return (ptrace_setfpxregs(pid, lwpid, p4));
+
+ case LX_PTRACE_SYSCALL:
+ return (ptrace_syscall(lxpid, pid, lwpid, (int)p4));
+
+ default:
+ return (-EINVAL);
+ }
+}
+
+void
+lx_ptrace_fork(void)
+{
+ /*
+ * Send a special signal (that has no Linux equivalent) to indicate
+ * that we're in this particularly special case. The signal will be
+ * ignored by this process, but noticed by /proc consumers tracing
+ * this process.
+ */
+ (void) _lwp_kill(_lwp_self(), SIGWAITING);
+}
+
+static void
+ptrace_catch_fork(pid_t pid, int monitor)
+{
+ long ctl[14 + 2 * sizeof (sysset_t) / sizeof (long)];
+ long *ctlp;
+ sysset_t *sysp;
+ size_t size;
+ pstatus_t ps;
+ pid_t child;
+ int fd, err;
+
+ /*
+ * If any of this fails, we're really sunk since the child
+ * will be stuck in the middle of lx_ptrace_fork().
+ * Fortunately it's practically assured to succeed unless
+ * something is seriously wrong on the system.
+ */
+ if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) {
+ lx_debug("lx_catch_fork: failed to control %d",
+ (int)pid);
+ return;
+ }
+
+ /*
+ * Turn off the /proc PR_PTRACE flag so the parent doesn't get
+ * spurious wake ups while we're working our dark magic. Arrange to
+ * catch the process when it exits from fork, and turn on the /proc
+ * inherit-on-fork flag so we catcht the child as well. We then run
+ * the process, wait for it to stop on the fork1(2) call and reset
+ * the tracing flags to their original state.
+ */
+ ctlp = ctl;
+ *ctlp++ = PCCSIG;
+ if (!monitor) {
+ *ctlp++ = PCUNSET;
+ *ctlp++ = PR_PTRACE;
+ }
+ *ctlp++ = PCSET;
+ *ctlp++ = PR_FORK;
+ *ctlp++ = PCSEXIT;
+ sysp = (sysset_t *)ctlp;
+ ctlp += sizeof (sysset_t) / sizeof (long);
+ premptyset(sysp);
+ praddset(sysp, SYS_forksys); /* fork1() is forksys(0, 0) */
+ *ctlp++ = PCRUN;
+ *ctlp++ = 0;
+ *ctlp++ = PCWSTOP;
+ if (!monitor) {
+ *ctlp++ = PCSET;
+ *ctlp++ = PR_PTRACE;
+ }
+ *ctlp++ = PCUNSET;
+ *ctlp++ = PR_FORK;
+ *ctlp++ = PCSEXIT;
+ sysp = (sysset_t *)ctlp;
+ ctlp += sizeof (sysset_t) / sizeof (long);
+ premptyset(sysp);
+ if (monitor)
+ praddset(sysp, SYS_execve);
+
+ size = (char *)ctlp - (char *)&ctl[0];
+ assert(size <= sizeof (ctl));
+
+ if (write(fd, ctl, size) != size) {
+ (void) close(fd);
+ lx_debug("lx_catch_fork: failed to set %d running",
+ (int)pid);
+ return;
+ }
+
+ /*
+ * Get the status so we can find the value returned from fork1() --
+ * the child process's pid.
+ */
+ if (get_status(pid, &ps) != 0) {
+ (void) close(fd);
+ lx_debug("lx_catch_fork: failed to get status for %d",
+ (int)pid);
+ return;
+ }
+
+ child = (pid_t)ps.pr_lwp.pr_reg[R_R0];
+
+ /*
+ * We're done with the parent -- off you go.
+ */
+ ctl[0] = PCRUN;
+ ctl[1] = 0;
+ size = 2 * sizeof (long);
+
+ if (write(fd, ctl, size) != size) {
+ (void) close(fd);
+ lx_debug("lx_catch_fork: failed to set %d running",
+ (int)pid);
+ return;
+ }
+
+ (void) close(fd);
+
+ /*
+ * If fork1(2) failed, we're done.
+ */
+ if (child < 0) {
+ lx_debug("lx_catch_fork: fork1 failed");
+ return;
+ }
+
+ /*
+ * Now we need to screw with the child process.
+ */
+ if ((fd = open_lwpfile(child, 1, O_WRONLY, "lwpctl")) < 0) {
+ lx_debug("lx_catch_fork: failed to control %d",
+ (int)child);
+ return;
+ }
+
+ ctlp = ctl;
+ *ctlp++ = PCUNSET;
+ *ctlp++ = PR_FORK;
+ *ctlp++ = PCSEXIT;
+ sysp = (sysset_t *)ctlp;
+ ctlp += sizeof (sysset_t) / sizeof (long);
+ premptyset(sysp);
+ size = (char *)ctlp - (char *)&ctl[0];
+
+ if (write(fd, ctl, size) != size) {
+ (void) close(fd);
+ lx_debug("lx_catch_fork: failed to clear trace flags for %d",
+ (int)child);
+ return;
+ }
+
+ /*
+ * Now treat the child as though we had attached to it explicitly.
+ */
+ err = ptrace_attach_common(fd, child, child, 1, 1);
+ assert(err == 0);
+
+ (void) close(fd);
+}
+
+static void
+set_dr6(pid_t pid, siginfo_t *infop)
+{
+ uintptr_t *debugreg;
+ uintptr_t addr;
+ uintptr_t base;
+ size_t size = NULL;
+ int dr7;
+ int lrw;
+ int i;
+
+ if ((debugreg = debug_registers(pid)) == NULL)
+ return;
+
+ debugreg[6] = 0xffff0ff0; /* read as ones */
+ switch (infop->si_code) {
+ case TRAP_TRACE:
+ debugreg[6] |= 0x4000; /* single-step */
+ break;
+ case TRAP_RWATCH:
+ case TRAP_WWATCH:
+ case TRAP_XWATCH:
+ dr7 = debugreg[7];
+ addr = (uintptr_t)infop->si_addr;
+ for (i = 0; i < 4; i++) {
+ if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */
+ continue;
+ lrw = (dr7 >> (16 + (4 * i))) & 0xf;
+ switch (lrw >> 2) { /* length */
+ case 0: size = 1; break;
+ case 1: size = 2; break;
+ case 2: size = 8; break;
+ case 3: size = 4; break;
+ }
+ base = debugreg[i];
+ if (addr >= base && addr < base + size)
+ debugreg[6] |= (1 << i);
+ }
+ /*
+ * Were we also attempting a single-step?
+ * (kludge: we use debugreg[4] for this flag.)
+ */
+ if (debugreg[4])
+ debugreg[6] |= 0x4000;
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * This is called from the emulation of the wait4 and waitpid system call to
+ * take into account the monitor processes which we spawn to observe other
+ * processes from ptrace_attach().
+ */
+int
+lx_ptrace_wait(siginfo_t *infop)
+{
+ ptrace_monitor_map_t *p, **pp;
+ pid_t lxpid, pid = infop->si_pid;
+ lwpid_t lwpid;
+ int fd;
+ pstatus_t status;
+
+ /*
+ * If the process observed by waitid(2) corresponds to the monitor
+ * process for a traced thread, we need to rewhack the siginfo_t to
+ * look like it came from the traced thread with the flags set
+ * according to the current state.
+ */
+ (void) mutex_lock(&ptrace_map_mtx);
+ for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) {
+ if (p->pmm_monitor == pid) {
+ assert(infop->si_code == CLD_EXITED ||
+ infop->si_code == CLD_KILLED ||
+ infop->si_code == CLD_DUMPED ||
+ infop->si_code == CLD_TRAPPED);
+ goto found;
+ }
+ }
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ /*
+ * If the traced process got a SIGWAITING, we must be in the middle
+ * of a clone(2) with CLONE_PTRACE set.
+ */
+ if (infop->si_code == CLD_TRAPPED && infop->si_status == SIGWAITING) {
+ ptrace_catch_fork(pid, 0);
+ return (-1);
+ }
+
+ if (get_status(pid, &status) == 0 &&
+ (status.pr_lwp.pr_flags & PR_STOPPED) &&
+ status.pr_lwp.pr_why == PR_SIGNALLED &&
+ status.pr_lwp.pr_info.si_signo == SIGTRAP)
+ set_dr6(pid, &status.pr_lwp.pr_info);
+
+ return (0);
+
+found:
+ /*
+ * If the monitor is in the exiting state, ignore the event and free
+ * the monitor structure if the monitor has exited. By returning -1 we
+ * indicate to the caller that this was a spurious return from
+ * waitid(2) and that it should ignore the result and try again.
+ */
+ if (p->pmm_exiting) {
+ if (infop->si_code == CLD_EXITED ||
+ infop->si_code == CLD_KILLED ||
+ infop->si_code == CLD_DUMPED) {
+ *pp = p->pmm_next;
+ (void) mutex_unlock(&ptrace_map_mtx);
+ free(p);
+ }
+ return (-1);
+ }
+
+ lxpid = p->pmm_target;
+ pid = p->pmm_pid;
+ lwpid = p->pmm_lwpid;
+ (void) mutex_unlock(&ptrace_map_mtx);
+
+ /*
+ * If we can't find the traced process, kill off its monitor.
+ */
+ if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) {
+ assert(errno == ENOENT);
+ monitor_kill(lxpid, pid);
+ infop->si_code = CLD_EXITED;
+ infop->si_status = 0;
+ infop->si_pid = lxpid;
+ return (0);
+ }
+
+ if (read(fd, &status.pr_lwp, sizeof (status.pr_lwp)) !=
+ sizeof (status.pr_lwp)) {
+ lx_err(gettext("read lwpstatus failed %d %s"),
+ fd, strerror(errno));
+ assert(0);
+ }
+
+ (void) close(fd);
+
+ /*
+ * If the traced process isn't stopped, this is a truly spurious
+ * event probably caused by another /proc consumer tracing the
+ * monitor.
+ */
+ if (!(status.pr_lwp.pr_flags & PR_STOPPED)) {
+ (void) ptrace_cont_monitor(p);
+ return (-1);
+ }
+
+ switch (status.pr_lwp.pr_why) {
+ case PR_SIGNALLED:
+ /*
+ * If the traced process got a SIGWAITING, we must be in the
+ * middle of a clone(2) with CLONE_PTRACE set.
+ */
+ if (status.pr_lwp.pr_what == SIGWAITING) {
+ ptrace_catch_fork(lxpid, 1);
+ (void) ptrace_cont_monitor(p);
+ return (-1);
+ }
+ infop->si_code = CLD_TRAPPED;
+ infop->si_status = status.pr_lwp.pr_what;
+ if (status.pr_lwp.pr_info.si_signo == SIGTRAP)
+ set_dr6(pid, &status.pr_lwp.pr_info);
+ break;
+
+ case PR_REQUESTED:
+ /*
+ * Make it look like the traced process stopped on an
+ * event of interest.
+ */
+ infop->si_code = CLD_TRAPPED;
+ infop->si_status = SIGTRAP;
+ break;
+
+ case PR_JOBCONTROL:
+ /*
+ * Ignore this as it was probably caused by another /proc
+ * consumer tracing the monitor.
+ */
+ (void) ptrace_cont_monitor(p);
+ return (-1);
+
+ case PR_SYSEXIT:
+ /*
+ * Processes traced via a monitor (rather than using the
+ * native Solaris ptrace support) explicitly trace returns
+ * from exec system calls since it's an implicit ptrace
+ * trace point. Accordingly we need to present a process
+ * in that state as though it had reached the ptrace trace
+ * point.
+ */
+ if (status.pr_lwp.pr_what == SYS_execve) {
+ infop->si_code = CLD_TRAPPED;
+ infop->si_status = SIGTRAP;
+ break;
+ }
+
+ /*FALLTHROUGH*/
+
+ case PR_SYSENTRY:
+ case PR_FAULTED:
+ case PR_SUSPENDED:
+ default:
+ lx_err(gettext("didn't expect %d (%d %d)"),
+ status.pr_lwp.pr_why,
+ status.pr_lwp.pr_what, status.pr_lwp.pr_flags);
+ assert(0);
+ }
+
+ infop->si_pid = lxpid;
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/rlimit.c b/usr/src/lib/brand/lx/lx_brand/common/rlimit.c
new file mode 100644
index 0000000000..97498c6d4a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/rlimit.c
@@ -0,0 +1,233 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/resource.h>
+#include <sys/sysconfig.h>
+#include <sys/lx_types.h>
+#include <sys/lx_misc.h>
+
+#define LX_RLIMIT_RSS 5
+#define LX_RLIMIT_NPROC 6
+#define LX_RLIMIT_MEMLOCK 8
+#define LX_RLIMIT_LOCKS 10
+#define LX_RLIMIT_NLIMITS 11
+
+/*
+ * Linux supports many of the same resources that we do, but the numbering
+ * is slightly different. This table is used to translate Linux resource
+ * limit keys into their Solaris equivalents.
+ */
+static int ltos_resource[LX_RLIMIT_NLIMITS] = {
+ RLIMIT_CPU,
+ RLIMIT_FSIZE,
+ RLIMIT_DATA,
+ RLIMIT_STACK,
+ RLIMIT_CORE,
+ -1, /* RSS */
+ -1, /* NPROC */
+ RLIMIT_NOFILE,
+ -1, /* MEMLOCK */
+ RLIMIT_AS,
+ -1 /* LOCKS */
+};
+
+#define NLIMITS (sizeof (ltos_resource) / sizeof (int))
+
+/*
+ * Magic values Linux uses to indicate infinity
+ */
+#define LX_RLIM_INFINITY_O (0x7fffffffUL)
+#define LX_RLIM_INFINITY_N (0xffffffffUL)
+
+/*
+ * Array to store the rlimits that we track but do not enforce.
+ */
+static struct rlimit fake_limits[NLIMITS] = {
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ RLIM_INFINITY, RLIM_INFINITY, /* LX_RLIM_RSS */
+ RLIM_INFINITY, RLIM_INFINITY, /* LX_RLIM_NPROC */
+ 0, 0,
+ RLIM_INFINITY, RLIM_INFINITY, /* LX_RLIM_MEMLOCK */
+ 0, 0,
+ RLIM_INFINITY, RLIM_INFINITY /* LX_RLIM_LOCKS */
+};
+
+static int
+lx_getrlimit_common(int resource, struct rlimit *rlp, int inf)
+{
+ int rv;
+ int sresource;
+ struct rlimit rl;
+
+ if (resource < 0 || resource >= LX_RLIMIT_NLIMITS)
+ return (-EINVAL);
+
+ sresource = ltos_resource[resource];
+
+ if (sresource == -1) {
+ switch (resource) {
+ case LX_RLIMIT_MEMLOCK:
+ case LX_RLIMIT_RSS:
+ case LX_RLIMIT_LOCKS:
+ case LX_RLIMIT_NPROC:
+ rl.rlim_max = fake_limits[resource].rlim_max;
+ rl.rlim_cur = fake_limits[resource].rlim_cur;
+ if (rl.rlim_cur == RLIM_INFINITY)
+ rl.rlim_cur = inf;
+ if (rl.rlim_max == RLIM_INFINITY)
+ rl.rlim_max = inf;
+ if ((uucopy(&rl, rlp, sizeof (rl))) != 0)
+ return (-errno);
+ return (0);
+ default:
+ lx_unsupported("Unsupported resource type %d\n",
+ resource);
+ return (-ENOTSUP);
+ }
+ } else {
+ rv = getrlimit(sresource, rlp);
+ }
+
+ if (rv < 0)
+ return (-errno);
+
+ if (rlp->rlim_cur == RLIM_INFINITY)
+ rlp->rlim_cur = inf;
+
+ if (rlp->rlim_max == RLIM_INFINITY)
+ rlp->rlim_max = inf;
+
+ return (0);
+}
+
+/*
+ * This is the 'new' getrlimit, variously called getrlimit or ugetrlimit
+ * in Linux headers and code. The only difference between this and the old
+ * getrlimit (variously called getrlimit or old_getrlimit) is the value of
+ * RLIM_INFINITY, which is smaller for the older version. Modern code will
+ * use this version by default.
+ */
+int
+lx_getrlimit(uintptr_t p1, uintptr_t p2)
+{
+ int resource = (int)p1;
+ struct rlimit *rlp = (struct rlimit *)p2;
+
+ return (lx_getrlimit_common(resource, rlp, LX_RLIM_INFINITY_N));
+}
+
+/*
+ * This is the 'old' getrlimit, variously called getrlimit or old_getrlimit
+ * in Linux headers and code. The only difference between this and the new
+ * getrlimit (variously called getrlimit or ugetrlimit) is the value of
+ * RLIM_INFINITY, which is smaller for the older version.
+ */
+int
+lx_oldgetrlimit(uintptr_t p1, uintptr_t p2)
+{
+ int resource = (int)p1;
+ struct rlimit *rlp = (struct rlimit *)p2;
+
+ return (lx_getrlimit_common(resource, rlp, LX_RLIM_INFINITY_O));
+}
+
+int
+lx_setrlimit(uintptr_t p1, uintptr_t p2)
+{
+ int resource = (int)p1;
+ struct rlimit *rlp = (struct rlimit *)p2;
+ struct rlimit rl;
+ int rv, sresource;
+
+ if (resource < 0 || resource >= LX_RLIMIT_NLIMITS)
+ return (-EINVAL);
+
+ sresource = ltos_resource[resource];
+
+ if (sresource == -1) {
+ if (uucopy((void *)p2, &rl, sizeof (rl)) != 0)
+ return (-errno);
+
+ switch (resource) {
+ case LX_RLIMIT_MEMLOCK:
+ case LX_RLIMIT_RSS:
+ case LX_RLIMIT_LOCKS:
+ case LX_RLIMIT_NPROC:
+ if (rl.rlim_max != LX_RLIM_INFINITY_N &&
+ (rl.rlim_cur == LX_RLIM_INFINITY_N ||
+ rl.rlim_cur > rl.rlim_max))
+ return (-EINVAL);
+ if (rl.rlim_max == LX_RLIM_INFINITY_N)
+ fake_limits[resource].rlim_max = RLIM_INFINITY;
+ else
+ fake_limits[resource].rlim_max = rl.rlim_max;
+ if (rl.rlim_cur == LX_RLIM_INFINITY_N)
+ fake_limits[resource].rlim_cur = RLIM_INFINITY;
+ else
+ fake_limits[resource].rlim_cur = rl.rlim_cur;
+ return (0);
+ }
+
+ lx_unsupported("Unsupported resource type %d\n", resource);
+ return (-ENOTSUP);
+ }
+
+ rv = setrlimit(sresource, rlp);
+
+ return (rv < 0 ? -errno : 0);
+}
+
+/*
+ * We lucked out here. Linux and Solaris have exactly the same
+ * rusage structures.
+ */
+int
+lx_getrusage(uintptr_t p1, uintptr_t p2)
+{
+ int who = (int)p1;
+ struct rusage *rup = (struct rusage *)p2;
+ int rv, swho;
+
+ if (who == LX_RUSAGE_SELF)
+ swho = _RUSAGESYS_GETRUSAGE;
+ else if (who == LX_RUSAGE_CHILDREN)
+ swho = _RUSAGESYS_GETRUSAGE_CHLD;
+ else
+ return (-EINVAL);
+
+ rv = getrusage(swho, rup);
+
+ return (rv < 0 ? -errno : 0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sched.c b/usr/src/lib/brand/lx/lx_brand/common/sched.c
new file mode 100644
index 0000000000..f37ab83aee
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sched.c
@@ -0,0 +1,610 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/cred_impl.h>
+#include <sys/ucred.h>
+#include <ucred.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sched.h>
+#include <strings.h>
+#include <pthread.h>
+#include <time.h>
+#include <thread.h>
+#include <alloca.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_sched.h>
+
+/* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */
+static int
+validate_policy(int policy)
+{
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ return (SCHED_FIFO);
+
+ case LX_SCHED_RR:
+ return (SCHED_RR);
+
+ case LX_SCHED_OTHER:
+ return (SCHED_OTHER);
+
+ default:
+ lx_debug("validate_policy: illegal policy: %d", policy);
+ return (-EINVAL);
+ }
+}
+
+/*
+ * Check to see if we have the permissions to set scheduler parameters and
+ * policy, based on Linux' demand that such commands fail with errno set to
+ * EPERM if the current euid is not the euid or ruid of the process in
+ * question.
+ */
+static int
+check_schedperms(pid_t pid)
+{
+ size_t sz;
+ ucred_t *cr;
+ uid_t euid;
+
+ euid = geteuid();
+
+ if (pid == getpid()) {
+ /*
+ * If we're the process to be checked, simply check the euid
+ * against our ruid.
+ */
+ if (euid != getuid())
+ return (-EPERM);
+
+ return (0);
+ }
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ /*
+ * If we can't access the process' credentials, fail with errno EPERM
+ * as the call would not have succeeded anyway.
+ */
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0)
+ return ((errno == EACCES) ? -EPERM : -errno);
+
+ if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr)))
+ return (-EPERM);
+
+ return (0);
+}
+
+static int
+ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp)
+{
+ struct lx_sched_param ls;
+ int smin = sched_get_priority_min(policy);
+ int smax = sched_get_priority_max(policy);
+
+ if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0)
+ return (-errno);
+
+ bzero(sp, sizeof (struct sched_param));
+
+ /*
+ * Linux has a fixed priority range, 0 - 99, which we need to convert to
+ * Solaris's dynamic range. Linux considers lower numbers to be
+ * higher priority, so we'll invert the priority within Solaris's range.
+ *
+ * The formula to convert between ranges is:
+ *
+ * L * (smax - smin)
+ * S = ----------------- + smin
+ * (lmax - lmin)
+ *
+ * where S is the Solaris equivalent of the linux priority L.
+ *
+ * To invert the priority, we use:
+ * S' = smax - S + smin
+ *
+ * Together, these two formulas become:
+ *
+ * L * (smax - smin)
+ * S = smax - ----------------- + 2smin
+ * 99
+ */
+ sp->sched_priority = smax -
+ ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin;
+
+ lx_debug("ltos_sparam: linux prio %d = Solaris prio %d "
+ "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority,
+ smin, smax);
+
+ return (0);
+}
+
+static int
+stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp)
+{
+ struct lx_sched_param ls;
+ int smin = sched_get_priority_min(policy);
+ int smax = sched_get_priority_max(policy);
+
+ if (policy == SCHED_OTHER) {
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ ls.lx_sched_prio = 0;
+ } else {
+ /*
+ * Convert Solaris's dynamic, inverted priority range to the
+ * fixed Linux range of 1 - 99.
+ *
+ * The formula is (see above):
+ *
+ * (smax - s + 2smin) * 99
+ * l = -----------------------
+ * smax - smin
+ */
+ ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) *
+ LX_PRI_MAX) / (smax - smin);
+ }
+
+ lx_debug("stol_sparam: Solaris prio %d = linux prio %d "
+ "(Solaris range %d,%d)\n", sp->sched_priority, ls.lx_sched_prio,
+ smin, smax);
+
+ return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0)
+ ? -errno : 0);
+}
+
+#define BITINDEX(ind) (ind / (sizeof (ulong_t) * 8))
+#define BITSHIFT(ind) (1 << (ind % (sizeof (ulong_t) * 8)))
+
+/* ARGSUSED */
+int
+lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
+{
+ int sz;
+ ulong_t *lmask, *zmask;
+ int i;
+
+ sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp);
+ if (sz == -1)
+ return (-errno);
+
+ /*
+ * If the target LWP hasn't ever had an affinity mask set, the kernel
+ * will return a mask of all 0's. If that is the case we must build a
+ * default mask that has all valid bits turned on.
+ */
+ lmask = SAFE_ALLOCA(sz);
+ zmask = SAFE_ALLOCA(sz);
+ if (lmask == NULL || zmask == NULL)
+ return (-ENOMEM);
+
+ bzero(zmask, sz);
+
+ if (uucopy((void *)maskp, lmask, sz) != 0)
+ return (-EFAULT);
+
+ if (bcmp(lmask, zmask, sz) != 0)
+ return (sz);
+
+ for (i = 0; i < sz * 8; i++) {
+ if (p_online(i, P_STATUS) != -1) {
+ lmask[BITINDEX(i)] |= BITSHIFT(i);
+ }
+ }
+
+ if (uucopy(lmask, (void *)maskp, sz) != 0)
+ return (-EFAULT);
+
+ return (sz);
+}
+
+/* ARGSUSED */
+int
+lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
+{
+ int ret;
+ int sz;
+ int i;
+ int found;
+ ulong_t *lmask;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ processorid_t cpuid = NULL;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * We only support setting affinity masks for threads in
+ * the calling process.
+ */
+ if (s_pid != getpid())
+ return (-EPERM);
+
+ /*
+ * First, get the minimum bitmask size from the kernel.
+ */
+ sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0);
+ if (sz == -1)
+ return (-errno);
+
+ lmask = SAFE_ALLOCA(sz);
+ if (lmask == NULL)
+ return (-ENOMEM);
+
+ if (uucopy((void *)maskp, lmask, sz) != 0)
+ return (-EFAULT);
+
+ /*
+ * Make sure the mask contains at least one processor that is
+ * physically on the system. Reduce the user's mask to the set of
+ * physically present CPUs. Keep track of how many valid
+ * bits are set in the user's mask.
+ */
+
+ for (found = 0, i = 0; i < sz * 8; i++) {
+ if (p_online(i, P_STATUS) == -1) {
+ /*
+ * This CPU doesn't exist, so clear this bit from
+ * the user's mask.
+ */
+ lmask[BITINDEX(i)] &= ~BITSHIFT(i);
+ continue;
+ }
+
+ if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) {
+ found++;
+ cpuid = i;
+ }
+ }
+
+ if (found == 0) {
+ lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n");
+ return (-EINVAL);
+ }
+
+ /*
+ * If only one bit is set, bind the thread to that procesor;
+ * otherwise, clear the binding.
+ */
+ if (found == 1) {
+ lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n",
+ s_tid, cpuid);
+ if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0)
+ /*
+ * It could be that the requested processor is offline,
+ * so we'll just abandon our good-natured attempt to
+ * bind to it.
+ */
+ lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid,
+ cpuid, strerror(errno));
+ } else {
+ lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n",
+ s_tid);
+ if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) {
+ lx_debug("couldn't clear CPU binding for LWP %d: %s\n",
+ s_tid, strerror(errno));
+ }
+ }
+
+ /*
+ * Finally, ask the kernel to make a note of our current (though fairly
+ * meaningless) affinity mask.
+ */
+ ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask);
+
+ return ((ret == 0) ? 0 : -errno);
+}
+
+int
+lx_sched_getparam(uintptr_t pid, uintptr_t param)
+{
+ int policy, ret;
+ pid_t s_pid;
+ lwpid_t s_tid;
+
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * If we're attempting to get information on our own process, we can
+ * get data on a per-thread basis; if not, punt and use the specified
+ * pid.
+ */
+ if (s_pid == getpid()) {
+ if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0)
+ return (-ret);
+ } else {
+ if (sched_getparam(s_pid, &sp) == -1)
+ return (-errno);
+
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+ }
+
+ return (stol_sparam(policy, &sp, (struct lx_sched_param *)param));
+}
+
+int
+lx_sched_setparam(uintptr_t pid, uintptr_t param)
+{
+ int err, policy;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ struct lx_sched_param lp;
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ if (s_pid == getpid()) {
+ struct sched_param dummy;
+
+ if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
+ return (-err);
+ } else
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+
+ lx_debug("sched_setparam(): current policy %d", policy);
+
+ if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
+ return (-errno);
+
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0))
+ return (-EINVAL);
+
+ if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp,
+ &sp)) != 0)
+ return (err);
+
+ /*
+ * Check if we're allowed to change the scheduler for the process.
+ *
+ * If we're operating on a thread, we can't just call
+ * pthread_setschedparam() because as all threads reside within a
+ * single Solaris process, Solaris will allow the modification
+ *
+ * If we're operating on a process, we can't just call sched_setparam()
+ * because Solaris will allow the call to succeed if the scheduler
+ * parameters do not differ from those being installed, but Linux wants
+ * the call to fail.
+ */
+ if ((err = check_schedperms(s_pid)) != 0)
+ return (err);
+
+ if (s_pid == getpid())
+ return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0)
+ ? -err : 0);
+
+ return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0);
+}
+
+int
+lx_sched_rr_get_interval(uintptr_t pid, uintptr_t timespec)
+{
+ struct timespec ts;
+ pid_t s_pid;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0)
+ return (-ESRCH);
+
+ if (uucopy((struct timespec *)timespec, &ts,
+ sizeof (struct timespec)) != 0)
+ return (-errno);
+
+ return ((sched_rr_get_interval(s_pid, &ts) == -1) ? -errno : 0);
+}
+
+int
+lx_sched_getscheduler(uintptr_t pid)
+{
+ int policy, rv;
+ pid_t s_pid;
+ lwpid_t s_tid;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ if (s_pid == getpid()) {
+ struct sched_param dummy;
+
+ if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
+ return (-rv);
+ } else
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+
+ /*
+ * Linux only supports certain policies; avoid confusing apps with
+ * alien policies.
+ */
+ switch (policy) {
+ case SCHED_FIFO:
+ return (LX_SCHED_FIFO);
+ case SCHED_OTHER:
+ return (LX_SCHED_OTHER);
+ case SCHED_RR:
+ return (LX_SCHED_RR);
+ default:
+ break;
+ }
+
+ return (LX_SCHED_OTHER);
+}
+
+int
+lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param)
+{
+ int rt_pol;
+ int rv;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ struct lx_sched_param lp;
+
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if ((rt_pol = validate_policy((int)policy)) < 0)
+ return (rt_pol);
+
+ if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param,
+ &sp)) != 0)
+ return (rv);
+
+ if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
+ return (-errno);
+
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ if ((rt_pol == LX_SCHED_OTHER) && (lp.lx_sched_prio != 0))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * Check if we're allowed to change the scheduler for the process.
+ *
+ * If we're operating on a thread, we can't just call
+ * pthread_setschedparam() because as all threads reside within a
+ * single Solaris process, Solaris will allow the modification.
+ *
+ * If we're operating on a process, we can't just call
+ * sched_setscheduler() because Solaris will allow the call to succeed
+ * if the scheduler and scheduler parameters do not differ from those
+ * being installed, but Linux wants the call to fail.
+ */
+ if ((rv = check_schedperms(s_pid)) != 0)
+ return (rv);
+
+ if (s_pid == getpid()) {
+ struct sched_param param;
+ int pol;
+
+ if ((pol = sched_getscheduler(s_pid)) != 0)
+ return (-errno);
+
+ /*
+ * sched_setscheduler() returns the previous scheduling policy
+ * on success, so call pthread_getschedparam() to get the
+ * current thread's scheduling policy and return that if the
+ * call to pthread_setschedparam() succeeds.
+ */
+ if ((rv = pthread_getschedparam(s_tid, &pol, &param)) != 0)
+ return (-rv);
+
+ return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0)
+ ? -rv : pol);
+ }
+
+ return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1)
+ ? -errno : rv);
+}
+
+int
+lx_sched_get_priority_min(uintptr_t policy)
+{
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0.
+ * Linux scheduling priorities are not alterable, so there is no
+ * Solaris translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MIN_RRFIFO);
+ case LX_SCHED_OTHER:
+ return (LX_SCHED_PRIORITY_MIN_OTHER);
+ default:
+ break;
+ }
+ return (-EINVAL);
+}
+
+int
+lx_sched_get_priority_max(uintptr_t policy)
+{
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ * Linux scheduling priorities are not alterable, so there is no
+ * Solaris translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MAX_RRFIFO);
+ case LX_SCHED_OTHER:
+ return (LX_SCHED_PRIORITY_MAX_OTHER);
+ default:
+ break;
+ }
+ return (-EINVAL);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sendfile.c b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c
new file mode 100644
index 0000000000..1c4af9bf74
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c
@@ -0,0 +1,97 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * lx_sendfile() and lx_sendfile64() are just branded versions of the
+ * library calls available in the Solaris libsendfile (see sendfile(3EXT)).
+ */
+
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/sendfile.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/lx_misc.h>
+
+int
+lx_sendfile(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ sysret_t rval;
+ off_t off = 0;
+ off_t *offp = (off_t *)p3;
+ int error;
+ struct sendfilevec sfv;
+ size_t xferred;
+ size_t sz = (size_t)p4;
+
+ if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0)
+ return (-errno);
+
+ sfv.sfv_fd = p2;
+ sfv.sfv_flag = 0;
+ sfv.sfv_off = off;
+ sfv.sfv_len = sz;
+ error = __systemcall(&rval, SYS_sendfilev, SENDFILEV, p1, &sfv,
+ 1, &xferred);
+
+ if (error == 0 && xferred > 0) {
+ off += xferred;
+ error = uucopy(&off, offp, sizeof (off));
+ }
+
+ return (error ? -error : (int)rval.sys_rval1);
+}
+
+int
+lx_sendfile64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ sysret_t rval;
+ off64_t off = 0;
+ off64_t *offp = (off64_t *)p3;
+ size_t sz = (size_t)p4;
+ int error;
+ struct sendfilevec64 sfv;
+ size_t xferred;
+
+ if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0)
+ return (-errno);
+
+ sfv.sfv_fd = p2;
+ sfv.sfv_flag = 0;
+ sfv.sfv_off = off;
+ sfv.sfv_len = sz;
+ error = __systemcall(&rval, SYS_sendfilev, SENDFILEV64, p1, &sfv,
+ 1, &xferred);
+
+ if (error == 0 && xferred > 0) {
+ off += xferred;
+ error = uucopy(&off, offp, sizeof (off));
+ }
+
+ return (error ? -error : (int)rval.sys_rval1);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c
new file mode 100644
index 0000000000..b6fea626b7
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c
@@ -0,0 +1,1714 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/segments.h>
+#include <sys/lx_types.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_thread.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <thread.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <libintl.h>
+#include <ieeefp.h>
+
+/*
+ * Delivering signals to a Linux process is complicated by differences in
+ * signal numbering, stack structure and contents, and the action taken when a
+ * signal handler exits. In addition, many signal-related structures, such as
+ * sigset_ts, vary between Solaris and Linux.
+ *
+ * To support user-level signal handlers, the brand uses a double layer of
+ * indirection to process and deliver signals to branded threads.
+ *
+ * When a Linux process sends a signal using the kill(2) system call, we must
+ * translate the signal into the Solaris equivalent before handing control off
+ * to the standard signalling mechanism. When a signal is delivered to a Linux
+ * process, we translate the signal number from Solaris to back to Linux.
+ * Translating signals both at generation and delivery time ensures both that
+ * Solaris signals are sent properly to Linux applications and that signals'
+ * default behavior works as expected.
+ *
+ * In a normal Solaris process, signal delivery is interposed on for any thread
+ * registering a signal handler by libc. Libc needs to do various bits of magic
+ * to provide thread-safe critical regions, so it registers its own handler,
+ * named sigacthandler(), using the sigaction(2) system call. When a signal is
+ * received, sigacthandler() is called, and after some processing, libc turns
+ * around and calls the user's signal handler via a routine named
+ * call_user_handler().
+ *
+ * Adding a Linux branded thread to the mix complicates things somewhat.
+ *
+ * First, when a thread receives a signal, it may be running with a Linux value
+ * in the x86 %gs segment register as opposed to the value Solaris threads
+ * expect; if control were passed directly to Solaris code, such as libc's
+ * sigacthandler(), that code would experience a segmentation fault the first
+ * time it tried to dereference a memory location using %gs.
+ *
+ * Second, the signal number translation referenced above must take place.
+ * Further, as was the case with Solaris libc, before the Linux signal handler
+ * is called, the value of the %gs segment register MUST be restored to the
+ * value Linux code expects.
+ *
+ * This need to translate signal numbers and manipulate the %gs register means
+ * that while with standard Solaris libc, following a signal from generation to
+ * delivery looks something like:
+ *
+ * kernel ->
+ * sigacthandler() ->
+ * call_user_handler() ->
+ * user signal handler
+ *
+ * while for the brand's Linux threads, this would look like:
+ *
+ * kernel ->
+ * lx_sigacthandler() ->
+ * sigacthandler() ->
+ * call_user_handler() ->
+ * lx_call_user_handler() ->
+ * Linux user signal handler
+ *
+ * The new addtions are:
+ *
+ * lx_sigacthandler
+ * ================
+ * This routine is responsible for setting the %gs segment register to the
+ * value Solaris code expects, and jumping to Solaris' libc signal
+ * interposition handler, sigacthandler().
+ *
+ * lx_call_user_handler
+ * ====================
+ * This routine is responsible for translating Solaris signal numbers to
+ * their Linux equivalents, building a Linux signal stack based on the
+ * information Solaris has provided, and passing the stack to the
+ * registered Linux signal handler. It is, in effect, the Linux thread
+ * equivalent to libc's call_user_handler().
+ *
+ * Installing lx_sigacthandler() is a bit tricky, as normally libc's
+ * sigacthandler() routine is hidden from user programs. To facilitate this, a
+ * new private function was added to libc, setsigaction():
+ *
+ * void setsigacthandler(void (*new_handler)(int, siginfo_t *, void *),
+ * void (**old_handler)(int, siginfo_t *, void *))
+ *
+ * The routine works by modifying the per-thread data structure libc already
+ * keeps that keeps track of the address of its own interposition handler with
+ * the address passed in; the old handler's address is set in the pointer
+ * pointed to by the second argument, if it is non-NULL, mimicking the behavior
+ * of sigaction() itself. Once setsigacthandler() has been executed, all
+ * future branded threads this thread may create will automatically have the
+ * proper interposition handler installed as the result of a normal
+ * sigaction() call.
+ *
+ * Note that none of this interposition is necessary unless a Linux thread
+ * registers a user signal handler, as the default action for all signals is the
+ * same between Solaris and Linux save for one signal, SIGPWR. For this reason,
+ * the brand ALWAYS installs its own internal signal handler for SIGPWR that
+ * translates the action to the Linux default, to terminate the process.
+ * (Solaris' default action is to ignore SIGPWR.)
+ *
+ * It is also important to note that when signals are not translated, the brand
+ * relies upon code interposing upon the wait(2) system call to translate
+ * signals to their proper values for any Linux threads retrieving the status
+ * of others. So while the Solaris signal number for a particular signal is set
+ * in a process' data structures (and would be returned as the result of say,
+ * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for
+ * translating the value WTERMSIG() would return from a Solaris signal number
+ * to the appropriate Linux value.
+ *
+ * The process of returning to an interrupted thread of execution from a user
+ * signal handler is entirely different between Solaris and Linux. While
+ * Solaris generally expects to set the context to the interrupted one on a
+ * normal return from a signal handler, in the normal case Linux instead calls
+ * code that calls a specific Linux system call, sigreturn(2). Thus when a
+ * Linux signal handler completes execution, instead of returning through what
+ * would in libc be a call to setcontext(2), the sigreturn(2) Linux system call
+ * is responsible for accomplishing much the same thing.
+ *
+ * This trampoline code looks something like this:
+ *
+ * pop %eax
+ * mov LX_SYS_rt_sigreturn, %eax
+ * int $0x80
+ *
+ * so when the Linux user signal handler is eventually called, the stack looks
+ * like this (in the case of an "lx_sigstack" stack:
+ *
+ * =========================================================
+ * | Pointer to actual trampoline code (in code segment) |
+ * =========================================================
+ * | Linux signal number |
+ * =========================================================
+ * | Pointer to Linux siginfo_t (or NULL) |
+ * =========================================================
+ * | Pointer to Linux ucontext_t (or NULL) |
+ * =========================================================
+ * | Linux siginfo_t |
+ * =========================================================
+ * | Linux ucontext_t |
+ * =========================================================
+ * | Linux struct _fpstate |
+ * =========================================================
+ * | Trampoline code (marker for gdb, not really executed) |
+ * =========================================================
+ *
+ * The brand takes the approach of intercepting the Linux sigreturn(2) system
+ * call in order to turn it into the return through the libc call stack that
+ * Solaris expects. This is done by the lx_sigreturn() and lx_rt_sigreturn()
+ * routines, which remove the Linux signal frame from the stack and pass the
+ * resulting stack pointer to another routine, lx_sigreturn_tolibc(), which
+ * makes libc believe the user signal handler it had called returned.
+ *
+ * (Note that the trampoline code actually lives in a proper executable segment
+ * and not on the stack, but gdb checks for the exact code sequence of the
+ * trampoline code on the stack to determine whether it is in a signal stack
+ * frame or not. Really.)
+ *
+ * When control then returns to libc's call_user_handler() routine, a
+ * setcontext(2) will be done that (in most cases) returns the thread executing
+ * the code back to the location originally interrupted by receipt of the
+ * signal.
+ */
+
+/*
+ * Two flavors of Linux signal stacks:
+ *
+ * lx_sigstack - used for "modern" signal handlers, in practice those
+ * that have the sigaction(2) flag SA_SIGINFO set
+ *
+ * lx_oldsigstack - used for legacy signal handlers, those that do not have
+ * the sigaction(2) flag SA_SIGINFO set or that were setup via
+ * the signal(2) call.
+ *
+ * NOTE: Since these structures will be placed on the stack and stack math will
+ * be done with their sizes, they must be word aligned in size (32 bits)
+ * so the stack remains word aligned per the i386 ABI.
+ */
+struct lx_sigstack {
+ void (*retaddr)(); /* address of real lx_rt_sigreturn code */
+ int sig; /* signal number */
+ lx_siginfo_t *sip; /* points to "si" if valid, NULL if not */
+ lx_ucontext_t *ucp; /* points to "uc" if valid, NULL if not */
+ lx_siginfo_t si; /* saved signal information */
+ lx_ucontext_t uc; /* saved user context */
+ lx_fpstate_t fpstate; /* saved FP state */
+ char trampoline[8]; /* code for trampoline to lx_rt_sigreturn() */
+};
+
+struct lx_oldsigstack {
+ void (*retaddr)(); /* address of real lx_sigreturn code */
+ int sig; /* signal number */
+ lx_sigcontext_t sigc; /* saved user context */
+ lx_fpstate_t fpstate; /* saved FP state */
+ int sig_extra; /* signal mask for signals [32 .. NSIG - 1] */
+ char trampoline[8]; /* code for trampoline to lx_sigreturn() */
+};
+
+/*
+ * libc_sigacthandler is set to the address of the libc signal interposition
+ * routine, sigacthandler().
+ */
+void (*libc_sigacthandler)(int, siginfo_t *, void*);
+
+/*
+ * The lx_sighandlers structure needs to be a global due to the semantics of
+ * clone().
+ *
+ * If CLONE_SIGHAND is set, the calling process and child share signal
+ * handlers, and if either calls sigaction(2) it should change the behavior
+ * in the other thread. Each thread does, however, have its own signal mask
+ * and set of pending signals.
+ *
+ * If CLONE_SIGHAND is not set, the child process should inherit a copy of
+ * the signal handlers at the time of the clone() but later calls to
+ * sigaction(2) should only affect the individual thread calling it.
+ *
+ * This maps perfectly to a thr_create(3C) thread semantic in the first
+ * case and a fork(2)-type semantic in the second case. By making
+ * lx_sighandlers global, we automatically get the correct behavior.
+ */
+static lx_sighandlers_t lx_sighandlers;
+
+/*
+ * stol_stack() and ltos_stack() convert between Solaris and Linux stack_t
+ * structures.
+ *
+ * These routines are needed because although the two structures have the same
+ * contents, their contents are declared in a different order, so the content
+ * of the structures cannot be copied with a simple bcopy().
+ */
+static void
+stol_stack(stack_t *fr, lx_stack_t *to)
+{
+ to->ss_sp = fr->ss_sp;
+ to->ss_flags = fr->ss_flags;
+ to->ss_size = fr->ss_size;
+}
+
+static void
+ltos_stack(lx_stack_t *fr, stack_t *to)
+{
+ to->ss_sp = fr->ss_sp;
+ to->ss_flags = fr->ss_flags;
+ to->ss_size = fr->ss_size;
+}
+
+static int
+ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp)
+{
+ lx_sigset_t l;
+ int lx_sig, sig;
+
+ if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0)
+ return (-errno);
+
+ (void) sigemptyset(s_sigsetp);
+
+ for (lx_sig = 1; lx_sig < LX_NSIG; lx_sig++) {
+ if (lx_sigismember(&l, lx_sig) &&
+ ((sig = ltos_signo[lx_sig]) > 0))
+ (void) sigaddset(s_sigsetp, sig);
+ }
+
+ return (0);
+}
+
+static int
+stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp)
+{
+ lx_sigset_t l;
+ int sig, lx_sig;
+
+ bzero(&l, sizeof (lx_sigset_t));
+
+ for (sig = 1; sig < NSIG; sig++) {
+ if (sigismember(s_sigsetp, sig) &&
+ ((lx_sig = stol_signo[sig]) > 0))
+ lx_sigaddset(&l, lx_sig);
+ }
+
+ return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0)
+ ? -errno : 0);
+}
+
+static int
+ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp)
+{
+ lx_osigset_t lo;
+ int lx_sig, sig;
+
+ if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0)
+ return (-errno);
+
+ (void) sigemptyset(s_sigsetp);
+
+ for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++)
+ if ((lo & OSIGSET_BITSET(lx_sig)) &&
+ ((sig = ltos_signo[lx_sig]) > 0))
+ (void) sigaddset(s_sigsetp, sig);
+
+ return (0);
+}
+
+static int
+stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp)
+{
+ lx_osigset_t lo = 0;
+ int lx_sig, sig;
+
+ /*
+ * Note that an lx_osigset_t can only represent the signals from
+ * [1 .. OSIGSET_NBITS], so even though a signal may be present in the
+ * Solaris sigset_t, it may not be representable as a bit in the
+ * lx_osigset_t.
+ */
+ for (sig = 1; sig < NSIG; sig++)
+ if (sigismember(s_sigsetp, sig) &&
+ ((lx_sig = stol_signo[sig]) > 0) &&
+ (lx_sig <= OSIGSET_NBITS))
+ lo |= OSIGSET_BITSET(lx_sig);
+
+ return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0)
+ ? -errno : 0);
+}
+
+static int
+stol_sigcode(int si_code)
+{
+ switch (si_code) {
+ case SI_USER:
+ return (LX_SI_USER);
+ case SI_LWP:
+ return (LX_SI_TKILL);
+ case SI_QUEUE:
+ return (LX_SI_QUEUE);
+ case SI_TIMER:
+ return (LX_SI_TIMER);
+ case SI_ASYNCIO:
+ return (LX_SI_ASYNCIO);
+ case SI_MESGQ:
+ return (LX_SI_MESGQ);
+ default:
+ return (si_code);
+ }
+}
+
+int
+stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
+{
+ lx_siginfo_t lx_siginfo;
+
+ bzero(&lx_siginfo, sizeof (*lx_siginfop));
+
+ if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ lx_siginfo.lsi_code = stol_sigcode(siginfop->si_code);
+ lx_siginfo.lsi_errno = siginfop->si_errno;
+
+ switch (lx_siginfo.lsi_signo) {
+ /*
+ * Semantics ARE defined for SIGKILL, but since
+ * we can't catch it, we can't translate it. :-(
+ */
+ case LX_SIGPOLL:
+ lx_siginfo.lsi_band = siginfop->si_band;
+ lx_siginfo.lsi_fd = siginfop->si_fd;
+ break;
+
+ case LX_SIGCHLD:
+ lx_siginfo.lsi_pid = siginfop->si_pid;
+ lx_siginfo.lsi_status = siginfop->si_status;
+ lx_siginfo.lsi_utime = siginfop->si_utime;
+ lx_siginfo.lsi_stime = siginfop->si_stime;
+
+ break;
+
+ case LX_SIGILL:
+ case LX_SIGBUS:
+ case LX_SIGFPE:
+ lx_siginfo.lsi_addr = siginfop->si_addr;
+ break;
+
+ default:
+ lx_siginfo.lsi_pid = siginfop->si_pid;
+ lx_siginfo.lsi_uid =
+ LX_UID32_TO_UID16(siginfop->si_uid);
+ break;
+ }
+
+ return ((uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0)
+ ? -errno : 0);
+}
+
+static void
+stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr)
+{
+ struct _fpstate *fpsp = (struct _fpstate *)fpr;
+ size_t copy_len;
+
+ /*
+ * The Solaris struct _fpstate and lx_fpstate_t are identical from the
+ * beginning of the structure to the lx_fpstate_t "magic" field, so
+ * just bcopy() those entries.
+ */
+ copy_len = (size_t)&(((lx_fpstate_t *)0)->magic);
+ bcopy(fpsp, lfpr, copy_len);
+
+ /*
+ * These fields are all only significant for the first 16 bits.
+ */
+ lfpr->cw &= 0xffff; /* x87 control word */
+ lfpr->tag &= 0xffff; /* x87 tag word */
+ lfpr->cssel &= 0xffff; /* cs selector */
+ lfpr->datasel &= 0xffff; /* ds selector */
+
+ /*
+ * Linux wants the x87 status word field to contain the value of the
+ * x87 saved exception status word.
+ */
+ lfpr->sw = lfpr->status & 0xffff; /* x87 status word */
+
+ lfpr->mxcsr = fpsp->mxcsr;
+
+ if (fpsp->mxcsr != 0) {
+ /*
+ * Linux uses the "magic" field to denote whether the XMM
+ * registers contain legal data or not. Since we can't get to
+ * %cr4 from userland to check the status of the OSFXSR bit,
+ * check the mxcsr field to see if it's 0, which it should
+ * never be on a system with the OXFXSR bit enabled.
+ */
+ lfpr->magic = LX_X86_FXSR_MAGIC;
+ bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm));
+ } else {
+ lfpr->magic = LX_X86_FXSR_NONE;
+ }
+}
+
+static void
+ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr)
+{
+ struct _fpstate *fpsp = (struct _fpstate *)fpr;
+ size_t copy_len;
+
+ /*
+ * The lx_fpstate_t and Solaris struct _fpstate are identical from the
+ * beginning of the structure to the struct _fpstate "mxcsr" field, so
+ * just bcopy() those entries.
+ *
+ * Note that we do NOT have to propogate changes the user may have made
+ * to the "status" word back to the "sw" word, unlike the way we have
+ * to deal with processing the ESP and UESP register values on return
+ * from a signal handler.
+ */
+ copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr);
+ bcopy(lfpr, fpsp, copy_len);
+
+ /*
+ * These fields are all only significant for the first 16 bits.
+ */
+ fpsp->cw &= 0xffff; /* x87 control word */
+ fpsp->sw &= 0xffff; /* x87 status word */
+ fpsp->tag &= 0xffff; /* x87 tag word */
+ fpsp->cssel &= 0xffff; /* cs selector */
+ fpsp->datasel &= 0xffff; /* ds selector */
+ fpsp->status &= 0xffff; /* saved status */
+
+ fpsp->mxcsr = lfpr->mxcsr;
+
+ if (lfpr->magic == LX_X86_FXSR_MAGIC)
+ bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm));
+}
+
+/*
+ * The brand needs a lx version of this because the format of the lx stack_t
+ * differs from the Solaris stack_t not really in content but in ORDER,
+ * so we can't simply pass pointers and expect things to work (sigh...)
+ */
+int
+lx_sigaltstack(uintptr_t nsp, uintptr_t osp)
+{
+ lx_stack_t ls;
+ stack_t newsstack, oldsstack;
+ stack_t *nssp = (nsp ? &newsstack : NULL);
+ stack_t *ossp = (osp ? &oldsstack : NULL);
+
+ if (nsp) {
+ if (uucopy((void *)nsp, &ls, sizeof (lx_stack_t)) != 0)
+ return (-errno);
+
+ if ((ls.ss_flags & LX_SS_DISABLE) == 0 &&
+ ls.ss_size < LX_MINSIGSTKSZ)
+ return (-ENOMEM);
+
+ newsstack.ss_sp = (int *)ls.ss_sp;
+ newsstack.ss_size = (long)ls.ss_size;
+ newsstack.ss_flags = ls.ss_flags;
+ }
+
+ if (sigaltstack(nssp, ossp) != 0)
+ return (-errno);
+
+ if (osp) {
+ ls.ss_sp = (void *)oldsstack.ss_sp;
+ ls.ss_size = (size_t)oldsstack.ss_size;
+ ls.ss_flags = oldsstack.ss_flags;
+
+ if (uucopy(&ls, (void *)osp, sizeof (lx_stack_t)) != 0)
+ return (-errno);
+ }
+
+ return (0);
+}
+
+/*
+ * The following routines are needed because sigset_ts and siginfo_ts are
+ * different in format between Linux and Solaris.
+ *
+ * Note that there are two different lx_sigset structures, lx_sigset_ts and
+ * lx_osigset_ts:
+ *
+ * + An lx_sigset_t is the equivalent of a Solaris sigset_t and supports
+ * more than 32 signals.
+ *
+ * + An lx_osigset_t is simply a uint32_t, so it by definition only supports
+ * 32 signals.
+ *
+ * When there are two versions of a routine, one prefixed with lx_rt_ and
+ * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with
+ * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts. Unfortunately,
+ * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn())
+ */
+int
+lx_sigpending(uintptr_t sigpend)
+{
+ sigset_t sigpendset;
+
+ if (sigpending(&sigpendset) != 0)
+ return (-errno);
+
+ return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend));
+}
+
+int
+lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize)
+{
+ sigset_t sigpendset;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (sigpending(&sigpendset) != 0)
+ return (-errno);
+
+ return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend));
+}
+
+/*
+ * Create a common routine to encapsulate all of the sigprocmask code,
+ * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask()
+ * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by
+ * the setting of the "sigset_type" flag.
+ */
+static int
+lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp,
+ uintptr_t sigset_type)
+{
+ int err;
+ sigset_t set, oset;
+ sigset_t *s_setp = NULL;
+ sigset_t *s_osetp;
+
+ if (l_setp) {
+ switch (how) {
+ case LX_SIG_BLOCK:
+ how = SIG_BLOCK;
+ break;
+
+ case LX_SIG_UNBLOCK:
+ how = SIG_UNBLOCK;
+ break;
+
+ case LX_SIG_SETMASK:
+ how = SIG_SETMASK;
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ s_setp = &set;
+
+ if (sigset_type == USE_SIGSET)
+ err = ltos_sigset((lx_sigset_t *)l_setp, s_setp);
+ else
+ err = ltos_osigset((lx_osigset_t *)l_setp, s_setp);
+
+ if (err != 0)
+ return (err);
+ }
+
+ s_osetp = (l_osetp ? &oset : NULL);
+
+ /*
+ * In a multithreaded environment, a call to sigprocmask(2) should
+ * only affect the current thread's signal mask so we don't need to
+ * explicitly call thr_sigsetmask(3C) here.
+ */
+ if (sigprocmask(how, s_setp, s_osetp) != 0)
+ return (-errno);
+
+ if (l_osetp) {
+ if (sigset_type == USE_SIGSET)
+ err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp);
+ else
+ err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp);
+
+ if (err != 0) {
+ /*
+ * Encountered a fault while writing to the old signal
+ * mask buffer, so unwind the signal mask change made
+ * above.
+ */
+ (void) sigprocmask(how, s_osetp, (sigset_t *)NULL);
+ return (err);
+ }
+ }
+
+ return (0);
+}
+
+int
+lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp)
+{
+ return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET));
+}
+
+int
+lx_sgetmask(void)
+{
+ lx_osigset_t oldmask;
+
+ return ((lx_sigprocmask_common(SIG_SETMASK, NULL, (uintptr_t)&oldmask,
+ USE_OSIGSET) != 0) ? -errno : (int)oldmask);
+}
+
+int
+lx_ssetmask(uintptr_t sigmask)
+{
+ lx_osigset_t newmask, oldmask;
+
+ newmask = (lx_osigset_t)sigmask;
+
+ return ((lx_sigprocmask_common(SIG_SETMASK, (uintptr_t)&newmask,
+ (uintptr_t)&oldmask, USE_OSIGSET) != 0) ? -errno : (int)oldmask);
+}
+
+int
+lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp,
+ uintptr_t setsize)
+{
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET));
+}
+
+int
+lx_sigsuspend(uintptr_t set)
+{
+ sigset_t s_set;
+
+ if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0)
+ return (-errno);
+
+ return ((sigsuspend(&s_set) == -1) ? -errno : 0);
+}
+
+int
+lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize)
+{
+ sigset_t s_set;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0)
+ return (-errno);
+
+ return ((sigsuspend(&s_set) == -1) ? -errno : 0);
+}
+
+int
+lx_sigwaitinfo(uintptr_t set, uintptr_t sinfo)
+{
+ lx_osigset_t *setp = (lx_osigset_t *)set;
+ lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
+
+ sigset_t s_set;
+ siginfo_t s_sinfo, *s_sinfop;
+ int rc;
+
+ if (ltos_osigset(setp, &s_set) != 0)
+ return (-errno);
+
+ s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
+
+ if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
+ return (-errno);
+
+ if (s_sinfop == NULL)
+ return (rc);
+
+ return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc);
+}
+
+int
+lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize)
+{
+ sigset_t s_set;
+ siginfo_t s_sinfo, *s_sinfop;
+ int rc;
+
+ lx_sigset_t *setp = (lx_sigset_t *)set;
+ lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (ltos_sigset(setp, &s_set) != 0)
+ return (-errno);
+
+ s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
+
+ if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
+ return (-errno);
+
+ if (s_sinfop == NULL)
+ return (rc);
+
+ return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc);
+}
+
+int
+lx_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp)
+{
+ sigset_t s_set;
+ siginfo_t s_sinfo, *s_sinfop;
+ int rc;
+
+ lx_osigset_t *setp = (lx_osigset_t *)set;
+ lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
+
+ if (ltos_osigset(setp, &s_set) != 0)
+ return (-errno);
+
+ s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
+
+ if ((rc = sigtimedwait(&s_set, s_sinfop,
+ (struct timespec *)toutp)) == -1)
+ return (-errno);
+
+ if (s_sinfop == NULL)
+ return (rc);
+
+ return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc);
+}
+
+int
+lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp,
+ uintptr_t setsize)
+{
+ sigset_t s_set;
+ siginfo_t s_sinfo, *s_sinfop;
+ int rc;
+
+ lx_sigset_t *setp = (lx_sigset_t *)set;
+ lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (ltos_sigset(setp, &s_set) != 0)
+ return (-errno);
+
+ s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
+
+ if ((rc = sigtimedwait(&s_set, s_sinfop,
+ (struct timespec *)toutp)) == -1)
+ return (-errno);
+
+ if (s_sinfop == NULL)
+ return (rc);
+
+ return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc);
+}
+
+/*
+ * Intercept the Linux sigreturn() syscall to turn it into the return through
+ * the libc call stack that Solaris expects.
+ *
+ * When control returns to libc's call_user_handler() routine, a setcontext(2)
+ * will be done that returns thread execution to the point originally
+ * interrupted by receipt of the signal.
+ */
+int
+lx_sigreturn(void)
+{
+ struct lx_oldsigstack *lx_ossp;
+ lx_sigset_t lx_sigset;
+ lx_regs_t *rp;
+ ucontext_t *ucp;
+ uintptr_t sp;
+
+ rp = lx_syscall_regs();
+
+ /*
+ * NOTE: The sp saved in the context is eight bytes off of where we
+ * need it to be.
+ */
+ sp = (uintptr_t)rp->lxr_esp - 8;
+
+ /*
+ * At this point, the stack pointer should point to the struct
+ * lx_oldsigstack that lx_build_old_signal_frame() constructed and
+ * placed on the stack. We need to reference it a bit later, so
+ * save a pointer to it before incrementing our copy of the sp.
+ */
+ lx_ossp = (struct lx_oldsigstack *)sp;
+ sp += sizeof (struct lx_oldsigstack);
+
+ /*
+ * lx_sigdeliver() pushes LX_SIGRT_MAGIC on the stack before it
+ * creates the struct lx_oldsigstack.
+ *
+ * If we don't find it here, the stack's been corrupted and we need to
+ * kill ourselves.
+ */
+ if (*(uint32_t *)sp != LX_SIGRT_MAGIC)
+ lx_err_fatal(gettext(
+ "sp @ 0x%p, expected 0x%x, found 0x%x!"),
+ sp, LX_SIGRT_MAGIC, *(uint32_t *)sp);
+
+ sp += sizeof (uint32_t);
+
+ /*
+ * For signal mask handling to be done properly, this call needs to
+ * return to the libc routine that originally called the signal handler
+ * rather than directly set the context back to the place the signal
+ * interrupted execution as the original Linux code would do.
+ *
+ * Here *sp points to the Solaris ucontext_t, so we need to copy
+ * machine registers the Linux signal handler may have modified
+ * back to the Solaris version.
+ */
+ ucp = (ucontext_t *)(*(uint32_t *)sp);
+
+ /*
+ * General registers copy across as-is, except Linux expects that
+ * changes made to uc_mcontext.gregs[ESP] will be reflected when the
+ * interrupted thread resumes execution after the signal handler. To
+ * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
+ * match uc_mcontext.gregs[ESP] as Solaris will restore the UESP
+ * value to ESP.
+ */
+ lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp;
+ bcopy(&lx_ossp->sigc, &ucp->uc_mcontext, sizeof (gregset_t));
+
+ /* copy back FP regs if present */
+ if (lx_ossp->sigc.sc_fpstate != NULL)
+ ltos_fpstate(&lx_ossp->fpstate, &ucp->uc_mcontext.fpregs);
+
+ /* convert Linux signal mask back to its Solaris equivalent */
+ bzero(&lx_sigset, sizeof (lx_sigset_t));
+ lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask;
+ lx_sigset.__bits[1] = lx_ossp->sig_extra;
+ (void) ltos_sigset(&lx_sigset, &ucp->uc_sigmask);
+
+ /*
+ * At this point sp contains the value of the stack pointer when
+ * lx_call_user_handler() was called.
+ *
+ * Pop one more value off the stack and pass the new sp to
+ * lx_sigreturn_tolibc(), which will in turn manipulate the x86
+ * registers to make it appear to libc's call_user_handler() as if the
+ * handler it had called returned.
+ */
+ sp += sizeof (uint32_t);
+ lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp);
+ lx_sigreturn_tolibc(sp);
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+int
+lx_rt_sigreturn(void)
+{
+ struct lx_sigstack *lx_ssp;
+ lx_regs_t *rp;
+ lx_ucontext_t *lx_ucp;
+ ucontext_t *ucp;
+ uintptr_t sp;
+
+ rp = lx_syscall_regs();
+
+ /*
+ * NOTE: Because of some silly compatibility measures done in the
+ * signal trampoline code to make sure it uses the _exact same_
+ * instruction sequence Linux does, we have to manually "pop"
+ * one extra four byte instruction off the stack here before
+ * passing the stack address to the syscall because the
+ * trampoline code isn't allowed to do it.
+ *
+ * No, I'm not kidding.
+ *
+ * The sp saved in the context is eight bytes off of where we
+ * need it to be, so the need to pop the extra four byte
+ * instruction means we need to subtract a net four bytes from
+ * the sp before "popping" the struct lx_sigstack off the stack.
+ * This will yield the value the stack pointer had before
+ * lx_sigdeliver() created the stack frame for the Linux signal
+ * handler.
+ */
+ sp = (uintptr_t)rp->lxr_esp - 4;
+
+ /*
+ * At this point, the stack pointer should point to the struct
+ * lx_sigstack that lx_build_signal_frame() constructed and
+ * placed on the stack. We need to reference it a bit later, so
+ * save a pointer to it before incrementing our copy of the sp.
+ */
+ lx_ssp = (struct lx_sigstack *)sp;
+ sp += sizeof (struct lx_sigstack);
+
+ /*
+ * lx_sigdeliver() pushes LX_SIGRT_MAGIC on the stack before it
+ * creates the struct lx_sigstack (and possibly struct lx_fpstate_t).
+ *
+ * If we don't find it here, the stack's been corrupted and we need to
+ * kill ourselves.
+ */
+ if (*(uint32_t *)sp != LX_SIGRT_MAGIC)
+ lx_err_fatal(gettext("sp @ 0x%p, expected 0x%x, found 0x%x!"),
+ sp, LX_SIGRT_MAGIC, *(uint32_t *)sp);
+
+ sp += sizeof (uint32_t);
+
+ /*
+ * For signal mask handling to be done properly, this call needs to
+ * return to the libc routine that originally called the signal handler
+ * rather than directly set the context back to the place the signal
+ * interrupted execution as the original Linux code would do.
+ *
+ * Here *sp points to the Solaris ucontext_t, so we need to copy
+ * machine registers the Linux signal handler may have modified
+ * back to the Solaris version.
+ */
+ ucp = (ucontext_t *)(*(uint32_t *)sp);
+
+ lx_ucp = lx_ssp->ucp;
+
+ if (lx_ucp != NULL) {
+ /*
+ * General registers copy across as-is, except Linux expects
+ * that changes made to uc_mcontext.gregs[ESP] will be reflected
+ * when the interrupted thread resumes execution after the
+ * signal handler. To emulate this behavior, we must modify
+ * uc_mcontext.gregs[UESP] to match uc_mcontext.gregs[ESP] as
+ * Solaris will restore the UESP value to ESP.
+ */
+ lx_ucp->uc_sigcontext.sc_esp_at_signal =
+ lx_ucp->uc_sigcontext.sc_esp;
+ bcopy(&lx_ucp->uc_sigcontext, &ucp->uc_mcontext.gregs,
+ sizeof (gregset_t));
+
+ if (lx_ucp->uc_sigcontext.sc_fpstate != NULL)
+ ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate,
+ &ucp->uc_mcontext.fpregs);
+
+ /*
+ * Convert the Linux signal mask and stack back to their
+ * Solaris equivalents.
+ */
+ (void) ltos_sigset(&lx_ucp->uc_sigmask, &ucp->uc_sigmask);
+ ltos_stack(&lx_ucp->uc_stack, &ucp->uc_stack);
+ }
+
+ /*
+ * At this point sp contains the value of the stack pointer when
+ * lx_call_user_handler() was called.
+ *
+ * Pop one more value off the stack and pass the new sp to
+ * lx_sigreturn_tolibc(), which will in turn manipulate the x86
+ * registers to make it appear to libc's call_user_handler() as if the
+ * handler it had called returned.
+ */
+ sp += sizeof (uint32_t);
+ lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp);
+ lx_sigreturn_tolibc(sp);
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+/*
+ * Build signal frame for processing for "old" (legacy) Linux signals
+ */
+static void
+lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp)
+{
+ extern void lx_sigreturn_tramp();
+
+ lx_sigset_t lx_sigset;
+ ucontext_t *ucp = (ucontext_t *)p;
+ struct lx_sigaction *lxsap;
+ struct lx_oldsigstack *lx_ossp = sp;
+
+ lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp);
+
+ lx_ossp->sig = lx_sig;
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("lxsap @ 0x%p", lxsap);
+
+ if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
+ lxsap->lxsa_restorer) {
+ lx_ossp->retaddr = lxsap->lxsa_restorer;
+ lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr);
+ } else {
+ lx_ossp->retaddr = lx_sigreturn_tramp;
+ lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp);
+ }
+
+ lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr);
+
+ /* convert Solaris signal mask and stack to their Linux equivalents */
+ (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset);
+ lx_ossp->sigc.sc_mask = lx_sigset.__bits[0];
+ lx_ossp->sig_extra = lx_sigset.__bits[1];
+
+ /*
+ * General registers copy across as-is, except Linux expects that
+ * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
+ * signal.
+ */
+ bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t));
+ lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal;
+
+ /*
+ * cr2 contains the faulting address, and Linux only sets cr2 for a
+ * a segmentation fault.
+ */
+ lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ?
+ (uintptr_t)sip->si_addr : 0);
+
+ /* convert FP regs if present */
+ if (ucp->uc_flags & UC_FPU) {
+ stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate);
+ lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate;
+ } else {
+ lx_ossp->sigc.sc_fpstate = NULL;
+ }
+
+ /*
+ * Believe it or not, gdb wants to SEE the trampoline code on the
+ * bottom of the stack to determine whether the stack frame belongs to
+ * a signal handler, even though this code is no longer actually
+ * called.
+ *
+ * You can't make this stuff up.
+ */
+ bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline,
+ sizeof (lx_ossp->trampoline));
+}
+
+/*
+ * Build signal frame for processing for modern Linux signals
+ */
+static void
+lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp)
+{
+ extern void lx_rt_sigreturn_tramp();
+
+ lx_ucontext_t *lx_ucp;
+ ucontext_t *ucp = (ucontext_t *)p;
+ struct lx_sigstack *lx_ssp = sp;
+ struct lx_sigaction *lxsap;
+
+ lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp);
+
+ lx_ucp = &lx_ssp->uc;
+ lx_ssp->ucp = lx_ucp;
+ lx_ssp->sig = lx_sig;
+
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("lxsap @ 0x%p", lxsap);
+
+ if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
+ lxsap->lxsa_restorer) {
+ lx_ssp->retaddr = lxsap->lxsa_restorer;
+ lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr);
+ } else {
+ lx_ssp->retaddr = lx_rt_sigreturn_tramp;
+ lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp);
+ }
+
+ /* Linux has these fields but always clears them to 0 */
+ lx_ucp->uc_flags = 0;
+ lx_ucp->uc_link = NULL;
+
+ /* convert Solaris signal mask and stack to their Linux equivalents */
+ (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask);
+ stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack);
+
+ /*
+ * General registers copy across as-is, except Linux expects that
+ * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
+ * signal.
+ */
+ bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t));
+ lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal;
+
+ /*
+ * cr2 contains the faulting address, which Linux only sets for a
+ * a segmentation fault.
+ */
+ lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ?
+ (uintptr_t)sip->si_addr : 0;
+
+ /*
+ * Point the lx_siginfo_t pointer to the signal stack's lx_siginfo_t
+ * if there was a Solaris siginfo_t to convert, otherwise set it to
+ * NULL.
+ */
+ if ((sip) && (stol_siginfo(sip, &lx_ssp->si) == 0))
+ lx_ssp->sip = &lx_ssp->si;
+ else
+ lx_ssp->sip = NULL;
+
+ /* convert FP regs if present */
+ if (ucp->uc_flags & UC_FPU) {
+ /*
+ * Copy FP regs to the appropriate place in the the lx_sigstack
+ * structure.
+ */
+ stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate);
+ lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate;
+ } else
+ lx_ucp->uc_sigcontext.sc_fpstate = NULL;
+
+ /*
+ * Believe it or not, gdb wants to SEE the trampoline code on the
+ * bottom of the stack to determine whether the stack frame belongs to
+ * a signal handler, even though this code is no longer actually
+ * called.
+ *
+ * You can't make this stuff up.
+ */
+ bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline,
+ sizeof (lx_ssp->trampoline));
+}
+
+/*
+ * This is the second level interposition handler for Linux signals.
+ */
+static void
+lx_call_user_handler(int sig, siginfo_t *sip, void *p)
+{
+ void (*user_handler)();
+ void (*stk_builder)();
+
+ lx_tsd_t *lx_tsd;
+ struct lx_sigaction *lxsap;
+ ucontext_t *ucp = (ucontext_t *)p;
+ uintptr_t gs;
+ size_t stksize;
+ int err, lx_sig;
+
+ /*
+ * If Solaris signal has no Linux equivalent, effectively
+ * ignore it.
+ */
+ if ((lx_sig = stol_signo[sig]) == -1) {
+ lx_debug("caught solaris signal %d, no Linux equivalent", sig);
+ return;
+ }
+
+ lx_debug("interpose caught solaris signal %d, translating to Linux "
+ "signal %d", sig, lx_sig);
+
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("lxsap @ 0x%p", lxsap);
+
+ if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) {
+ /* Linux SIG_DFL for SIGPWR is to terminate */
+ exit(LX_SIGPWR | 0x80);
+ }
+
+ if ((lxsap->lxsa_handler == SIG_DFL) ||
+ (lxsap->lxsa_handler == SIG_IGN))
+ lx_err_fatal(gettext("%s set to %s? How?!?!?"),
+ "lxsa_handler",
+ ((lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN"),
+ lxsap->lxsa_handler);
+
+ if ((err = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0)
+ lx_err_fatal(gettext(
+ "%s: unable to read thread-specific data: %s"),
+ "lx_call_user_handler", strerror(err));
+
+ assert(lx_tsd != 0);
+
+ gs = lx_tsd->lxtsd_gs & 0xffff; /* gs is only 16 bits */
+
+ /*
+ * Any zero %gs value should be caught when a save is attempted in
+ * lx_emulate(), but this extra check will catch any zero values due to
+ * bugs in the library.
+ */
+ assert(gs != 0);
+
+ if (lxsap->lxsa_flags & LX_SA_SIGINFO) {
+ stksize = sizeof (struct lx_sigstack);
+ stk_builder = lx_build_signal_frame;
+ } else {
+ stksize = sizeof (struct lx_oldsigstack);
+ stk_builder = lx_build_old_signal_frame;
+ }
+
+ user_handler = lxsap->lxsa_handler;
+
+ lx_debug("delivering %d (lx %d) to handler at 0x%p with gs 0x%x", sig,
+ lx_sig, lxsap->lxsa_handler, gs);
+
+ if (lxsap->lxsa_flags & LX_SA_RESETHAND)
+ lxsap->lxsa_handler = SIG_DFL;
+
+ /*
+ * lx_sigdeliver() doesn't return, so it relies on the Linux
+ * signal handlers to clean up the stack, reset the current
+ * signal mask and return to the code interrupted by the signal.
+ */
+ lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler, gs);
+}
+
+/*
+ * Common routine to modify sigaction characteristics of a thread.
+ *
+ * We shouldn't need any special locking code here as we actually use
+ * libc's sigaction() to do all the real work, so its thread locking should
+ * take care of any issues for us.
+ */
+static int
+lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp,
+ struct lx_sigaction *olxsp)
+{
+ struct lx_sigaction *lxsap;
+ struct sigaction sa;
+
+ if (lx_sig <= 0 || lx_sig >= LX_NSIG)
+ return (-EINVAL);
+
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap);
+
+ if ((olxsp != NULL) &&
+ ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0))
+ return (-errno);
+
+ if (lxsp != NULL) {
+ int err, sig;
+ struct lx_sigaction lxsa;
+ sigset_t new_set, oset;
+
+ if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0)
+ return (-errno);
+
+ if ((sig = ltos_signo[lx_sig]) != -1) {
+ /*
+ * Block this signal while messing with its dispostion
+ */
+ (void) sigemptyset(&new_set);
+ (void) sigaddset(&new_set, sig);
+
+ if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) {
+ err = errno;
+ lx_debug("unable to block signal %d: %s", sig,
+ strerror(err));
+ return (-err);
+ }
+
+ /*
+ * We don't really need the old signal disposition at
+ * this point, but this weeds out signals that would
+ * cause sigaction() to return an error before we change
+ * anything other than the current signal mask.
+ */
+ if (sigaction(sig, NULL, &sa) < 0) {
+ err = errno;
+ lx_debug("sigaction() to get old "
+ "disposition for signal %d failed: "
+ "%s", sig, strerror(err));
+ (void) sigprocmask(SIG_SETMASK, &oset, NULL);
+ return (-err);
+ }
+
+ if ((lxsa.lxsa_handler != SIG_DFL) &&
+ (lxsa.lxsa_handler != SIG_IGN)) {
+ sa.sa_handler = lx_call_user_handler;
+
+ /*
+ * The interposition signal handler needs the
+ * information provided via the SA_SIGINFO flag.
+ */
+ sa.sa_flags = SA_SIGINFO;
+
+ if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP)
+ sa.sa_flags |= SA_NOCLDSTOP;
+ if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT)
+ sa.sa_flags |= SA_NOCLDWAIT;
+ if (lxsa.lxsa_flags & LX_SA_ONSTACK)
+ sa.sa_flags |= SA_ONSTACK;
+ if (lxsa.lxsa_flags & LX_SA_RESTART)
+ sa.sa_flags |= SA_RESTART;
+ if (lxsa.lxsa_flags & LX_SA_NODEFER)
+ sa.sa_flags |= SA_NODEFER;
+
+ /*
+ * Can't use RESETHAND with SIGPWR due to
+ * different default actions between Linux
+ * and Solaris.
+ */
+ if ((sig != SIGPWR) &&
+ (lxsa.lxsa_flags & LX_SA_RESETHAND))
+ sa.sa_flags |= SA_RESETHAND;
+
+ if (ltos_sigset(&lxsa.lxsa_mask,
+ &sa.sa_mask) != 0) {
+ err = errno;
+ (void) sigprocmask(SIG_SETMASK, &oset,
+ NULL);
+ return (-err);
+ }
+
+ lx_debug("interposing handler @ 0x%p for "
+ "signal %d (lx %d), flags 0x%x",
+ lxsa.lxsa_handler, sig, lx_sig,
+ lxsa.lxsa_flags);
+
+ if (sigaction(sig, &sa, NULL) < 0) {
+ err = errno;
+ lx_debug("sigaction() to set new "
+ "disposition for signal %d failed: "
+ "%s", sig, strerror(err));
+ (void) sigprocmask(SIG_SETMASK, &oset,
+ NULL);
+ return (-err);
+ }
+ } else if ((sig != SIGPWR) ||
+ ((sig == SIGPWR) &&
+ (lxsa.lxsa_handler == SIG_IGN))) {
+ /*
+ * There's no need to interpose for SIG_DFL or
+ * SIG_IGN so just call libc's sigaction(), but
+ * don't allow SIG_DFL for SIGPWR due to
+ * differing default actions between Linux and
+ * Solaris.
+ *
+ * Get the previous disposition first so things
+ * like sa_mask and sa_flags are preserved over
+ * a transition to SIG_DFL or SIG_IGN, which is
+ * what Linux expects.
+ */
+
+ sa.sa_handler = lxsa.lxsa_handler;
+
+ if (sigaction(sig, &sa, NULL) < 0) {
+ err = errno;
+ lx_debug("sigaction(%d, %s) failed: %s",
+ sig, ((sa.sa_handler == SIG_DFL) ?
+ "SIG_DFL" : "SIG_IGN"),
+ strerror(err));
+ (void) sigprocmask(SIG_SETMASK, &oset,
+ NULL);
+ return (-err);
+ }
+ }
+ } else {
+ lx_debug("Linux signal with no kill support "
+ "specified: %d", lx_sig);
+ }
+
+ /*
+ * Save the new disposition for the signal in the global
+ * lx_sighandlers structure.
+ */
+ bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction));
+
+ /*
+ * Reset the signal mask to what we came in with if
+ * we were modifying a kill-supported signal.
+ */
+ if (sig != -1)
+ (void) sigprocmask(SIG_SETMASK, &oset, NULL);
+ }
+
+ return (0);
+}
+
+int
+lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp)
+{
+ int val;
+ struct lx_sigaction sa, osa;
+ struct lx_sigaction *sap, *osap;
+ struct lx_osigaction *osp;
+
+ sap = (actp ? &sa : NULL);
+ osap = (oactp ? &osa : NULL);
+
+ /*
+ * If we have a source pointer, convert source lxsa_mask from
+ * lx_osigset_t to lx_sigset_t format.
+ */
+ if (sap) {
+ osp = (struct lx_osigaction *)actp;
+ sap->lxsa_handler = osp->lxsa_handler;
+
+ bzero(&sap->lxsa_mask, sizeof (lx_sigset_t));
+
+ for (val = 1; val <= OSIGSET_NBITS; val++)
+ if (osp->lxsa_mask & OSIGSET_BITSET(val))
+ (void) lx_sigaddset(&sap->lxsa_mask, val);
+
+ sap->lxsa_flags = osp->lxsa_flags;
+ sap->lxsa_restorer = osp->lxsa_restorer;
+ }
+
+ if ((val = lx_sigaction_common(lx_sig, sap, osap)))
+ return (val);
+
+ /*
+ * If we have a save pointer, convert the old lxsa_mask from
+ * lx_sigset_t to lx_osigset_t format.
+ */
+ if (osap) {
+ osp = (struct lx_osigaction *)oactp;
+
+ osp->lxsa_handler = osap->lxsa_handler;
+
+ bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask));
+ for (val = 1; val <= OSIGSET_NBITS; val++)
+ if (lx_sigismember(&osap->lxsa_mask, val))
+ osp->lxsa_mask |= OSIGSET_BITSET(val);
+
+ osp->lxsa_flags = osap->lxsa_flags;
+ osp->lxsa_restorer = osap->lxsa_restorer;
+ }
+
+ return (0);
+}
+
+int
+lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp,
+ uintptr_t setsize)
+{
+ /*
+ * The "new" rt_sigaction call checks the setsize
+ * parameter.
+ */
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp,
+ (struct lx_sigaction *)oactp));
+}
+
+/*
+ * Convert signal syscall to a call to the lx_sigaction() syscall
+ */
+int
+lx_signal(uintptr_t lx_sig, uintptr_t handler)
+{
+ struct sigaction act;
+ struct sigaction oact;
+ int rc;
+
+ /*
+ * Use sigaction to mimic SYSV signal() behavior; glibc will
+ * actually call sigaction(2) itself, so we're really reaching
+ * back for signal(2) semantics here.
+ */
+ bzero(&act, sizeof (act));
+ act.sa_handler = (void (*)())handler;
+ act.sa_flags = SA_RESETHAND | SA_NODEFER;
+
+ rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact);
+ return ((rc == 0) ? ((int)oact.sa_handler) : rc);
+}
+
+int
+lx_tgkill(uintptr_t tgid, uintptr_t pid, uintptr_t sig)
+{
+ if (((pid_t)tgid <= 0) || ((pid_t)pid <= 0))
+ return (-EINVAL);
+
+ if (tgid != pid) {
+ lx_unsupported(gettext(
+ "BrandZ tgkill(2) does not support gid != pid\n"));
+ return (-ENOTSUP);
+ }
+
+ /*
+ * Pad the lx_tkill() call with NULLs to match the IN_KERNEL_SYSCALL
+ * prototype generated for it by IN_KERNEL_SYSCALL in lx_brand.c.
+ */
+ return (lx_tkill(pid, sig, NULL, NULL, NULL, NULL));
+}
+
+/*
+ * This C routine to save the passed %gs value into the thread-specific save
+ * area is called by the assembly routine lx_sigacthandler.
+ */
+void
+lx_sigsavegs(uintptr_t signalled_gs)
+{
+ lx_tsd_t *lx_tsd;
+ int err;
+
+ signalled_gs &= 0xffff; /* gs is only 16 bits */
+
+ /*
+ * While a %gs of 0 is technically legal (as long as the application
+ * never dereferences memory using %gs), Solaris has its own ideas as
+ * to how a zero %gs should be handled in _update_sregs(), such that
+ * any 32-bit user process with a %gs of zero running on a system with
+ * a 64-bit kernel will have its %gs hidden base register stomped on on
+ * return from a system call, leaving an incorrect base address in
+ * place until the next time %gs is actually reloaded (forcing a reload
+ * of the base address from the appropriate descriptor table.)
+ *
+ * Of course the kernel will once again stomp on THAT base address when
+ * returning from a system call, resulting in an application
+ * segmentation fault.
+ *
+ * To avoid this situation, disallow a save of a zero %gs here in order
+ * to try and capture any Linux process that takes a signal with a zero
+ * %gs installed.
+ */
+ assert(signalled_gs != 0);
+
+ if (signalled_gs != LWPGS_SEL) {
+ if ((err = thr_getspecific(lx_tsd_key,
+ (void **)&lx_tsd)) != 0)
+ lx_err_fatal(gettext(
+ "%s: unable to read thread-specific data: %s"),
+ "sigsavegs", strerror(err));
+
+ assert(lx_tsd != 0);
+
+ lx_tsd->lxtsd_gs = signalled_gs;
+
+ lx_debug("lx_sigsavegs(): gsp 0x%p, saved gs: 0x%x\n",
+ lx_tsd, signalled_gs);
+ }
+}
+
+int
+lx_siginit(void)
+{
+ extern void set_setcontext_enforcement(int);
+ extern void lx_sigacthandler(int, siginfo_t *, void *);
+
+ struct sigaction sa;
+ sigset_t new_set, oset;
+ int lx_sig, sig;
+
+ /*
+ * Block all signals possible while setting up the signal imposition
+ * mechanism.
+ */
+ (void) sigfillset(&new_set);
+
+ if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0)
+ lx_err_fatal(gettext("unable to block signals while setting up "
+ "imposition mechanism: %s"), strerror(errno));
+
+ /*
+ * Ignore any signals that have no Linux analog so that those
+ * signals cannot be sent to Linux processes from the global zone
+ */
+ for (sig = 1; sig < NSIG; sig++)
+ if (stol_signo[sig] < 0)
+ (void) sigignore(sig);
+
+ /*
+ * As mentioned previously, when a user signal handler is installed
+ * via sigaction(), libc interposes on the mechanism by actually
+ * installing an internal routine sigacthandler() as the signal
+ * handler. On receipt of the signal, libc does some thread-related
+ * processing via sigacthandler(), then calls the registered user
+ * signal handler on behalf of the user.
+ *
+ * We need to interpose on that mechanism to make sure the correct
+ * %gs segment register value is installed before the libc routine
+ * is called, otherwise the libc code will die with a segmentation
+ * fault.
+ *
+ * The private libc routine setsigacthandler() will set our
+ * interposition routine, lx_sigacthandler(), as the default
+ * "sigacthandler" routine for all new signal handlers for this
+ * thread.
+ */
+ setsigacthandler(lx_sigacthandler, &libc_sigacthandler);
+ lx_debug("lx_sigacthandler installed, libc_sigacthandler = 0x%p",
+ libc_sigacthandler);
+
+ /*
+ * Mark any signals that are ignored as ignored in our interposition
+ * handler array
+ */
+ for (lx_sig = 1; lx_sig < LX_NSIG; lx_sig++) {
+ if (((sig = ltos_signo[lx_sig]) != -1) &&
+ (sigaction(sig, NULL, &sa) < 0))
+ lx_err_fatal(gettext("unable to determine previous "
+ "disposition for signal %d: %s"),
+ sig, strerror(errno));
+
+ if (sa.sa_handler == SIG_IGN) {
+ lx_debug("marking signal %d (lx %d) as SIG_IGN",
+ sig, lx_sig);
+ lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN;
+ }
+ }
+
+ /*
+ * Have our interposition handler handle SIGPWR to start with,
+ * as it has a default action of terminating the process in Linux
+ * but its default is to be ignored in Solaris.
+ */
+ (void) sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = lx_call_user_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ if (sigaction(SIGPWR, &sa, NULL) < 0)
+ lx_err_fatal(gettext("%s failed: %s"), "sigaction(SIGPWR)",
+ strerror(errno));
+
+ /*
+ * Solaris' libc forces certain register values in the ucontext_t
+ * used to restore a post-signal user context to be those Solaris
+ * expects; however that is not what we want to happen if the signal
+ * was taken while branded code was executing, so we must disable
+ * that behavior.
+ */
+ set_setcontext_enforcement(0);
+
+ /*
+ * Reset the signal mask to what we came in with
+ */
+ (void) sigprocmask(SIG_SETMASK, &oset, NULL);
+
+ lx_debug("interposition handler setup for SIGPWR");
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/socket.c b/usr/src/lib/brand/lx/lx_brand/common/socket.c
new file mode 100644
index 0000000000..c5a402e8f1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/socket.c
@@ -0,0 +1,1487 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <libintl.h>
+#include <strings.h>
+#include <alloca.h>
+#include <ucred.h>
+
+#include <sys/param.h>
+#include <sys/brand.h>
+#include <sys/syscall.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <netinet/tcp.h>
+#include <netinet/igmp.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_socket.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+
+/*
+ * This string is used to prefix all abstract namespace unix sockets, ie all
+ * abstract namespace sockets are converted to regular sockets in the /tmp
+ * directory with .ABSK_ prefixed to their names.
+ */
+#define ABST_PRFX "/tmp/.ABSK_"
+#define ABST_PRFX_LEN 11
+
+static int lx_socket(ulong_t *);
+static int lx_bind(ulong_t *);
+static int lx_connect(ulong_t *);
+static int lx_listen(ulong_t *);
+static int lx_accept(ulong_t *);
+static int lx_getsockname(ulong_t *);
+static int lx_getpeername(ulong_t *);
+static int lx_socketpair(ulong_t *);
+static int lx_send(ulong_t *);
+static int lx_recv(ulong_t *);
+static int lx_sendto(ulong_t *);
+static int lx_recvfrom(ulong_t *);
+static int lx_shutdown(ulong_t *);
+static int lx_setsockopt(ulong_t *);
+static int lx_getsockopt(ulong_t *);
+static int lx_sendmsg(ulong_t *);
+static int lx_recvmsg(ulong_t *);
+
+typedef int (*sockfn_t)(ulong_t *);
+
+static struct {
+ sockfn_t s_fn; /* Function implementing the subcommand */
+ int s_nargs; /* Number of arguments the function takes */
+} sockfns[] = {
+ lx_socket, 3,
+ lx_bind, 3,
+ lx_connect, 3,
+ lx_listen, 2,
+ lx_accept, 3,
+ lx_getsockname, 3,
+ lx_getpeername, 3,
+ lx_socketpair, 4,
+ lx_send, 4,
+ lx_recv, 4,
+ lx_sendto, 6,
+ lx_recvfrom, 6,
+ lx_shutdown, 2,
+ lx_setsockopt, 5,
+ lx_getsockopt, 5,
+ lx_sendmsg, 3,
+ lx_recvmsg, 3
+};
+
+/*
+ * What follows are a series of tables we use to translate Linux constants
+ * into equivalent Solaris constants and back again. I wish this were
+ * cleaner, more programmatic, and generally nicer. Sadly, life is messy,
+ * and Unix networking even more so.
+ */
+static const int ltos_family[LX_AF_MAX + 1] = {
+ AF_UNSPEC, AF_UNIX, AF_INET, AF_CCITT, AF_IPX,
+ AF_APPLETALK, AF_NOTSUPPORTED, AF_OSI, AF_NOTSUPPORTED,
+ AF_X25, AF_INET6, AF_CCITT, AF_DECnet,
+ AF_802, AF_POLICY, AF_KEY, AF_ROUTE,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_SNA, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED
+};
+
+#define LTOS_FAMILY(d) ((d) <= LX_AF_MAX ? ltos_family[(d)] : AF_INVAL)
+
+static const int ltos_socktype[LX_SOCK_PACKET + 1] = {
+ SOCK_NOTSUPPORTED, SOCK_STREAM, SOCK_DGRAM, SOCK_RAW,
+ SOCK_RDM, SOCK_SEQPACKET, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED,
+ SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED
+};
+
+#define LTOS_SOCKTYPE(t) \
+ ((t) <= LX_SOCK_PACKET ? ltos_socktype[(t)] : SOCK_INVAL)
+
+/*
+ * Linux socket option type definitions
+ *
+ * The protocol `levels` are well defined (see in.h) The option values are
+ * not so well defined. Linux often uses different values to Solaris
+ * although they mean the same thing. For example, IP_TOS in Linux is
+ * defined as value 1 but in Solaris it is defined as value 3. This table
+ * maps all the Protocol levels to their options and maps them between
+ * Linux and Solaris and vice versa. Hence the reason for the complexity.
+ */
+
+typedef struct lx_proto_opts {
+ const int *proto; /* Linux to Solaris mapping table */
+ int maxentries; /* max entries in this table */
+} lx_proto_opts_t;
+
+#define OPTNOTSUP -1 /* we don't support it */
+
+static const int ltos_ip_sockopts[LX_IP_DROP_MEMBERSHIP + 1] = {
+ OPTNOTSUP, IP_TOS, IP_TTL, IP_HDRINCL,
+ IP_OPTIONS, OPTNOTSUP, IP_RECVOPTS, IP_RETOPTS,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ IP_RECVTTL, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ IP_MULTICAST_IF, IP_MULTICAST_TTL, IP_MULTICAST_LOOP,
+ IP_ADD_MEMBERSHIP, IP_DROP_MEMBERSHIP
+};
+
+static const int ltos_tcp_sockopts[LX_TCP_QUICKACK + 1] = {
+ OPTNOTSUP, TCP_NODELAY, TCP_MAXSEG, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ TCP_KEEPALIVE, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP
+};
+
+static const int ltos_igmp_sockopts[IGMP_MTRACE + 1] = {
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ IGMP_MINLEN, OPTNOTSUP, OPTNOTSUP, /* XXX: was IGMP_TIMER_SCALE */
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, IGMP_MEMBERSHIP_QUERY,
+ IGMP_V1_MEMBERSHIP_REPORT, IGMP_DVMRP,
+ IGMP_PIM, OPTNOTSUP, IGMP_V2_MEMBERSHIP_REPORT,
+ IGMP_V2_LEAVE_GROUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ IGMP_MTRACE_RESP, IGMP_MTRACE
+};
+
+static const int ltos_socket_sockopts[LX_SO_ACCEPTCONN + 1] = {
+ OPTNOTSUP, SO_DEBUG, SO_REUSEADDR, SO_TYPE,
+ SO_ERROR, SO_DONTROUTE, SO_BROADCAST, SO_SNDBUF,
+ SO_RCVBUF, SO_KEEPALIVE, SO_OOBINLINE, OPTNOTSUP,
+ OPTNOTSUP, SO_LINGER, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, SO_RCVLOWAT, SO_SNDLOWAT,
+ SO_RCVTIMEO, SO_SNDTIMEO, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP,
+ OPTNOTSUP, OPTNOTSUP, SO_ACCEPTCONN
+};
+
+#define PROTO_SOCKOPTS(opts) \
+ { (opts), sizeof ((opts)) / sizeof ((opts)[0]) }
+
+/*
+ * The main Linux to Solaris protocol to options mapping table
+ * IPPROTO_TAB_SIZE can be set up to IPPROTO_MAX. All entries above
+ * IPPROTO_TAB_SIZE are in effect not implemented,
+ */
+
+#define IPPROTO_TAB_SIZE 8
+
+static const lx_proto_opts_t ltos_proto_opts[IPPROTO_TAB_SIZE] = {
+ /* IPPROTO_IP 0 */
+ PROTO_SOCKOPTS(ltos_ip_sockopts),
+ /* SOL_SOCKET 1 */
+ PROTO_SOCKOPTS(ltos_socket_sockopts),
+ /* IPPROTO_IGMP 2 */
+ PROTO_SOCKOPTS(ltos_igmp_sockopts),
+ /* NOT IMPLEMENTED 3 */
+ { NULL, 0 },
+ /* NOT IMPLEMENTED 4 */
+ { NULL, 0 },
+ /* NOT IMPLEMENTED 5 */
+ { NULL, 0 },
+ /* IPPROTO_TCP 6 */
+ PROTO_SOCKOPTS(ltos_tcp_sockopts),
+ /* NOT IMPLEMENTED 7 */
+ { NULL, 0 }
+};
+
+/*
+ * Lifted from socket.h, since these definitions are contained within
+ * _KERNEL guards.
+ */
+#define _CMSG_HDR_ALIGNMENT 4
+#define _CMSG_HDR_ALIGN(x) (((uintptr_t)(x) + _CMSG_HDR_ALIGNMENT - 1) & \
+ ~(_CMSG_HDR_ALIGNMENT - 1))
+#define CMSG_FIRSTHDR(m) \
+ (((m)->msg_controllen < sizeof (struct cmsghdr)) ? \
+ (struct cmsghdr *)0 : (struct cmsghdr *)((m)->msg_control))
+
+#define CMSG_NXTHDR(m, c) \
+ (((c) == 0) ? CMSG_FIRSTHDR(m) : \
+ ((((uintptr_t)_CMSG_HDR_ALIGN((char *)(c) + \
+ ((struct cmsghdr *)(c))->cmsg_len) + sizeof (struct cmsghdr)) > \
+ (((uintptr_t)((struct lx_msghdr *)(m))->msg_control) + \
+ ((uintptr_t)((struct lx_msghdr *)(m))->msg_controllen))) ? \
+ ((struct cmsghdr *)0) : \
+ ((struct cmsghdr *)_CMSG_HDR_ALIGN((char *)(c) + \
+ ((struct cmsghdr *)(c))->cmsg_len))))
+
+#define LX_TO_SOL 1
+#define SOL_TO_LX 2
+
+static int
+convert_cmsgs(int direction, struct lx_msghdr *msg, char *caller)
+{
+ struct cmsghdr *cmsg, *last;
+ int err = 0;
+
+ cmsg = CMSG_FIRSTHDR(msg);
+ while (cmsg != NULL && err == 0) {
+ if (direction == LX_TO_SOL) {
+ if (cmsg->cmsg_level == LX_SOL_SOCKET) {
+ cmsg->cmsg_level = SOL_SOCKET;
+ if (cmsg->cmsg_type == LX_SCM_RIGHTS)
+ cmsg->cmsg_type = SCM_RIGHTS;
+ else if (cmsg->cmsg_type == LX_SCM_CRED)
+ cmsg->cmsg_type = SCM_UCRED;
+ else
+ err = ENOTSUP;
+ } else {
+ err = ENOTSUP;
+ }
+ } else {
+ if (cmsg->cmsg_level == SOL_SOCKET) {
+ cmsg->cmsg_level = LX_SOL_SOCKET;
+ if (cmsg->cmsg_type == SCM_RIGHTS)
+ cmsg->cmsg_type = LX_SCM_RIGHTS;
+ else if (cmsg->cmsg_type == SCM_UCRED)
+ cmsg->cmsg_type = LX_SCM_CRED;
+ else
+ err = ENOTSUP;
+ } else {
+ err = ENOTSUP;
+ }
+ }
+
+ last = cmsg;
+ cmsg = CMSG_NXTHDR(msg, last);
+ }
+ if (err)
+ lx_unsupported("Unsupported socket control message in %s\n.",
+ caller);
+
+ return (err);
+}
+
+/*
+ * If inaddr is an abstract namespace unix socket, this function expects addr
+ * to have enough memory to hold the expanded socket name, ie it must be of
+ * size *len + ABST_PRFX_LEN.
+ */
+static int
+convert_sockaddr(struct sockaddr *addr, socklen_t *len,
+ struct sockaddr *inaddr, socklen_t inlen)
+{
+ sa_family_t family;
+ int lx_in6_len;
+ int size;
+ int i, orig_len;
+
+ /*
+ * Note that if the buffer at inaddr is ever smaller than inlen bytes,
+ * we may erroneously return EFAULT rather than a possible EINVAL
+ * as the copy comes before the various checks as to whether inlen
+ * is of the proper length for the socket type.
+ *
+ * This isn't an issue at present because all callers to this routine
+ * do meet that constraint.
+ */
+ if ((ssize_t)inlen < 0)
+ return (-EINVAL);
+ if (uucopy(inaddr, addr, inlen) != 0)
+ return (-errno);
+
+ family = LTOS_FAMILY(addr->sa_family);
+
+ switch (family) {
+ case (sa_family_t)AF_NOTSUPPORTED:
+ return (-EPROTONOSUPPORT);
+ case (sa_family_t)AF_INVAL:
+ return (-EAFNOSUPPORT);
+ case AF_INET:
+ size = sizeof (struct sockaddr);
+
+ if (inlen < size)
+ return (-EINVAL);
+
+ *len = size;
+ break;
+
+ case AF_INET6:
+ /*
+ * The Solaris sockaddr_in6 has one more 32-bit
+ * field than the Linux version.
+ */
+ size = sizeof (struct sockaddr_in6);
+ lx_in6_len = size - sizeof (uint32_t);
+
+ if (inlen != lx_in6_len)
+ return (-EINVAL);
+
+ *len = (sizeof (struct sockaddr_in6));
+ bzero((char *)addr + lx_in6_len, sizeof (uint32_t));
+ break;
+
+ case AF_UNIX:
+ if (inlen > sizeof (struct sockaddr_un))
+ return (-EINVAL);
+
+ *len = inlen;
+
+ /*
+ * Linux supports abstract unix sockets, which are
+ * simply sockets that do not exist on the file system.
+ * These sockets are denoted by beginning the path with
+ * a NULL character. To support these, we strip out the
+ * leading NULL character and change the path to point
+ * to a real place in /tmp directory, by prepending
+ * ABST_PRFX and replacing all illegal characters with
+ * '_'.
+ */
+ if (addr->sa_data[0] == '\0') {
+
+ /*
+ * inlen is the entire size of the sockaddr_un
+ * data structure, including the sun_family, so
+ * we need to subtract this out. We subtract
+ * 1 since we want to overwrite the leadin NULL
+ * character, and thus do not include it in the
+ * length.
+ */
+ orig_len = inlen - sizeof (addr->sa_family) - 1;
+
+ /*
+ * Since abstract paths can contain illegal
+ * filename characters, we simply replace these
+ * with '_'
+ */
+ for (i = 1; i < orig_len + 1; i++) {
+ if (addr->sa_data[i] == '\0' ||
+ addr->sa_data[i] == '/')
+ addr->sa_data[i] = '_';
+ }
+
+ /*
+ * prepend ABST_PRFX to file name, minus the
+ * leading NULL character. This places the
+ * socket as a hidden file in the /tmp
+ * directory.
+ */
+ (void) memmove(addr->sa_data + ABST_PRFX_LEN,
+ addr->sa_data + 1, orig_len);
+ bcopy(ABST_PRFX, addr->sa_data, ABST_PRFX_LEN);
+
+ /*
+ * Since abstract socket paths may not be NULL
+ * terminated, we must explicitly NULL terminate
+ * our string.
+ */
+ addr->sa_data[orig_len + ABST_PRFX_LEN] = '\0';
+
+ /*
+ * Make len reflect the new len of our string.
+ * Although we removed the NULL character at the
+ * beginning of the string, we added a NULL
+ * character to the end, so the net gain in
+ * length is simply ABST_PRFX_LEN.
+ */
+ *len = inlen + ABST_PRFX_LEN;
+ }
+ break;
+
+ default:
+ *len = inlen;
+ }
+
+ addr->sa_family = family;
+ return (0);
+}
+
+static int
+convert_sock_args(int in_dom, int in_type, int in_protocol, int *out_dom,
+ int *out_type)
+{
+ int domain, type;
+
+ if (in_dom < 0 || in_type < 0 || in_protocol < 0)
+ return (-EINVAL);
+
+ domain = LTOS_FAMILY(in_dom);
+ if (domain == AF_NOTSUPPORTED || domain == AF_UNSPEC)
+ return (-EAFNOSUPPORT);
+ if (domain == AF_INVAL)
+ return (-EINVAL);
+
+ type = LTOS_SOCKTYPE(in_type);
+ if (type == SOCK_NOTSUPPORTED)
+ return (-ESOCKTNOSUPPORT);
+ if (type == SOCK_INVAL)
+ return (-EINVAL);
+
+ /*
+ * Linux does not allow the app to specify IP Protocol for raw
+ * sockets. Solaris does, so bail out here.
+ */
+ if (type == SOCK_RAW && in_protocol == IPPROTO_IP)
+ return (-ESOCKTNOSUPPORT);
+
+ *out_dom = domain;
+ *out_type = type;
+ return (0);
+}
+
+static int
+convert_sockflags(int lx_flags)
+{
+ int solaris_flags = 0;
+
+ if (lx_flags & LX_MSG_OOB)
+ solaris_flags |= MSG_OOB;
+
+ if (lx_flags & LX_MSG_PEEK)
+ solaris_flags |= MSG_PEEK;
+
+ if (lx_flags & LX_MSG_DONTROUTE)
+ solaris_flags |= MSG_DONTROUTE;
+
+ if (lx_flags & LX_MSG_CTRUNC)
+ solaris_flags |= MSG_CTRUNC;
+
+ if (lx_flags & LX_MSG_TRUNC)
+ solaris_flags |= MSG_TRUNC;
+
+ if (lx_flags & LX_MSG_WAITALL)
+ solaris_flags |= MSG_WAITALL;
+
+ if (lx_flags & LX_MSG_DONTWAIT)
+ solaris_flags |= MSG_DONTWAIT;
+
+ if (lx_flags & LX_MSG_EOR)
+ solaris_flags |= MSG_EOR;
+
+ if (lx_flags & LX_MSG_PROXY)
+ lx_unsupported("socket operation with MSG_PROXY flag set");
+
+ if (lx_flags & LX_MSG_FIN)
+ lx_unsupported("socket operation with MSG_FIN flag set");
+
+ if (lx_flags & LX_MSG_SYN)
+ lx_unsupported("socket operation with MSG_SYN flag set");
+
+ if (lx_flags & LX_MSG_CONFIRM)
+ lx_unsupported("socket operation with MSG_CONFIRM set");
+
+ if (lx_flags & LX_MSG_RST)
+ lx_unsupported("socket operation with MSG_RST flag set");
+
+ if (lx_flags & LX_MSG_MORE)
+ lx_unsupported("socket operation with MSG_MORE flag set");
+
+ return (solaris_flags);
+}
+
+static int
+lx_socket(ulong_t *args)
+{
+ int domain;
+ int type;
+ int protocol = (int)args[2];
+ int fd;
+ int err;
+
+ err = convert_sock_args((int)args[0], (int)args[1], protocol,
+ &domain, &type);
+ if (err != 0)
+ return (err);
+
+ lx_debug("\tsocket(%d, %d, %d)", domain, type, protocol);
+
+ /* Right now IPv6 sockets don't work */
+ if (domain == AF_INET6)
+ return (-EAFNOSUPPORT);
+
+ /*
+ * Clients of the auditing subsystem used by CentOS 4 and 5 expect to
+ * be able to create AF_ROUTE SOCK_RAW sockets to communicate with the
+ * auditing daemons. Failure to create these sockets will cause login,
+ * ssh and useradd, amoung other programs to fail. To trick these
+ * programs into working, we convert the socket domain and type to
+ * something that we do support. Then when sendto is called on these
+ * sockets, we return an error code. See lx_sendto.
+ */
+ if (domain == AF_ROUTE && type == SOCK_RAW) {
+ domain = AF_INET;
+ type = SOCK_STREAM;
+ protocol = 0;
+ }
+
+ fd = socket(domain, type, protocol);
+ if (fd >= 0)
+ return (fd);
+
+ if (errno == EPROTONOSUPPORT)
+ return (-ESOCKTNOSUPPORT);
+
+ return (-errno);
+}
+
+static int
+lx_bind(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ struct stat64 statbuf;
+ struct sockaddr *name, oldname;
+ socklen_t len;
+ int r, r2, ret, tmperrno;
+ int abst_sock;
+ struct stat sb;
+
+ if (uucopy((struct sockaddr *)args[1], &oldname,
+ sizeof (struct sockaddr)) != 0)
+ return (-errno);
+
+ /*
+ * Handle Linux abstract sockets, which are UNIX sockets whose path
+ * begins with a NULL character.
+ */
+ abst_sock = (oldname.sa_family == AF_UNIX) &&
+ (oldname.sa_data[0] == '\0');
+
+ /*
+ * convert_sockaddr will expand the socket path if it is abstract, so
+ * we need to allocate extra memory for it now.
+ */
+ if ((name = SAFE_ALLOCA((socklen_t)args[2] +
+ abst_sock * ABST_PRFX_LEN)) == NULL)
+ return (-EINVAL);
+
+ if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1],
+ (socklen_t)args[2])) < 0)
+ return (r);
+
+ /*
+ * Linux abstract namespace unix sockets are simply socket that do not
+ * exist on the filesystem. We emulate them by changing their paths
+ * in covert_sockaddr so that they point real files names on the
+ * filesystem. Because in Linux they do not exist on the filesystem
+ * applications do not have to worry about deleting files, however in
+ * our filesystem based emulation we do. To solve this problem, we first
+ * check to see if the socket already exists before we create one. If it
+ * does we attempt to connect to it to see if it is in use, or just
+ * left over from a previous lx_bind call. If we are unable to connect,
+ * we assume it is not in use and remove the file, then continue on
+ * as if the file never existed.
+ */
+ if (abst_sock && stat(name->sa_data, &sb) == 0 &&
+ S_ISSOCK(sb.st_mode)) {
+ if ((r2 = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return (-ENOSR);
+ ret = connect(r2, name, len);
+ tmperrno = errno;
+ if (close(r2) < 0)
+ return (-EINVAL);
+
+ /*
+ * if we can't connect to the socket, assume no one is using it
+ * and remove it, otherwise assume it is in use and return
+ * EADDRINUSE.
+ */
+ if ((ret < 0) && (tmperrno == ECONNREFUSED)) {
+ if (unlink(name->sa_data) < 0) {
+ return (-EADDRINUSE);
+ }
+ } else {
+ return (-EADDRINUSE);
+ }
+ }
+
+ lx_debug("\tbind(%d, 0x%p, %d)", sockfd, name, len);
+
+ if (name->sa_family == AF_UNIX)
+ lx_debug("\t\tAF_UNIX, path = %s", name->sa_data);
+
+ r = bind(sockfd, name, len);
+
+ /*
+ * Linux returns EADDRINUSE for attempts to bind to UNIX domain
+ * sockets that aren't sockets.
+ */
+ if ((r < 0) && (errno == EINVAL) && (name->sa_family == AF_UNIX) &&
+ ((stat64(name->sa_data, &statbuf) == 0) &&
+ (!S_ISSOCK(statbuf.st_mode))))
+ return (-EADDRINUSE);
+
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_connect(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ struct sockaddr *name, oldname;
+ socklen_t len;
+ int r;
+ int abst_sock;
+
+ if (uucopy((struct sockaddr *)args[1], &oldname,
+ sizeof (struct sockaddr)) != 0)
+ return (-errno);
+
+
+ /* Handle Linux abstract sockets */
+ abst_sock = (oldname.sa_family == AF_UNIX) &&
+ (oldname.sa_data[0] == '\0');
+
+ /*
+ * convert_sockaddr will expand the socket path, if it is abstract, so
+ * we need to allocate extra memory for it now.
+ */
+ if ((name = SAFE_ALLOCA((socklen_t)args[2] +
+ abst_sock * ABST_PRFX_LEN)) == NULL)
+ return (-EINVAL);
+
+ if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1],
+ (socklen_t)args[2])) < 0)
+ return (r);
+
+ lx_debug("\tconnect(%d, 0x%p, %d)", sockfd, name, len);
+
+ if (name->sa_family == AF_UNIX)
+ lx_debug("\t\tAF_UNIX, path = %s", name->sa_data);
+
+ r = connect(sockfd, name, len);
+
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_listen(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ int backlog = (int)args[1];
+ int r;
+
+ lx_debug("\tlisten(%d, %d)", sockfd, backlog);
+ r = listen(sockfd, backlog);
+
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_accept(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ struct sockaddr *name = (struct sockaddr *)args[1];
+ socklen_t namelen = 0;
+ int r;
+
+ lx_debug("\taccept(%d, 0x%p, 0x%p", sockfd, args[1], args[2]);
+
+ /*
+ * The Linux man page says that -1 is returned and errno is set to
+ * EFAULT if the "name" address is bad, but it is silent on what to
+ * set errno to if the "namelen" address is bad. Experimentation
+ * shows that Linux (at least the 2.4.21 kernel in CentOS) actually
+ * sets errno to EINVAL in both cases.
+ *
+ * Note that we must first check the name pointer, as the Linux
+ * docs state nothing is copied out if the "name" pointer is NULL.
+ * If it is NULL, we don't care about the namelen pointer's value
+ * or about dereferencing it.
+ *
+ * Happily, Solaris' accept(3SOCKET) treats NULL name pointers and
+ * zero namelens the same way.
+ */
+ if ((name != NULL) &&
+ (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0))
+ return ((errno == EFAULT) ? -EINVAL : -errno);
+
+ lx_debug("\taccept namelen = %d", namelen);
+
+ if ((r = accept(sockfd, name, &namelen)) < 0)
+ return ((errno == EFAULT) ? -EINVAL : -errno);
+
+ lx_debug("\taccept namelen returned %d bytes", namelen);
+
+ /*
+ * In Linux, accept()ed sockets do not inherit anything set by
+ * fcntl(), so filter those out.
+ */
+ if (fcntl(r, F_SETFL, 0) < 0)
+ return (-errno);
+
+ /*
+ * Once again, a bad "namelen" address sets errno to EINVAL, not
+ * EFAULT. If namelen was zero, there's no need to copy a zero back
+ * out.
+ *
+ * Logic might dictate that we should check if we can write to
+ * the namelen pointer earlier so we don't accept a pending connection
+ * only to fail the call because we can't write the namelen value back
+ * out. However, testing shows Linux does indeed fail the call after
+ * accepting the connection so we must behave in a compatible manner.
+ */
+ if ((name != NULL) && (namelen != 0) &&
+ (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0))
+ return ((errno == EFAULT) ? -EINVAL : -errno);
+
+ return (r);
+}
+
+static int
+lx_getsockname(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ struct sockaddr *name = NULL;
+ socklen_t namelen, namelen_orig;
+
+ if (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0)
+ return (-errno);
+ namelen_orig = namelen;
+
+ lx_debug("\tgetsockname(%d, 0x%p, 0x%p (=%d))",
+ sockfd, args[1], args[2], namelen);
+
+ if (namelen > 0) {
+ if ((name = SAFE_ALLOCA(namelen)) == NULL)
+ return (-EINVAL);
+ bzero(name, namelen);
+ }
+
+ if ((getsockname(sockfd, name, &namelen)) < 0)
+ return (-errno);
+
+ /*
+ * If the name that getsockname() want's to return is larger
+ * than namelen, getsockname() will copy out the maximum amount
+ * of data possible and then update namelen to indicate the
+ * actually size of all the data that it wanted to copy out.
+ */
+ if (uucopy(name, (void *)args[1], namelen_orig) != 0)
+ return (-errno);
+ if (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_getpeername(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ struct sockaddr *name;
+ socklen_t namelen;
+
+ if (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0)
+ return (-errno);
+
+ lx_debug("\tgetpeername(%d, 0x%p, 0x%p (=%d))",
+ sockfd, args[1], args[2], namelen);
+
+ /*
+ * Linux returns EFAULT in this case, even if the namelen parameter
+ * is 0. This check will not catch other illegal addresses, but
+ * the benefit catching a non-null illegal address here is not
+ * worth the cost of another system call.
+ */
+ if ((void *)args[1] == NULL)
+ return (-EFAULT);
+
+ if ((name = SAFE_ALLOCA(namelen)) == NULL)
+ return (-EINVAL);
+ if ((getpeername(sockfd, name, &namelen)) < 0)
+ return (-errno);
+
+ if (uucopy(name, (void *)args[1], namelen) != 0)
+ return (-errno);
+
+ if (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_socketpair(ulong_t *args)
+{
+ int domain;
+ int type;
+ int protocol = (int)args[2];
+ int *sv = (int *)args[3];
+ int fds[2];
+ int r;
+
+ r = convert_sock_args((int)args[0], (int)args[1], protocol,
+ &domain, &type);
+ if (r != 0)
+ return (r);
+
+ lx_debug("\tsocketpair(%d, %d, %d, 0x%p)", domain, type, protocol, sv);
+
+ r = socketpair(domain, type, protocol, fds);
+
+ if (r == 0) {
+ if (uucopy(fds, sv, sizeof (fds)) != 0) {
+ r = errno;
+ (void) close(fds[0]);
+ (void) close(fds[1]);
+ return (-r);
+ }
+ return (0);
+ }
+
+ if (errno == EPROTONOSUPPORT)
+ return (-ESOCKTNOSUPPORT);
+
+ return (-errno);
+}
+
+static ssize_t
+lx_send(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ void *buf = (void *)args[1];
+ size_t len = (size_t)args[2];
+ int flags = (int)args[3];
+ ssize_t r;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\tsend(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags);
+
+ flags = convert_sockflags(flags);
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal(gettext(
+ "%s: could not ignore SIGPIPE to emulate "
+ "LX_MSG_NOSIGNAL"), "send()");
+ }
+
+ r = send(sockfd, buf, len, flags);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal(
+ gettext("%s: could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL"), "send()");
+
+ return ((r < 0) ? -errno : r);
+}
+
+static ssize_t
+lx_recv(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ void *buf = (void *)args[1];
+ size_t len = (size_t)args[2];
+ int flags = (int)args[3];
+ ssize_t r;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\trecv(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags);
+
+ flags = convert_sockflags(flags);
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal(gettext(
+ "%s: could not ignore SIGPIPE to emulate "
+ "LX_MSG_NOSIGNAL"), "recv()");
+ }
+
+ r = recv(sockfd, buf, len, flags);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal(
+ gettext("%s: could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL"), "recv()");
+
+ return ((r < 0) ? -errno : r);
+}
+
+static ssize_t
+lx_sendto(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ void *buf = (void *)args[1];
+ size_t len = (size_t)args[2];
+ int flags = (int)args[3];
+ struct sockaddr *to = NULL, oldto;
+ socklen_t tolen = 0;
+ ssize_t r;
+ int abst_sock;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ if ((args[4] != NULL) && (args[5] > 0)) {
+ if (uucopy((struct sockaddr *)args[4], &oldto,
+ sizeof (struct sockaddr)) != 0)
+ return (-errno);
+
+ /* Handle Linux abstract sockets */
+ abst_sock = (oldto.sa_family == AF_UNIX) &&
+ (oldto.sa_data[0] == '\0');
+
+ /*
+ * convert_sockaddr will expand the socket path, if it is
+ * abstract, so we need to allocate extra memory for it now.
+ */
+ if ((to = SAFE_ALLOCA(args[5] + abst_sock * ABST_PRFX_LEN))
+ == NULL)
+ return (-EINVAL);
+
+ if ((r = convert_sockaddr(to, &tolen,
+ (struct sockaddr *)args[4], (socklen_t)args[5])) < 0)
+ return (r);
+ }
+
+
+ lx_debug("\tsendto(%d, 0x%p, 0x%d, 0x%x, 0x%x, %d)", sockfd, buf, len,
+ flags, to, tolen);
+
+ flags = convert_sockflags(flags);
+
+ /* return this error to make auditing subsystem happy */
+ if (to && to->sa_family == AF_ROUTE) {
+ return (-ECONNREFUSED);
+ }
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal(gettext(
+ "%s: could not ignore SIGPIPE to emulate "
+ "LX_MSG_NOSIGNAL"), "sendto()");
+ }
+
+ r = sendto(sockfd, buf, len, flags, to, tolen);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal(
+ gettext("%s: could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL"), "sendto()");
+
+ if (r < 0) {
+ /*
+ * according to the man page and LTP, the expected error in
+ * this case is EPIPE.
+ */
+ if (errno == ENOTCONN)
+ return (-EPIPE);
+ else
+ return (-errno);
+ }
+ return (r);
+}
+
+static ssize_t
+lx_recvfrom(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ void *buf = (void *)args[1];
+ size_t len = (size_t)args[2];
+ int flags = (int)args[3];
+ struct sockaddr *from = (struct sockaddr *)args[4];
+ socklen_t *from_lenp = (socklen_t *)args[5];
+ ssize_t r;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\trecvfrom(%d, 0x%p, 0x%d, 0x%x, 0x%x, 0x%p)", sockfd, buf,
+ len, flags, from, from_lenp);
+
+ flags = convert_sockflags(flags);
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal(gettext(
+ "%s: could not ignore SIGPIPE to emulate "
+ "LX_MSG_NOSIGNAL"), "recvfrom()");
+ }
+
+ r = recvfrom(sockfd, buf, len, flags, from, from_lenp);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal(
+ gettext("%s: could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL"), "recvfrom()");
+
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_shutdown(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ int how = (int)args[1];
+ int r;
+
+ lx_debug("\tshutdown(%d, %d)", sockfd, how);
+ r = shutdown(sockfd, how);
+
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_setsockopt(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ int level = (int)args[1];
+ int optname = (int)args[2];
+ void *optval = (void *)args[3];
+ int optlen = (int)args[4];
+ int internal_opt;
+ int r;
+
+ lx_debug("\tsetsockopt(%d, %d, %d, 0x%p, %d)", sockfd, level, optname,
+ optval, optlen);
+
+ /*
+ * The kernel returns EFAULT for all invalid addresses except NULL,
+ * for which it returns EINVAL. Linux wants EFAULT for NULL too.
+ */
+ if (optval == NULL)
+ return (-EFAULT);
+
+ /*
+ * Do a table lookup of the Solaris equivalent of the given option
+ */
+ if (level < IPPROTO_IP || level >= IPPROTO_TAB_SIZE)
+ return (-ENOPROTOOPT);
+
+ if (ltos_proto_opts[level].maxentries == 0 ||
+ optname <= 0 || optname >= (ltos_proto_opts[level].maxentries))
+ return (-ENOPROTOOPT);
+
+ /*
+ * Linux sets this option when it wants to send credentials over a
+ * socket. Currently we just ignore it to make Linux programs happy.
+ */
+ if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED))
+ return (0);
+
+
+ if ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK)) {
+ /*
+ * TCP_CORK is a Linux-only option that instructs the TCP
+ * stack not to send out partial frames. Solaris doesn't
+ * include this option but some apps require it. So, we do
+ * our best to emulate the option by disabling TCP_NODELAY.
+ * If the app requests that we disable TCP_CORK, we just
+ * ignore it since enabling TCP_NODELAY may be
+ * overcompensating.
+ */
+ optname = TCP_NODELAY;
+ if (optlen != sizeof (int))
+ return (-EINVAL);
+ if (uucopy(optval, &internal_opt, sizeof (int)) != 0)
+ return (-errno);
+ if (internal_opt == 0)
+ return (0);
+ internal_opt = 1;
+ optval = &internal_opt;
+ } else {
+ optname = ltos_proto_opts[level].proto[optname];
+
+ if (optname == OPTNOTSUP)
+ return (-ENOPROTOOPT);
+ }
+
+ if (level == LX_SOL_SOCKET)
+ level = SOL_SOCKET;
+
+ r = setsockopt(sockfd, level, optname, optval, optlen);
+
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_getsockopt(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ int level = (int)args[1];
+ int optname = (int)args[2];
+ void *optval = (void *)args[3];
+ int *optlenp = (int *)args[4];
+ int r;
+
+ lx_debug("\tgetsockopt(%d, %d, %d, 0x%p, 0x%p)", sockfd, level, optname,
+ optval, optlenp);
+
+ /*
+ * According to the Linux man page, a NULL optval should indicate
+ * (as in Solaris) that no return value is expected. Instead, it
+ * actually triggers an EFAULT error.
+ */
+ if (optval == NULL)
+ return (-EFAULT);
+
+ /*
+ * Do a table lookup of the Solaris equivalent of the given option
+ */
+ if (level < IPPROTO_IP || level >= IPPROTO_TAB_SIZE)
+ return (-EOPNOTSUPP);
+
+ if (ltos_proto_opts[level].maxentries == 0 ||
+ optname <= 0 || optname >= (ltos_proto_opts[level].maxentries))
+ return (-ENOPROTOOPT);
+
+ if (((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED)) ||
+ ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK))) {
+ /*
+ * Linux sets LX_SO_PASSCRED when it wants to send credentials
+ * over a socket. Since we do not support it, it is never set
+ * and we return 0.
+ *
+ * We don't support TCP_CORK but some apps rely on it. So,
+ * rather than return an error we just return 0. This
+ * isn't exactly a lie, since this option really isn't set,
+ * but it's not the whole truth either. Fortunately, we
+ * aren't under oath.
+ */
+ r = 0;
+ if (uucopy(&r, optval, sizeof (int)) != 0)
+ return (-errno);
+ r = sizeof (int);
+ if (uucopy(&r, optlenp, sizeof (int)) != 0)
+ return (-errno);
+ return (0);
+ }
+ if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PEERCRED)) {
+ struct lx_ucred lx_ucred;
+ ucred_t *ucp;
+
+ /*
+ * We don't support SO_PEERCRED, but we do have equivalent
+ * functionality in getpeerucred() so invoke that here.
+ */
+
+ /* Verify there's going to be enough room for the results. */
+ if (uucopy(optlenp, &r, sizeof (int)) != 0)
+ return (-errno);
+ if (r < sizeof (struct lx_ucred))
+ return (-EOVERFLOW);
+
+ /*
+ * We allocate a ucred_t ourselves rather than allow
+ * getpeerucred() to do it for us because getpeerucred()
+ * uses malloc(3C) and we'd rather use SAFE_ALLOCA().
+ */
+ if ((ucp = (ucred_t *)SAFE_ALLOCA(ucred_size())) == NULL)
+ return (-ENOMEM);
+
+ /* Get the credential for the remote end of this socket. */
+ if (getpeerucred(sockfd, &ucp) != 0)
+ return (-errno);
+ if (((lx_ucred.lxu_pid = ucred_getpid(ucp)) == -1) ||
+ ((lx_ucred.lxu_uid = ucred_geteuid(ucp)) == (uid_t)-1) ||
+ ((lx_ucred.lxu_gid = ucred_getegid(ucp)) == (gid_t)-1)) {
+ return (-errno);
+ }
+
+ /* Copy out the results. */
+ if ((uucopy(&lx_ucred, optval, sizeof (lx_ucred))) != 0)
+ return (-errno);
+ r = sizeof (lx_ucred);
+ if ((uucopy(&r, optlenp, sizeof (int))) != 0)
+ return (-errno);
+ return (0);
+ }
+
+ optname = ltos_proto_opts[level].proto[optname];
+
+ if (optname == OPTNOTSUP)
+ return (-ENOPROTOOPT);
+
+ if (level == LX_SOL_SOCKET)
+ level = SOL_SOCKET;
+
+ r = getsockopt(sockfd, level, optname, optval, optlenp);
+
+ return ((r < 0) ? -errno : r);
+}
+
+/*
+ * libc routines that issue these system calls. We bypass the libsocket
+ * wrappers since they explicitly turn off the MSG_XPG_2 flag we need for
+ * Linux compatibility.
+ */
+extern int _so_sendmsg();
+extern int _so_recvmsg();
+
+static int
+lx_sendmsg(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ struct lx_msghdr msg;
+ struct cmsghdr *cmsg;
+ int flags = (int)args[2];
+ int r;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\tsendmsg(%d, 0x%p, 0x%x)", sockfd, (void *)args[1], flags);
+
+ flags = convert_sockflags(flags);
+
+ if ((uucopy((void *)args[1], &msg, sizeof (msg))) != 0)
+ return (-errno);
+
+ /*
+ * If there are control messages bundled in this message, we need
+ * to convert them from Linux to Solaris.
+ */
+ if (msg.msg_control != NULL) {
+ if (msg.msg_controllen == 0) {
+ cmsg = NULL;
+ } else {
+ cmsg = SAFE_ALLOCA(msg.msg_controllen);
+ if (cmsg == NULL)
+ return (-EINVAL);
+ }
+ if ((uucopy(msg.msg_control, cmsg, msg.msg_controllen)) != 0)
+ return (-errno);
+ msg.msg_control = cmsg;
+ if ((r = convert_cmsgs(LX_TO_SOL, &msg, "sendmsg()")) != 0)
+ return (-r);
+ }
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal(gettext(
+ "%s: could not ignore SIGPIPE to emulate "
+ "LX_MSG_NOSIGNAL"), "sendmsg()");
+ }
+
+ r = _so_sendmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal(
+ gettext("%s: could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL"), "sendmsg()");
+
+ if (r < 0) {
+ /*
+ * according to the man page and LTP, the expected error in
+ * this case is EPIPE.
+ */
+ if (errno == ENOTCONN)
+ return (-EPIPE);
+ else
+ return (-errno);
+ }
+
+ return (r);
+}
+
+static int
+lx_recvmsg(ulong_t *args)
+{
+ int sockfd = (int)args[0];
+ struct lx_msghdr msg;
+ struct lx_msghdr *msgp = (struct lx_msghdr *)args[1];
+ struct cmsghdr *cmsg = NULL;
+ int flags = (int)args[2];
+ int r, err;
+
+ int nosigpipe = flags & LX_MSG_NOSIGNAL;
+ struct sigaction newact, oact;
+
+ lx_debug("\trecvmsg(%d, 0x%p, 0x%x)", sockfd, (void *)args[1], flags);
+
+ flags = convert_sockflags(flags);
+
+ if ((uucopy(msgp, &msg, sizeof (msg))) != 0)
+ return (-errno);
+
+ /*
+ * If we are expecting to have to convert any control messages,
+ * then we should receive them into our address space instead of
+ * the app's.
+ */
+ if (msg.msg_control != NULL) {
+ cmsg = msg.msg_control;
+ if (msg.msg_controllen == 0) {
+ msg.msg_control = NULL;
+ } else {
+ msg.msg_control = SAFE_ALLOCA(msg.msg_controllen);
+ if (msg.msg_control == NULL)
+ return (-EINVAL);
+ }
+ }
+
+ /*
+ * If nosigpipe is set, we want to emulate the Linux action of
+ * not sending a SIGPIPE to the caller if the remote socket has
+ * already been closed.
+ *
+ * As SIGPIPE is a directed signal sent only to the thread that
+ * performed the action, we can emulate this behavior by momentarily
+ * resetting the action for SIGPIPE to SIG_IGN, performing the socket
+ * call, and resetting the action back to its previous value.
+ */
+ if (nosigpipe) {
+ newact.sa_handler = SIG_IGN;
+ newact.sa_flags = 0;
+ (void) sigemptyset(&newact.sa_mask);
+
+ if (sigaction(SIGPIPE, &newact, &oact) < 0)
+ lx_err_fatal(gettext(
+ "%s: could not ignore SIGPIPE to emulate "
+ "LX_MSG_NOSIGNAL"), "recvmsg()");
+ }
+
+ r = _so_recvmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2);
+
+ if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0))
+ lx_err_fatal(
+ gettext("%s: could not reset SIGPIPE handler to "
+ "emulate LX_MSG_NOSIGNAL"), "recvmsg()");
+
+ if (r >= 0 && msg.msg_control != NULL) {
+ /*
+ * If there are control messages bundled in this message,
+ * we need to convert them from Linux to Solaris.
+ */
+ if ((err = convert_cmsgs(SOL_TO_LX, &msg, "recvmsg()")) != 0)
+ return (-err);
+
+ if ((uucopy(msg.msg_control, cmsg, msg.msg_controllen)) != 0)
+ return (-errno);
+ }
+
+ /*
+ * A handful of the values in the msghdr are set by the recvmsg()
+ * call, so copy their values back to the caller. Rather than iterate,
+ * just copy the whole structure back.
+ */
+ if (uucopy(&msg, msgp, sizeof (msg)) != 0)
+ return (-errno);
+
+ return ((r < 0) ? -errno : r);
+}
+
+int
+lx_socketcall(uintptr_t p1, uintptr_t p2)
+{
+ int subcmd = (int)p1 - 1; /* subcommands start at 1 - not 0 */
+ ulong_t args[6];
+ int r;
+
+ if (subcmd < 0 || subcmd >= LX_RECVMSG)
+ return (-EINVAL);
+
+ /*
+ * Copy the arguments to the subcommand in from the app's address
+ * space, returning EFAULT if we get a bogus pointer.
+ */
+ if (uucopy((void *)p2, args,
+ sockfns[subcmd].s_nargs * sizeof (ulong_t)))
+ return (-errno);
+
+ r = (sockfns[subcmd].s_fn)(args);
+
+ return (r);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/stat.c b/usr/src/lib/brand/lx/lx_brand/common/stat.c
new file mode 100644
index 0000000000..7ba2312565
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/stat.c
@@ -0,0 +1,551 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * when a stat() is done for a non-device file, the devt returned
+ * via the stat is the devt of the device backing the filesystem which
+ * contains the file the stat was performed on. these devts are currently
+ * untranslated. if this turns out to cause problems in the future then
+ * we might want to add more devt translators to convert sd and cmdk
+ * devts into linux devts that normally represent disks.
+ *
+ * XXX this may not be the best place to have the devt translation code.
+ * devt translation will also be needed for /proc fs support, which will
+ * probably be done in the kernel. we may need to move this code into
+ * the kernel and add a brand syscall to do the translation for us. this
+ * will need to be worked out before putback.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/lx_types.h>
+#include <sys/lx_stat.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_ptm.h>
+#include <sys/lx_audio.h>
+#include <sys/lx_fcntl.h>
+#include <sys/modctl.h>
+
+/* define _KERNEL to get the devt manipulation macros */
+#define _KERNEL
+#include <sys/sysmacros.h>
+#undef _KERNEL
+
+
+#define LX_PTS_MAJOR_MIN 136
+#define LX_PTS_MAJOR_MAX 143
+#define LX_PTS_MAX \
+ ((LX_PTS_MAJOR_MAX - LX_PTS_MAJOR_MIN + 1) * LX_MINORMASK)
+
+#define LX_PTM_MAJOR 5
+#define LX_PTM_MINOR 2
+
+/* values for dt_type */
+#define DTT_INVALID 0
+#define DTT_LIST 1
+#define DTT_CUSTOM 2
+
+/* convience macros for access the dt_minor union */
+#define dt_list dt_minor.dtm_list
+#define dt_custom dt_minor.dtm_custom
+
+/*
+ * structure used to define devt translators
+ */
+typedef struct minor_translator {
+ char *mt_path; /* solaris minor node path */
+ minor_t mt_minor; /* solaris minor node number */
+ int mt_lx_major; /* linux major node number */
+ int mt_lx_minor; /* linux minor node number */
+} minor_translator_t;
+
+typedef struct devt_translator {
+ char *dt_driver; /* solaris driver name */
+ major_t dt_major; /* solaris driver number */
+
+ /* dt_type dictates how we intrepret dt_minor */
+ int dt_type;
+ union {
+ uintptr_t dtm_foo; /* required to compile */
+ minor_translator_t *dtm_list;
+ int (*dtm_custom)(dev_t, lx_dev_t *, int);
+ } dt_minor;
+} devt_translator_t;
+
+
+/*
+ * forward declerations
+ */
+static devt_translator_t devt_translators[];
+
+/*
+ * called to initialize the devt translation subsystem
+ */
+int
+lx_stat_init()
+{
+ minor_translator_t *mt;
+ struct stat st;
+ major_t major;
+ char *driver;
+ int i, j, ret;
+
+ for (i = 0; devt_translators[i].dt_driver != NULL; i++) {
+
+ assert(devt_translators[i].dt_type != DTT_INVALID);
+
+ /* figure out the major numbers for our devt translators */
+ driver = devt_translators[i].dt_driver;
+ ret = modctl(MODGETMAJBIND,
+ driver, strlen(driver) + 1, &major);
+ if (ret != 0) {
+ lx_err(gettext("%s%s) failed: %s\n"),
+ "lx_stat_init(): modctl(MODGETMAJBIND, ",
+ driver, strerror(errno));
+ lx_err(gettext("%s: %s translator disabled for: %s\n"),
+ "lx_stat_init()", "devt", driver);
+ devt_translators[i].dt_major = (major_t)-1;
+ continue;
+ }
+
+ /* save the major node value */
+ devt_translators[i].dt_major = major;
+
+ /* if this translator doesn't use a list mapping we're done. */
+ if (devt_translators[i].dt_type != DTT_LIST)
+ continue;
+
+ /* for each device listed, lookup the minor node number */
+ mt = devt_translators[i].dt_list;
+ for (j = 0; mt[j].mt_path != NULL; j++) {
+
+ /* stat the device */
+ ret = stat(mt[j].mt_path, &st);
+ if (ret != 0) {
+ lx_err(gettext("%s%s) failed: %s\n"),
+ "lx_stat_init(): stat(",
+ mt[j].mt_path, strerror(errno));
+ lx_err(gettext(
+ "%s: %s translator disabled for: %s\n"),
+ "lx_stat_init()", "devt",
+ mt[j].mt_path);
+ st.st_rdev = NODEV;
+ } else {
+ /* make sure the major node matches */
+ assert(getmajor(st.st_rdev) == major);
+ assert(mt[j].mt_minor < LX_MINORMASK);
+ }
+
+ /* save the minor node value */
+ mt[j].mt_minor = getminor(st.st_rdev);
+ }
+ }
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+pts_devt_translator(dev_t dev, lx_dev_t *jdev, int fd)
+{
+ minor_t min = getminor(dev);
+ int lx_maj;
+ int lx_min;
+
+ /*
+ * linux has a really small minor number name space (8 bits).
+ * so if pts devices are limited to one major number you could
+ * only have 256 of them. linux addresses this issue by using
+ * multiple major numbers for pts devices.
+ */
+ if (min >= LX_PTS_MAX)
+ return (EOVERFLOW);
+
+ lx_maj = LX_PTS_MAJOR_MIN + (min / LX_MINORMASK);
+ lx_min = min % LX_MINORMASK;
+
+ *jdev = LX_MAKEDEVICE(lx_maj, lx_min);
+ return (0);
+}
+
+
+static int
+/*ARGSUSED*/
+ptm_devt_translator(dev_t dev, lx_dev_t *jdev, int fd)
+{
+ *jdev = LX_MAKEDEVICE(LX_PTM_MAJOR, LX_PTM_MINOR);
+ return (0);
+}
+
+static int
+audio_devt_translator(dev_t dev, lx_dev_t *jdev, int fd)
+{
+ int s_minor, l_minor;
+
+ if (fd == -1) {
+ s_minor = getminor(dev);
+ } else {
+ /*
+ * this is a cloning device so we have to ask the driver
+ * what kind of minor node this is
+ */
+ if (ioctl(fd, LXA_IOC_GETMINORNUM, &s_minor) < 0)
+ return (-EINVAL);
+ }
+
+ switch (s_minor) {
+ case LXA_MINORNUM_DSP:
+ l_minor = 3;
+ break;
+ case LXA_MINORNUM_MIXER:
+ l_minor = 0;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ *jdev = LX_MAKEDEVICE(14, l_minor);
+ return (0);
+}
+
+static void
+s2l_dev_report(dev_t dev, lx_dev_t jdev)
+{
+ major_t maj;
+ minor_t min;
+ int lx_maj, lx_min;
+
+ if (lx_debug_enabled == 0)
+ return;
+
+ maj = getmajor(dev);
+ min = getminor(dev);
+
+ lx_maj = LX_GETMAJOR(jdev);
+ lx_min = LX_GETMINOR(jdev);
+
+ lx_debug("\ttranslated devt [%d, %d] -> [%d, %d]",
+ maj, min, lx_maj, lx_min);
+}
+
+static int
+s2l_devt(dev_t dev, lx_dev_t *jdev, int fd)
+{
+ minor_translator_t *mt;
+ int i, j, err;
+ major_t maj = getmajor(dev);
+ minor_t min = getminor(dev);
+
+ /* look for a devt translator for this major number */
+ for (i = 0; devt_translators[i].dt_driver != NULL; i++) {
+ if (devt_translators[i].dt_major == maj)
+ break;
+ }
+ if (devt_translators[i].dt_driver != NULL) {
+
+ /* try to translate the solaris devt to a linux devt */
+ switch (devt_translators[i].dt_type) {
+ case DTT_LIST:
+ mt = devt_translators[i].dt_list;
+ for (j = 0; mt[j].mt_path != NULL; j++) {
+ if (mt[j].mt_minor == min) {
+ assert(mt[j].mt_minor < LX_MINORMASK);
+
+ /* found a translation */
+ *jdev = LX_MAKEDEVICE(
+ mt[j].mt_lx_major,
+ mt[j].mt_lx_minor);
+ s2l_dev_report(dev, *jdev);
+ return (0);
+ }
+ }
+ break;
+
+ case DTT_CUSTOM:
+ err = devt_translators[i].dt_custom(dev, jdev, fd);
+ if (err == 0)
+ s2l_dev_report(dev, *jdev);
+ return (err);
+ break;
+ }
+ }
+
+ /* we don't have a translator for this device */
+ *jdev = LX_MAKEDEVICE(maj, min);
+ return (0);
+}
+
+static int
+stat_convert(uintptr_t lx_statp, struct stat *s, int fd)
+{
+ struct lx_stat buf;
+ lx_dev_t st_dev, st_rdev;
+ int err;
+
+ if ((err = s2l_devt(s->st_dev, &st_dev, fd)) != 0)
+ return (err);
+ if ((err = s2l_devt(s->st_rdev, &st_rdev, fd)) != 0)
+ return (err);
+
+ if ((st_dev > USHRT_MAX) || (st_rdev > USHRT_MAX) ||
+ (s->st_nlink > USHRT_MAX) || (s->st_size > ULONG_MAX))
+ return (-EOVERFLOW);
+
+ /* Linux seems to report a 0 st_size for all block devices */
+ if ((s->st_mode & S_IFMT) == S_IFBLK)
+ s->st_size = 0;
+
+ bzero(&buf, sizeof (buf));
+ buf.st_dev = st_dev;
+ buf.st_rdev = st_rdev;
+ buf.st_ino = s->st_ino;
+ buf.st_mode = s->st_mode;
+ buf.st_nlink = s->st_nlink;
+ buf.st_uid = LX_UID32_TO_UID16(s->st_uid);
+ buf.st_gid = LX_GID32_TO_GID16(s->st_gid);
+ buf.st_size = s->st_size;
+ buf.st_blksize = s->st_blksize;
+ buf.st_blocks = s->st_blocks;
+ buf.st_atime.ts_sec = s->st_atim.tv_sec;
+ buf.st_atime.ts_nsec = s->st_atim.tv_nsec;
+ buf.st_ctime.ts_sec = s->st_ctim.tv_sec;
+ buf.st_ctime.ts_nsec = s->st_ctim.tv_nsec;
+ buf.st_mtime.ts_sec = s->st_mtim.tv_sec;
+ buf.st_mtime.ts_nsec = s->st_mtim.tv_nsec;
+
+ if (uucopy(&buf, (void *)lx_statp, sizeof (buf)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+stat64_convert(uintptr_t lx_statp, struct stat64 *s, int fd)
+{
+ struct lx_stat64 buf;
+ lx_dev_t st_dev, st_rdev;
+ int err;
+
+ if ((err = s2l_devt(s->st_dev, &st_dev, fd)) != 0)
+ return (err);
+ if ((err = s2l_devt(s->st_rdev, &st_rdev, fd)) != 0)
+ return (err);
+
+ /* Linux seems to report a 0 st_size for all block devices */
+ if ((s->st_mode & S_IFMT) == S_IFBLK)
+ s->st_size = 0;
+
+ bzero(&buf, sizeof (buf));
+ buf.st_dev = st_dev;
+ buf.st_rdev = st_rdev;
+ buf.st_small_ino = (lx_ino_t)(s->st_ino & UINT_MAX);
+ buf.st_ino = (lx_ino64_t)s->st_ino;
+ buf.st_mode = s->st_mode;
+ buf.st_nlink = s->st_nlink;
+ buf.st_uid = s->st_uid;
+ buf.st_gid = s->st_gid;
+ buf.st_size = s->st_size;
+ buf.st_blksize = s->st_blksize;
+ buf.st_blocks = s->st_blocks;
+ buf.st_atime.ts_sec = s->st_atim.tv_sec;
+ buf.st_atime.ts_nsec = s->st_atim.tv_nsec;
+ buf.st_ctime.ts_sec = s->st_ctim.tv_sec;
+ buf.st_ctime.ts_nsec = s->st_ctim.tv_nsec;
+ buf.st_mtime.ts_sec = s->st_mtim.tv_sec;
+ buf.st_mtime.ts_nsec = s->st_mtim.tv_nsec;
+
+ if (uucopy(&buf, (void *)lx_statp, sizeof (buf)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+int
+lx_stat(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ struct stat sbuf;
+
+ lx_debug("\tstat(%s, ...)", path);
+ if (stat(path, &sbuf))
+ return (-errno);
+
+ return (stat_convert(p2, &sbuf, -1));
+}
+
+
+int
+lx_fstat(uintptr_t p1, uintptr_t p2)
+{
+ int fd = (int)p1;
+ struct stat sbuf;
+ char *path, path_buf[MAXPATHLEN];
+
+ if (lx_debug_enabled != 0) {
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+ if (path == NULL)
+ path = "?";
+
+ lx_debug("\tfstat(%d - %s, ...)", fd, path);
+ }
+ if (fstat(fd, &sbuf))
+ return (-errno);
+
+ return (stat_convert(p2, &sbuf, fd));
+}
+
+
+int
+lx_lstat(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ struct stat sbuf;
+
+ lx_debug("\tlstat(%s, ...)", path);
+ if (lstat(path, &sbuf))
+ return (-errno);
+
+ return (stat_convert(p2, &sbuf, -1));
+}
+
+int
+lx_stat64(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ struct stat64 sbuf;
+
+ lx_debug("\tstat64(%s, ...)", path);
+ if (stat64(path, &sbuf))
+ return (-errno);
+
+ return (stat64_convert(p2, &sbuf, -1));
+}
+
+
+int
+lx_fstat64(uintptr_t p1, uintptr_t p2)
+{
+ int fd = (int)p1;
+ struct stat64 sbuf;
+ char *path, path_buf[MAXPATHLEN];
+
+ if (lx_debug_enabled != 0) {
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+ if (path == NULL)
+ path = "?";
+
+ lx_debug("\tfstat64(%d - %s, ...)", fd, path);
+ }
+ if (fstat64(fd, &sbuf))
+ return (-errno);
+
+ return (stat64_convert(p2, &sbuf, fd));
+}
+
+int
+lx_fstatat64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int atfd = (int)p1;
+ const char *path = (const char *)p2;
+ int flag;
+ struct stat64 sbuf;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flag = ltos_at_flag(p4, AT_SYMLINK_NOFOLLOW);
+ if (flag < 0)
+ return (-EINVAL);
+
+ if (fstatat64(atfd, path, &sbuf, flag))
+ return (-errno);
+
+ return (stat64_convert(p3, &sbuf, -1));
+}
+
+
+int
+lx_lstat64(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ struct stat64 sbuf;
+
+ lx_debug("\tlstat64(%s, ...)", path);
+ if (lstat64(path, &sbuf))
+ return (-errno);
+
+ return (stat64_convert(p2, &sbuf, -1));
+}
+
+/*
+ * devt translator definitions
+ */
+#define MINOR_TRANSLATOR(path, lx_major, lx_minor) \
+ { path, 0, lx_major, lx_minor }
+
+#define MINOR_TRANSLATOR_END \
+ { NULL, 0, 0, 0 }
+
+#define DEVT_TRANSLATOR(drv, flags, i) \
+ { drv, 0, flags, (uintptr_t)i }
+
+/*
+ * translators for devts
+ */
+static minor_translator_t mtranslator_mm[] = {
+ MINOR_TRANSLATOR("/dev/null", 1, 3),
+ MINOR_TRANSLATOR("/dev/zero", 1, 5),
+ MINOR_TRANSLATOR_END
+};
+static minor_translator_t mtranslator_random[] = {
+ MINOR_TRANSLATOR("/dev/random", 1, 8),
+ MINOR_TRANSLATOR("/dev/urandom", 1, 9),
+ MINOR_TRANSLATOR_END
+};
+static minor_translator_t mtranslator_sy[] = {
+ MINOR_TRANSLATOR("/dev/tty", 5, 0),
+ MINOR_TRANSLATOR_END
+};
+static minor_translator_t mtranslator_zcons[] = {
+ MINOR_TRANSLATOR("/dev/console", 5, 1),
+ MINOR_TRANSLATOR_END
+};
+static devt_translator_t devt_translators[] = {
+ DEVT_TRANSLATOR("mm", DTT_LIST, &mtranslator_mm),
+ DEVT_TRANSLATOR("random", DTT_LIST, &mtranslator_random),
+ DEVT_TRANSLATOR("sy", DTT_LIST, &mtranslator_sy),
+ DEVT_TRANSLATOR("zcons", DTT_LIST, &mtranslator_zcons),
+ DEVT_TRANSLATOR(LX_AUDIO_DRV, DTT_CUSTOM, audio_devt_translator),
+ DEVT_TRANSLATOR(LX_PTM_DRV, DTT_CUSTOM, ptm_devt_translator),
+ DEVT_TRANSLATOR("pts", DTT_CUSTOM, pts_devt_translator),
+ DEVT_TRANSLATOR(NULL, 0, 0)
+};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/statfs.c b/usr/src/lib/brand/lx/lx_brand/common/statfs.c
new file mode 100644
index 0000000000..03e2563d70
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/statfs.c
@@ -0,0 +1,309 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <assert.h>
+#include <errno.h>
+#include <libintl.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/statvfs.h>
+#include <sys/param.h>
+
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_statfs.h>
+
+/*
+ * these defines must exist before we include regexp.h, see regexp(5)
+ */
+#define RE_SIZE 1024
+#define INIT char *sp = instring;
+#define GETC() (*sp++)
+#define PEEKC() (*sp)
+#define UNGETC(c) (--sp)
+#define RETURN(c) return (NULL);
+#define ERROR(c) return ((char *)c);
+
+/*
+ * for regular expressions we're using regexp(5).
+ *
+ * we'd really prefer to use some other nicer regular expressions
+ * interfaces (like regcmp(3c), regcomp(3c), or re_comp(3c)) but we
+ * can't because all these other interfaces rely on the ability
+ * to allocate memory via libc malloc()/calloc() calls, which
+ * we can't really do here.
+ *
+ * we could optionally use regexpr(3gen) but we don't since the
+ * interfaces there are incredibly similar to the regexp(5)
+ * interfaces we're already using and we'd have the added
+ * requirement of linking against libgen.
+ *
+ * another option that was considered is fnmatch(3c) but the
+ * limited pattern expansion capability of this interface would
+ * force us to include more patterns to check against.
+ */
+#include <regexp.h>
+
+static struct lx_ftype_path {
+ char *lfp_path;
+ char lfp_re[RE_SIZE];
+ int lfp_magic;
+ char *lfp_magic_str;
+} ftype_path_list[] = {
+ { "^/dev/pts$", "",
+ LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" },
+ { "^/dev/pts/$", "",
+ LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" },
+ { "^/dev/pts/[0-9][0-9]*$", "",
+ LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" },
+ { NULL, "",
+ 0, NULL }
+};
+
+/*
+ * For lack of linux equivalents, we present lofs and zfs as being ufs.
+ */
+static struct lx_ftype_name {
+ const char *lfn_name;
+ int lfn_magic;
+ char *lfn_magic_str;
+} ftype_name_list[] = {
+ { "hsfs", LX_ISOFS_SUPER_MAGIC, "LX_ISOFS_SUPER_MAGIC" },
+ { "nfs", LX_NFS_SUPER_MAGIC, "LX_NFS_SUPER_MAGIC" },
+ { "pcfs", LX_MSDOS_SUPER_MAGIC, "LX_MSDOS_SUPER_MAGIC" },
+ { "lx_proc", LX_PROC_SUPER_MAGIC, "LX_PROC_SUPER_MAGIC" },
+ { "ufs", LX_UFS_MAGIC, "LX_UFS_MAGIC" },
+ { "lofs", LX_UFS_MAGIC, "LX_UFS_MAGIC" },
+ { "zfs", LX_UFS_MAGIC, "LX_UFS_MAGIC" },
+ { NULL, 0, NULL }
+};
+
+int
+lx_statfs_init()
+{
+ int i;
+ char *rv;
+
+ for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) {
+ rv = compile(
+ ftype_path_list[i].lfp_path,
+ ftype_path_list[i].lfp_re,
+ ftype_path_list[i].lfp_re + RE_SIZE, '\0');
+ if (rv == NULL)
+ continue;
+
+ lx_debug("lx_statfs_init compile(\"%s\") failed",
+ ftype_path_list[i].lfp_path);
+ return (1);
+ }
+ return (0);
+}
+
+static int
+stol_type(const char *path, const char *name)
+{
+ int i;
+ lx_debug("\tstol_type(\"%s\", \"%s\")\n", path == NULL ? "NULL" : path,
+ name == NULL ? "NULL" : name);
+
+ if (path != NULL) {
+ char userpath[MAXPATHLEN];
+
+ if (uucopystr(path, userpath, MAXPATHLEN) == -1)
+ return (-errno);
+
+ for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) {
+ if (step(userpath, ftype_path_list[i].lfp_re) == 0)
+ continue;
+
+ /* got a match on the fs path */
+ lx_debug("\ttranslated f_type to 0x%x - %s",
+ ftype_path_list[i].lfp_magic,
+ ftype_path_list[i].lfp_magic_str);
+ return (ftype_path_list[i].lfp_magic);
+ }
+ }
+
+ assert(name != NULL);
+ for (i = 0; ftype_name_list[i].lfn_name != NULL; i++) {
+ if (strcmp(name, ftype_name_list[i].lfn_name) == 0) {
+
+ /* got a match on the fs name */
+ lx_debug("\ttranslated f_type to 0x%x - %s",
+ ftype_name_list[i].lfn_magic,
+ ftype_name_list[i].lfn_magic_str);
+ return (ftype_name_list[i].lfn_magic);
+ }
+ }
+
+ /* we don't know what the fs type is so just set it to 0 */
+ return (0);
+}
+
+/*
+ * The Linux statfs() is similar to the Solaris statvfs() call, the main
+ * difference being the use of a numeric 'f_type' identifier instead of the
+ * 'f_basetype' string.
+ */
+static int
+stol_statfs(const char *path, struct lx_statfs *l, struct statvfs *s)
+{
+ int type;
+
+ if ((type = stol_type(path, s->f_basetype)) < 0)
+ return (type);
+
+ l->f_type = type;
+ l->f_bsize = s->f_bsize;
+ l->f_blocks = s->f_blocks;
+ l->f_bfree = s->f_bfree;
+ l->f_bavail = s->f_bavail;
+ l->f_files = s->f_files;
+ l->f_ffree = s->f_ffree;
+ l->f_fsid = s->f_fsid;
+ l->f_namelen = s->f_namemax;
+ l->f_frsize = s->f_frsize;
+ bzero(&(l->f_spare), sizeof (l->f_spare));
+
+ return (0);
+}
+
+static int
+stol_statfs64(const char *path, struct lx_statfs64 *l, struct statvfs64 *s)
+{
+ int type;
+
+ if ((type = stol_type(path, s->f_basetype)) < 0)
+ return (type);
+
+ l->f_type = type;
+ l->f_bsize = s->f_bsize;
+ l->f_blocks = s->f_blocks;
+ l->f_bfree = s->f_bfree;
+ l->f_bavail = s->f_bavail;
+ l->f_files = s->f_files;
+ l->f_ffree = s->f_ffree;
+ l->f_fsid = s->f_fsid;
+ l->f_namelen = s->f_namemax;
+ l->f_frsize = s->f_frsize;
+ bzero(&(l->f_spare), sizeof (l->f_spare));
+
+ return (0);
+}
+
+int
+lx_statfs(uintptr_t p1, uintptr_t p2)
+{
+ const char *path = (const char *)p1;
+ struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2;
+ struct statvfs vfs;
+ int err;
+
+ lx_debug("\tfstatvfs(%s, 0x%p)", path, fs);
+ if (statvfs(path, &vfs) != 0)
+ return (-errno);
+
+ if ((err = stol_statfs(path, &lxfs, &vfs)) != 0)
+ return (err);
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+int
+lx_fstatfs(uintptr_t p1, uintptr_t p2)
+{
+ struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2;
+ struct statvfs vfs;
+ char *path, path_buf[MAXPATHLEN];
+ int fd = (int)p1;
+ int err;
+
+ lx_debug("\tfstatvfs(%d, 0x%p)", fd, fs);
+ if (fstatvfs(fd, &vfs) != 0)
+ return (-errno);
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+
+ if ((err = stol_statfs(path, &lxfs, &vfs)) != 0)
+ return (err);
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/* ARGSUSED */
+int
+lx_statfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ const char *path = (const char *)p1;
+ struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3;
+ struct statvfs64 vfs;
+ int err;
+
+ lx_debug("\tstatvfs64(%s, %d, 0x%p)", path, p2, fs);
+ if (statvfs64(path, &vfs) != 0)
+ return (-errno);
+
+ if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0)
+ return (err);
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/* ARGSUSED */
+int
+lx_fstatfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3;
+ struct statvfs64 vfs;
+ char *path, path_buf[MAXPATHLEN];
+ int fd = (int)p1;
+ int err;
+
+ lx_debug("\tfstatvfs64(%d, %d, 0x%p)", fd, p2, fs);
+ if (fstatvfs64(fd, &vfs) != 0)
+ return (-errno);
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+
+ if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0)
+ return (err);
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0)
+ return (-errno);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysctl.c b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c
new file mode 100644
index 0000000000..1cf4ca3ac1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c
@@ -0,0 +1,138 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <alloca.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+
+/*
+ * sysctl() implementation. The full set of possible values is incredibly
+ * large; we only implement the bare minimum here, namely basic kernel
+ * information.
+ *
+ * For the moment, we also print out debugging messages if the application
+ * attempts to write or access any other values, so we can tell if we are not
+ * supporting something we should be.
+ */
+
+struct lx_sysctl_args {
+ int *name;
+ int nlen;
+ void *oldval;
+ size_t *oldlenp;
+ void *newval;
+ size_t newlen;
+};
+
+#define LX_CTL_KERN 1
+
+#define LX_KERN_OSTYPE 1
+#define LX_KERN_OSRELEASE 2
+#define LX_KERN_OSREV 3
+#define LX_KERN_VERSION 4
+
+int
+lx_sysctl(uintptr_t raw)
+{
+ struct lx_sysctl_args args;
+ int name[2];
+ size_t oldlen;
+ char *namebuf;
+
+ if (uucopy((void *)raw, &args, sizeof (args)) < 0)
+ return (-EFAULT);
+
+ /*
+ * We only allow [ CTL_KERN, KERN_* ] pairs, so reject anything that
+ * doesn't have exactly two values starting with LX_CTL_KERN.
+ */
+ if (args.nlen != 2)
+ return (-ENOTDIR);
+
+ if (uucopy(args.name, name, sizeof (name)) < 0)
+ return (-EFAULT);
+
+ if (name[0] != LX_CTL_KERN) {
+ lx_debug("sysctl: read of [%d, %d] unsupported",
+ name[0], name[1]);
+ return (-ENOTDIR);
+ }
+
+ /* We don't support writing new sysctl values. */
+ if ((args.newval != NULL) || (args.newlen != 0)) {
+ lx_debug("sysctl: write of [%d, %d] unsupported",
+ name[0], name[1]);
+ return (-EPERM);
+ }
+
+ /*
+ * It may seem silly, but passing in a NULL oldval pointer and not
+ * writing any new values is a perfectly legal thing to do and should
+ * succeed.
+ */
+ if (args.oldval == NULL)
+ return (0);
+
+ /*
+ * Likewise, Linux specifies that setting a non-NULL oldval but a
+ * zero *oldlenp should result in an errno of EFAULT.
+ */
+ if ((uucopy(args.oldlenp, &oldlen, sizeof (oldlen)) < 0) ||
+ (oldlen == 0))
+ return (-EFAULT);
+
+ namebuf = SAFE_ALLOCA(oldlen);
+ if (namebuf == NULL)
+ return (-ENOMEM);
+
+ switch (name[1]) {
+ case LX_KERN_OSTYPE:
+ (void) strlcpy(namebuf, LX_UNAME_SYSNAME, oldlen);
+ break;
+ case LX_KERN_OSRELEASE:
+ (void) strlcpy(namebuf, lx_release, oldlen);
+ break;
+ case LX_KERN_VERSION:
+ (void) strlcpy(namebuf, LX_UNAME_VERSION, oldlen);
+ break;
+ default:
+ lx_debug("sysctl: read of [CTL_KERN, %d] unsupported", name[1]);
+ return (-ENOTDIR);
+ }
+
+ oldlen = strlen(namebuf);
+
+ if ((uucopy(namebuf, args.oldval, oldlen) < 0) ||
+ (uucopy(&oldlen, args.oldlenp, sizeof (oldlen)) < 0))
+ return (-EFAULT);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c
new file mode 100644
index 0000000000..62efcdbe3d
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c
@@ -0,0 +1,893 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <unistd.h>
+#include <strings.h>
+#include <rctl.h>
+#include <alloca.h>
+#include <values.h>
+#include <sys/syscall.h>
+#include <sys/msg.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_types.h>
+#include <sys/lx_sysv_ipc.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+#define SLOT_SEM 0
+#define SLOT_SHM 1
+#define SLOT_MSG 2
+
+static int
+get_rctlval(rctlblk_t *rblk, char *name)
+{
+ rctl_qty_t r;
+
+ if (getrctl(name, NULL, rblk, RCTL_FIRST) == -1)
+ return (-errno);
+
+ r = rctlblk_get_value(rblk);
+ if (r > MAXINT)
+ return (-EOVERFLOW);
+ return (r);
+}
+
+/*
+ * Given a slot number and a maximum number of ids to extract from the
+ * kernel, return the msgid in the provided slot.
+ */
+static int
+slot_to_id(int type, int slot)
+{
+ uint_t nids, max;
+ int *idbuf = NULL;
+ int r = 0;
+
+ nids = 0;
+ for (;;) {
+ switch (type) {
+ case SLOT_SEM:
+ r = semids(idbuf, nids, &max);
+ break;
+ case SLOT_SHM:
+ r = shmids(idbuf, nids, &max);
+ break;
+ case SLOT_MSG:
+ r = msgids(idbuf, nids, &max);
+ break;
+ }
+
+ if (r < 0)
+ return (-errno);
+
+ if (max == 0)
+ return (-EINVAL);
+
+ if (max <= nids)
+ return (idbuf[slot]);
+
+ nids = max;
+ if ((idbuf = (int *)SAFE_ALLOCA(sizeof (int) * nids)) == NULL)
+ return (-ENOMEM);
+ }
+}
+
+/*
+ * Semaphore operations.
+ */
+static int
+lx_semget(key_t key, int nsems, int semflg)
+{
+ int sol_flag;
+ int r;
+
+ lx_debug("\nsemget(%d, %d, %d)\n", key, nsems, semflg);
+ sol_flag = semflg & S_IAMB;
+ if (semflg & LX_IPC_CREAT)
+ sol_flag |= IPC_CREAT;
+ if (semflg & LX_IPC_EXCL)
+ sol_flag |= IPC_EXCL;
+
+ r = semget(key, nsems, sol_flag);
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_semop(int semid, struct sembuf *sops, size_t nsops)
+{
+ int r;
+
+ lx_debug("\nsemop(%d, 0x%p, %u)\n", semid, sops, nsops);
+ if (nsops == 0)
+ return (-EINVAL);
+
+ r = semop(semid, sops, nsops);
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_semctl_ipcset(int semid, void *buf)
+{
+ struct lx_semid_ds semds;
+ struct semid_ds sol_semds;
+ int r;
+
+ if (uucopy(buf, &semds, sizeof (semds)))
+ return (-errno);
+
+ bzero(&sol_semds, sizeof (sol_semds));
+ sol_semds.sem_perm.uid = semds.sem_perm.uid;
+ sol_semds.sem_perm.gid = semds.sem_perm.gid;
+ sol_semds.sem_perm.mode = semds.sem_perm.mode;
+
+ r = semctl(semid, 0, IPC_SET, &sol_semds);
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_semctl_ipcstat(int semid, void *buf)
+{
+ struct lx_semid_ds semds;
+ struct semid_ds sol_semds;
+
+ if (semctl(semid, 0, IPC_STAT, &sol_semds) != 0)
+ return (-errno);
+
+ bzero(&semds, sizeof (semds));
+ semds.sem_perm.key = sol_semds.sem_perm.key;
+ semds.sem_perm.seq = sol_semds.sem_perm.seq;
+ semds.sem_perm.uid = sol_semds.sem_perm.uid;
+ semds.sem_perm.gid = sol_semds.sem_perm.gid;
+ semds.sem_perm.cuid = sol_semds.sem_perm.cuid;
+ semds.sem_perm.cgid = sol_semds.sem_perm.cgid;
+
+ /* Linux only uses the bottom 9 bits */
+ semds.sem_perm.mode = sol_semds.sem_perm.mode & S_IAMB;
+ semds.sem_otime = sol_semds.sem_otime;
+ semds.sem_ctime = sol_semds.sem_ctime;
+ semds.sem_nsems = sol_semds.sem_nsems;
+
+ if (uucopy(&semds, buf, sizeof (semds)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_semctl_ipcinfo(void *buf)
+{
+ struct lx_seminfo i;
+ rctlblk_t *rblk;
+ int rblksz;
+ uint_t nids;
+ int idbuf;
+
+ rblksz = rctlblk_size();
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL)
+ return (-ENOMEM);
+
+ bzero(&i, sizeof (i));
+ if ((i.semmni = get_rctlval(rblk, "project.max-sem-ids")) < 0)
+ return (i.semmni);
+ if ((i.semmsl = get_rctlval(rblk, "process.max-sem-nsems")) < 0)
+ return (i.semmsl);
+ if ((i.semopm = get_rctlval(rblk, "process.max-sem-ops")) < 0)
+ return (i.semopm);
+
+ /*
+ * We don't have corresponding rctls for these fields. The values
+ * are taken from the formulas used to derive the defaults listed
+ * in the Linux header file. We're lying, but trying to be
+ * coherent about it.
+ */
+ i.semmap = i.semmni;
+ i.semmns = i.semmni * i.semmsl;
+ i.semmnu = INT_MAX;
+ i.semume = INT_MAX;
+ i.semvmx = LX_SEMVMX;
+ if (semids(&idbuf, 0, &nids) < 0)
+ return (-errno);
+ i.semusz = nids;
+ i.semaem = INT_MAX;
+
+ if (uucopy(&i, buf, sizeof (i)) != 0)
+ return (-errno);
+
+ return (nids);
+}
+
+static int
+lx_semctl_semstat(int slot, void *buf)
+{
+ int r, semid;
+
+ semid = slot_to_id(SLOT_SEM, slot);
+ if (semid < 0)
+ return (semid);
+
+ r = lx_semctl_ipcstat(semid, buf);
+ return (r < 0 ? r : semid);
+}
+
+/*
+ * For the SETALL operation, we have to examine each of the semaphore
+ * values to be sure it is legal.
+ */
+static int
+lx_semctl_setall(int semid, union lx_semun *arg)
+{
+ struct semid_ds semds;
+ ushort_t *vals;
+ int i, sz, r;
+
+ /*
+ * Find out how many semaphores are involved, reserve enough
+ * memory for an internal copy of the array, and then copy it in
+ * from the process.
+ */
+ if (semctl(semid, 0, IPC_STAT, &semds) != 0)
+ return (-errno);
+ sz = semds.sem_nsems * sizeof (ushort_t);
+ if ((vals = SAFE_ALLOCA(sz)) == NULL)
+ return (-ENOMEM);
+ if (uucopy(arg->sems, vals, sz))
+ return (-errno);
+
+ /* Validate each of the values. */
+ for (i = 0; i < semds.sem_nsems; i++)
+ if (vals[i] > LX_SEMVMX)
+ return (-ERANGE);
+
+ r = semctl(semid, 0, SETALL, arg->sems);
+
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_semctl(int semid, int semnum, int cmd, void *ptr)
+{
+ union lx_semun arg;
+ int rval;
+ int opt = cmd & ~LX_IPC_64;
+ int use_errno = 0;
+
+ lx_debug("\nsemctl(%d, %d, %d, 0x%p)\n", semid, semnum, cmd, ptr);
+
+ /*
+ * The final arg to semctl() is a pointer to a union. For some
+ * commands we can hand that pointer directly to the kernel. For
+ * these commands, we need to extract an argument from the union
+ * before calling into the kernel.
+ */
+ if (opt == LX_SETVAL || opt == LX_SETALL || opt == LX_GETALL ||
+ opt == LX_IPC_SET || opt == LX_IPC_STAT || opt == LX_SEM_STAT ||
+ opt == LX_IPC_INFO || opt == LX_SEM_INFO)
+ if (uucopy(ptr, &arg, sizeof (arg)))
+ return (-errno);
+
+ switch (opt) {
+ case LX_GETVAL:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETVAL, NULL);
+ break;
+ case LX_SETVAL:
+ if (arg.val > LX_SEMVMX) {
+ rval = -ERANGE;
+ break;
+ }
+ use_errno = 1;
+ rval = semctl(semid, semnum, SETVAL, arg.val);
+ break;
+ case LX_GETPID:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETPID, NULL);
+ break;
+ case LX_GETNCNT:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETNCNT, NULL);
+ break;
+ case LX_GETZCNT:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETZCNT, NULL);
+ break;
+ case LX_GETALL:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETALL, arg.sems);
+ break;
+ case LX_SETALL:
+ rval = lx_semctl_setall(semid, &arg);
+ break;
+ case LX_IPC_RMID:
+ use_errno = 1;
+ rval = semctl(semid, semnum, IPC_RMID, NULL);
+ break;
+ case LX_SEM_STAT:
+ rval = lx_semctl_semstat(semid, arg.semds);
+ break;
+ case LX_IPC_STAT:
+ rval = lx_semctl_ipcstat(semid, arg.semds);
+ break;
+
+ case LX_IPC_SET:
+ rval = lx_semctl_ipcset(semid, arg.semds);
+ break;
+
+ case LX_IPC_INFO:
+ case LX_SEM_INFO:
+ rval = lx_semctl_ipcinfo(arg.semds);
+ break;
+
+ default:
+ rval = -EINVAL;
+ }
+
+ if (use_errno == 1 && rval < 0)
+ return (-errno);
+ return (rval);
+}
+
+/*
+ * msg operations.
+ */
+static int
+lx_msgget(key_t key, int flag)
+{
+ int sol_flag;
+ int r;
+
+ lx_debug("\tlx_msgget(%d, %d)\n", key, flag);
+
+ sol_flag = flag & S_IAMB;
+ if (flag & LX_IPC_CREAT)
+ sol_flag |= IPC_CREAT;
+ if (flag & LX_IPC_EXCL)
+ sol_flag |= IPC_EXCL;
+
+ r = msgget(key, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+static int
+lx_msgsnd(int id, struct msgbuf *buf, size_t sz, int flag)
+{
+ int sol_flag = 0;
+ int r;
+
+ lx_debug("\tlx_msgsnd(%d, 0x%p, %d, %d)\n", id, buf, sz, flag);
+
+ if (flag & LX_IPC_NOWAIT)
+ sol_flag |= IPC_NOWAIT;
+
+ if (((ssize_t)sz < 0) || (sz > LX_MSGMAX))
+ return (-EINVAL);
+
+ r = msgsnd(id, buf, sz, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+static int
+lx_msgrcv(int id, struct msgbuf *buf, size_t sz, int flag)
+{
+ int sol_flag = 0;
+ struct {
+ void *msgp;
+ long msgtype;
+ } args;
+ int r;
+
+ /*
+ * Rather than passing 5 args into ipc(2) directly, glibc passes 4
+ * args and uses the buf argument to point to a structure
+ * containing two args: a pointer to the message and the message
+ * type.
+ */
+ if (uucopy(buf, &args, sizeof (args)))
+ return (-errno);
+
+ lx_debug("\tlx_msgrcv(%d, 0x%p, %d, %d, %ld, %d)\n",
+ id, args.msgp, sz, args.msgtype, flag);
+
+ /*
+ * Check for a negative sz parameter.
+ *
+ * Unlike msgsnd(2), the Linux man page does not specify that
+ * msgrcv(2) should return EINVAL if (sz > MSGMAX), only if (sz < 0).
+ */
+ if ((ssize_t)sz < 0)
+ return (-EINVAL);
+
+ if (flag & LX_MSG_NOERROR)
+ sol_flag |= MSG_NOERROR;
+ if (flag & LX_IPC_NOWAIT)
+ sol_flag |= IPC_NOWAIT;
+
+ r = msgrcv(id, args.msgp, sz, args.msgtype, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+static int
+lx_msgctl_ipcstat(int msgid, void *buf)
+{
+ struct lx_msqid_ds msgids;
+ struct msqid_ds sol_msgids;
+ int r;
+
+ r = msgctl(msgid, IPC_STAT, &sol_msgids);
+ if (r < 0)
+ return (-errno);
+
+ bzero(&msgids, sizeof (msgids));
+ msgids.msg_perm.key = sol_msgids.msg_perm.key;
+ msgids.msg_perm.seq = sol_msgids.msg_perm.seq;
+ msgids.msg_perm.uid = sol_msgids.msg_perm.uid;
+ msgids.msg_perm.gid = sol_msgids.msg_perm.gid;
+ msgids.msg_perm.cuid = sol_msgids.msg_perm.cuid;
+ msgids.msg_perm.cgid = sol_msgids.msg_perm.cgid;
+
+ /* Linux only uses the bottom 9 bits */
+ msgids.msg_perm.mode = sol_msgids.msg_perm.mode & S_IAMB;
+
+ msgids.msg_stime = sol_msgids.msg_stime;
+ msgids.msg_rtime = sol_msgids.msg_rtime;
+ msgids.msg_ctime = sol_msgids.msg_ctime;
+ msgids.msg_qbytes = sol_msgids.msg_qbytes;
+ msgids.msg_cbytes = sol_msgids.msg_cbytes;
+ msgids.msg_qnum = sol_msgids.msg_qnum;
+ msgids.msg_lspid = sol_msgids.msg_lspid;
+ msgids.msg_lrpid = sol_msgids.msg_lrpid;
+
+ if (uucopy(&msgids, buf, sizeof (msgids)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_msgctl_ipcinfo(int cmd, void *buf)
+{
+ struct lx_msginfo m;
+ rctlblk_t *rblk;
+ int idbuf, rblksz, msgseg, maxmsgs;
+ uint_t nids;
+ int rval;
+
+ rblksz = rctlblk_size();
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL)
+ return (-ENOMEM);
+
+ bzero(&m, sizeof (m));
+ if ((m.msgmni = get_rctlval(rblk, "project.max-msg-ids")) < 0)
+ return (m.msgmni);
+ if ((m.msgmnb = get_rctlval(rblk, "process.max-msg-qbytes")) < 0)
+ return (m.msgmnb);
+
+ if (cmd == LX_IPC_INFO) {
+ if ((maxmsgs = get_rctlval(rblk,
+ "process.max-msg-messages")) < 0)
+ return (maxmsgs);
+ m.msgtql = maxmsgs * m.msgmni;
+ m.msgmap = m.msgmnb;
+ m.msgpool = m.msgmax * m.msgmnb;
+ rval = 0;
+ } else {
+ if (msgids(&idbuf, 0, &nids) < 0)
+ return (-errno);
+ m.msgpool = nids;
+
+ /*
+ * For these fields, we can't even come up with a good fake
+ * approximation. These are listed as 'obsolete' or
+ * 'unused' in the header files, so hopefully nobody is
+ * relying on them anyway.
+ */
+ m.msgtql = INT_MAX;
+ m.msgmap = INT_MAX;
+ rval = nids;
+ }
+
+ /*
+ * We don't have corresponding rctls for these fields. The values
+ * are taken from the formulas used to derive the defaults listed
+ * in the Linux header file. We're lying, but trying to be
+ * coherent about it.
+ */
+ m.msgmax = m.msgmnb;
+ m.msgssz = 16;
+ msgseg = (m.msgpool * 1024) / m.msgssz;
+ m.msgseg = (msgseg > 0xffff) ? 0xffff : msgseg;
+
+ if (uucopy(&m, buf, sizeof (m)))
+ return (-errno);
+ return (rval);
+}
+
+static int
+lx_msgctl_ipcset(int msgid, void *buf)
+{
+ struct lx_msqid_ds msgids;
+ struct msqid_ds sol_msgids;
+ int r;
+
+ if (uucopy(buf, &msgids, sizeof (msgids)))
+ return (-errno);
+
+ bzero(&sol_msgids, sizeof (sol_msgids));
+ sol_msgids.msg_perm.uid = LX_UID16_TO_UID32(msgids.msg_perm.uid);
+ sol_msgids.msg_perm.gid = LX_UID16_TO_UID32(msgids.msg_perm.gid);
+
+ /* Linux only uses the bottom 9 bits */
+ sol_msgids.msg_perm.mode = msgids.msg_perm.mode & S_IAMB;
+ sol_msgids.msg_qbytes = msgids.msg_qbytes;
+
+ r = msgctl(msgid, IPC_SET, &sol_msgids);
+ return (r < 0 ? -errno : r);
+}
+
+static int
+lx_msgctl_msgstat(int slot, void *buf)
+{
+ int r, msgid;
+
+ lx_debug("msgstat(%d, 0x%p)\n", slot, buf);
+
+ msgid = slot_to_id(SLOT_MSG, slot);
+
+ if (msgid < 0)
+ return (msgid);
+
+ r = lx_msgctl_ipcstat(msgid, buf);
+ return (r < 0 ? r : msgid);
+}
+
+/*
+ * Split off the various msgctl's here
+ */
+static int
+lx_msgctl(int msgid, int cmd, void *buf)
+{
+ int r;
+
+ lx_debug("\tlx_msgctl(%d, %d, 0x%p)\n", msgid, cmd, buf);
+ switch (cmd & ~LX_IPC_64) {
+ case LX_IPC_RMID:
+ r = msgctl(msgid, IPC_RMID, NULL);
+ if (r < 0)
+ r = -errno;
+ break;
+ case LX_IPC_SET:
+ r = lx_msgctl_ipcset(msgid, buf);
+ break;
+ case LX_IPC_STAT:
+ r = lx_msgctl_ipcstat(msgid, buf);
+ break;
+ case LX_MSG_STAT:
+ r = lx_msgctl_msgstat(msgid, buf);
+ break;
+
+ case LX_IPC_INFO:
+ case LX_MSG_INFO:
+ r = lx_msgctl_ipcinfo(cmd, buf);
+ break;
+
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return (r);
+}
+
+/*
+ * shm-related operations.
+ */
+static int
+lx_shmget(key_t key, size_t size, int flag)
+{
+ int sol_flag;
+ int r;
+
+ lx_debug("\tlx_shmget(%d, %d, %d)\n", key, size, flag);
+
+ sol_flag = flag & S_IAMB;
+ if (flag & LX_IPC_CREAT)
+ sol_flag |= IPC_CREAT;
+ if (flag & LX_IPC_EXCL)
+ sol_flag |= IPC_EXCL;
+
+ r = shmget(key, size, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+static int
+lx_shmat(int shmid, void *addr, int flags, void **rval)
+{
+ int sol_flags;
+ void *ptr;
+
+ lx_debug("\tlx_shmat(%d, 0x%p, %d, 0%o)\n", shmid, addr, flags);
+
+ sol_flags = 0;
+ if (flags & LX_SHM_RDONLY)
+ sol_flags |= SHM_RDONLY;
+ if (flags & LX_SHM_RND)
+ sol_flags |= SHM_RND;
+ if ((flags & LX_SHM_REMAP) && (addr == NULL))
+ return (-EINVAL);
+
+ ptr = shmat(shmid, addr, sol_flags);
+ if (ptr == (void *)-1)
+ return (-errno);
+ if (uucopy(&ptr, rval, sizeof (ptr)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_shmctl_ipcinfo(void *buf)
+{
+ struct lx_shminfo s;
+ rctlblk_t *rblk;
+ int rblksz;
+
+ rblksz = rctlblk_size();
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL)
+ return (-ENOMEM);
+
+ bzero(&s, sizeof (s));
+ if ((s.shmmni = get_rctlval(rblk, "project.max-shm-ids")) < 0)
+ return (s.shmmni);
+ if ((s.shmmax = get_rctlval(rblk, "project.max-shm-memory")) < 0)
+ return (s.shmmax);
+
+ /*
+ * We don't have corresponding rctls for these fields. The values
+ * are taken from the formulas used to derive the defaults listed
+ * in the Linux header file. We're lying, but trying to be
+ * coherent about it.
+ */
+ s.shmmin = 1;
+ s.shmseg = INT_MAX;
+ s.shmall = s.shmmax / getpagesize();
+
+ if (uucopy(&s, buf, sizeof (s)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_shmctl_ipcstat(int shmid, void *buf)
+{
+ struct lx_shmid_ds shmds;
+ struct shmid_ds sol_shmds;
+
+ if (shmctl(shmid, IPC_STAT, &sol_shmds) != 0)
+ return (-errno);
+
+ bzero(&shmds, sizeof (shmds));
+ shmds.shm_perm.key = sol_shmds.shm_perm.key;
+ shmds.shm_perm.seq = sol_shmds.shm_perm.seq;
+ shmds.shm_perm.uid = sol_shmds.shm_perm.uid;
+ shmds.shm_perm.gid = sol_shmds.shm_perm.gid;
+ shmds.shm_perm.cuid = sol_shmds.shm_perm.cuid;
+ shmds.shm_perm.cgid = sol_shmds.shm_perm.cgid;
+ shmds.shm_perm.mode = sol_shmds.shm_perm.mode & S_IAMB;
+ if (sol_shmds.shm_lkcnt > 0)
+ shmds.shm_perm.mode |= LX_SHM_LOCKED;
+ shmds.shm_segsz = sol_shmds.shm_segsz;
+ shmds.shm_atime = sol_shmds.shm_atime;
+ shmds.shm_dtime = sol_shmds.shm_dtime;
+ shmds.shm_ctime = sol_shmds.shm_ctime;
+ shmds.shm_cpid = sol_shmds.shm_cpid;
+ shmds.shm_lpid = sol_shmds.shm_lpid;
+ shmds.shm_nattch = (ushort_t)sol_shmds.shm_nattch;
+
+ if (uucopy(&shmds, buf, sizeof (shmds)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_shmctl_ipcset(int shmid, void *buf)
+{
+ struct lx_shmid_ds shmds;
+ struct shmid_ds sol_shmds;
+ int r;
+
+ if (uucopy(buf, &shmds, sizeof (shmds)))
+ return (-errno);
+
+ bzero(&sol_shmds, sizeof (sol_shmds));
+ sol_shmds.shm_perm.uid = shmds.shm_perm.uid;
+ sol_shmds.shm_perm.gid = shmds.shm_perm.gid;
+ sol_shmds.shm_perm.mode = shmds.shm_perm.mode & S_IAMB;
+
+ r = shmctl(shmid, IPC_SET, &sol_shmds);
+ return (r < 0 ? -errno : r);
+}
+
+/*
+ * Build and return a shm_info structure. We only return the bare
+ * essentials required by ipcs. The rest of the info is not readily
+ * available.
+ */
+static int
+lx_shmctl_shminfo(void *buf)
+{
+ struct lx_shm_info shminfo;
+ uint_t nids;
+ int idbuf;
+
+ bzero(&shminfo, sizeof (shminfo));
+
+ if (shmids(&idbuf, 0, &nids) < 0)
+ return (-errno);
+
+ shminfo.used_ids = nids;
+ if (uucopy(&shminfo, buf, sizeof (shminfo)) != 0)
+ return (-errno);
+
+ return (nids);
+}
+
+static int
+lx_shmctl_shmstat(int slot, void *buf)
+{
+ int r, shmid;
+
+ lx_debug("shmctl_shmstat(%d, 0x%p)\n", slot, buf);
+ shmid = slot_to_id(SLOT_SHM, slot);
+ if (shmid < 0)
+ return (shmid);
+
+ r = lx_shmctl_ipcstat(shmid, buf);
+ return (r < 0 ? r : shmid);
+}
+
+static int
+lx_shmctl(int shmid, int cmd, void *buf)
+{
+ int r;
+ int use_errno = 0;
+
+ lx_debug("\tlx_shmctl(%d, %d, 0x%p)\n", shmid, cmd, buf);
+ switch (cmd & ~LX_IPC_64) {
+ case LX_IPC_RMID:
+ use_errno = 1;
+ r = shmctl(shmid, IPC_RMID, NULL);
+ break;
+
+ case LX_IPC_SET:
+ r = lx_shmctl_ipcset(shmid, buf);
+ break;
+
+ case LX_IPC_STAT:
+ r = lx_shmctl_ipcstat(shmid, buf);
+ break;
+
+ case LX_IPC_INFO:
+ r = lx_shmctl_ipcinfo(buf);
+ break;
+
+ case LX_SHM_LOCK:
+ use_errno = 1;
+ r = shmctl(shmid, SHM_LOCK, NULL);
+ break;
+
+ case LX_SHM_UNLOCK:
+ use_errno = 1;
+ r = shmctl(shmid, SHM_UNLOCK, NULL);
+ break;
+
+ case LX_SHM_INFO:
+ r = lx_shmctl_shminfo(buf);
+ break;
+
+ case LX_SHM_STAT:
+ r = lx_shmctl_shmstat(shmid, buf);
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ if (use_errno == 1 && r < 0)
+ return (-errno);
+
+ return (r);
+}
+
+/*
+ * Under Linux, glibc funnels all of the sysv IPC operations into this
+ * single ipc(2) system call. We need to blow that up and filter the
+ * remnants into the proper Solaris system calls.
+ */
+int
+lx_ipc(uintptr_t cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
+ uintptr_t arg4)
+{
+ int r;
+ void *bufptr = (void *)arg4;
+
+ lx_debug("lx_ipc(%d, %d, %d, %d, 0x%p, %d)\n",
+ cmd, arg1, arg2, arg3, bufptr, arg4);
+
+ switch (cmd) {
+ case LX_MSGGET:
+ r = lx_msgget((key_t)arg1, (int)arg2);
+ break;
+ case LX_MSGSND:
+ r = lx_msgsnd((int)arg1, bufptr, (size_t)arg2, (int)arg3);
+ break;
+ case LX_MSGRCV:
+ r = lx_msgrcv((int)arg1, bufptr, (size_t)arg2, (int)arg3);
+ break;
+ case LX_MSGCTL:
+ r = lx_msgctl((int)arg1, (int)arg2, bufptr);
+ break;
+ case LX_SEMCTL:
+ r = lx_semctl((int)arg1, (size_t)arg2, (int)arg3, bufptr);
+ break;
+ case LX_SEMOP:
+ /*
+ * 'struct sembuf' is the same on Linux and Solaris, so we
+ * pass bufptr straight through.
+ */
+ r = lx_semop((int)arg1, bufptr, (size_t)arg2);
+ break;
+ case LX_SEMGET:
+ r = lx_semget((int)arg1, (size_t)arg2, (int)arg3);
+ break;
+ case LX_SHMAT:
+ r = lx_shmat((int)arg1, bufptr, (size_t)arg2, (void *)arg3);
+ break;
+ case LX_SHMDT:
+ r = shmdt(bufptr);
+ if (r < 0)
+ r = -errno;
+ break;
+ case LX_SHMGET:
+ r = lx_shmget((int)arg1, (size_t)arg2, (int)arg3);
+ break;
+ case LX_SHMCTL:
+ r = lx_shmctl((int)arg1, (int)arg2, bufptr);
+ break;
+
+ default:
+ r = -EINVAL;
+ }
+
+ return (r);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/time.c b/usr/src/lib/brand/lx/lx_brand/common/time.c
new file mode 100644
index 0000000000..16b883ec0a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/time.c
@@ -0,0 +1,184 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/times.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_misc.h>
+
+/*
+ * time() - This cannot be passthrough because on Linux a bad buffer will
+ * set errno to EFAULT, and on Solaris the failure mode is documented
+ * as "undefined."
+ *
+ * (At present, Solaris' time(2) will segmentation fault, as the call
+ * is simply a libc wrapper atop the time() syscall that will
+ * dereference the passed pointer if it is non-zero.)
+ */
+int
+lx_time(uintptr_t p1)
+{
+ time_t ret = time((time_t *)0);
+
+ if ((ret == (time_t)-1) ||
+ ((p1 != 0) && (uucopy(&ret, (time_t *)p1, sizeof (ret)) != 0)))
+ return (-errno);
+
+ return (ret);
+}
+
+/*
+ * times() - The Linux implementation avoids writing to NULL, while Solaris
+ * returns EFAULT.
+ */
+int
+lx_times(uintptr_t p1)
+{
+ clock_t ret;
+ struct tms buf, *tp = (struct tms *)p1;
+
+ ret = times(&buf);
+
+ if ((ret == -1) ||
+ ((tp != NULL) && uucopy((void *)&buf, tp, sizeof (buf)) != 0))
+ return (-errno);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+/*
+ * setitimer() - the Linux implementation can handle tv_usec values greater
+ * than 1,000,000 where Solaris would return EINVAL.
+ *
+ * There's still an issue here where Linux can handle a
+ * tv_sec value greater than 100,000,000 but Solaris cannot,
+ * but that would also mean setting an interval timer to fire
+ * over _three years_ in the future so it's unlikely anything
+ * other than Linux test suites will trip over it.
+ */
+int
+lx_setitimer(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ struct itimerval itv;
+ struct itimerval *itp = (struct itimerval *)p2;
+
+ if (itp != NULL) {
+ if (uucopy(itp, &itv, sizeof (itv)) != 0)
+ return (-errno);
+
+ /*
+ * Adjust any tv_usec fields >= 1,000,000 by adding any whole
+ * seconds so indicated to tv_sec and leaving tv_usec as the
+ * remainder.
+ */
+ if (itv.it_interval.tv_usec >= MICROSEC) {
+ itv.it_interval.tv_sec +=
+ itv.it_interval.tv_usec / MICROSEC;
+
+ itv.it_interval.tv_usec %= MICROSEC;
+ }
+ if (itv.it_value.tv_usec >= MICROSEC) {
+ itv.it_value.tv_sec +=
+ itv.it_value.tv_usec / MICROSEC;
+
+ itv.it_value.tv_usec %= MICROSEC;
+ }
+
+ itp = &itv;
+ }
+
+ return ((setitimer((int)p1, itp, (struct itimerval *)p3) != 0) ?
+ -errno : 0);
+}
+
+/*
+ * NOTE: The Linux man pages state this structure is obsolete and is
+ * unsupported, so it is declared here for sizing purposes only.
+ */
+struct lx_timezone {
+ int tz_minuteswest; /* minutes W of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+
+/*
+ * lx_gettimeofday() and lx_settimeofday() are implemented here rather than
+ * as pass-through calls to Solaris' libc due to the need to return EFAULT
+ * for a bad buffer rather than die with a segmentation fault.
+ */
+int
+lx_gettimeofday(uintptr_t p1, uintptr_t p2)
+{
+ struct timeval tv;
+ struct lx_timezone tz;
+
+ bzero(&tz, sizeof (tz));
+ (void) gettimeofday(&tv, NULL);
+
+ if ((p1 != NULL) &&
+ (uucopy(&tv, (struct timeval *)p1, sizeof (tv)) < 0))
+ return (-errno);
+
+ /*
+ * The Linux man page states use of the second parameter is obsolete,
+ * but gettimeofday(2) should still return EFAULT if it is set
+ * to a bad non-NULL pointer (sigh...)
+ */
+ if ((p2 != NULL) &&
+ (uucopy(&tz, (struct lx_timezone *)p2, sizeof (tz)) < 0))
+ return (-errno);
+
+ return (0);
+}
+
+int
+lx_settimeofday(uintptr_t p1, uintptr_t p2)
+{
+ struct timeval tv;
+ struct lx_timezone tz;
+
+ if ((p1 != NULL) &&
+ (uucopy((struct timeval *)p1, &tv, sizeof (tv)) < 0))
+ return (-errno);
+
+ /*
+ * The Linux man page states use of the second parameter is obsolete,
+ * but settimeofday(2) should still return EFAULT if it is set
+ * to a bad non-NULL pointer (sigh...)
+ */
+ if ((p2 != NULL) &&
+ (uucopy((struct lx_timezone *)p2, &tz, sizeof (tz)) < 0))
+ return (-errno);
+
+ if ((p1 != NULL) && (settimeofday(&tv, NULL) < 0))
+ return (-errno);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/truncate.c b/usr/src/lib/brand/lx/lx_brand/common/truncate.c
new file mode 100644
index 0000000000..81fdfbac35
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/truncate.c
@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/lx_types.h>
+#include <sys/lx_misc.h>
+
+/*
+ * On Solaris, truncate() and ftruncate() are implemented in libc, so these are
+ * layered on those interfaces.
+ */
+
+int
+lx_truncate(uintptr_t path, uintptr_t length)
+{
+ return (truncate((const char *)path, (off_t)length) == 0 ? 0 : -errno);
+}
+
+int
+lx_ftruncate(uintptr_t fd, uintptr_t length)
+{
+ return (ftruncate((int)fd, (off_t)length) == 0 ? 0 : -errno);
+}
+
+int
+lx_truncate64(uintptr_t path, uintptr_t length_lo, uintptr_t length_hi)
+{
+ return (truncate64((const char *)path,
+ LX_32TO64(length_lo, length_hi)) == 0 ? 0 : -errno);
+}
+
+int
+lx_ftruncate64(uintptr_t fd, uintptr_t length_lo, uintptr_t length_hi)
+{
+ return (ftruncate64((int)fd,
+ LX_32TO64(length_lo, length_hi)) == 0 ? 0 : -errno);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c
new file mode 100644
index 0000000000..33b3d49923
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c
@@ -0,0 +1,288 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * wait() family of functions.
+ *
+ * The first minor difference between the Linux and Solaris family of wait()
+ * calls is that the values for WNOHANG and WUNTRACED are different. Solaris
+ * also has additional options (WCONTINUED, WNOWAIT) which should be flagged as
+ * invalid on Linux. Thankfully, the exit status values are identical between
+ * the two implementations.
+ *
+ * Things get very different and very complicated when we introduce the Linux
+ * threading model. Under linux, both threads and child processes are
+ * represented as processes. However, the behavior of wait() with respect to
+ * each child varies according to the flags given to clone()
+ *
+ * SIGCHLD The SIGCHLD signal should be sent on termination
+ * CLONE_THREAD The child shares the same thread group as the parent
+ * CLONE_DETACHED The parent receives no notification when the child exits
+ *
+ * The following flags control the Linux behavior w.r.t. the above attributes:
+ *
+ * __WALL Wait on all children, regardless of type
+ * __WCLONE Wait only on non-SIGCHLD children
+ * __WNOTHREAD Don't wait on children of other threads in this group
+ *
+ * The following chart shows whether wait() returns when the child exits:
+ *
+ * default __WCLONE __WALL
+ * no SIGCHLD - X X
+ * SIGCHLD X - X
+ *
+ * The following chart shows whether wait() returns when the grandchild exits:
+ *
+ * default __WNOTHREAD
+ * no CLONE_THREAD - -
+ * CLONE_THREAD X -
+ *
+ * The CLONE_DETACHED flag is universal - when the child exits, no state is
+ * stored and wait() has no effect.
+ *
+ * XXX Support the above combination of options, or some reasonable subset that
+ * covers at least fork() and pthread_create().
+ */
+
+#include <errno.h>
+#include <sys/wait.h>
+#include <sys/lx_types.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <sys/times.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+
+/*
+ * Convert between Linux options and Solaris options, returning -1 if any
+ * invalid flags are found.
+ */
+#define LX_WNOHANG 0x1
+#define LX_WUNTRACED 0x2
+
+#define LX_WNOTHREAD 0x20000000
+#define LX_WALL 0x40000000
+#define LX_WCLONE 0x80000000
+
+#define LX_P_ALL 0x0
+#define LX_P_PID 0x1
+#define LX_P_GID 0x2
+
+static int
+ltos_options(uintptr_t options)
+{
+ int newoptions = 0;
+
+ if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WNOTHREAD |
+ LX_WALL | LX_WCLONE)) != 0) {
+ return (-1);
+ }
+ /* XXX implement LX_WNOTHREAD, LX_WALL, LX_WCLONE */
+
+ if (options & LX_WNOHANG)
+ newoptions |= WNOHANG;
+ if (options & LX_WUNTRACED)
+ newoptions |= WUNTRACED;
+
+ return (newoptions);
+}
+
+static int
+lx_wstat(int code, int status)
+{
+ int stat = 0;
+
+ switch (code) {
+ case CLD_EXITED:
+ stat = status << 8;
+ break;
+ case CLD_DUMPED:
+ stat = stol_signo[status];
+ assert(stat != -1);
+ stat |= WCOREFLG;
+ break;
+ case CLD_KILLED:
+ stat = stol_signo[status];
+ assert(stat != -1);
+ break;
+ case CLD_TRAPPED:
+ case CLD_STOPPED:
+ stat = stol_signo[status];
+ assert(stat != -1);
+ stat <<= 8;
+ stat |= WSTOPFLG;
+ break;
+ case CLD_CONTINUED:
+ stat = WCONTFLG;
+ break;
+ }
+
+ return (stat);
+}
+
+/* wrapper to make solaris waitid work properly with ptrace */
+static int
+lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *info, int options)
+{
+ do {
+ /*
+ * It's possible that we return EINVAL here if the idtype is
+ * P_PID or P_PGID and id is out of bounds for a valid pid or
+ * pgid, but Linux expects to see ECHILD. No good way occurs to
+ * handle this so we'll punt for now.
+ */
+ if (waitid(idtype, id, info, options) < 0)
+ return (-errno);
+
+ /*
+ * If the WNOHANG flag was specified and no child was found
+ * return 0.
+ */
+ if ((options & WNOHANG) && info->si_pid == 0)
+ return (0);
+
+ /*
+ * It's possible that we may have a spurious return for one of
+ * the child processes created by the ptrace subsystem. If
+ * that's the case, we simply try again.
+ */
+ } while (lx_ptrace_wait(info) == -1);
+ return (0);
+}
+
+int
+lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ siginfo_t info = { 0 };
+ struct rusage ru = { 0 };
+ idtype_t idtype;
+ id_t id;
+ int options, status = 0;
+ pid_t pid = (pid_t)p1;
+ int rval;
+
+ if ((options = ltos_options(p3)) == -1)
+ return (-EINVAL);
+
+ /*
+ * While not listed as a valid return code, Linux's wait4(2) does,
+ * in fact, get an EFAULT if either the status pointer or rusage
+ * pointer is invalid. Since a failed waitpid should leave child
+ * process in a state where a future wait4(2) will succeed, we
+ * check them by copying out the values their buffers originally
+ * contained. (We need to do this as a failed system call should
+ * never affect the contents of a passed buffer.)
+ *
+ * This will fail if the buffers in question are write-only.
+ */
+ if ((void *)p2 != NULL &&
+ ((uucopy((void *)p2, &status, sizeof (status)) != 0) ||
+ (uucopy(&status, (void *)p2, sizeof (status)) != 0)))
+ return (-EFAULT);
+
+ if ((void *)p4 != NULL) {
+ if ((uucopy((void *)p4, &ru, sizeof (ru)) != 0) ||
+ (uucopy(&ru, (void *)p4, sizeof (ru)) != 0))
+ return (-EFAULT);
+ }
+
+ if (pid < -1) {
+ idtype = P_PGID;
+ id = -pid;
+ } else if (pid == -1) {
+ idtype = P_ALL;
+ id = 0;
+ } else if (pid == 0) {
+ idtype = P_PGID;
+ id = getpgrp();
+ } else {
+ idtype = P_PID;
+ id = pid;
+ }
+
+ options |= WEXITED | WTRAPPED;
+
+ if ((rval = lx_waitid_helper(idtype, id, &info, options)) < 0)
+ return (rval);
+ /*
+ * If the WNOHANG flag was specified and no child was found return 0.
+ */
+ if ((options & WNOHANG) && info.si_pid == 0)
+ return (0);
+
+ status = lx_wstat(info.si_code, info.si_status);
+
+ /*
+ * Unfortunately if this attempt to copy out either the status or the
+ * rusage fails, the process will be in an inconsistent state as
+ * subsequent calls to wait for the same child will fail where they
+ * should succeed on a Linux system. This, however, is rather
+ * unlikely since we tested the validity of both above.
+ */
+ if (p2 != NULL && uucopy(&status, (void *)p2, sizeof (status)) != 0)
+ return (-EFAULT);
+
+ if (p4 != NULL && (rval = lx_getrusage(LX_RUSAGE_CHILDREN, p4)) != 0)
+ return (rval);
+
+ return (info.si_pid);
+}
+
+int
+lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ return (lx_wait4(p1, p2, p3, NULL));
+}
+
+int
+lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt)
+{
+ int rval, options;
+ siginfo_t s_infop = {0};
+ if ((options = ltos_options(opt)) == -1)
+ return (-1);
+ switch (idtype) {
+ case LX_P_ALL:
+ idtype = P_ALL;
+ break;
+ case LX_P_PID:
+ idtype = P_PID;
+ break;
+ case LX_P_GID:
+ idtype = P_GID;
+ break;
+ default:
+ return (-EINVAL);
+ }
+ if ((rval = lx_waitid_helper(idtype, (id_t)id, &s_infop, options)) < 0)
+ return (rval);
+
+ return (stol_siginfo(&s_infop, (lx_siginfo_t *)infop));
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/Makefile b/usr/src/lib/brand/lx/lx_brand/i386/Makefile
new file mode 100644
index 0000000000..fd38a056f6
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/Makefile
@@ -0,0 +1,56 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# lib/brand/lx/i386/Makefile
+
+ISASRCDIR=.
+
+ASFLAGS += -P -D_ASM
+
+include ../Makefile.com
+
+POFILE= lx_brand.po
+MSGFILES= $(CSRCS)
+
+ASSYMDEP_OBJS = lx_handler.o
+
+$(ASSYMDEP_OBJS:%=pics/%): assym.h
+
+OFFSETS = ../$(MACH)/offsets.in
+
+assym.h: $(OFFSETS)
+ $(OFFSETS_CREATE) $(CTF_FLAGS) < $(OFFSETS) > $@
+
+CLOBBERFILES += assym.h
+
+install: all $(ROOTLIBS)
+
+$(POFILE): $(MSGFILES)
+ $(BUILDPO.msgfiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s
new file mode 100644
index 0000000000..c457c1c209
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/asm_linkage.h>
+
+#if defined(lint)
+
+void
+_start(void)
+{
+}
+
+#else /* lint */
+
+ /*
+ * C language startup routine for the lx brand shared library.
+ */
+ ENTRY_NP(_start)
+ pushl $0 / Build a stack frame. retpc = NULL
+ pushl $0 / fp = NULL
+ movl %esp, %ebp / first stack frame
+
+ /*
+ * Calculate the location of the envp array by adding the size of
+ * the argv array to the start of the argv array.
+ */
+ movl 8(%ebp), %eax / argc in %eax
+ leal 16(%ebp,%eax,4), %edx / envp in %edx
+ andl $-16, %esp
+ pushl %edx / push envp
+ leal 12(%ebp),%edx / compute &argv[0]
+ pushl %edx / push argv
+ pushl %eax / push argc
+ call lx_init
+ /*
+ * lx_init will never return.
+ */
+ SET_SIZE(_start)
+
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s
new file mode 100644
index 0000000000..413ef9852d
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s
@@ -0,0 +1,377 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/asm_linkage.h>
+#include <sys/regset.h>
+#include <sys/segments.h>
+#include <sys/syscall.h>
+#include <sys/lx_brand.h>
+
+#if defined(_ASM)
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#endif /* _ASM */
+
+#include "assym.h"
+
+#define PIC_SETUP(r) \
+ call 9f; \
+9: popl r; \
+ addl $_GLOBAL_OFFSET_TABLE_ + [. - 9b], r
+
+/*
+ * Each JMP must occupy 16 bytes
+ */
+#define JMP \
+ pushl $_CONST(. - lx_handler_table); \
+ jmp lx_handler; \
+ .align 16;
+
+#define JMP4 JMP; JMP; JMP; JMP
+#define JMP16 JMP4; JMP4; JMP4; JMP4
+#define JMP64 JMP16; JMP16; JMP16; JMP16
+#define JMP256 JMP64; JMP64; JMP64; JMP64
+
+/*
+ * Alternate jump table that turns on lx_traceflag before proceeding with
+ * the normal emulation routine.
+ */
+#define TJMP \
+ pushl $_CONST(. - lx_handler_trace_table); \
+ jmp lx_handler_trace; \
+ .align 16;
+
+#define TJMP4 TJMP; TJMP; TJMP; TJMP
+#define TJMP16 TJMP4; TJMP4; TJMP4; TJMP4
+#define TJMP64 TJMP16; TJMP16; TJMP16; TJMP16
+#define TJMP256 TJMP64; TJMP64; TJMP64; TJMP64
+
+
+#if defined(lint)
+
+#include <sys/types.h>
+#include <sys/regset.h>
+#include <sys/signal.h>
+
+void
+lx_handler_table(void)
+{}
+
+void
+lx_handler(void)
+{}
+
+/* ARGSUSED */
+void
+lx_setup_clone(uintptr_t gs, void *retaddr, void *stk)
+{}
+
+/* ARGSUSED */
+void
+lx_sigdeliver(int sig, siginfo_t *sip, void *p, size_t stacksz,
+ void (*stack_frame_builder)(void), void (*lx_sighandler)(void),
+ uintptr_t gs)
+{}
+
+/* ARGSUSED */
+void
+lx_sigacthandler(int sig, siginfo_t *s, void *p)
+{}
+
+void
+lx_sigreturn_tramp(void)
+{}
+
+void
+lx_rt_sigreturn_tramp(void)
+{}
+
+/* ARGSUSED */
+void
+lx_sigreturn_tolibc(uintptr_t sp)
+{}
+
+#else /* lint */
+
+ /*
+ * On entry to this table, %eax will hold the return address. The
+ * location where we enter the table is a function of the system
+ * call number. The table needs the same alignment as the individual
+ * entries.
+ */
+ .align 16
+ ENTRY_NP(lx_handler_trace_table)
+ TJMP256
+ TJMP64
+ SET_SIZE(lx_handler_trace_table)
+
+ .align 16
+ ENTRY_NP(lx_handler_table)
+ JMP256
+ JMP64
+ SET_SIZE(lx_handler_table)
+
+ ENTRY_NP(lx_handler_trace)
+ pushl %esi
+ PIC_SETUP(%esi)
+ movl lx_traceflag@GOT(%esi), %esi
+ movl $1, (%esi)
+ popl %esi
+ /*
+ * While we could just fall through to lx_handler(), we "tail-call" it
+ * instead to make ourselves a little more comprehensible to trace
+ * tools.
+ */
+ jmp lx_handler
+ SET_SIZE(lx_handler_trace)
+
+ ALTENTRY(lx_handler)
+ /*
+ * %ebp isn't always going to be a frame pointer on Linux, but when
+ * it is, saving it here lets us have a coherent stack backtrace.
+ */
+ pushl %ebp
+
+ /*
+ * Fill in a lx_regs_t structure on the stack.
+ */
+ subl $SIZEOF_LX_REGS_T, %esp
+
+ /*
+ * Save %ebp and then fill it with what would be its usual value as
+ * the frame pointer. The value we save for %esp needs to be the
+ * stack pointer at the time of the interrupt so we need to skip the
+ * saved %ebp and (what will be) the return address.
+ */
+ movl %ebp, LXR_EBP(%esp)
+ movl %esp, %ebp
+ addl $_CONST(SIZEOF_LX_REGS_T), %ebp
+ movl %ebp, LXR_ESP(%esp)
+ addl $_CONST(_MUL(CPTRSIZE, 2)), LXR_ESP(%esp)
+
+ movl $0, LXR_GS(%esp)
+ movw %gs, LXR_GS(%esp)
+ movl %edi, LXR_EDI(%esp)
+ movl %esi, LXR_ESI(%esp)
+ movl %ebx, LXR_EBX(%esp)
+ movl %edx, LXR_EDX(%esp)
+ movl %ecx, LXR_ECX(%esp)
+ movl %eax, LXR_EIP(%esp)
+
+ /*
+ * The kernel drops us into the middle of one of the tables above
+ * that then pushes that table offset onto the stack, and calls into
+ * lx_handler. That offset indicates the system call number while
+ * %eax holds the return address for the system call. We replace the
+ * value on the stack with the return address, and use the value to
+ * compute the system call number by dividing by the table entry size.
+ */
+ xchgl CPTRSIZE(%ebp), %eax
+ shrl $4, %eax
+ movl %eax, LXR_EAX(%esp)
+
+ /*
+ * Switch to the Solaris libc's %gs.
+ */
+ movl $LWPGS_SEL, %ebx
+ movw %bx, %gs
+
+ /*
+ * Call lx_emulate() whose only argument is a pointer to the
+ * lx_regs_t structure we've placed on the stack.
+ */
+ pushl %esp
+ call lx_emulate
+
+ /*
+ * We use this global symbol to identify this return site when
+ * walking the stack backtrace. It needs to remain immediately
+ * after the call to lx_emulate().
+ */
+ ALTENTRY(lx_emulate_done)
+
+ /*
+ * Clean up the argument to lx_emulate().
+ */
+ addl $4, %esp
+
+ /*
+ * Restore the saved register state; we get %ebp, %esp and %esp from
+ * the ordinary locations rather than the saved state.
+ */
+ movl LXR_EDI(%esp), %edi
+ movl LXR_ESI(%esp), %esi
+ movl LXR_EBX(%esp), %ebx
+ movl LXR_EDX(%esp), %edx
+ movl LXR_ECX(%esp), %ecx
+ movl LXR_EAX(%esp), %eax
+ movw LXR_GS(%esp), %gs
+
+ addl $SIZEOF_LX_REGS_T, %esp
+
+ movl %ebp, %esp
+ popl %ebp
+ ret
+ SET_SIZE(lx_handler)
+
+ ENTRY_NP(lx_swap_gs)
+ push %eax /* save the current eax value */
+ movl 0xc(%esp),%eax /* 2nd param is a pointer */
+ movw %gs,(%eax) /* use the pointer to save current gs */
+ movl 0x8(%esp),%eax /* first parameter is the new gs value */
+ movw %ax, %gs /* switch to the new gs value */
+ pop %eax /* restore eax */
+ ret
+ SET_SIZE(lx_swap_gs)
+
+ ENTRY_NP(lx_setup_clone)
+ xorl %ebp, %ebp /* terminating stack */
+ popl %edx /* eat the start_clone() return address */
+ popl %gs /* Switch back to the Linux libc's %gs */
+ popl %edx /* Linux clone() return address */
+ popl %esp /* New stack pointer */
+ xorl %eax, %eax /* child returns 0 to SYS_clone() */
+ jmp *%edx /* return to Linux app. */
+ SET_SIZE(lx_setup_clone)
+
+ /*
+ * lx_sigdeliver(sig, siginfo_t *, ucontext_t *, stack_size,
+ * stack_build_routine, signal_handler, glibc_gs)
+ *
+ * This routine allocates stack space for the Linux signal stack,
+ * calls a routine to build the signal stack and then calls the Linux
+ * signal handler. This is written in assembly because of the way
+ * we need to directly manipulate the stack and pass the resulting
+ * stack to the signal handler with the Linux signal stack on top.
+ *
+ * When the Linux signal handler is called, the stack will look
+ * like this:
+ *
+ * =================================================
+ * | Linux signal frame built by lx_stackbuilder() |
+ * =================================================
+ * | LX_SIGRT_MAGIC |
+ * =================================================
+ * | %ebp |
+ * =================================================
+ */
+ ENTRY_NP(lx_sigdeliver)
+ pushl %ebp
+ movl %esp, %ebp
+ movl 16(%ebp), %edx /* pointer to Solaris ucontext_t */
+ pushl %edx /* save ucontext_t ptr for later */
+ pushl $LX_SIGRT_MAGIC /* marker value for lx_(rt)_sigreturn */
+
+ subl 20(%ebp), %esp /* create stack buffer */
+ pushl %esp /* push stack pointer */
+ pushl %edx /* push pointer to ucontext_t */
+ pushl 12(%ebp) /* push pointer to siginfo_t */
+ pushl 8(%ebp) /* push signal number */
+ call *24(%ebp) /* lx_stackbuilder(sig, sip, ucp, sp) */
+ add $16, %esp /* remove args from stack */
+ movw 32(%ebp), %gs /* only low 16 bits are used */
+
+ mov 4(%ebp),%eax /* fetch old %ebp from stack */
+ mov 28(%ebp), %edx /* get address of Linux handler */
+ mov %eax, %ebp /* restore old %ebp */
+ jmp *%edx /* jmp to the Linux signal handler */
+ SET_SIZE(lx_sigdeliver)
+
+ /*
+ * Due to the nature of signals, we need to be able to force the %gs
+ * value to that used by Solaris by running any Solaris code.
+ *
+ * This routine does that, then calls a C routine that will save the
+ * %gs value at the time of the signal off into a thread-specific data
+ * structure. Finally, we trampoline to the libc code that would
+ * normally interpose itself before calling a signal handler.
+ *
+ * The libc routine that calls user signal handlers ends with a
+ * setcontext, so we would never return here even if we used a call
+ * rather than a jmp.
+ *
+ * %esi is used for the PIC as it is guaranteed by the 386 ABI to
+ * survive the call to lx_sigsavegs. The downside is we must also
+ * preserve its value for our caller.
+ *
+ * Note that because lx_sigsavegs and libc_sigacthandler are externs,
+ * they need to be dereferenced via the GOT.
+ *
+ * IMPORTANT: Because libc apparently gets upset if extra data is
+ * left on its stack, this routine needs to be crafted
+ * in assembly so that the jmp to the libc interposer
+ * doesn't leave any cruft lying around.
+ */
+ ENTRY_NP(lx_sigacthandler)
+ pushl %esi /* save %esi */
+ pushl %gs /* push the Linux %gs */
+ pushl $LWPGS_SEL
+ popl %gs /* install the Solaris %gs */
+
+ PIC_SETUP(%esi)
+ movl lx_sigsavegs@GOT(%esi), %eax
+ call *%eax /* save the Linux %gs */
+ movl libc_sigacthandler@GOT(%esi), %eax
+ add $4, %esp /* clear Linux %gs from stack */
+ popl %esi /* restore %esi */
+ jmp *(%eax) /* jmp to libc's interposer */
+ SET_SIZE(lx_sigacthandler)
+
+ /*
+ * Trampoline code is called by the return at the end of a Linux
+ * signal handler to return control to the interrupted application
+ * via the lx_sigreturn() or lx_rt_sigreturn() syscalls.
+ *
+ * (lx_sigreturn() is called for legacy signal handling, and
+ * lx_rt_sigreturn() is called for "new"-style signals.)
+ *
+ * These two routines must consist of the EXACT code sequences below
+ * as gdb looks at the sequence of instructions a routine will return
+ * to determine whether it is in a signal handler or not.
+ */
+ ENTRY_NP(lx_sigreturn_tramp)
+ popl %eax
+ movl $LX_SYS_sigreturn, %eax
+ int $0x80
+ SET_SIZE(lx_sigreturn_tramp)
+
+ ENTRY_NP(lx_rt_sigreturn_tramp)
+ movl $LX_SYS_rt_sigreturn, %eax
+ int $0x80
+ SET_SIZE(lx_rt_sigreturn_tramp)
+
+ /*
+ * Manipulate the stack in the way necessary for it to appear to libc
+ * that the signal handler it invoked via call_user_handler() is
+ * returning.
+ */
+ ENTRY_NP(lx_sigreturn_tolibc)
+ movl 4(%esp), %esp /* set %esp to passed value */
+ popl %ebp /* restore proper %ebp */
+ ret /* return to libc interposer */
+ SET_SIZE(lx_sigreturn_tolibc)
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s
new file mode 100644
index 0000000000..28ec39938e
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/asm_linkage.h>
+
+#if defined(lint)
+
+/*ARGSUSED*/
+void
+lx_runexe(void *argv, int32_t entry)
+{
+}
+
+#else /* lint */
+
+ /*
+ * Set our stack pointer, clear the general registers,
+ * and jump to the brand linker's entry point.
+ */
+ ENTRY_NP(lx_runexe)
+ movl 4(%esp), %eax / %eax = &argv[0]
+ movl 8(%esp), %ebx / Brand linker's entry point in %ebx
+ subl $4, %eax / Top of stack - must point at argc
+ movl %eax, %esp / Set %esp to what linkers expect
+
+ movl $0, %eax
+ movl $0, %ecx
+ movl $0, %edx
+ movl $0, %esi
+ movl $0, %edi
+ movl $0, %ebp
+
+ jmp *%ebx / And away we go...
+ SET_SIZE(lx_runexe)
+
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/offsets.in b/usr/src/lib/brand/lx/lx_brand/i386/offsets.in
new file mode 100644
index 0000000000..ac934ee76c
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/offsets.in
@@ -0,0 +1,40 @@
+\
+\ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+\ Use is subject to license terms.
+\
+\ CDDL HEADER START
+\
+\ The contents of this file are subject to the terms of the
+\ Common Development and Distribution License (the "License").
+\ You may not use this file except in compliance with the License.
+\
+\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+\ or http://www.opensolaris.org/os/licensing.
+\ See the License for the specific language governing permissions
+\ and limitations under the License.
+\
+\ When distributing Covered Code, include this CDDL HEADER in each
+\ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+\ If applicable, add the following below this CDDL HEADER, with the
+\ fields enclosed by brackets "[]" replaced with your own identifying
+\ information: Portions Copyright [yyyy] [name of copyright owner]
+\
+\ CDDL HEADER END
+\
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/lx_brand.h>
+
+lx_regs_t SIZEOF_LX_REGS_T
+ lxr_gs
+ lxr_edi
+ lxr_esi
+ lxr_ebp
+ lxr_esp
+ lxr_ebx
+ lxr_edx
+ lxr_ecx
+ lxr_eax
+ lxr_eip
+ lxr_orig_eax
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h
new file mode 100644
index 0000000000..80fb579665
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_DEBUG_H
+#define _LX_DEBUG_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* initialize the debugging subsystem */
+extern void lx_debug_init(void);
+
+/* printf() style debug message functionality */
+extern void lx_debug(const char *, ...);
+
+/* set non-zero if the debugging subsystem is enabled */
+extern int lx_debug_enabled;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_DEBUG_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h
new file mode 100644
index 0000000000..997f1c4589
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h
@@ -0,0 +1,114 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_FCNTL_H
+#define _SYS_LX_FCNTL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Lx open/fcntl flags
+ */
+#define LX_O_RDONLY 00
+#define LX_O_WRONLY 01
+#define LX_O_RDWR 02
+#define LX_O_CREAT 0100
+#define LX_O_EXCL 0200
+#define LX_O_NOCTTY 0400
+#define LX_O_TRUNC 01000
+#define LX_O_APPEND 02000
+#define LX_O_NONBLOCK 04000
+#define LX_O_NDELAY LX_O_NONBLOCK
+#define LX_O_SYNC 010000
+#define LX_O_FSYNC LX_O_SYNC
+#define LX_O_ASYNC 020000
+#define LX_O_DIRECT 040000
+#define LX_O_LARGEFILE 0100000
+#define LX_O_DIRECTORY 0200000
+#define LX_O_NOFOLLOW 0400000
+
+#define LX_F_DUPFD 0
+#define LX_F_GETFD 1
+#define LX_F_SETFD 2
+#define LX_F_GETFL 3
+#define LX_F_SETFL 4
+#define LX_F_GETLK 5
+#define LX_F_SETLK 6
+#define LX_F_SETLKW 7
+#define LX_F_SETOWN 8
+#define LX_F_GETOWN 9
+#define LX_F_SETSIG 10
+#define LX_F_GETSIG 11
+
+#define LX_F_GETLK64 12
+#define LX_F_SETLK64 13
+#define LX_F_SETLKW64 14
+
+#define LX_F_SETLEASE 1024
+#define LX_F_GETLEASE 1025
+#define LX_F_NOTIFY 1026
+
+#define LX_F_RDLCK 0
+#define LX_F_WRLCK 1
+#define LX_F_UNLCK 2
+
+/*
+ * Lx flock codes.
+ */
+#define LX_NAME_MAX 255
+#define LX_LOCK_SH 1 /* shared */
+#define LX_LOCK_EX 2 /* exclusive */
+#define LX_LOCK_NB 4 /* non-blocking */
+#define LX_LOCK_UN 8 /* unlock */
+
+#define LX_AT_FDCWD -100
+#define LX_AT_EACCESS 512
+#define LX_AT_REMOVEDIR 512
+#define LX_AT_SYMLINK_NOFOLLOW 256
+#define LX_AT_SYMLINK_FOLLOW 1024
+
+struct lx_flock {
+ short l_type;
+ short l_whence;
+ long l_start;
+ long l_len;
+ int l_pid;
+};
+
+struct lx_flock64 {
+ short l_type;
+ short l_whence;
+ long long l_start;
+ long long l_len;
+ int l_pid;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_FCNTL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h
new file mode 100644
index 0000000000..01bfb9499f
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h
@@ -0,0 +1,382 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_IOCTL_H
+#define _SYS_LX_IOCTL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_ioctl_init(void);
+
+/*
+ * LX_NCC must be different from LX_NCCS since while the termio and termios
+ * structures may look similar they are fundamentally different sizes and
+ * have different members.
+ */
+#define LX_NCC 8
+#define LX_NCCS 19
+
+struct lx_termio {
+ unsigned short c_iflag; /* input mode flags */
+ unsigned short c_oflag; /* output mode flags */
+ unsigned short c_cflag; /* control mode flags */
+ unsigned short c_lflag; /* local mode flags */
+ unsigned char c_line; /* line discipline */
+ unsigned char c_cc[LX_NCC]; /* control characters */
+};
+
+struct lx_termios {
+ uint32_t c_iflag; /* input mode flags */
+ uint32_t c_oflag; /* output mode flags */
+ uint32_t c_cflag; /* control mode flags */
+ uint32_t c_lflag; /* local mode flags */
+ unsigned char c_line; /* line discipline */
+ unsigned char c_cc[LX_NCCS]; /* control characters */
+};
+
+/*
+ * c_cc characters which are valid for lx_termio and lx_termios
+ */
+#define LX_VINTR 0
+#define LX_VQUIT 1
+#define LX_VERASE 2
+#define LX_VKILL 3
+#define LX_VEOF 4
+#define LX_VTIME 5
+#define LX_VMIN 6
+#define LX_VSWTC 7
+
+/*
+ * c_cc characters which are valid for lx_termios
+ */
+#define LX_VSTART 8
+#define LX_VSTOP 9
+#define LX_VSUSP 10
+#define LX_VEOL 11
+#define LX_VREPRINT 12
+#define LX_VDISCARD 13
+#define LX_VWERASE 14
+#define LX_VLNEXT 15
+#define LX_VEOL2 16
+
+/*
+ * Sound formats
+ */
+#define LX_AFMT_QUERY 0x00000000
+#define LX_AFMT_MU_LAW 0x00000001
+#define LX_AFMT_A_LAW 0x00000002
+#define LX_AFMT_IMA_ADPCM 0x00000004
+#define LX_AFMT_U8 0x00000008
+#define LX_AFMT_S16_LE 0x00000010
+#define LX_AFMT_S16_BE 0x00000020
+#define LX_AFMT_S8 0x00000040
+#define LX_AFMT_U16_LE 0x00000080
+#define LX_AFMT_U16_BE 0x00000100
+#define LX_AFMT_MPEG 0x00000200
+#define LX_AFMT_AC3 0x00000400
+
+/*
+ * Supported ioctls
+ */
+#define LX_TCGETS 0x5401
+#define LX_TCSETS 0x5402
+#define LX_TCSETSW 0x5403
+#define LX_TCSETSF 0x5404
+#define LX_TCGETA 0x5405
+#define LX_TCSETA 0x5406
+#define LX_TCSETAW 0x5407
+#define LX_TCSETAF 0x5408
+#define LX_TCSBRK 0x5409
+#define LX_TCXONC 0x540a
+#define LX_TCFLSH 0x540b
+#define LX_TIOCEXCL 0x540c
+#define LX_TIOCNXCL 0x540d
+#define LX_TIOCSCTTY 0x540e
+#define LX_TIOCGPGRP 0x540f
+#define LX_TIOCSPGRP 0x5410
+#define LX_TIOCOUTQ 0x5411
+#define LX_TIOCSTI 0x5412
+#define LX_TIOCGWINSZ 0x5413
+#define LX_TIOCSWINSZ 0x5414
+#define LX_TIOCMGET 0x5415
+#define LX_TIOCMBIS 0x5416
+#define LX_TIOCMBIC 0x5417
+#define LX_TIOCMSET 0x5418
+#define LX_TIOCGSOFTCAR 0x5419
+#define LX_TIOCSSOFTCAR 0x541a
+#define LX_FIONREAD 0x541b
+#define LX_TIOCPKT 0x5420
+#define LX_FIONBIO 0x5421
+#define LX_TIOCNOTTY 0x5422
+#define LX_TIOCSETD 0x5423
+#define LX_TIOCGETD 0x5424
+#define LX_TCSBRKP 0x5425
+#define LX_TIOCGSID 0x5429
+#define LX_TIOCGPTN 0x80045430
+#define LX_TIOCSPTLCK 0x40045431
+#define LX_FIONCLEX 0x5450
+#define LX_FIOCLEX 0x5451
+#define LX_FIOASYNC 0x5452
+#define LX_FIOSETOWN 0x8901
+#define LX_SIOCSPGRP 0x8902
+#define LX_FIOGETOWN 0x8903
+#define LX_SIOCGPGRP 0x8904
+#define LX_SIOCATMARK 0x8905
+#define LX_SIOCGIFCONF 0x8912
+#define LX_SIOCGIFFLAGS 0x8913
+#define LX_SIOCSIFFLAGS 0x8914
+#define LX_SIOCGIFADDR 0x8915
+#define LX_SIOCSIFADDR 0x8916
+#define LX_SIOCGIFDSTADDR 0x8917
+#define LX_SIOCSIFDSTADDR 0x8918
+#define LX_SIOCGIFBRDADDR 0x8919
+#define LX_SIOCSIFBRDADDR 0x891a
+#define LX_SIOCGIFNETMASK 0x891b
+#define LX_SIOCSIFNETMASK 0x891c
+#define LX_SIOCGIFMETRIC 0x891d
+#define LX_SIOCSIFMETRIC 0x891e
+#define LX_SIOCGIFMEM 0x891f
+#define LX_SIOCSIFMEM 0x8920
+#define LX_SIOCGIFMTU 0x8921
+#define LX_SIOCSIFMTU 0x8922
+#define LX_SIOCSIFHWADDR 0x8924
+#define LX_SIOCGIFHWADDR 0x8927
+
+/*
+ * /dev/dsp ioctls - supported
+ */
+#define LX_OSS_SNDCTL_DSP_RESET 0x5000
+#define LX_OSS_SNDCTL_DSP_SYNC 0x5001
+#define LX_OSS_SNDCTL_DSP_SPEED 0xc0045002
+#define LX_OSS_SNDCTL_DSP_STEREO 0xc0045003
+#define LX_OSS_SNDCTL_DSP_GETBLKSIZE 0xc0045004
+#define LX_OSS_SNDCTL_DSP_SETFMTS 0xc0045005
+#define LX_OSS_SNDCTL_DSP_CHANNELS 0xc0045006
+#define LX_OSS_SNDCTL_DSP_SETFRAGMENT 0xc004500a
+#define LX_OSS_SNDCTL_DSP_GETFMTS 0x8004500b
+#define LX_OSS_SNDCTL_DSP_GETOSPACE 0x8010500c
+#define LX_OSS_SNDCTL_DSP_GETCAPS 0x8004500f
+#define LX_OSS_SNDCTL_DSP_SETTRIGGER 0x40045010
+#define LX_OSS_SNDCTL_DSP_GETOPTR 0x800c5012
+#define LX_OSS_SNDCTL_DSP_GETISPACE 0x8010500d
+
+/*
+ * support for /dev/dsp SNDCTL_DSP_GETFMTS and SNDCTL_DSP_SETFMTS
+ */
+#define LX_OSS_AFMT_QUERY 0x0000
+#define LX_OSS_AFMT_MU_LAW 0x0001
+#define LX_OSS_AFMT_A_LAW 0x0002
+#define LX_OSS_AFMT_IMA_ADPCM 0x0004
+#define LX_OSS_AFMT_U8 0x0008
+#define LX_OSS_AFMT_S16_LE 0x0010
+#define LX_OSS_AFMT_S16_BE 0x0020
+#define LX_OSS_AFMT_S8 0x0040
+#define LX_OSS_AFMT_U16_LE 0x0080
+#define LX_OSS_AFMT_U16_BE 0x0100
+#define LX_OSS_AFMT_MPEG 0x0200
+
+#ifdef _LITTLE_ENDIAN
+#define LX_OSS_AFMT_S16_NE LX_OSS_AFMT_S16_LE
+#define LX_OSS_AFMT_U16_NE LX_OSS_AFMT_U16_LE
+#elif defined(_BIG_ENDIAN)
+#define LX_OSS_AFMT_S16_NE LX_OSS_AFMT_S16_BE
+#define LX_OSS_AFMT_U16_NE LX_OSS_AFMT_U16_BE
+#else /* _LITTLE_ENDIAN */
+#error NO ENDIAN defined.
+#endif /* _LITTLE_ENDIAN */
+
+/*
+ * support for /dev/dsp SNDCTL_DSP_GETISPACE and SNDCTL_DSP_GETOSPACE
+ */
+typedef struct lx_oss_audio_buf_info {
+ int fragments; /* fragments that can be rd/wr without blocking */
+ int fragstotal; /* total number of fragments allocated for buffering */
+ int fragsize; /* size of fragments, same as SNDCTL_DSP_GETBLKSIZE */
+ int bytes; /* what can be rd/wr immediatly without blocking */
+} lx_oss_audio_buf_info_t;
+
+/*
+ * support for /dev/dsp SNDCTL_DSP_GETOPTR
+ */
+typedef struct lx_oss_count_info {
+ /* # of bytes processed since opening the device */
+ int bytes;
+
+ /*
+ * # of fragment transitions since last call to this function.
+ * only valid for mmap acess mode.
+ */
+ int blocks;
+
+ /*
+ * byte offset of the current recording/playback position from
+ * the beginning of the audio buffer. only valid for mmap access
+ * mode.
+ */
+ int ptr;
+} lx_oss_count_info_t;
+
+/*
+ * support for /dev/dsp SNDCTL_DSP_GETCAPS
+ */
+#define LX_OSS_DSP_CAP_TRIGGER 0x1000
+#define LX_OSS_DSP_CAP_MMAP 0x2000
+
+/*
+ * support for /dev/dsp/ SNDCTL_DSP_SETTRIGGER
+ */
+#define LX_OSS_PCM_DISABLE_OUTPUT 0
+#define LX_OSS_PCM_ENABLE_OUTPUT 2
+
+/*
+ * /dev/mixer ioctl macros
+ */
+#define LX_OSS_SM_NRDEVICES 25
+#define LX_OSS_SM_READ(x) (0x80044d00 | (x))
+#define LX_OSS_SM_WRITE(x) (0xc0044d00 | (x))
+
+/*
+ * /dev/mixer ioctls - supported
+ */
+#define LX_OSS_SOUND_MIXER_READ_VOLUME LX_OSS_SM_READ(LX_OSS_SM_VOLUME)
+#define LX_OSS_SOUND_MIXER_READ_PCM LX_OSS_SM_READ(LX_OSS_SM_PCM)
+#define LX_OSS_SOUND_MIXER_READ_MIC LX_OSS_SM_READ(LX_OSS_SM_MIC)
+#define LX_OSS_SOUND_MIXER_READ_IGAIN LX_OSS_SM_READ(LX_OSS_SM_IGAIN)
+#define LX_OSS_SOUND_MIXER_WRITE_VOLUME LX_OSS_SM_WRITE(LX_OSS_SM_VOLUME)
+#define LX_OSS_SOUND_MIXER_WRITE_PCM LX_OSS_SM_WRITE(LX_OSS_SM_PCM)
+#define LX_OSS_SOUND_MIXER_WRITE_MIC LX_OSS_SM_WRITE(LX_OSS_SM_MIC)
+#define LX_OSS_SOUND_MIXER_WRITE_IGAIN LX_OSS_SM_WRITE(LX_OSS_SM_IGAIN)
+#define LX_OSS_SOUND_MIXER_READ_STEREODEVS LX_OSS_SM_READ(LX_OSS_SM_STEREODEVS)
+#define LX_OSS_SOUND_MIXER_READ_RECMASK LX_OSS_SM_READ(LX_OSS_SM_RECMASK)
+#define LX_OSS_SOUND_MIXER_READ_DEVMASK LX_OSS_SM_READ(LX_OSS_SM_DEVMASK)
+#define LX_OSS_SOUND_MIXER_READ_RECSRC LX_OSS_SM_READ(LX_OSS_SM_RECSRC)
+
+/*
+ * /dev/mixer channels
+ */
+#define LX_OSS_SM_VOLUME 0
+#define LX_OSS_SM_BASS 1
+#define LX_OSS_SM_TREBLE 2
+#define LX_OSS_SM_SYNTH 3
+#define LX_OSS_SM_PCM 4
+#define LX_OSS_SM_SPEAKER 5
+#define LX_OSS_SM_LINE 6
+#define LX_OSS_SM_MIC 7
+#define LX_OSS_SM_CD 8
+#define LX_OSS_SM_MIX 9
+#define LX_OSS_SM_PCM2 10
+#define LX_OSS_SM_REC 11
+#define LX_OSS_SM_IGAIN 12
+#define LX_OSS_SM_OGAIN 13
+#define LX_OSS_SM_LINE1 14
+#define LX_OSS_SM_LINE2 15
+#define LX_OSS_SM_LINE3 16
+#define LX_OSS_SM_DIGITAL1 17
+#define LX_OSS_SM_DIGITAL2 18
+#define LX_OSS_SM_DIGITAL3 19
+#define LX_OSS_SM_PHONEIN 20
+#define LX_OSS_SM_PHONEOUT 21
+#define LX_OSS_SM_VIDEO 22
+#define LX_OSS_SM_RADIO 23
+#define LX_OSS_SM_MONITOR 24
+
+/*
+ * /dev/mixer operations
+ */
+#define LX_OSS_SM_STEREODEVS 251
+#define LX_OSS_SM_CAPS 252
+#define LX_OSS_SM_RECMASK 253
+#define LX_OSS_SM_DEVMASK 254
+#define LX_OSS_SM_RECSRC 255
+
+/*
+ * /dev/mixer value conversion macros
+ *
+ * solaris expects gain level on a scale of 0 - 255
+ * oss expects gain level on a scale of 0 - 100
+ *
+ * oss also encodes multiple channels volume values in a single int,
+ * one channel value per byte.
+ */
+#define LX_OSS_S2L_GAIN(v) (((v) * 100) / 255)
+#define LX_OSS_L2S_GAIN(v) (((v) * 255) / 100)
+#define LX_OSS_MIXER_DEC1(v) ((v) & 0xff)
+#define LX_OSS_MIXER_DEC2(v) (((v) >> 8) & 0xff)
+#define LX_OSS_MIXER_ENC2(v1, v2) (((v2) << 8) | (v1))
+
+/*
+ * /dev/mixer value verification macros
+ */
+#define LX_OSS_MIXER_VCHECK(x) (((int)(x) >= 0) && ((int)(x) <= 100))
+#define LX_OSS_MIXER_1CH_OK(x) ((((x) & ~0xff) == 0) && \
+ LX_OSS_MIXER_VCHECK(LX_OSS_MIXER_DEC1(x)))
+#define LX_OSS_MIXER_2CH_OK(x) ((((x) & ~0xffff) == 0) && \
+ LX_OSS_MIXER_VCHECK(LX_OSS_MIXER_DEC1(x)) && \
+ LX_OSS_MIXER_VCHECK(LX_OSS_MIXER_DEC2(x)))
+
+/*
+ * Unsupported ioctls (NOT a comprehensive list)
+ */
+#define LX_TIOCLINUX 0x541c
+#define LX_TIOCCONS 0x541d
+#define LX_TIOCGSERIAL 0x541e
+#define LX_TIOCSSERIAL 0x541f
+#define LX_TIOCTTYGSTRUCT 0x5426
+#define LX_TIOCSERCONFIG 0x5453
+#define LX_TIOCSERGWILD 0x5454
+#define LX_TIOCSERSWILD 0x5455
+#define LX_TIOCGLCKTRMIOS 0x5456
+#define LX_TIOCSLCKTRMIOS 0x5457
+#define LX_TIOCSERGSTRUCT 0x5458
+#define LX_TIOCSERGETLSR 0x5459
+#define LX_TIOCSERGETMULTI 0x545a
+#define LX_TIOCSERSETMULTI 0x545b
+#define LX_OLD_SIOCGIFHWADDR 0x8923
+#define LX_SIOCSIFENCAP 0x8926
+#define LX_SIOCGIFSLAVE 0x8929
+#define LX_SIOCSIFSLAVE 0x8930
+#define LX_SIOCADDMULTI 0x8931
+#define LX_SIOCDELMULTI 0x8932
+#define LX_SIOCADDRTOLD 0x8940
+#define LX_SIOCDELRTOLD 0x8941
+#define LX_SIOCGIFTXQLEN 0x8942
+#define LX_SIOCDARP 0x8950
+#define LX_SIOCGARP 0x8951
+#define LX_SIOCSARP 0x8952
+#define LX_SIOCDRARP 0x8960
+#define LX_SIOCGRARP 0x8961
+#define LX_SIOCSRARP 0x8962
+#define LX_SIOCGIFMAP 0x8970
+#define LX_SIOCSIFMAP 0x8971
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_IOCTL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
new file mode 100644
index 0000000000..307952656d
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
@@ -0,0 +1,153 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_H
+#define _SYS_LX_H
+
+#include <stdio.h>
+#include <alloca.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/lwp.h>
+
+#include <sys/lx_brand.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern char lx_release[128];
+extern pid_t zoneinit_pid;
+
+/*
+ * Support for the unfortunate RPM race condition workaround.
+ */
+extern int lx_rpm_delay;
+extern boolean_t lx_is_rpm;
+
+/*
+ * Values Linux expects for init
+ */
+#define LX_INIT_PGID 0
+#define LX_INIT_SID 0
+#define LX_INIT_PID 1
+
+/*
+ * Codes to reboot(2).
+ */
+#define LINUX_REBOOT_MAGIC1 0xfee1dead
+#define LINUX_REBOOT_MAGIC2 672274793
+#define LINUX_REBOOT_MAGIC2A 85072278
+#define LINUX_REBOOT_MAGIC2B 369367448
+#define LINUX_REBOOT_MAGIC2C 537993216
+
+/*
+ * This was observed as coming from Red Hat's init process, but it's not in
+ * their reboot(2) man page.
+ */
+#define LINUX_REBOOT_MAGIC2D 0x28121969
+
+#define LINUX_REBOOT_CMD_RESTART 0x1234567
+#define LINUX_REBOOT_CMD_HALT 0xcdef0123
+#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
+#define LINUX_REBOOT_CMD_RESTART2 0xa1b2c3d4
+#define LINUX_REBOOT_CMD_CAD_ON 0x89abcdef
+#define LINUX_REBOOT_CMD_CAD_OFF 0
+
+/*
+ * the maximum length of messages to be output with lx_msg(), lx_err(),
+ * lx_debug(), or lx_unsupported().
+ */
+#define LX_MSG_MAXLEN (128 + MAXPATHLEN)
+
+/*
+ * Linux scheduler priority ranges.
+ */
+#define LX_SCHED_PRIORITY_MIN_OTHER 0
+#define LX_SCHED_PRIORITY_MAX_OTHER 0
+#define LX_SCHED_PRIORITY_MIN_RRFIFO 1
+#define LX_SCHED_PRIORITY_MAX_RRFIFO 99
+
+/*
+ * Constants to indicate who getrusage() should return information about.
+ */
+#define LX_RUSAGE_SELF 0
+#define LX_RUSAGE_CHILDREN (-1)
+
+/*
+ * normally we never want to write to stderr or stdout because it's unsafe
+ * to make assumptions about the underlying file descriptors. to protect
+ * against writes to these file descriptors we go ahead and close them
+ * our brand process initalization code. but there are still occasions
+ * where we are willing to make assumptions about our file descriptors
+ * and write to them. at thes times we should use one lx_msg() or
+ * lx_msg_error()
+ */
+extern void lx_msg(char *, ...);
+extern void lx_err(char *, ...);
+extern void lx_err_fatal(char *, ...);
+extern void lx_unsupported(char *, ...);
+
+struct ucontext;
+
+extern void lx_handler_table(void);
+extern void lx_handler_trace_table(void);
+extern void lx_emulate_done(void);
+extern lx_regs_t *lx_syscall_regs(void);
+
+extern char *lx_fd_to_path(int fd, char *buf, int buf_size);
+extern int lx_lpid_to_spair(pid_t, pid_t *, lwpid_t *);
+extern int lx_lpid_to_spid(pid_t, pid_t *);
+
+extern int lx_ptrace_wait(siginfo_t *);
+extern void lx_ptrace_fork(void);
+
+extern int lx_get_kern_version(void);
+
+extern int lx_check_alloca(size_t);
+#define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL)
+
+extern int ltos_at_flag(int lflag, int allow);
+
+/*
+ * NO_UUCOPY disables calls to the uucopy* system calls to help with
+ * debugging brand library accesses to linux application memory.
+ */
+#ifdef NO_UUCOPY
+
+int uucopy_unsafe(const void *src, void *dst, size_t n);
+int uucopystr_unsafe(const void *src, void *dst, size_t n);
+
+#define uucopy(src, dst, n) uucopy_unsafe((src), (dst), (n))
+#define uucopystr(src, dst, n) uucopystr_unsafe((src), (dst), (n))
+
+#endif /* NO_UUCOPY */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h
new file mode 100644
index 0000000000..41db8cca03
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h
@@ -0,0 +1,139 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_MOUNT_H
+#define _LX_MOUNT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rpc/rpc.h>
+#include <nfs/nfs.h>
+
+/*
+ * mount() is significantly different between Linux and Solaris. The main
+ * difference is between the set of flags. Some flags on Linux can be
+ * translated to a Solaris equivalent, some are converted to a
+ * filesystem-specific option, while others have no equivalent whatsoever.
+ */
+#define LX_MS_MGC_VAL 0xC0ED0000
+#define LX_MS_RDONLY 0x00000001
+#define LX_MS_NOSUID 0x00000002
+#define LX_MS_NODEV 0x00000004
+#define LX_MS_NOEXEC 0x00000008
+#define LX_MS_SYNCHRONOUS 0x00000010
+#define LX_MS_REMOUNT 0x00000020
+#define LX_MS_MANDLOCK 0x00000040
+#define LX_MS_NOATIME 0x00000400
+#define LX_MS_NODIRATIME 0x00000800
+#define LX_MS_BIND 0x00001000
+#define LX_MS_SUPPORTED (LX_MS_MGC_VAL | \
+ LX_MS_RDONLY | LX_MS_NOSUID | \
+ LX_MS_NODEV | LX_MS_NOEXEC | \
+ LX_MS_REMOUNT | LX_MS_NOATIME | \
+ LX_MS_BIND)
+
+/*
+ * support for nfs mounts
+ */
+#define LX_NMD_MAXHOSTNAMELEN 256
+
+#define LX_NFS_MOUNT_SOFT 0x00000001
+#define LX_NFS_MOUNT_INTR 0x00000002
+#define LX_NFS_MOUNT_SECURE 0x00000004
+#define LX_NFS_MOUNT_POSIX 0x00000008
+#define LX_NFS_MOUNT_NOCTO 0x00000010
+#define LX_NFS_MOUNT_NOAC 0x00000020
+#define LX_NFS_MOUNT_TCP 0x00000040
+#define LX_NFS_MOUNT_VER3 0x00000080
+#define LX_NFS_MOUNT_KERBEROS 0x00000100
+#define LX_NFS_MOUNT_NONLM 0x00000200
+#define LX_NFS_MOUNT_BROKEN_SUID 0x00000400
+#define LX_NFS_MOUNT_SUPPORTED (LX_NFS_MOUNT_SOFT | \
+ LX_NFS_MOUNT_INTR | \
+ LX_NFS_MOUNT_POSIX | \
+ LX_NFS_MOUNT_NOCTO | \
+ LX_NFS_MOUNT_NOAC | \
+ LX_NFS_MOUNT_TCP | \
+ LX_NFS_MOUNT_VER3 | \
+ LX_NFS_MOUNT_NONLM)
+
+#define LX_NMD_DEFAULT_RSIZE 0
+#define LX_NMD_DEFAULT_WSIZE 0
+
+/*
+ * the nfs v3 file handle structure definitions are _almost_ the same
+ * on linux and solaris. the key difference are:
+ *
+ * 1) on linux fh3_length is an unsigned short where as on solaris it's
+ * an int.
+ *
+ * 2) on linux the file handle data doesn't 32 bit members, so the structure
+ * is not 32 bit aligned. (where as on solaris it is.)
+ *
+ * so rather than defining a structure that would allow us to intrepret
+ * all the contents of the nfs v3 file handle here, we decide to treate
+ * the file handle as an array of chars. this works just fine since it
+ * avoids the alignment issues and the actual file handle handle contects
+ * are defined by the nfs specification so they are common across solaris
+ * and linux. we do the same thing for nfs v2 file handles.
+ */
+struct lx_nfs_fh2 {
+ unsigned char lx_fh_data[NFS_FHSIZE];
+} lx_nfs_fh2;
+
+struct lx_nfs_fh3 {
+ unsigned short lx_fh3_length;
+ unsigned char lx_fh3_data[NFS3_FHSIZE];
+} lx_nfs_fh3;
+
+typedef struct lx_nfs_mount_data {
+ int nmd_version;
+ int nmd_fd;
+ struct lx_nfs_fh2 nmd_old_root;
+ int nmd_flags;
+ int nmd_rsize;
+ int nmd_wsize;
+ int nmd_timeo;
+ int nmd_retrans;
+ int nmd_acregmin;
+ int nmd_acregmax;
+ int nmd_acdirmin;
+ int nmd_acdirmax;
+ struct sockaddr_in nmd_addr;
+ char nmd_hostname[LX_NMD_MAXHOSTNAMELEN];
+ int nmd_namlen;
+ uint_t nmd_bsize;
+ struct lx_nfs_fh3 nmd_root;
+} lx_nfs_mount_data_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_MOUNT_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h
new file mode 100644
index 0000000000..cb5706fab2
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h
@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_POLL_H
+#define _SYS_LX_POLL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * These events are identical between Linux and Solaris
+ */
+#define LX_POLLIN 0x001
+#define LX_POLLPRI 0x002
+#define LX_POLLOUT 0x004
+#define LX_POLLERR 0x008
+#define LX_POLLHUP 0x010
+#define LX_POLLNVAL 0x020
+#define LX_POLLRDNORM 0x040
+#define LX_POLLRDBAND 0x080
+
+#define LX_POLL_COMMON_EVENTS (LX_POLLIN | LX_POLLPRI | LX_POLLOUT | \
+ LX_POLLERR | LX_POLLHUP | LX_POLLNVAL | LX_POLLRDNORM | LX_POLLRDBAND)
+
+/*
+ * These events differ between Linux and Solaris
+ */
+#define LX_POLLWRNORM 0x100
+#define LX_POLLWRBAND 0x200
+
+#define LX_POLL_SUPPORTED_EVENTS \
+ (LX_POLL_COMMON_EVENTS | LX_POLLWRNORM | LX_POLLWRBAND)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_POLL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
new file mode 100644
index 0000000000..b7609962ae
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
@@ -0,0 +1,299 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_SIGNAL_H
+#define _SYS_LX_SIGNAL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#if !defined(_ASM)
+#include <sys/lx_types.h>
+#include <lx_signum.h>
+
+#endif /* !defined(_ASM) */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Linux sigaction flags
+ */
+#define LX_SA_NOCLDSTOP 0x00000001
+#define LX_SA_NOCLDWAIT 0x00000002
+#define LX_SA_SIGINFO 0x00000004
+#define LX_SA_RESTORER 0x04000000
+#define LX_SA_ONSTACK 0x08000000
+#define LX_SA_RESTART 0x10000000
+#define LX_SA_NODEFER 0x40000000
+#define LX_SA_RESETHAND 0x80000000
+#define LX_SA_NOMASK LX_SA_NODEFER
+#define LX_SA_ONESHOT LX_SA_RESETHAND
+
+#define LX_SIG_BLOCK 0
+#define LX_SIG_UNBLOCK 1
+#define LX_SIG_SETMASK 2
+
+#define LX_MINSIGSTKSZ 2048
+#define LX_SS_ONSTACK 1
+#define LX_SS_DISABLE 2
+
+#define LX_SIGRT_MAGIC 0xdeadf00d
+
+#if !defined(_ASM)
+
+/*
+ * NOTE: Linux uses different definitions for sigset_ts and sigaction_ts
+ * depending on whether the definition is for user space or the kernel.
+ *
+ * The definitions below MUST correspond to the Linux kernel versions,
+ * as glibc will do the necessary translation from the Linux user
+ * versions.
+ */
+typedef struct {
+ ulong_t __bits[LX_NSIG_WORDS];
+} lx_sigset_t;
+
+#define LX_NBITS (sizeof (ulong_t) * NBBY)
+#define lx_sigmask(n) (1UL << (((n) - 1) % LX_NBITS))
+#define lx_sigword(n) (((ulong_t)((n) - 1))>>5)
+#define lx_sigismember(s, n) (lx_sigmask(n) & (s)->__bits[lx_sigword(n)])
+#define lx_sigaddset(s, n) ((s)->__bits[lx_sigword(n)] |= lx_sigmask(n))
+
+typedef struct lx_sigaction {
+ void (*lxsa_handler)();
+ int lxsa_flags;
+ void (*lxsa_restorer)(void);
+ lx_sigset_t lxsa_mask;
+} lx_sigaction_t;
+
+typedef uint32_t lx_osigset_t;
+
+#define OSIGSET_NBITS (sizeof (lx_osigset_t) * NBBY)
+#define OSIGSET_BITSET(sig) (1U << (((sig) - 1) % OSIGSET_NBITS))
+
+/*
+ * Flag settings to determine whether common routines should operate on
+ * lx_sigset_ts or lx_osigset_ts.
+ */
+#define USE_OSIGSET 0
+#define USE_SIGSET 1
+
+typedef struct lx_osigaction {
+ void (*lxsa_handler)();
+ lx_osigset_t lxsa_mask;
+ int lxsa_flags;
+ void (*lxsa_restorer)(void);
+} lx_osigaction_t;
+
+#define LX_SI_MAX_SIZE 128
+#define LX_SI_PAD_SIZE ((LX_SI_MAX_SIZE/sizeof (int)) - 3)
+
+typedef struct lx_siginfo {
+ int lsi_signo;
+ int lsi_errno;
+ int lsi_code;
+ union {
+ int _pad[LX_SI_PAD_SIZE];
+
+ struct {
+ pid_t _pid;
+ lx_uid16_t _uid;
+ } _kill;
+
+ struct {
+ uint_t _timer1;
+ uint_t _timer2;
+ } _timer;
+
+ struct {
+ pid_t _pid; /* sender's pid */
+ lx_uid16_t _uid; /* sender's uid */
+ union sigval _sigval;
+ } _rt;
+
+ struct {
+ pid_t _pid; /* which child */
+ lx_uid16_t _uid; /* sender's uid */
+ int _status; /* exit code */
+ clock_t _utime;
+ clock_t _stime;
+ } _sigchld;
+
+ struct {
+ void *_addr; /* faulting insn/memory ref. */
+ } _sigfault;
+
+ struct {
+ int _band; /* POLL_IN,POLL_OUT,POLL_MSG */
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} lx_siginfo_t;
+
+/*
+ * lx_siginfo_t lsi_code values
+ *
+ * LX_SI_ASYNCNL: Sent by asynch name lookup completion
+ * LX_SI_TKILL: Sent by tkill
+ * LX_SI_SIGIO: Sent by queued SIGIO
+ * LX_SI_ASYNCIO: Sent by asynchronous I/O completion
+ * LX_SI_MESGQ: Sent by real time message queue state change
+ * LX_SI_TIMER: Sent by timer expiration
+ * LX_SI_QUEUE: Sent by sigqueue
+ * LX_SI_USER: Sent by kill, sigsend, raise, etc.
+ * LX_SI_KERNEL: Sent by kernel
+ *
+ * At present, LX_SI_ASYNCNL and LX_SI_SIGIO are unused by BrandZ.
+ */
+#define LX_SI_ASYNCNL (-60)
+#define LX_SI_TKILL (-6)
+#define LX_SI_SIGIO (-5)
+#define LX_SI_ASYNCIO (-4)
+#define LX_SI_MESGQ (-3)
+#define LX_SI_TIMER (-2)
+#define LX_SI_QUEUE (-1)
+#define LX_SI_USER (0)
+#define LX_SI_KERNEL (0x80)
+
+typedef struct lx_sighandlers {
+ struct lx_sigaction lx_sa[LX_NSIG];
+} lx_sighandlers_t;
+
+typedef struct lx_sigaltstack {
+ void *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+} lx_stack_t;
+
+struct lx_fpreg {
+ ushort_t significand[4];
+ ushort_t exponent;
+};
+
+struct lx_fpxreg {
+ ushort_t significand[4];
+ ushort_t exponent;
+ ushort_t padding[3];
+};
+
+struct lx_xmmreg {
+ uint32_t element[4];
+};
+
+#define LX_X86_FXSR_MAGIC 0x0000
+#define LX_X86_FXSR_NONE 0xffff
+
+typedef struct lx_fpstate {
+ /* Regular FPU environment */
+ ulong_t cw;
+ ulong_t sw;
+ ulong_t tag;
+ ulong_t ipoff;
+ ulong_t cssel;
+ ulong_t dataoff;
+ ulong_t datasel;
+ struct lx_fpreg _st[8];
+ ushort_t status;
+ ushort_t magic; /* 0xffff = regular FPU data */
+
+ /* FXSR FPU environment */
+ ulong_t _fxsr_env[6]; /* env is ignored */
+ ulong_t mxcsr;
+ ulong_t reserved;
+ struct lx_fpxreg _fxsr_st[8]; /* reg data is ignored */
+ struct lx_xmmreg _xmm[8];
+ ulong_t padding[56];
+} lx_fpstate_t;
+
+typedef struct lx_sigcontext {
+ ulong_t sc_gs;
+ ulong_t sc_fs;
+ ulong_t sc_es;
+ ulong_t sc_ds;
+ ulong_t sc_edi;
+ ulong_t sc_esi;
+ ulong_t sc_ebp;
+ ulong_t sc_esp;
+ ulong_t sc_ebx;
+ ulong_t sc_edx;
+ ulong_t sc_ecx;
+ ulong_t sc_eax;
+ ulong_t sc_trapno;
+ ulong_t sc_err;
+ ulong_t sc_eip;
+ ulong_t sc_cs;
+ ulong_t sc_eflags;
+ ulong_t sc_esp_at_signal;
+ ulong_t sc_ss;
+ lx_fpstate_t *sc_fpstate;
+ ulong_t sc_mask;
+ ulong_t sc_cr2;
+} lx_sigcontext_t;
+
+typedef struct lx_ucontext {
+ ulong_t uc_flags;
+ struct lx_ucontext *uc_link;
+ lx_stack_t uc_stack;
+ lx_sigcontext_t uc_sigcontext;
+ lx_sigset_t uc_sigmask;
+} lx_ucontext_t;
+
+#define LX_SI_MAX_SIZE 128
+#define LX_SI_PAD_SIZE ((LX_SI_MAX_SIZE/sizeof (int)) - 3)
+
+#define lsi_pid _sifields._kill._pid
+#define lsi_uid _sifields._kill._uid
+#define lsi_status _sifields._sigchld._status
+#define lsi_utime _sifields._sigchld._utime
+#define lsi_stime _sifields._sigchld._stime
+#define lsi_value _sifields._rt._sigval
+#define lsi_int _sifields._rt._sigval.sivalx_int
+#define lsi_ptr _sifields._rt._sigval.sivalx_ptr
+#define lsi_addr _sifields._sigfault._addr
+#define lsi_band _sifields._sigpoll._band
+#define lsi_fd _sifields._sigpoll._fd
+
+extern const int ltos_signo[];
+extern const int stol_signo[];
+
+extern void setsigacthandler(void (*)(int, siginfo_t *, void *),
+ void (**)(int, siginfo_t *, void *));
+
+extern int lx_siginit(void);
+
+extern void lx_sigreturn_tolibc(uintptr_t);
+extern void lx_sigdeliver(int, siginfo_t *, void *, size_t, void (*)(),
+ void (*)(), uintptr_t);
+
+extern int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop);
+
+#endif /* !defined(_ASM) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SIGNAL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h
new file mode 100644
index 0000000000..ce8d71c962
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h
@@ -0,0 +1,258 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_SOCKET_H
+#define _SYS_LX_SOCKET_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/lx_types.h>
+
+/*
+ * Linux address family definitions
+ * Some of these are not supported
+ */
+#define LX_AF_UNSPEC 0 /* Unspecified */
+#define LX_AF_UNIX 1 /* local file/pipe name */
+#define LX_AF_INET 2 /* IP protocol family */
+#define LX_AF_AX25 3 /* Amateur Radio AX.25 */
+#define LX_AF_IPX 4 /* Novell Internet Protocol */
+#define LX_AF_APPLETALK 5 /* Appletalk */
+#define LX_AF_NETROM 6 /* Amateur radio */
+#define LX_AF_BRIDGE 7 /* Multiprotocol bridge */
+#define LX_AF_ATMPVC 8 /* ATM PVCs */
+#define LX_AF_X25 9 /* X.25 */
+#define LX_AF_INET6 10 /* IPV 6 */
+#define LX_AF_ROSE 11 /* Amateur Radio X.25 */
+#define LX_AF_DECnet 12 /* DECnet */
+#define LX_AF_NETBEUI 13 /* 802.2LLC */
+#define LX_AF_SECURITY 14 /* Security callback */
+#define LX_AF_KEY 15 /* key management */
+#define LX_AF_ROUTE 16 /* Alias to emulate 4.4BSD */
+#define LX_AF_PACKET 17 /* Packet family */
+#define LX_AF_ASH 18 /* Ash ? */
+#define LX_AF_ECONET 19 /* Acorn Econet */
+#define LX_AF_ATMSVC 20 /* ATM SVCs */
+#define LX_AF_SNA 22 /* Linux SNA */
+#define LX_AF_IRDA 23 /* IRDA sockets */
+#define LX_AF_PPPOX 24 /* PPPoX sockets */
+#define LX_AF_WANPIPE 25 /* Wanpipe API sockets */
+#define LX_AF_BLUETOOTH 31 /* Bluetooth sockets */
+#define LX_AF_MAX 32 /* MAX socket type */
+
+#define AF_NOTSUPPORTED -1
+#define AF_INVAL -2
+
+/*
+ * Linux ARP protocol hardware identifiers
+ */
+#define LX_ARPHRD_ETHER 1 /* Ethernet */
+#define LX_ARPHRD_LOOPBACK 772 /* Loopback */
+#define LX_ARPHRD_VOID 0xffff /* Unknown */
+
+/*
+ * Linux socket type definitions
+ */
+#define LX_SOCK_STREAM 1 /* Connection-based byte streams */
+#define LX_SOCK_DGRAM 2 /* Connectionless, datagram */
+#define LX_SOCK_RAW 3 /* Raw protocol interface */
+#define LX_SOCK_RDM 4 /* Reliably-delivered message */
+#define LX_SOCK_SEQPACKET 5 /* Sequenced packet stream */
+#define LX_SOCK_PACKET 10 /* Linux specific */
+#define LX_SOCK_MAX 11
+
+#define SOCK_NOTSUPPORTED -1
+#define SOCK_INVAL -2
+
+/*
+ * Options for use with [gs]etsockopt at the IP level.
+ * IPPROTO_IP
+ */
+#define LX_IP_TOS 1
+#define LX_IP_TTL 2
+#define LX_IP_HDRINCL 3
+#define LX_IP_OPTIONS 4
+#define LX_IP_ROUTER_ALERT 5
+#define LX_IP_RECVOPTS 6
+#define LX_IP_RETOPTS 7
+#define LX_IP_PKTINFO 8
+#define LX_IP_PKTOPTIONS 9
+#define LX_IP_MTU_DISCOVER 10
+#define LX_IP_RECVERR 11
+#define LX_IP_RECVTTL 12
+#define LX_IP_RECVTOS 13
+#define LX_IP_MTU 14
+#define LX_IP_FREEBIND 15
+#define LX_IP_MULTICAST_IF 32
+#define LX_IP_MULTICAST_TTL 33
+#define LX_IP_MULTICAST_LOOP 34
+#define LX_IP_ADD_MEMBERSHIP 35
+#define LX_IP_DROP_MEMBERSHIP 36
+
+/*
+ * Options for use with [gs]etsockopt at the TCP level.
+ * IPPROTO_TCP
+ */
+#define LX_TCP_NODELAY 1 /* Don't delay send to coalesce packets */
+#define LX_TCP_MAXSEG 2 /* Set maximum segment size */
+#define LX_TCP_CORK 3 /* Control sending of partial frames */
+#define LX_TCP_KEEPIDLE 4 /* Start keeplives after this period */
+#define LX_TCP_KEEPINTVL 5 /* Interval between keepalives */
+#define LX_TCP_KEEPCNT 6 /* Number of keepalives before death */
+#define LX_TCP_SYNCNT 7 /* Number of SYN retransmits */
+#define LX_TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */
+#define LX_TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */
+#define LX_TCP_WINDOW_CLAMP 10 /* Bound advertised window */
+#define LX_TCP_INFO 11 /* Information about this connection. */
+#define LX_TCP_QUICKACK 12 /* Bock/reenable quick ACKs. */
+
+/*
+ * Options for use with [gs]etsockopt at the IGMP level.
+ * IPPROTO_IGMP
+ */
+#define LX_IGMP_MINLEN 8
+#define LX_IGMP_MAX_HOST_REPORT_DELAY 10
+#define LX_IGMP_HOST_MEMBERSHIP_QUERY 0x11
+#define LX_IGMP_HOST_MEMBERSHIP_REPORT 0x12
+#define LX_IGMP_DVMRP 0x13
+#define LX_IGMP_PIM 0x14
+#define LX_IGMP_TRACE 0x15
+#define LX_IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16
+#define LX_IGMP_HOST_LEAVE_MESSAGE 0x17
+#define LX_IGMP_MTRACE_RESP 0x1e
+#define LX_IGMP_MTRACE 0x1f
+
+/*
+ * Options for use with [gs]etsockopt at the SOL_SOCKET level.
+ */
+#define LX_SOL_SOCKET 1
+
+#define LX_SCM_RIGHTS 1
+#define LX_SCM_CRED 2
+
+#define LX_SO_DEBUG 1
+#define LX_SO_REUSEADDR 2
+#define LX_SO_TYPE 3
+#define LX_SO_ERROR 4
+#define LX_SO_DONTROUTE 5
+#define LX_SO_BROADCAST 6
+#define LX_SO_SNDBUF 7
+#define LX_SO_RCVBUF 8
+#define LX_SO_KEEPALIVE 9
+#define LX_SO_OOBINLINE 10
+#define LX_SO_NO_CHECK 11
+#define LX_SO_PRIORITY 12
+#define LX_SO_LINGER 13
+#define LX_SO_BSDCOMPAT 14
+/* To add :#define LX_SO_REUSEPORT 15 */
+#define LX_SO_PASSCRED 16
+#define LX_SO_PEERCRED 17
+#define LX_SO_RCVLOWAT 18
+#define LX_SO_SNDLOWAT 19
+#define LX_SO_RCVTIMEO 20
+#define LX_SO_SNDTIMEO 21
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define LX_SO_SECURITY_AUTHENTICATION 22
+#define LX_SO_SECURITY_ENCRYPTION_TRANSPORT 23
+#define LX_SO_SECURITY_ENCRYPTION_NETWORK 24
+#define LX_SO_BINDTODEVICE 25
+/* Socket filtering */
+#define LX_SO_ATTACH_FILTER 26
+#define LX_SO_DETACH_FILTER 27
+#define LX_SO_PEERNAME 28
+#define LX_SO_TIMESTAMP 29
+#define LX_SCM_TIMESTAMP LX_SO_TIMESTAMP
+#define LX_SO_ACCEPTCONN 30
+
+/*
+ * Linux socketcall indices.
+ * These constitute all 17 socket related system calls
+ *
+ * These system calls are called via a single system call socketcall().
+ * The first arg being the endex of the system call type
+ */
+#define LX_SOCKET 1
+#define LX_BIND 2
+#define LX_CONNECT 3
+#define LX_LISTEN 4
+#define LX_ACCEPT 5
+#define LX_GETSOCKNAME 6
+#define LX_GETPEERNAME 7
+#define LX_SOCKETPAIR 8
+#define LX_SEND 9
+#define LX_RECV 10
+#define LX_SENDTO 11
+#define LX_RECVFROM 12
+#define LX_SHUTDOWN 13
+#define LX_SETSOCKOPT 14
+#define LX_GETSOCKOPT 15
+#define LX_SENDMSG 16
+#define LX_RECVMSG 17
+
+/*
+ * Linux socket flags for use with recv(2)/send(2)/recvmsg(2)/sendmsg(2)
+ */
+#define LX_MSG_OOB 1
+#define LX_MSG_PEEK 2
+#define LX_MSG_DONTROUTE 4
+#define LX_MSG_CTRUNC 8
+#define LX_MSG_PROXY 0x10
+#define LX_MSG_TRUNC 0x20
+#define LX_MSG_DONTWAIT 0x40
+#define LX_MSG_EOR 0x80
+#define LX_MSG_WAITALL 0x100
+#define LX_MSG_FIN 0x200
+#define LX_MSG_SYN 0x400
+#define LX_MSG_CONFIRM 0x800
+#define LX_MSG_RST 0x1000
+#define LX_MSG_ERRQUEUE 0x2000
+#define LX_MSG_NOSIGNAL 0x4000
+#define LX_MSG_MORE 0x8000
+
+struct lx_msghdr {
+ void *msg_name; /* optional address */
+ socklen_t msg_namelen; /* size of address */
+ struct iovec *msg_iov; /* scatter/gather array */
+ int msg_iovlen; /* # elements in msg_iov */
+ void *msg_control; /* ancillary data */
+ socklen_t msg_controllen; /* ancillary data buffer len */
+ int msg_flags; /* flags on received message */
+};
+
+struct lx_ucred {
+ pid_t lxu_pid;
+ lx_uid_t lxu_uid;
+ lx_gid_t lxu_gid;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SOCKET_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h
new file mode 100644
index 0000000000..67aa86bb1c
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h
@@ -0,0 +1,95 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_STAT_H
+#define _SYS_LX_STAT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/lx_types.h>
+#include <sys/stat.h>
+
+#define LX_MAJORSHIFT 8
+#define LX_MINORMASK ((1 << LX_MAJORSHIFT) - 1)
+#define LX_MAKEDEVICE(lx_maj, lx_min) \
+ ((lx_dev_t)((lx_maj) << LX_MAJORSHIFT | ((lx_min) & LX_MINORMASK)))
+
+#define LX_GETMAJOR(lx_dev) ((lx_dev) >> LX_MAJORSHIFT)
+#define LX_GETMINOR(lx_dev) ((lx_dev) & LX_MINORMASK)
+
+#undef st_atime
+#undef st_mtime
+#undef st_ctime
+
+struct lx_stat {
+ lx_dev16_t st_dev;
+ uint16_t st_pad1;
+ lx_ino_t st_ino;
+ lx_mode16_t st_mode;
+ uint16_t st_nlink;
+ lx_uid16_t st_uid;
+ lx_gid16_t st_gid;
+ lx_dev16_t st_rdev;
+ uint16_t st_pad2;
+ lx_off_t st_size;
+ lx_blksize_t st_blksize;
+ lx_blkcnt_t st_blocks;
+ struct lx_timespec st_atime;
+ struct lx_timespec st_mtime;
+ struct lx_timespec st_ctime;
+ uint32_t st_pad3;
+ uint32_t st_pad4;
+};
+
+struct lx_stat64 {
+ lx_dev_t st_dev;
+ uint32_t st_pad1;
+ lx_ino_t st_small_ino;
+ lx_mode_t st_mode;
+ uint_t st_nlink;
+ lx_uid_t st_uid;
+ lx_gid_t st_gid;
+ lx_dev_t st_rdev;
+ uint32_t st_pad2;
+ lx_off64_t st_size;
+ lx_blksize_t st_blksize;
+ lx_blkcnt64_t st_blocks;
+ struct lx_timespec st_atime;
+ struct lx_timespec st_mtime;
+ struct lx_timespec st_ctime;
+ lx_ino64_t st_ino;
+};
+
+extern int lx_stat_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_STAT_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h
new file mode 100644
index 0000000000..839d36d7ae
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h
@@ -0,0 +1,79 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_STATFS_H
+#define _LX_STATFS_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_statfs_init(void);
+
+struct lx_statfs {
+ int f_type;
+ int f_bsize;
+ ulong_t f_blocks;
+ ulong_t f_bfree;
+ ulong_t f_bavail;
+ ulong_t f_files;
+ ulong_t f_ffree;
+ u_longlong_t f_fsid;
+ int f_namelen;
+ int f_frsize;
+ int f_spare[5];
+};
+
+struct lx_statfs64 {
+ int f_type;
+ int f_bsize;
+ u_longlong_t f_blocks;
+ u_longlong_t f_bfree;
+ u_longlong_t f_bavail;
+ u_longlong_t f_files;
+ u_longlong_t f_ffree;
+ u_longlong_t f_fsid;
+ int f_namelen;
+ int f_frsize;
+ int f_spare[5];
+};
+
+/*
+ * These magic values are taken mostly from statfs(2).
+ */
+#define LX_ISOFS_SUPER_MAGIC 0x9660
+#define LX_NFS_SUPER_MAGIC 0x6969
+#define LX_MSDOS_SUPER_MAGIC 0x4d44
+#define LX_PROC_SUPER_MAGIC 0x9fa0
+#define LX_UFS_MAGIC 0x00011954
+#define LX_DEVPTS_SUPER_MAGIC 0x1cd1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_STATFS_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
new file mode 100644
index 0000000000..b0d4cf2eb6
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
@@ -0,0 +1,556 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_SYSCALL_H
+#define _SYS_LX_SYSCALL_H
+
+#if !defined(_ASM)
+
+#include <sys/types.h>
+#include <sys/procset.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_install;
+
+extern int lx_openat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_mkdirat(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_mknodat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_fchownat(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_futimesat(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_fstatat64(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_unlinkat(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_renameat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_linkat(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_symlinkat(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_readlinkat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_fchmodat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_faccessat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_stat(uintptr_t, uintptr_t);
+extern int lx_fstat(uintptr_t, uintptr_t);
+extern int lx_lstat(uintptr_t, uintptr_t);
+extern int lx_stat64(uintptr_t, uintptr_t);
+extern int lx_fstat64(uintptr_t, uintptr_t);
+extern int lx_lstat64(uintptr_t, uintptr_t);
+extern int lx_fcntl(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_fcntl64(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_flock(uintptr_t, uintptr_t);
+extern int lx_open(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_readdir(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_getdents64(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_getpid(void);
+extern int lx_execve(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_dup2(uintptr_t, uintptr_t);
+extern int lx_ioctl(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_vhangup(void);
+
+extern int lx_read(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_readv(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_writev(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_pread64(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_pwrite64(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_socketcall(uintptr_t, uintptr_t);
+extern int lx_select(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_poll(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_oldgetrlimit(uintptr_t, uintptr_t);
+extern int lx_getrlimit(uintptr_t, uintptr_t);
+extern int lx_setrlimit(uintptr_t, uintptr_t);
+extern int lx_gettimeofday(uintptr_t, uintptr_t);
+extern int lx_settimeofday(uintptr_t, uintptr_t);
+extern int lx_getrusage(uintptr_t, uintptr_t);
+extern int lx_mknod(uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_getpgrp(void);
+extern int lx_getpgid(uintptr_t);
+extern int lx_setpgid(uintptr_t, uintptr_t);
+extern int lx_getsid(uintptr_t);
+extern int lx_setsid(void);
+extern int lx_setgroups(uintptr_t, uintptr_t);
+
+
+extern int lx_waitpid(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_waitid(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_wait4(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_getuid16(void);
+extern int lx_getgid16(void);
+extern int lx_geteuid16(void);
+extern int lx_getegid16(void);
+extern int lx_geteuid(void);
+extern int lx_getegid(void);
+extern int lx_getresuid16(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_getresgid16(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_getresuid(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_getresgid(uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_setuid16(uintptr_t);
+extern int lx_setreuid16(uintptr_t, uintptr_t);
+extern int lx_setregid16(uintptr_t, uintptr_t);
+extern int lx_setgid16(uintptr_t);
+extern int lx_setfsuid16(uintptr_t);
+extern int lx_setfsgid16(uintptr_t);
+
+extern int lx_setfsuid(uintptr_t);
+extern int lx_setfsgid(uintptr_t);
+
+extern int lx_clock_settime(int, struct timespec *);
+extern int lx_clock_gettime(int, struct timespec *);
+extern int lx_clock_getres(int, struct timespec *);
+extern int lx_clock_nanosleep(int, int flags, struct timespec *,
+ struct timespec *);
+
+extern int lx_truncate(uintptr_t, uintptr_t);
+extern int lx_ftruncate(uintptr_t, uintptr_t);
+extern int lx_truncate64(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_ftruncate64(uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_sysctl(uintptr_t);
+extern int lx_fsync(uintptr_t);
+extern int lx_fdatasync(uintptr_t);
+extern int lx_pipe(uintptr_t);
+extern int lx_link(uintptr_t, uintptr_t);
+extern int lx_unlink(uintptr_t);
+extern int lx_rmdir(uintptr_t);
+extern int lx_chown16(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_fchown16(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_lchown16(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_chown(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_fchown(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_chmod(uintptr_t, uintptr_t);
+extern int lx_rename(uintptr_t, uintptr_t);
+extern int lx_utime(uintptr_t, uintptr_t);
+extern int lx_llseek(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_lseek(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_sysfs(uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_getcwd(uintptr_t, uintptr_t);
+extern int lx_uname(uintptr_t);
+extern int lx_reboot(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_getgroups16(uintptr_t, uintptr_t);
+extern int lx_setgroups16(uintptr_t, uintptr_t);
+extern int lx_personality(uintptr_t);
+
+extern int lx_query_module(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern int lx_time(uintptr_t);
+extern int lx_times(uintptr_t);
+extern int lx_setitimer(uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_clone(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_exit(uintptr_t);
+extern int lx_group_exit(uintptr_t);
+
+extern int lx_mlock(uintptr_t, uintptr_t);
+extern int lx_mlockall(uintptr_t);
+extern int lx_munlock(uintptr_t, uintptr_t);
+extern int lx_munlockall(void);
+extern int lx_msync(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_madvise(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_mprotect(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_mmap(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+extern int lx_mmap2(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern int lx_mount(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_umount(uintptr_t);
+extern int lx_umount2(uintptr_t, uintptr_t);
+
+extern int lx_statfs(uintptr_t, uintptr_t);
+extern int lx_fstatfs(uintptr_t, uintptr_t);
+extern int lx_statfs64(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_fstatfs64(uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_sigreturn(void);
+extern int lx_rt_sigreturn(void);
+extern int lx_signal(uintptr_t, uintptr_t);
+extern int lx_sigaction(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_rt_sigaction(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_sigaltstack(uintptr_t, uintptr_t);
+extern int lx_sigpending(uintptr_t);
+extern int lx_rt_sigpending(uintptr_t, uintptr_t);
+extern int lx_sigprocmask(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_rt_sigprocmask(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_sigsuspend(uintptr_t);
+extern int lx_rt_sigsuspend(uintptr_t, uintptr_t);
+extern int lx_sigwaitinfo(uintptr_t, uintptr_t);
+extern int lx_rt_sigwaitinfo(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_sigtimedwait(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_rt_sigtimedwait(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_sync(void);
+
+extern int lx_futex(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern int lx_tkill(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+extern int lx_tgkill(uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_sethostname(uintptr_t, uintptr_t);
+extern int lx_setdomainname(uintptr_t, uintptr_t);
+
+extern int lx_sendfile(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern int lx_sendfile64(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_fork(void);
+extern int lx_vfork(void);
+extern int lx_exec(uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_getpriority(uintptr_t, uintptr_t);
+extern int lx_setpriority(uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_ptrace(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern int lx_sched_getaffinity(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_sched_setaffinity(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_sched_getparam(uintptr_t, uintptr_t);
+extern int lx_sched_setparam(uintptr_t, uintptr_t);
+extern int lx_sched_rr_get_interval(uintptr_t pid, uintptr_t);
+extern int lx_sched_getscheduler(uintptr_t);
+extern int lx_sched_setscheduler(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_sched_get_priority_min(uintptr_t);
+extern int lx_sched_get_priority_max(uintptr_t);
+
+extern int lx_keyctl(void);
+
+extern int lx_ipc(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+#endif /* !defined(_ASM) */
+
+#define EBP_HAS_ARG6 0x01
+
+/*
+ * Linux syscall numbers
+ */
+#define LX_SYS_exit 1
+#define LX_SYS_fork 2
+#define LX_SYS_read 3
+#define LX_SYS_write 4
+#define LX_SYS_open 5
+#define LX_SYS_close 6
+#define LX_SYS_waitpid 7
+#define LX_SYS_creat 8
+#define LX_SYS_link 9
+#define LX_SYS_unlink 10
+#define LX_SYS_execve 11
+#define LX_SYS_chdir 12
+#define LX_SYS_time 13
+#define LX_SYS_mknod 14
+#define LX_SYS_chmod 15
+#define LX_SYS_lchown 16
+#define LX_SYS_break 17
+#define LX_SYS_oldstat 18
+#define LX_SYS_lseek 19
+#define LX_SYS_getpid 20
+#define LX_SYS_mount 21
+#define LX_SYS_umount 22
+#define LX_SYS_setuid 23
+#define LX_SYS_getuid 24
+#define LX_SYS_stime 25
+#define LX_SYS_ptrace 26
+#define LX_SYS_alarm 27
+#define LX_SYS_oldfstat 28
+#define LX_SYS_pause 29
+#define LX_SYS_utime 30
+#define LX_SYS_stty 31
+#define LX_SYS_gtty 32
+#define LX_SYS_access 33
+#define LX_SYS_nice 34
+#define LX_SYS_ftime 35
+#define LX_SYS_sync 36
+#define LX_SYS_kill 37
+#define LX_SYS_rename 38
+#define LX_SYS_mkdir 39
+#define LX_SYS_rmdir 40
+#define LX_SYS_dup 41
+#define LX_SYS_pipe 42
+#define LX_SYS_times 43
+#define LX_SYS_prof 44
+#define LX_SYS_brk 45
+#define LX_SYS_setgid 46
+#define LX_SYS_getgid 47
+#define LX_SYS_signal 48
+#define LX_SYS_geteuid 49
+#define LX_SYS_getegid 50
+#define LX_SYS_acct 51
+#define LX_SYS_umount2 52
+#define LX_SYS_lock 53
+#define LX_SYS_ioctl 54
+#define LX_SYS_fcntl 55
+#define LX_SYS_mpx 56
+#define LX_SYS_setpgid 57
+#define LX_SYS_ulimit 58
+#define LX_SYS_oldolduname 59
+#define LX_SYS_umask 60
+#define LX_SYS_chroot 61
+#define LX_SYS_ustat 62
+#define LX_SYS_dup2 63
+#define LX_SYS_getppid 64
+#define LX_SYS_getpgrp 65
+#define LX_SYS_setsid 66
+#define LX_SYS_sigaction 67
+#define LX_SYS_sgetmask 68
+#define LX_SYS_ssetmask 69
+#define LX_SYS_setreuid 70
+#define LX_SYS_setregid 71
+#define LX_SYS_sigsuspend 72
+#define LX_SYS_sigpending 73
+#define LX_SYS_sethostname 74
+#define LX_SYS_setrlimit 75
+#define LX_SYS_getrlimit 76
+#define LX_SYS_getrusage 77
+#define LX_SYS_gettimeofday 78
+#define LX_SYS_settimeofday 79
+#define LX_SYS_getgroups 80
+#define LX_SYS_setgroups 81
+#define LX_SYS_select 82
+#define LX_SYS_symlink 83
+#define LX_SYS_oldlstat 84
+#define LX_SYS_readlink 85
+#define LX_SYS_uselib 86
+#define LX_SYS_swapon 87
+#define LX_SYS_reboot 88
+#define LX_SYS_readdir 89
+#define LX_SYS_mmap 90
+#define LX_SYS_munmap 91
+#define LX_SYS_truncate 92
+#define LX_SYS_ftruncate 93
+#define LX_SYS_fchmod 94
+#define LX_SYS_fchown 95
+#define LX_SYS_getpriority 96
+#define LX_SYS_setpriority 97
+#define LX_SYS_profil 98
+#define LX_SYS_statfs 99
+#define LX_SYS_fstatfs 100
+#define LX_SYS_ioperm 101
+#define LX_SYS_socketcall 102
+#define LX_SYS_syslog 103
+#define LX_SYS_setitimer 104
+#define LX_SYS_getitimer 105
+#define LX_SYS_stat 106
+#define LX_SYS_lstat 107
+#define LX_SYS_fstat 108
+#define LX_SYS_olduname 109
+#define LX_SYS_iopl 110
+#define LX_SYS_vhangup 111
+#define LX_SYS_idle 112
+#define LX_SYS_vm86old 113
+#define LX_SYS_wait4 114
+#define LX_SYS_swapoff 115
+#define LX_SYS_sysinfo 116
+#define LX_SYS_ipc 117
+#define LX_SYS_fsync 118
+#define LX_SYS_sigreturn 119
+#define LX_SYS_clone 120
+#define LX_SYS_setdomainname 121
+#define LX_SYS_uname 122
+#define LX_SYS_modify_ldt 123
+#define LX_SYS_adjtimex 124
+#define LX_SYS_mprotect 125
+#define LX_SYS_sigprocmask 126
+#define LX_SYS_create_module 127
+#define LX_SYS_init_module 128
+#define LX_SYS_delete_module 129
+#define LX_SYS_get_kernel_syms 130
+#define LX_SYS_quotactl 131
+#define LX_SYS_getpgid 132
+#define LX_SYS_fchdir 133
+#define LX_SYS_sysfs 135
+#define LX_SYS_setfsuid 138
+#define LX_SYS_setfsgid 139
+#define LX_SYS_llseek 140
+#define LX_SYS_getdents 141
+#define LX_SYS_newselect 142
+#define LX_SYS_flock 143
+#define LX_SYS_msync 144
+#define LX_SYS_readv 145
+#define LX_SYS_writev 146
+#define LX_SYS_getsid 147
+#define LX_SYS_fdatasync 148
+#define LX_SYS_sysctl 149
+#define LX_SYS_mlock 150
+#define LX_SYS_munlock 151
+#define LX_SYS_mlockall 152
+#define LX_SYS_munlockall 153
+#define LX_SYS_sched_setparam 154
+#define LX_SYS_sched_getparam 155
+#define LX_SYS_sched_setscheduler 156
+#define LX_SYS_sched_getscheduler 157
+#define LX_SYS_sched_yield 158
+#define LX_SYS_sched_get_priority_max 159
+#define LX_SYS_sched_get_priority_min 160
+#define LX_SYS_sched_rr_get_interval 161
+#define LX_SYS_nanosleep 162
+#define LX_SYS_mremap 163
+#define LX_SYS_setresuid 164
+#define LX_SYS_getresuid 165
+#define LX_SYS_poll 168
+#define LX_SYS_setresgid 170
+#define LX_SYS_getresgid 171
+#define LX_SYS_prctl 172
+#define LX_SYS_rt_sigreturn 173
+#define LX_SYS_rt_sigaction 174
+#define LX_SYS_rt_sigprocmask 175
+#define LX_SYS_rt_sigpending 176
+#define LX_SYS_rt_sigtimedwait 177
+#define LX_SYS_rt_sigqueueinfo 178
+#define LX_SYS_rt_sigsuspend 179
+#define LX_SYS_pread 180
+#define LX_SYS_pwrite 181
+#define LX_SYS_chown 182
+#define LX_SYS_getcwd 183
+#define LX_SYS_capget 184
+#define LX_SYS_capset 185
+#define LX_SYS_sigaltstack 186
+#define LX_SYS_sendfile 187
+#define LX_SYS_getpmsg 188
+#define LX_SYS_putpmsg 189
+#define LX_SYS_vfork 190
+#define LX_SYS_ugetrlimit 191
+#define LX_SYS_mmap2 192
+#define LX_SYS_truncate64 193
+#define LX_SYS_ftruncate64 194
+#define LX_SYS_stat64 195
+#define LX_SYS_lstat64 196
+#define LX_SYS_fstat64 197
+#define LX_SYS_lchown32 198
+#define LX_SYS_getuid32 199
+#define LX_SYS_getgid32 200
+#define LX_SYS_geteuid32 201
+#define LX_SYS_getegid32 202
+#define LX_SYS_setreuid32 203
+#define LX_SYS_setregid32 204
+#define LX_SYS_getgroups32 205
+#define LX_SYS_setgroups32 206
+#define LX_SYS_fchown32 207
+#define LX_SYS_setresuid32 208
+#define LX_SYS_getresuid32 209
+#define LX_SYS_setresgid32 210
+#define LX_SYS_getresgid32 211
+#define LX_SYS_chown32 212
+#define LX_SYS_setuid32 213
+#define LX_SYS_setgid32 214
+#define LX_SYS_setfsuid32 215
+#define LX_SYS_setfsgid32 216
+#define LX_SYS_mincore 218
+#define LX_SYS_madvise 219
+#define LX_SYS_getdents64 220
+#define LX_SYS_fcntl64 221
+#define LX_SYS_gettid 224
+#define LX_SYS_readahead 225
+#define LX_SYS_setxattr 226
+#define LX_SYS_lsetxattr 227
+#define LX_SYS_fsetxattr 228
+#define LX_SYS_getxattr 229
+#define LX_SYS_lgetxattr 230
+#define LX_SYS_fgetxattr 231
+#define LX_SYS_listxattr 232
+#define LX_SYS_llistxattr 233
+#define LX_SYS_flistxattr 234
+#define LX_SYS_removexattr 235
+#define LX_SYS_lremovexattr 236
+#define LX_SYS_fremovexattr 237
+#define LX_SYS_tkill 238
+#define LX_SYS_sendfile64 239
+#define LX_SYS_futex 240
+#define LX_SYS_sched_setaffinity 241
+#define LX_SYS_sched_getaffinity 242
+#define LX_SYS_set_thread_area 243
+#define LX_SYS_get_thread_area 244
+#define LX_SYS_fadvise64 250
+#define LX_SYS_exit_group 252
+#define LX_SYS_remap_file_pages 257
+#define LX_SYS_set_tid_address 258
+#define LX_SYS_timer_create 259
+#define LX_SYS_timer_settime 260
+#define LX_SYS_timer_gettime 261
+#define LX_SYS_timer_getoverrun 262
+#define LX_SYS_timer_delete 263
+#define LX_SYS_clock_settime 264
+#define LX_SYS_clock_gettime 265
+#define LX_SYS_clock_getres 266
+#define LX_SYS_clock_nanosleep 267
+#define LX_SYS_tgkill 270
+/* the following syscalls are for 2.6 and later kernels */
+#define LX_SYS_utimes 271
+#define LX_SYS_fadvise64_64 272
+#define LX_SYS_vserver 273
+#define LX_SYS_mbind 274
+#define LX_SYS_get_mempolicyd 275
+#define LX_SYS_set_mempolicy 276
+#define LX_SYS_mq_open 277
+#define LX_SYS_mq_unlink 278
+#define LX_SYS_mq_timedsend 279
+#define LX_SYS_mq_timedreceive 280
+#define LX_SYS_mq_notify 281
+#define LX_SYS_mq_getsetattr 282
+#define LX_SYS_kexec_load 283
+#define LX_SYS_waitid 284
+#define LX_SYS_setaltroot 285
+#define LX_SYS_add_key 286
+#define LX_SYS_request_key 287
+#define LX_SYS_keyctl 288
+#define LX_SYS_ioprio_set 289
+#define LX_SYS_ioprio_get 290
+#define LX_SYS_inotify_init 291
+#define LX_SYS_inotify_add_watch 292
+#define LX_SYS_inotify_rm_watch 293
+#define LX_SYS_migrate_pages 294
+#define LX_SYS_openat 295
+#define LX_SYS_mkdirat 296
+#define LX_SYS_mknodat 297
+#define LX_SYS_fchownat 298
+#define LX_SYS_futimesat 299
+#define LX_SYS_fstatat64 300
+#define LX_SYS_unlinkat 301
+#define LX_SYS_renameat 302
+#define LX_SYS_linkat 303
+#define LX_SYS_symlinkat 304
+#define LX_SYS_readlinkat 305
+#define LX_SYS_fchmodat 306
+#define LX_SYS_faccessat 307
+#define LX_SYS_pselect6 308
+#define LX_SYS_ppoll 309
+#define LX_SYS_unshare 310
+#define LX_SYS_set_robust_list 311
+#define LX_SYS_get_robust_list 312
+#define LX_SYS_splice 313
+#define LX_SYS_sync_file_range 314
+#define LX_SYS_tee 315
+#define LX_SYS_vmsplice 316
+#define LX_SYS_move_pages 317
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SYSCALL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h
new file mode 100644
index 0000000000..08c824d5b4
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h
@@ -0,0 +1,211 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_SYSV_IPC_H
+#define _LX_SYSV_IPC_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * msg-related definitions.
+ */
+#define LX_IPC_CREAT 00001000
+#define LX_IPC_EXCL 00002000
+#define LX_IPC_NOWAIT 00004000
+
+#define LX_IPC_RMID 0
+#define LX_IPC_SET 1
+#define LX_IPC_STAT 2
+#define LX_IPC_INFO 3
+
+#define LX_IPC_64 0x0100
+
+#define LX_SEMOP 1
+#define LX_SEMGET 2
+#define LX_SEMCTL 3
+#define LX_MSGSND 11
+#define LX_MSGRCV 12
+#define LX_MSGGET 13
+#define LX_MSGCTL 14
+#define LX_SHMAT 21
+#define LX_SHMDT 22
+#define LX_SHMGET 23
+#define LX_SHMCTL 24
+
+#define LX_MSG_STAT 11
+#define LX_MSG_INFO 12
+
+#define LX_MSG_NOERROR 010000
+
+/*
+ * Linux hard codes the maximum msgbuf length to be 8192 bytes. Really.
+ */
+#define LX_MSGMAX 8192
+
+struct lx_ipc_perm {
+ key_t key;
+ uid_t uid;
+ uid_t gid;
+ uid_t cuid;
+ uid_t cgid;
+ ushort_t mode;
+ ushort_t _pad1;
+ ushort_t seq;
+ ushort_t _pad2;
+ ulong_t _unused1;
+ ulong_t _unused2;
+};
+
+struct lx_msqid_ds {
+ struct lx_ipc_perm msg_perm;
+ time_t msg_stime;
+ ulong_t _unused1;
+ time_t msg_rtime;
+ ulong_t _unused2;
+ time_t msg_ctime;
+ ulong_t _unused3;
+ ulong_t msg_cbytes;
+ ulong_t msg_qnum;
+ ulong_t msg_qbytes;
+ pid_t msg_lspid;
+ pid_t msg_lrpid;
+ ulong_t _unused4;
+ ulong_t _unused5;
+};
+
+struct lx_msginfo {
+ int msgpool;
+ int msgmap;
+ int msgmax;
+ int msgmnb;
+ int msgmni;
+ int msgssz;
+ int msgtql;
+ ushort_t msgseg;
+};
+
+/*
+ * semaphore-related definitions.
+ */
+#define LX_GETPID 11
+#define LX_GETVAL 12
+#define LX_GETALL 13
+#define LX_GETNCNT 14
+#define LX_GETZCNT 15
+#define LX_SETVAL 16
+#define LX_SETALL 17
+#define LX_SEM_STAT 18
+#define LX_SEM_INFO 19
+#define LX_SEM_UNDO 0x1000
+#define LX_SEMVMX 32767
+
+struct lx_semid_ds {
+ struct lx_ipc_perm sem_perm;
+ time_t sem_otime;
+ ulong_t _unused1;
+ time_t sem_ctime;
+ ulong_t _unused2;
+ ulong_t sem_nsems;
+ ulong_t _unused3;
+ ulong_t _unused4;
+};
+
+struct lx_seminfo {
+ int semmap;
+ int semmni;
+ int semmns;
+ int semmnu;
+ int semmsl;
+ int semopm;
+ int semume;
+ int semusz;
+ int semvmx;
+ int semaem;
+};
+
+union lx_semun {
+ int val;
+ struct lx_semid_ds *semds;
+ ushort_t *sems;
+ struct lx_seminfo *info;
+ uintptr_t dummy;
+};
+
+/*
+ * shm-related definitions
+ */
+#define LX_SHM_LOCKED 02000
+#define LX_SHM_RDONLY 010000
+#define LX_SHM_RND 020000
+#define LX_SHM_REMAP 040000
+
+#define LX_SHM_LOCK 11
+#define LX_SHM_UNLOCK 12
+#define LX_SHM_STAT 13
+#define LX_SHM_INFO 14
+
+struct lx_shmid_ds {
+ struct lx_ipc_perm shm_perm;
+ size_t shm_segsz;
+ time_t shm_atime;
+ ulong_t _unused1;
+ time_t shm_dtime;
+ ulong_t _unused2;
+ time_t shm_ctime;
+ ulong_t _unused3;
+ pid_t shm_cpid;
+ pid_t shm_lpid;
+ ushort_t shm_nattch;
+ ulong_t _unused4;
+ ulong_t _unused5;
+};
+
+struct lx_shm_info {
+ int used_ids;
+ ulong_t shm_tot;
+ ulong_t shm_rss;
+ ulong_t shm_swp;
+ ulong_t swap_attempts;
+ ulong_t swap_successes;
+};
+
+struct lx_shminfo {
+ int shmmax;
+ int shmmin;
+ int shmmni;
+ int shmseg;
+ int shmall;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SYSV_IPC_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h
new file mode 100644
index 0000000000..9c1952bfe7
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h
@@ -0,0 +1,53 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_THREAD_H
+#define _SYS_LX_THREAD_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <thread.h>
+
+typedef struct lx_tsd {
+ uintptr_t lxtsd_gs;
+ int lxtsd_exit;
+ int lxtsd_exit_status;
+ ucontext_t lxtsd_exit_context;
+} lx_tsd_t;
+
+extern thread_key_t lx_tsd_key; /* thread-specific Linux %gs value */
+
+extern void lx_swap_gs(long, long *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_THREAD_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h
new file mode 100644
index 0000000000..a56fe8eeb3
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h
@@ -0,0 +1,143 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_THUNK_SERVER_H
+#define _LX_THUNK_SERVER_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <netdb.h>
+#include <procfs.h>
+
+/*
+ * Binary that should be exec'd to start up the thunking server
+ */
+#define LXT_SERVER_BINARY "/native/usr/lib/brand/lx/lx_thunk"
+
+/*
+ * When the thunking server is started it will need to communicate
+ * to the client via two fifos. These fifos will be passed to the
+ * thunking server via the following file descriptors:
+ */
+#define LXT_SERVER_FIFO_RD_FD 3
+#define LXT_SERVER_FIFO_WR_FD 4
+
+/*
+ * Operations supported by the thunking server
+ */
+#define LXT_SERVER_OP_MIN 0
+#define LXT_SERVER_OP_PING 0
+#define LXT_SERVER_OP_NAME2HOST 1
+#define LXT_SERVER_OP_ADDR2HOST 2
+#define LXT_SERVER_OP_NAME2SERV 3
+#define LXT_SERVER_OP_PORT2SERV 4
+#define LXT_SERVER_OP_OPENLOG 5
+#define LXT_SERVER_OP_SYSLOG 6
+#define LXT_SERVER_OP_CLOSELOG 7
+#define LXT_SERVER_OP_MAX 8
+
+/*
+ * Macros used to translate pointer into offsets for when they are
+ * being transmitted between the client and server processes.
+ *
+ * NOTE: We're going to add 1 to every offset value. The reason
+ * for this is that some of the pointers we're converting to offsets are
+ * stored in NULL terminated arrays, and if one of the members of
+ * one of these arrays happened to be at the beginning of the storage
+ * buffer it would have an offset of 0 and when the client tries to
+ * translate the offsets back into pointers it wouldn't be able
+ * to differentiate between the 0 offset from the end of the array.
+ */
+#define LXT_PTR_TO_OFFSET(ptr, base) \
+ ((void *)((uintptr_t)(ptr) - (uintptr_t)(base) + 1))
+#define LXT_OFFSET_TO_PTR(offset, base) \
+ ((void *)((uintptr_t)(offset) + (uintptr_t)(base) - 1))
+
+/*
+ * Structures passed to the thunking server via door calls
+ */
+typedef struct lxt_server_arg {
+ int lxt_sa_op;
+ int lxt_sa_success;
+ int lxt_sa_errno;
+ char lxt_sa_data[1];
+} lxt_server_arg_t;
+
+typedef struct lxt_gethost_arg {
+ struct hostent lxt_gh_result;
+
+ int lxt_gh_h_errno;
+
+ int lxt_gh_type;
+ int lxt_gh_token_len;
+ int lxt_gh_buf_len;
+
+ int lxt_gh_storage_len;
+ char lxt_gh_storage[1];
+} lxt_gethost_arg_t;
+
+typedef struct lxt_getserv_arg {
+ struct servent lxt_gs_result;
+
+ int lxt_gs_token_len;
+ int lxt_gs_buf_len;
+ char lxt_gs_proto[5];
+
+ int lxt_gs_storage_len;
+ char lxt_gs_storage[1];
+} lxt_getserv_arg_t;
+
+typedef struct lxt_openlog_arg {
+ int lxt_ol_logopt;
+ int lxt_ol_facility;
+ char lxt_ol_ident[128];
+} lxt_openlog_arg_t;
+
+typedef struct lxt_syslog_arg {
+ int lxt_sl_priority;
+ pid_t lxt_sl_pid;
+ char lxt_sl_progname[PRFNSZ];
+ char lxt_sl_message[1024];
+} lxt_syslog_arg_t;
+
+
+/*
+ * Functions called by the brand library to manage startup of the
+ * thunk server process.
+ */
+void lxt_server_init(int, char *[]);
+int lxt_server_pid(int *pid);
+void lxt_server_exec_check(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_THUNK_SERVER_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h
new file mode 100644
index 0000000000..6152634459
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h
@@ -0,0 +1,108 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_TYPES_H
+#define _SYS_LX_TYPES_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SHRT_MIN (-32768) /* min value of a "short int" */
+#define SHRT_MAX 32767 /* max value of a "short int" */
+#define USHRT_MAX 65535 /* max of "unsigned short int" */
+#define INT_MIN (-2147483647-1) /* min value of an "int" */
+#define INT_MAX 2147483647 /* max value of an "int" */
+#define UINT_MAX 4294967295U /* max value of an "unsigned int" */
+#define LONG_MIN (-2147483647L-1L)
+ /* min value of a "long int" */
+#define LONG_MAX 2147483647L /* max value of a "long int" */
+#define ULONG_MAX 4294967295UL /* max of "unsigned long int" */
+
+#define LX_SYS_UTS_LN 65
+
+struct lx_utsname {
+ char sysname[LX_SYS_UTS_LN];
+ char nodename[LX_SYS_UTS_LN];
+ char release[LX_SYS_UTS_LN];
+ char version[LX_SYS_UTS_LN];
+ char machine[LX_SYS_UTS_LN];
+ char domainname[LX_SYS_UTS_LN];
+};
+
+typedef uint64_t lx_dev_t;
+typedef uint16_t lx_dev16_t;
+typedef uint32_t lx_ino_t;
+typedef uint64_t lx_ino64_t;
+typedef uint32_t lx_uid_t;
+typedef uint16_t lx_uid16_t;
+typedef uint32_t lx_gid_t;
+typedef uint16_t lx_gid16_t;
+typedef uint32_t lx_off_t;
+typedef uint64_t lx_off64_t;
+typedef uint32_t lx_blksize_t;
+typedef uint32_t lx_blkcnt_t;
+typedef uint64_t lx_blkcnt64_t;
+typedef ulong_t lx_mode_t;
+typedef uint16_t lx_mode16_t;
+
+#define LX_UID16_TO_UID32(uid16) \
+ (((uid16) == (lx_uid16_t)-1) ? ((lx_uid_t)-1) : (lx_uid_t)(uid16))
+
+#define LX_GID16_TO_GID32(gid16) \
+ (((gid16) == (lx_gid16_t)-1) ? ((lx_gid_t)-1) : (lx_gid_t)(gid16))
+
+/* Overflow values default to NFS nobody. */
+
+#define UID16_OVERFLOW ((lx_uid16_t)65534)
+#define GID16_OVERFLOW ((lx_gid16_t)65534)
+
+/*
+ * All IDs with high word non-zero are converted to default overflow values to
+ * avoid inadvertent truncation to zero (root) (!).
+ */
+#define LX_UID32_TO_UID16(uid32) \
+ ((((uid32) & 0xffff0000) == 0) ? ((lx_uid16_t)(uid32)) : \
+ (((uid32) == ((lx_uid_t)-1)) ? ((lx_uid16_t)-1) : UID16_OVERFLOW))
+
+#define LX_GID32_TO_GID16(gid32) \
+ ((((gid32) & 0xffff0000) == 0) ? ((lx_gid16_t)(gid32)) : \
+ (((gid32) == ((lx_gid_t)-1)) ? ((lx_gid16_t)-1) : GID16_OVERFLOW))
+
+struct lx_timespec {
+ time_t ts_sec;
+ long ts_nsec;
+};
+
+#define LX_32TO64(lo, hi) \
+ ((uint64_t)((uint64_t)(lo) | ((uint64_t)(hi) << 32)))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_TYPES_H */
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile
new file mode 100644
index 0000000000..f69dcec561
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile
@@ -0,0 +1,52 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+LINT_SUBDIRS = $(MACH)
+$(BUILD64)LINT_SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+all install clean clobber: $(SUBDIRS)
+
+lint: $(LINT_SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com
new file mode 100644
index 0000000000..a0fd9da3fe
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com
@@ -0,0 +1,68 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY = lx_nametoaddr.a
+VERS = .1
+
+COBJS = lx_nametoaddr.o
+OBJECTS = $(COBJS)
+
+include ../../../../Makefile.lib
+include ../../Makefile.lx
+
+MAPFILES = ../common/mapfile-vers
+MAPOPTS = $(MAPFILES:%=-M%)
+
+CSRCS = $(COBJS:%o=../common/%c)
+SRCS = $(CSRCS)
+
+SRCDIR = ../common
+LX_THUNK = ../../lx_thunk
+
+ASFLAGS += -P -D_ASM
+LDLIBS += -lc -lnsl
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I../ -I$(LX_THUNK)
+DYNFLAGS += $(MAPOPTS) '-R$$ORIGIN'
+
+LIBS = $(DYNLIB)
+
+LINTFLAGS += $(LX_THUNK)/$(MACH)/llib-llx_thunk.ln
+LINTFLAGS64 += $(LX_THUNK)/$(MACH64)/llib-llx_thunk.ln
+
+CLEANFILES = $(DYNLIB)
+ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx
+ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64)
+
+.KEEP_STATE:
+
+all: $(DYNLIB)
+
+lint: lintcheck
+
+include ../../../../Makefile.targ
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile
new file mode 100644
index 0000000000..a526d34834
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile
@@ -0,0 +1,35 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+DYNFLAGS += $(LX_THUNK)/$(MACH64)/lx_thunk.so.1
+CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c b/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c
new file mode 100644
index 0000000000..4b6a0532a9
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c
@@ -0,0 +1,479 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * BrandZ lx name services translation library.
+ *
+ * This library is specified as the default name services translation
+ * library in a custom netconfig(4) file that is only used when running
+ * native solaris processes in a Linux branded zone.
+ *
+ * What this means it that when a native solaris process runs in a
+ * Linux branded zone and issues a name service request to libnsl.so
+ * (either directly or indirectly via any libraries the program may
+ * be linked against) libnsl.so will dlopen(3c) this library and call
+ * into it to service these requests.
+ *
+ * This library is in turn linked against lx_thunk.so and will attempt
+ * to call interfaces in lx_thunk.so to resolve these requests. The
+ * functions that are called in lx_thunk.so are designed to have the
+ * same signature and behavior as the existing solaris name service
+ * interfaces. The name services interfaces we call are:
+ *
+ * Native Interface -> lx_thunk.so Interface
+ * ---------------- -> ---------------------
+ * gethostbyname_r -> lxt_gethostbyname_r
+ * gethostbyaddr_r -> lxt_gethostbyaddr_r
+ * getservbyname_r -> lxt_getservbyname_r
+ * getservbyport_r -> lxt_getservbyport_r
+ *
+ * This library also uses one additional interface from lx_thunk.so:
+ * lxt_debug
+ * Information debugging messages are sent to lx_thunk.so via this
+ * interface and that library can decided if it wants to drop the
+ * messages or output them somewhere.
+ */
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <netdir.h>
+#include <nss_dbdefs.h>
+#include <rpc/clnt.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/varargs.h>
+#include <sys/wait.h>
+#include <thread.h>
+#include <tiuser.h>
+#include <unistd.h>
+#include <sys/lx_thunk.h>
+
+
+/*
+ * Private nametoaddr library interfaces.
+ */
+static int
+netconfig_is_ipv4(struct netconfig *config)
+{
+ int i;
+ /*
+ * If we look at the rpc services registered on a Linux system
+ * (this can be done via rpcinfo(1M)) for both on the loopback
+ * interface and on any remote interfaces we only see services
+ * registered for tcp and udp. So here we'll limit our support
+ * to these transports.
+ */
+ char *ipv4_netids[] = {
+ "tcp",
+ "udp",
+ NULL
+ };
+
+ for (i = 0; ipv4_netids[i] != NULL; i++) {
+ if (strcmp(ipv4_netids[i], config->nc_netid) == 0)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Public nametoaddr library interfaces.
+ *
+ * These are the functional entry points that libnsl will lookup (via
+ * the symbol names) when it loads this nametoaddr translation library.
+ */
+
+/*
+ * _netdir_getbyname() returns all of the addresses for
+ * a specified host and service.
+ */
+struct nd_addrlist *
+_netdir_getbyname(struct netconfig *netconfigp,
+ struct nd_hostserv *nd_hostservp)
+{
+ struct nd_addrlist *rp = NULL;
+ struct netbuf *nbp = NULL;
+ struct sockaddr_in *sap = NULL;
+ struct hostent n2h_result;
+ struct servent n2s_result;
+ char *n2h_buf = NULL, *n2s_buf = NULL;
+ int h_errno, i, host_self = 0, r_count;
+ int n2h_count = 0, n2s_count = 0;
+
+ lxt_debug("_netdir_getbyname: request recieved\n");
+
+ /* Make sure this is an ipv4 request. */
+ if (!netconfig_is_ipv4(netconfigp)) {
+ _nderror = ND_BADARG;
+ goto fail;
+ }
+
+ /* Allocate memory for the queries. */
+ if (((n2h_buf = malloc(NSS_BUFLEN_HOSTS)) == NULL) ||
+ ((n2s_buf = malloc(NSS_BUFLEN_SERVICES)) == NULL))
+ goto malloc_fail;
+
+ /* Check if the host name specified is HOST_SELF. */
+ if (strcmp(nd_hostservp->h_host, HOST_SELF) == 0)
+ host_self = 1;
+
+ /*
+ * If the hostname specified is HOST_SELF, the we're just
+ * just doing a service lookup so don't bother with trying
+ * to lookup the host name.
+ */
+ if (!host_self) {
+ /* Resolve the hostname. */
+ lxt_debug("_netdir_getbyname: "
+ "resolving host name: %s\n", nd_hostservp->h_host);
+ if (lxt_gethostbyname_r(nd_hostservp->h_host, &n2h_result,
+ n2h_buf, NSS_BUFLEN_HOSTS, &h_errno) == NULL) {
+ if (errno == ERANGE) {
+ _nderror = ND_SYSTEM;
+ } else if (h_errno == HOST_NOT_FOUND) {
+ _nderror = ND_NOHOST;
+ } else if (h_errno == TRY_AGAIN) {
+ _nderror = ND_TRY_AGAIN;
+ } else if (h_errno == NO_RECOVERY) {
+ _nderror = ND_NO_RECOVERY;
+ } else if (h_errno == NO_DATA) {
+ _nderror = ND_NO_DATA;
+ } else {
+ _nderror = ND_SYSTEM;
+ }
+ goto fail;
+ }
+ while (n2h_result.h_addr_list[n2h_count++] != NULL);
+ n2h_count--;
+ }
+
+ if (nd_hostservp->h_serv != NULL) {
+ /* Resolve the service name */
+ lxt_debug("_netdir_getbyname: "
+ "resolving service name: %s\n", nd_hostservp->h_serv);
+ if (lxt_getservbyname_r(nd_hostservp->h_serv,
+ netconfigp->nc_proto, &n2s_result,
+ n2s_buf, NSS_BUFLEN_SERVICES) == NULL) {
+ _nderror = ND_SYSTEM;
+ goto fail;
+ }
+ n2s_count = 1;
+ }
+
+ /* Make sure we got some results. */
+ if ((n2h_count + n2s_count) == 0) {
+ lxt_debug("_netdir_getbyname: no results!\n");
+ goto exit;
+ }
+ r_count = (n2h_count != 0) ? n2h_count : 1;
+
+ /*
+ * Allocate the return buffers. These buffers will be free'd
+ * by libnsl`netdir_free(), so we need to allocate them in the
+ * way that libnsl`netdir_free() expects.
+ */
+ if (((rp = calloc(1, sizeof (struct nd_addrlist))) == NULL) ||
+ ((nbp = calloc(1, sizeof (struct netbuf) * r_count)) == NULL) ||
+ ((sap = calloc(1, sizeof (struct sockaddr_in) * r_count)) == NULL))
+ goto malloc_fail;
+
+ /* Initialize the structures we're going to return. */
+ rp->n_cnt = r_count;
+ rp->n_addrs = nbp;
+ for (i = 0; i < r_count; i++) {
+
+ /* Initialize the netbuf. */
+ nbp[i].maxlen = nbp[i].len = sizeof (struct sockaddr_in);
+ nbp[i].buf = (char *)&sap[i];
+
+ /* Initialize the sockaddr_in. */
+ sap[i].sin_family = AF_INET;
+
+ /* If we looked up any host address copy them out. */
+ if (!host_self)
+ bcopy(n2h_result.h_addr_list[i], &sap[i].sin_addr,
+ sizeof (sap[i].sin_addr));
+
+ /* If we looked up any service ports copy them out. */
+ if (nd_hostservp->h_serv != NULL)
+ sap[i].sin_port = n2s_result.s_port;
+ }
+
+ /* We're finally done. */
+ lxt_debug("_netdir_getbyname: success\n");
+ return (rp);
+
+malloc_fail:
+ _nderror = ND_NOMEM;
+
+fail:
+ lxt_debug("_netdir_getbyname: failed!\n");
+
+exit:
+ if (n2h_buf == NULL)
+ free(n2h_buf);
+ if (n2s_buf == NULL)
+ free(n2s_buf);
+ if (rp == NULL)
+ free(rp);
+ if (nbp == NULL)
+ free(nbp);
+ if (sap == NULL)
+ free(sap);
+ return (NULL);
+}
+
+/*
+ * _netdir_getbyaddr() takes an address (hopefully obtained from
+ * someone doing a _netdir_getbyname()) and returns all hosts with
+ * that address.
+ */
+struct nd_hostservlist *
+/*ARGSUSED*/
+_netdir_getbyaddr(struct netconfig *netconfigp, struct netbuf *nbp)
+{
+ struct nd_hostservlist *rp = NULL;
+ struct nd_hostserv *hsp = NULL;
+ struct sockaddr_in *sap;
+ struct servent p2s_result;
+ struct hostent a2h_result;
+ char *a2h_buf = NULL, *p2s_buf = NULL;
+ int h_errno, i;
+ int r_count = 0;
+ int a2h_count = 0, p2s_count = 0;
+
+ lxt_debug("_netdir_getbyaddr: request recieved\n");
+
+ /* Make sure this is an ipv4 request. */
+ if (!netconfig_is_ipv4(netconfigp)) {
+ _nderror = ND_BADARG;
+ goto fail;
+ }
+
+ /*
+ * Make sure the netbuf contains one struct sockaddr_in of
+ * type AF_INET.
+ */
+ if ((nbp->len != sizeof (struct sockaddr_in)) ||
+ (nbp->len < nbp->maxlen)) {
+ _nderror = ND_BADARG;
+ goto fail;
+ }
+ /*LINTED*/
+ sap = (struct sockaddr_in *)nbp->buf;
+ if (sap->sin_family != AF_INET) {
+ _nderror = ND_BADARG;
+ goto fail;
+ }
+
+ /* Allocate memory for the queries. */
+ if (((a2h_buf = malloc(NSS_BUFLEN_HOSTS)) == NULL) ||
+ ((p2s_buf = malloc(NSS_BUFLEN_SERVICES)) == NULL))
+ goto malloc_fail;
+
+ if (sap->sin_addr.s_addr != INADDR_ANY) {
+ lxt_debug("_netdir_getbyaddr: "
+ "resolving host address: 0x%x\n", sap->sin_addr.s_addr);
+ if (lxt_gethostbyaddr_r((char *)&sap->sin_addr.s_addr,
+ sizeof (sap->sin_addr.s_addr), AF_INET,
+ &a2h_result, a2h_buf, NSS_BUFLEN_HOSTS,
+ &h_errno) == NULL) {
+ if (errno == ERANGE) {
+ _nderror = ND_SYSTEM;
+ } else if (h_errno == HOST_NOT_FOUND) {
+ _nderror = ND_NOHOST;
+ } else if (h_errno == TRY_AGAIN) {
+ _nderror = ND_TRY_AGAIN;
+ } else if (h_errno == NO_RECOVERY) {
+ _nderror = ND_NO_RECOVERY;
+ } else if (h_errno == NO_DATA) {
+ _nderror = ND_NO_DATA;
+ } else {
+ _nderror = ND_SYSTEM;
+ }
+ goto fail;
+ }
+ while (a2h_result.h_aliases[a2h_count++] != NULL);
+ /*
+ * We need to count a2h_result.h_name as a valid name for
+ * for the address we just looked up. Of course a2h_count
+ * is actually over estimated by one, so instead of
+ * decrementing it here we'll just leave it as it to
+ * account for a2h_result.h_name.
+ */
+ }
+
+ if (sap->sin_port != 0) {
+ lxt_debug("_netdir_getbyaddr: "
+ "resolving service port: 0x%x\n", sap->sin_port);
+ if (lxt_getservbyport_r(sap->sin_port,
+ netconfigp->nc_proto, &p2s_result,
+ p2s_buf, NSS_BUFLEN_SERVICES) == NULL) {
+ _nderror = ND_SYSTEM;
+ goto fail;
+ }
+ p2s_count = 1;
+ }
+
+ /* Make sure we got some results. */
+ if ((a2h_count + p2s_count) == 0) {
+ lxt_debug("_netdir_getbyaddr: no results!\n");
+ goto exit;
+ }
+ r_count = (a2h_count != 0) ? a2h_count : 1;
+
+ /*
+ * Allocate the return buffers. These buffers will be free'd
+ * by libnsl`netdir_free(), so we need to allocate them in the
+ * way that libnsl`netdir_free() expects.
+ */
+ if (((rp = calloc(1, sizeof (struct nd_hostservlist))) == NULL) ||
+ ((hsp = calloc(1, sizeof (struct nd_hostserv) * r_count)) == NULL))
+ goto malloc_fail;
+
+ lxt_debug("_netdir_getbyaddr: hahaha0 - %d\n", r_count);
+ rp->h_cnt = r_count;
+ rp->h_hostservs = hsp;
+ for (i = 0; i < r_count; i++) {
+ /* If we looked up any host names copy them out. */
+ lxt_debug("_netdir_getbyaddr: hahaha1 - %d\n", r_count);
+ if ((a2h_count > 0) && (i == 0) &&
+ ((hsp[i].h_host = strdup(a2h_result.h_name)) == NULL))
+ goto malloc_fail;
+
+ if ((a2h_count > 0) && (i > 0) &&
+ ((hsp[i].h_host =
+ strdup(a2h_result.h_aliases[i - 1])) == NULL))
+ goto malloc_fail;
+
+ lxt_debug("_netdir_getbyaddr: hahaha2 - %d\n", r_count);
+ /* If we looked up any service names copy them out. */
+ if ((p2s_count > 0) &&
+ ((hsp[i].h_serv = strdup(p2s_result.s_name)) == NULL))
+ goto malloc_fail;
+ lxt_debug("_netdir_getbyaddr: hahaha3 - %d\n", r_count);
+ }
+
+ /* We're finally done. */
+ lxt_debug("_netdir_getbyaddr: success\n");
+ return (rp);
+
+malloc_fail:
+ _nderror = ND_NOMEM;
+
+fail:
+ lxt_debug("_netdir_getbyaddr: failed!\n");
+
+exit:
+ if (a2h_buf == NULL)
+ free(a2h_buf);
+ if (p2s_buf == NULL)
+ free(p2s_buf);
+ if (rp == NULL)
+ free(rp);
+ if (hsp != NULL) {
+ for (i = 0; i < r_count; i++) {
+ if (hsp[i].h_host != NULL)
+ free(hsp[i].h_host);
+ if (hsp[i].h_serv != NULL)
+ free(hsp[i].h_serv);
+ }
+ free(hsp);
+ }
+ return (NULL);
+}
+
+char *
+/* ARGSUSED */
+_taddr2uaddr(struct netconfig *netconfigp, struct netbuf *nbp)
+{
+ extern char *inet_ntoa_r();
+
+ struct sockaddr_in *sa;
+ char tmp[RPC_INET6_MAXUADDRSIZE];
+ unsigned short myport;
+
+ if (netconfigp == NULL || nbp == NULL || nbp->buf == NULL) {
+ _nderror = ND_BADARG;
+ return (NULL);
+ }
+
+ if (strcmp(netconfigp->nc_protofmly, NC_INET) != 0) {
+ /* we only support inet address translation */
+ assert(0);
+ _nderror = ND_SYSTEM;
+ return (NULL);
+ }
+
+ /* LINTED pointer cast */
+ sa = (struct sockaddr_in *)(nbp->buf);
+ myport = ntohs(sa->sin_port);
+ (void) inet_ntoa_r(sa->sin_addr, tmp);
+
+ (void) sprintf(tmp + strlen(tmp), ".%d.%d",
+ myport >> 8, myport & 255);
+ return (strdup(tmp)); /* Doesn't return static data ! */
+}
+
+/*
+ * _uaddr2taddr() translates a universal address back into a
+ * netaddr structure. Since the universal address is a string,
+ * put that into the TLI buffer (making sure to change all \ddd
+ * characters back and strip off the trailing \0 character).
+ */
+struct netbuf *
+/* ARGSUSED */
+_uaddr2taddr(struct netconfig *netconfigp, char *uaddr)
+{
+ assert(0);
+ _nderror = ND_SYSTEM;
+ return (NULL);
+}
+
+/*
+ * _netdir_options() is a "catch-all" routine that does
+ * transport specific things. The only thing that these
+ * routines have to worry about is ND_MERGEADDR.
+ */
+int
+/* ARGSUSED */
+_netdir_options(struct netconfig *netconfigp, int option, int fd, void *par)
+{
+ assert(0);
+ _nderror = ND_SYSTEM;
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers b/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers
new file mode 100644
index 0000000000..3ed165195b
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers
@@ -0,0 +1,51 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+SUNWprivate_1.1 {
+ global:
+ _netdir_getbyname;
+ _netdir_getbyaddr;
+ _taddr2uaddr;
+ _uaddr2taddr;
+ _netdir_options;
+
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile
new file mode 100644
index 0000000000..67545e46cd
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile
@@ -0,0 +1,34 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+DYNFLAGS += $(LX_THUNK)/$(MACH)/lx_thunk.so.1
+CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB)
+
+install: all $(ROOTLIBS)
diff --git a/usr/src/lib/brand/lx/lx_support/Makefile b/usr/src/lib/brand/lx/lx_support/Makefile
new file mode 100644
index 0000000000..e7c958e13a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_support/Makefile
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG = lx_support
+PROGS = $(PROG)
+OBJS = lx_support
+
+all: $(PROG)
+
+include ../Makefile.lx
+include $(SRC)/cmd/Makefile.cmd
+
+# override the install directory
+ROOTBIN = $(ROOTBRANDDIR)
+CLOBBERFILES = $(OBJS) $(ROOTPROGS)
+
+UTSBASE = $(SRC)/uts
+
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I$(UTSBASE)/common/brand/lx
+LDLIBS += -lzonecfg
+
+.KEEP_STATE:
+
+install: all $(ROOTPROGS)
+
+clean:
+ $(RM) $(PROG) $(OBJS)
+
+lint: lint_PROG
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/lib/brand/lx/lx_support/lx_support.c b/usr/src/lib/brand/lx/lx_support/lx_support.c
new file mode 100644
index 0000000000..70de13dd71
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_support/lx_support.c
@@ -0,0 +1,578 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * lx_support is a small cli utility used to perform some brand-specific
+ * tasks when booting, halting, or verifying a zone. This utility is not
+ * intended to be called by users - it is intended to be invoked by the
+ * zones utilities.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <stropts.h>
+#include <sys/ioccom.h>
+#include <sys/stat.h>
+#include <sys/systeminfo.h>
+#include <sys/types.h>
+#include <sys/varargs.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <locale.h>
+
+#include <libzonecfg.h>
+#include <sys/lx_audio.h>
+#include <sys/lx_brand.h>
+
+static void lxs_err(char *msg, ...) __NORETURN;
+static void usage(void) __NORETURN;
+
+#define CP_CMD "/usr/bin/cp"
+#define MOUNT_CMD "/sbin/mount"
+
+#define LXA_AUDIO_DEV "/dev/brand/lx/audio_devctl"
+#define INTSTRLEN 32
+#define KVSTRLEN 10
+
+static char *bname = NULL;
+static char *zonename = NULL;
+static char *zoneroot = NULL;
+
+#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
+#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
+#endif
+
+static void
+lxs_err(char *msg, ...)
+{
+ char buf[1024];
+ va_list ap;
+
+ va_start(ap, msg);
+ /*LINTED*/
+ (void) vsnprintf(buf, sizeof (buf), msg, ap);
+ va_end(ap);
+
+ (void) printf("%s error: %s\n", bname, buf);
+
+ exit(1);
+ /*NOTREACHED*/
+}
+
+/*
+ * The Linux init(1M) command requires communication over the /dev/initctl
+ * FIFO. Since any attempt to create a file in /dev will fail, we must
+ * create it here.
+ */
+static void
+lxs_make_initctl()
+{
+ char cmdbuf[ARG_MAX];
+ char path[MAXPATHLEN];
+ char special[MAXPATHLEN];
+ struct stat buf;
+ int err;
+
+ if (snprintf(special, sizeof (special), "%s/dev/initctl", zoneroot) >=
+ sizeof (special))
+ lxs_err("%s: %s", gettext("Failed to create /dev/initctl"),
+ gettext("zoneroot is too long"));
+
+ if (snprintf(path, sizeof (path), "%s/root/dev/initctl", zoneroot) >=
+ sizeof (path))
+ lxs_err("%s: %s", gettext("Failed to create /dev/initctl"),
+ gettext("zoneroot is too long"));
+
+ /* create the actual fifo as <zoneroot>/dev/initctl */
+ if (stat(special, &buf) != 0) {
+ err = errno;
+ if (err != ENOENT)
+ lxs_err("%s: %s",
+ gettext("Failed to create /dev/initctl"),
+ strerror(err));
+ if (mkfifo(special, 0644) < 0) {
+ err = errno;
+ lxs_err("%s: %s",
+ gettext("Failed to create /dev/initctl"),
+ strerror(err));
+ }
+ } else {
+ if ((buf.st_mode & S_IFIFO) == 0)
+ lxs_err("%s: %s",
+ gettext("Failed to create /dev/initctl"),
+ gettext("It already exists, and is not a FIFO."));
+ }
+
+ /*
+ * now lofs mount the <zoneroot>/dev/initctl fifo onto
+ * <zoneroot>/root/dev/initctl
+ */
+ if (snprintf(cmdbuf, sizeof (cmdbuf), "%s -F lofs %s %s", MOUNT_CMD,
+ special, path) >= sizeof (cmdbuf))
+ lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"),
+ gettext("zoneroot is too long"));
+
+ if (system(cmdbuf) < 0) {
+ err = errno;
+ lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"),
+ strerror(err));
+ }
+}
+
+/*
+ * fsck gets really confused when run inside a zone. Removing this file
+ * prevents it from running
+ */
+static void
+lxs_remove_autofsck()
+{
+ char path[MAXPATHLEN];
+ int err;
+
+ if (snprintf(path, MAXPATHLEN, "%s/root/.autofsck", zoneroot) >=
+ MAXPATHLEN)
+ lxs_err("%s: %s", gettext("Failed to remove /.autofsck"),
+ gettext("zoneroot is too long"));
+
+ if (unlink(path) < 0) {
+ err = errno;
+ if (err != ENOENT)
+ lxs_err("%s: %s",
+ gettext("Failed to remove /.autofsck"),
+ strerror(err));
+ }
+}
+
+/*
+ * Extract any lx-supported attributes from the zone configuration file.
+ */
+static void
+lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio,
+ char **idev, char **odev, char **kvers)
+{
+ struct zone_attrtab attrtab;
+ int err;
+
+ /* initialize the attribute iterator */
+ if (zonecfg_setattrent(zdh) != Z_OK) {
+ zonecfg_fini_handle(zdh);
+ lxs_err(gettext("error accessing zone configuration"));
+ }
+
+ *idev = (char *)malloc(INTSTRLEN);
+ *odev = (char *)malloc(INTSTRLEN);
+ *kvers = (char *)malloc(KVSTRLEN);
+ if (*idev == NULL || *odev == NULL || *kvers == NULL)
+ lxs_err(gettext("out of memory"));
+
+ *audio = B_FALSE;
+ *restart = B_FALSE;
+ bzero(*idev, INTSTRLEN);
+ bzero(*odev, INTSTRLEN);
+ bzero(*kvers, KVSTRLEN);
+ while ((err = zonecfg_getattrent(zdh, &attrtab)) == Z_OK) {
+ if ((strcmp(attrtab.zone_attr_name, "init-restart") == 0) &&
+ (zonecfg_get_attr_boolean(&attrtab, restart) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ if ((strcmp(attrtab.zone_attr_name, "audio") == 0) &&
+ (zonecfg_get_attr_boolean(&attrtab, audio) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ if ((strcmp(attrtab.zone_attr_name, "audio-inputdev") == 0) &&
+ (zonecfg_get_attr_string(&attrtab, *idev,
+ INTSTRLEN) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ if ((strcmp(attrtab.zone_attr_name, "audio-outputdev") == 0) &&
+ (zonecfg_get_attr_string(&attrtab, *odev,
+ INTSTRLEN) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ if ((strcmp(attrtab.zone_attr_name, "kernel-version") == 0) &&
+ (zonecfg_get_attr_string(&attrtab, *kvers,
+ KVSTRLEN) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ }
+
+ if (strlen(*kvers) == 0) {
+ free(*kvers);
+ *kvers = NULL;
+ }
+
+ /* some kind of error while looking up attributes */
+ if (err != Z_NO_ENTRY)
+ lxs_err(gettext("error accessing zone configuration"));
+}
+
+static int
+lxs_iodev_ok(char *dev)
+{
+ int i, j;
+
+ if ((j = strlen(dev)) == 0)
+ return (1);
+ if (strcmp(dev, "default") == 0)
+ return (1);
+ if (strcmp(dev, "none") == 0)
+ return (1);
+ for (i = 0; i < j; i++) {
+ if (!isdigit(dev[i]))
+ return (0);
+ }
+ return (1);
+}
+
+/*
+ * The audio configuration settings are read from the zone configuration
+ * file. Audio configuration is specified via the following attributes
+ * (settable via zonecfg):
+ * attr name: audio
+ * attr type: boolean
+ *
+ * attr name: audio-inputdev
+ * attr type: string
+ * attr values: "none" | [0-9]+
+ *
+ * attr name: audio-outputdev
+ * attr type: string
+ * attr values: "none" | [0-9]+
+ *
+ * The user can enable linux brand audio device (ie /dev/dsp and /dev/mixer)
+ * for a zone by setting the "audio" attribute to true. (The absence of
+ * this attribute leads to an assumed value of false.)
+ *
+ * If the "audio" attribute is set to true and "audio-inputdev" and
+ * "audio-outputdev" are not set, then when a linux applications access
+ * audio devices these access will be mapped to the system default audio
+ * device, ie /dev/audio and/dev/audioctl.
+ *
+ * If "audio-inputdev" is set to none, then audio input will be disabled.
+ * If "audio-inputdev" is set to an integer, then when a Linux application
+ * attempts to access audio devices these access will be mapped to
+ * /dev/sound/<audio-inputdev attribute value>. The same behavior will
+ * apply to the "audio-outputdev" attribute for linux audio output
+ * device accesses.
+ *
+ * If "audio-inputdev" or "audio-outputdev" exist but the audio attribute
+ * is missing (or set to false) audio will not be enabled for the zone.
+ */
+static void
+lxs_init_audio(char *idev, char *odev)
+{
+ int err, fd;
+ lxa_zone_reg_t lxa_zr;
+
+ /* sanity check the input and output device properties */
+ if (!lxs_iodev_ok(idev))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "audio-inputdev");
+
+ if (!lxs_iodev_ok(odev))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "audio-outputdev");
+
+ /* initialize the zone name in the ioctl request */
+ bzero(&lxa_zr, sizeof (lxa_zr));
+ (void) strlcpy(lxa_zr.lxa_zr_zone_name, zonename,
+ sizeof (lxa_zr.lxa_zr_zone_name));
+
+ /* initialize the input device property in the ioctl request */
+ (void) strlcpy(lxa_zr.lxa_zr_inputdev, idev,
+ sizeof (lxa_zr.lxa_zr_inputdev));
+ if (lxa_zr.lxa_zr_inputdev[0] == '\0') {
+ /*
+ * if no input device was specified, set the input device
+ * to "default"
+ */
+ (void) strlcpy(lxa_zr.lxa_zr_inputdev, "default",
+ sizeof (lxa_zr.lxa_zr_inputdev));
+ }
+
+ /* initialize the output device property in the ioctl request */
+ (void) strlcpy(lxa_zr.lxa_zr_outputdev, odev,
+ sizeof (lxa_zr.lxa_zr_outputdev));
+ if (lxa_zr.lxa_zr_outputdev[0] == '\0') {
+ /*
+ * if no output device was specified, set the output device
+ * to "default"
+ */
+ (void) strlcpy(lxa_zr.lxa_zr_outputdev, "default",
+ sizeof (lxa_zr.lxa_zr_outputdev));
+ }
+
+ /* open the audio device control node */
+ if ((fd = open(LXA_AUDIO_DEV, O_RDWR)) < 0)
+ lxs_err(gettext("error accessing lx_audio device"));
+
+ /* enable audio for this zone */
+ err = ioctl(fd, LXA_IOC_ZONE_REG, &lxa_zr);
+ (void) close(fd);
+ if (err != 0)
+ lxs_err(gettext("error configuring lx_audio device"));
+}
+
+static int
+lxs_boot()
+{
+ zoneid_t zoneid;
+ zone_dochandle_t zdh;
+ boolean_t audio, restart;
+ char *idev, *odev, *kvers;
+ int kversnum;
+
+ lxs_make_initctl();
+ lxs_remove_autofsck();
+
+ if ((zdh = zonecfg_init_handle()) == NULL)
+ lxs_err(gettext("unable to initialize zone handle"));
+
+ if (zonecfg_get_handle((char *)zonename, zdh) != Z_OK) {
+ zonecfg_fini_handle(zdh);
+ lxs_err(gettext("unable to load zone configuration"));
+ }
+
+ /* Extract any relevant attributes from the config file. */
+ lxs_getattrs(zdh, &restart, &audio, &idev, &odev, &kvers);
+ zonecfg_fini_handle(zdh);
+
+ /* Configure the zone's audio support (if any). */
+ if (audio == B_TRUE)
+ lxs_init_audio(idev, odev);
+
+ /*
+ * Let the kernel know whether or not this zone's init process
+ * should be automatically restarted on its death.
+ */
+ if ((zoneid = getzoneidbyname(zonename)) < 0)
+ lxs_err(gettext("unable to get zoneid"));
+ if (zone_setattr(zoneid, LX_ATTR_RESTART_INIT, &restart,
+ sizeof (boolean_t)) == -1)
+ lxs_err(gettext("error setting zone's restart_init property"));
+
+ if ((kvers != NULL) && (strcmp(kvers, "2.6") == 0))
+ kversnum = LX_KERN_2_6;
+ else
+ kversnum = LX_KERN_2_4;
+
+ if (zone_setattr(zoneid, LX_KERN_VERSION_NUM, &kversnum,
+ sizeof (int)) < 0)
+ lxs_err(gettext("unable to set kernel version"));
+
+ return (0);
+}
+
+static int
+lxs_halt()
+{
+ lxa_zone_reg_t lxa_zr;
+ int fd, rv;
+
+ /*
+ * We don't bother to check if audio is configured for this zone
+ * before issuing a request to unconfigure it. There's no real
+ * reason to do this, it would require looking up the xml zone and
+ * brand configuration information (which could have been changed
+ * since the zone was booted), and it would involve more library
+ * calls there by increasing chances for failure.
+ */
+
+ /* initialize the zone name in the ioctl request */
+ bzero(&lxa_zr, sizeof (lxa_zr));
+ (void) strlcpy(lxa_zr.lxa_zr_zone_name, zonename,
+ sizeof (lxa_zr.lxa_zr_zone_name));
+
+ /* open the audio device control node */
+ if ((fd = open(LXA_AUDIO_DEV, O_RDWR)) < 0)
+ lxs_err(gettext("error accessing lx_audio device"));
+
+ /*
+ * disable audio for this zone
+ *
+ * we ignore ENOENT errors here because it's possible that
+ * audio is not configured for this zone. (either it was
+ * already unconfigured or someone could have added the
+ * audio resource to this zone after it was booted.)
+ */
+ rv = ioctl(fd, LXA_IOC_ZONE_UNREG, &lxa_zr);
+ (void) close(fd);
+ if ((rv == 0) || (errno == ENOENT))
+ return (0);
+ lxs_err(gettext("error unconfiguring lx_audio device: %s"),
+ strerror(errno));
+ /*NOTREACHED*/
+ return (0);
+}
+
+static int
+lxs_verify(char *xmlfile)
+{
+ zone_dochandle_t handle;
+ struct zone_dstab dstab;
+ struct zone_devtab devtab;
+ boolean_t audio, restart;
+ char *idev, *odev, *kvers;
+ zone_iptype_t iptype;
+ char hostidp[HW_HOSTID_LEN];
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ lxs_err(gettext("internal libzonecfg.so.1 error"), 0);
+
+ if (zonecfg_get_xml_handle(xmlfile, handle) != Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("zonecfg provided an invalid XML file"));
+ }
+
+ /*
+ * Check to see whether the zone has any ZFS datasets configured.
+ */
+ if (zonecfg_setdsent(handle) != Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("zonecfg provided an invalid XML file"));
+ }
+
+ if (zonecfg_getdsent(handle, &dstab) == Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("lx zones do not support ZFS datasets"));
+ }
+
+ /*
+ * Check to see whether the zone has any devices configured.
+ */
+ if (zonecfg_setdevent(handle) != Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("zonecfg provided an invalid XML file"));
+ }
+
+ if (zonecfg_getdevent(handle, &devtab) == Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("lx zones do not support added devices"));
+ }
+
+ /*
+ * Check to see whether the zone has ip-type configured as exclusive
+ */
+ if (zonecfg_get_iptype(handle, &iptype) != Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("zonecfg provided an invalid XML file"));
+ }
+
+ if (iptype == ZS_EXCLUSIVE) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("lx zones do not support an 'exclusive' "
+ "ip-type"));
+ }
+
+ /*
+ * Check to see whether the zone has hostid emulation enabled.
+ */
+ if (zonecfg_get_hostid(handle, hostidp, sizeof (hostidp)) == Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("lx zones do not support hostid emulation"));
+ }
+
+ /* Extract any relevant attributes from the config file. */
+ lxs_getattrs(handle, &restart, &audio, &idev, &odev, &kvers);
+ zonecfg_fini_handle(handle);
+
+ if (audio) {
+ /* sanity check the input and output device properties */
+ if (!lxs_iodev_ok(idev))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "audio-inputdev");
+
+ if (!lxs_iodev_ok(odev))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "audio-outputdev");
+ }
+ if (kvers) {
+ if ((strcmp(kvers, "2.4")) != 0 && (strcmp(kvers, "2.6") != 0))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "kernel-version");
+ }
+ return (0);
+}
+
+static void
+usage()
+{
+
+ (void) fprintf(stderr,
+ gettext("usage:\t%s boot <zoneroot> <zonename>\n"), bname);
+ (void) fprintf(stderr,
+ gettext(" \t%s halt <zoneroot> <zonename>\n"), bname);
+ (void) fprintf(stderr,
+ gettext(" \t%s verify <xml file>\n\n"), bname);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ (void) setlocale(LC_ALL, "");
+ (void) textdomain(TEXT_DOMAIN);
+
+ bname = basename(argv[0]);
+
+ if (argc < 3)
+ usage();
+
+ if (strcmp(argv[1], "boot") == 0) {
+ if (argc != 4)
+ lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"),
+ bname, argv[1]);
+ zoneroot = argv[2];
+ zonename = argv[3];
+ return (lxs_boot());
+ }
+
+ if (strcmp(argv[1], "halt") == 0) {
+ if (argc != 4)
+ lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"),
+ bname, argv[1]);
+ zoneroot = argv[2];
+ zonename = argv[3];
+ return (lxs_halt());
+ }
+
+ if (strcmp(argv[1], "verify") == 0) {
+ if (argc != 3)
+ lxs_err(gettext("usage: %s verify <xml file>"),
+ bname);
+ return (lxs_verify(argv[2]));
+ }
+
+ usage();
+ /*NOTREACHED*/
+}
diff --git a/usr/src/lib/brand/lx/lx_thunk/Makefile b/usr/src/lib/brand/lx/lx_thunk/Makefile
new file mode 100644
index 0000000000..f69dcec561
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/Makefile
@@ -0,0 +1,52 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+LINT_SUBDIRS = $(MACH)
+$(BUILD64)LINT_SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+all install clean clobber: $(SUBDIRS)
+
+lint: $(LINT_SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/lx_thunk/Makefile.com b/usr/src/lib/brand/lx/lx_thunk/Makefile.com
new file mode 100644
index 0000000000..75629a6d61
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/Makefile.com
@@ -0,0 +1,74 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY = lx_thunk.a
+VERS = .1
+
+COBJS = lx_thunk.o
+OBJECTS = $(COBJS)
+
+include ../../../../Makefile.lib
+include ../../Makefile.lx
+
+#
+# Since our name doesn't start with "lib", Makefile.lib incorrectly
+# calculates LIBNAME. Therefore, we set it here.
+#
+LIBNAME = lx_thunk
+
+MAPFILES = ../common/mapfile-vers
+MAPOPTS = $(MAPFILES:%=-M%)
+
+CSRCS = $(COBJS:%o=../common/%c)
+SRCS = $(CSRCS)
+
+SRCDIR = ../common
+UTSBASE = ../../../../../uts
+
+ASFLAGS += -P -D_ASM
+LDLIBS += -lc
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I../ -I ../../lx_brand \
+ -I$(UTSBASE)/common/brand/lx
+
+# lx_think.so.1 interposes on a number of libc.so.1 routines.
+DYNFLAGS += $(MAPOPTS) $(ZINTERPOSE)
+
+LIBS = $(DYNLIB)
+
+CLEANFILES = $(DYNLIB)
+ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx
+ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64)
+
+.KEEP_STATE:
+
+all: $(DYNLIB)
+
+lint: $(LINTLIB) lintcheck
+
+include ../../../../Makefile.targ
diff --git a/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile b/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile
new file mode 100644
index 0000000000..dbb283dff1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile
@@ -0,0 +1,34 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c b/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c
new file mode 100644
index 0000000000..130f8fdc86
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c
@@ -0,0 +1,1123 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * The BrandZ Linux thunking library.
+ *
+ * The interfaces defined in this file form the client side of a bridge
+ * to allow native Solaris process to access Linux services. Currently
+ * the Linux services that is made accessible by these interfaces here
+ * are:
+ * - Linux host <-> address naming services
+ * - Linux service <-> port naming services
+ * - Linux syslog
+ *
+ * Currently, to use this library it must be LD_PRELOADed into the
+ * application that needs to access Linux services. Once loaded
+ * Linux services are accessed by the client application in two
+ * different ways:
+ *
+ * - Direct library calls:
+ * lxt_gethostbyname_r
+ * lxt_gethostbyaddr_r
+ * lxt_getservbyname_r
+ * lxt_getservbyport_r
+ * lxt_debug
+ *
+ * These library functions are used by the BrandZ lx name services
+ * translation library (lx_nametoaddr.so) to handle libnsl.so name
+ * service requests.
+ *
+ * - Intercepted library calls:
+ * openlog(3c)
+ * syslog(3c)
+ * vsyslog(3c)
+ * closelog(3c)
+ *
+ * Via the LD_PRELOAD mechanism this library interposes itself on
+ * these interfaces and when the application calls these interfaces
+ * (either directly or indirectly via any libraries the program may
+ * be linked against) this library intercepts the request and passes
+ * it onto a Linux process to handle the request.
+ *
+ * Once this library receives a request that needs to be serviced by a
+ * Linux process, it packs up that request and attempts to send it
+ * to a doors server. The door server interfaces are defined in
+ * lx_thunk_server.h. If the doors server is not running or not
+ * responding, this library will attempt to spawn a new doors server
+ * by forking and executing the following shell script (which runs as
+ * a native /bin/sh Linux process):
+ * /native/usr/lib/brand/lx/lx_thunk
+ *
+ * Notes:
+ * - This library also intercepts the following system calls:
+ * close(2) - We intercept close(2) to prevent the caller from
+ * accidentally closing any of the file descriptors we
+ * need to do our work.
+ *
+ * setppriv(2) - We intercept setppriv(2) to prevent a process
+ * from dropping any of the privileges we'll need to create
+ * a new lx_thunk server process and to deal with service
+ * requests.
+ *
+ * - To facilitate the running of native Solaris programs and libraries
+ * when this library is preloaded into an application it will chroot()
+ * into /native. This way the Solaris application and libraries can
+ * access files via their expected paths and we can avoid having to
+ * either do path mapping or modifying all libraries to make them
+ * aware of "/native" so that they can pre-pend it to all their
+ * filesystem operations.
+ *
+ * - This library can only be used with processes that are initially
+ * run by root in a zone. The reason is that we use the chroot()
+ * system call and this requires the PRIV_PROC_CHROOT privilege,
+ * which non-root users don't have.
+ */
+
+#include <alloca.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include <door.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <netdir.h>
+#include <priv.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <synch.h>
+#include <sys/brand.h>
+#include <sys/fcntl.h>
+#include <sys/lx_thunk_server.h>
+#include <sys/lx_thunk.h>
+#include <sys/mman.h>
+#include <sys/priv_impl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <thread.h>
+#include <unistd.h>
+#include <sys/varargs.h>
+
+#define LXT_DOOR_DIR "/tmp"
+#define LXT_DOOR_PREFIX "lxt"
+#define LXT_MSG_MAXLEN (128 + MAXPATHLEN)
+
+#pragma init(init)
+
+typedef uintptr_t (*fp1_t)(uintptr_t);
+typedef uintptr_t (*fp3_t)(uintptr_t, uintptr_t, uintptr_t);
+
+static char *lxt_debug_path = NULL; /* debug output file path */
+static char lxt_debug_path_buf[MAXPATHLEN];
+static int root_fd;
+static int debug_fd = -1;
+
+void lxt_debug(const char *msg, ...);
+
+void
+init(void)
+{
+ if (getenv("LX_DEBUG") != NULL) {
+
+ /* check if there's a debug log file specified */
+ lxt_debug_path = getenv("LX_DEBUG_FILE");
+ if (lxt_debug_path == NULL) {
+ /* send all debugging output to /dev/tty */
+ lxt_debug_path = "/dev/tty";
+ }
+
+ (void) strlcpy(lxt_debug_path_buf, lxt_debug_path,
+ sizeof (lxt_debug_path_buf));
+ lxt_debug_path = lxt_debug_path_buf;
+
+ /*
+ * Open the debugging output file. We need to open it
+ * and hold it open because we're going to call chroot()
+ * in just a second, so we won't be able to open it later.
+ */
+ if ((debug_fd = open(lxt_debug_path,
+ O_WRONLY|O_APPEND|O_CREAT|O_NDELAY|O_NOCTTY,
+ 0666)) != -1) {
+ (void) fchmod(debug_fd, 0666);
+ }
+ }
+ lxt_debug("lxt_init: executing native process");
+
+ /* Get a fd that points to the root directory */
+ if ((root_fd = open("/", O_RDONLY)) < 0) {
+ lxt_debug("lxt_init(): "
+ "failed to open root directory: %s", strerror(errno));
+ exit(-1);
+ }
+
+ /*
+ * Now, so that we can avoid having to do path mapping,
+ * just chdir() and chroot() into /native.
+ */
+ if (chdir("/native") != 0) {
+ lxt_debug("lxt_init(): "
+ "failed to chdir to /native: %s", strerror(errno));
+ exit(-1);
+ }
+ if (chroot("/native") != 0) {
+ lxt_debug("lxt_init(): "
+ "failed to chroot to /native: %s", strerror(errno));
+ exit(-1);
+ }
+}
+
+/*
+ * Linux Thunking Interfaces - Client Side
+ */
+static mutex_t lxt_door_lock = DEFAULTMUTEX;
+static int lxt_door_fd = -1;
+
+static void
+lxt_server_exec(int fifo_wr, int fifo_rd)
+{
+ extern const char **environ;
+ char *nullist[] = { NULL };
+
+ lxt_debug("lxt_server_exec: server starting");
+
+ /*
+ * First we need to dup our fifos to the file descriptors
+ * the brand library is expecting them to be at.
+ */
+
+ /* Check if the write fifo needs to be moved aside */
+ if ((fifo_wr == LXT_SERVER_FIFO_RD_FD) &&
+ ((fifo_wr = dup(fifo_wr)) < 0))
+ return;
+
+ /* Check if the read fifo needs to be moved aside */
+ if ((fifo_rd == LXT_SERVER_FIFO_WR_FD) &&
+ ((fifo_rd = dup(fifo_rd)) < 0))
+ return;
+
+ if ((fifo_wr != LXT_SERVER_FIFO_WR_FD) &&
+ (dup2(fifo_wr, LXT_SERVER_FIFO_WR_FD) < 0))
+ return;
+ if ((fifo_rd != LXT_SERVER_FIFO_RD_FD) &&
+ (dup2(fifo_rd, LXT_SERVER_FIFO_RD_FD) < 0))
+ return;
+
+ /*
+ * We're about to execute a native Linux process.
+ * Since we've been loaded into a Solaris process with
+ * LD_PRELOAD and LD_LIBRARY_PATH we should clear these
+ * variables from the environment before calling exec.
+ */
+ (void) unsetenv("LD_PRELOAD");
+ (void) unsetenv("LD_LIBRARY_PATH");
+
+ /*
+ * Now we need to exec the thunk server process. This is a
+ * branded Linux process that will act as a doors server and
+ * service our requests to perform native Linux operations.
+ * Since we're currently running as a native Solaris process
+ * to start up the server we'll use the brand system call to
+ * the kernel that the target of the exec will be a branded
+ * process.
+ */
+ lxt_debug("lxt_server_exec: execing as Linux process");
+ (void) syscall(SYS_brand, B_EXEC_BRAND,
+ LXT_SERVER_BINARY, nullist, environ);
+}
+
+
+static void *
+lxt_door_waitpid(void *arg)
+{
+ pid_t child_pid = (pid_t)(uintptr_t)arg;
+ int stat;
+
+ (void) waitpid(child_pid, &stat, 0);
+ return (NULL);
+}
+
+static char *
+lxt_door_mkfifo()
+{
+ char *path;
+
+ for (;;) {
+ path = tempnam(LXT_DOOR_DIR, LXT_DOOR_PREFIX);
+ if (path == NULL)
+ return (NULL);
+ if (mkfifo(path, S_IWUSR | S_IRUSR) != 0) {
+ if (errno != EEXIST) {
+ free(path);
+ return (NULL);
+ }
+ /* This file path exists, pick a new name. */
+ free(path);
+ continue;
+ }
+ /* We successfully created the fifo */
+ break;
+ }
+ return (path);
+}
+
+static void
+lxt_door_init()
+{
+ char *fifo1_path = NULL, *fifo2_path = NULL;
+ char fifo1_path_native[MAXPATHLEN];
+ int fifo1_rd = -1, fifo1_wr = -1;
+ int fifo2_rd = -1, fifo2_wr = -1;
+ int junk;
+ pid_t child_pid;
+ thread_t tid;
+
+ lxt_debug("lxt_door_init: preparint to start server");
+
+ /* Create two new fifos. */
+ if (((fifo1_path = lxt_door_mkfifo()) == NULL) ||
+ ((fifo2_path = lxt_door_mkfifo()) == NULL))
+ goto fail;
+
+ (void) snprintf(fifo1_path_native, sizeof (fifo1_path_native),
+ "/native%s", fifo1_path);
+
+ /*
+ * Open both fifos for reading and writing. We have to open
+ * the read side of the fifo first (because the write side will
+ * fail to open if there is no reader) and we have to use the
+ * O_NONBLOCK flag (because the read open with hang without it).
+ */
+ if (((fifo1_rd = open(fifo1_path, O_RDONLY | O_NONBLOCK)) < 0) ||
+ ((fifo1_wr = open(fifo1_path, O_WRONLY)) < 0) ||
+ ((fifo2_rd = open(fifo2_path, O_RDONLY | O_NONBLOCK)) < 0) ||
+ ((fifo2_wr = open(fifo2_path, O_WRONLY)) < 0))
+ goto fail;
+
+ /*
+ * Now we have to close the read side of fifo1 and fifo2 and re-open
+ * them without the O_NONBLOCK flag. This is because we're using
+ * the fifos for synchronization and when we actually try to read
+ * from them we want to block.
+ */
+ (void) close(fifo1_rd);
+ if ((fifo1_rd = open(fifo1_path, O_RDONLY)) < 0)
+ goto fail;
+ (void) close(fifo2_rd);
+ if ((fifo2_rd = open(fifo2_path, O_RDONLY)) < 0)
+ goto fail;
+
+ /*
+ * Once fifo2 is opened no one will ever need to open it again
+ * so delete it now.
+ */
+ (void) unlink(fifo2_path);
+ free(fifo2_path);
+ fifo2_path = NULL;
+
+ /* Attempt to fork and start the door server */
+ lxt_debug("lxt_door_init: starting server");
+ switch (child_pid = fork1()) {
+ case -1:
+ /* fork1() failed. */
+ goto fail;
+ case 0:
+ /* Child process - new door server. */
+ (void) close(fifo1_rd);
+ (void) close(fifo2_wr);
+
+ /* Need to chroot back to the real root directory */
+ if (fchroot(root_fd) != 0) {
+ lxt_debug("lxt_server_exec: "
+ "failed fchroot(\"/\"): %s", strerror(errno));
+ exit(-1);
+ }
+ (void) close(root_fd);
+
+ /* Start the server */
+ lxt_server_exec(fifo1_wr, fifo2_rd);
+ lxt_debug("lxt_server_exec: server init failed");
+ exit(-1);
+ /*NOTREACHED*/
+ }
+ /* Parent process - door client. */
+
+ /*
+ * fifo2 is used to send the door path to the child.
+ * (We can't simply pass it via the address space since the
+ * child will need to exec.) We'll write the name of the door
+ * file to fifo2 before we close the read end of the fifo2 so
+ * that if the child has exited for some reason we won't get
+ * a SIGPIPE. Note that we're reusing the name of fifo1 as
+ * the door path. Also note that we've pre-pended /native
+ * to the fifo/door path. The reason is that we're chroot'ed
+ * to /native, but when the thunking server executes it will
+ * be chroot'ed back to the real root directory.
+ */
+ (void) write(fifo2_wr,
+ fifo1_path_native, strlen(fifo1_path_native) + 1);
+ (void) close(fifo2_wr);
+ (void) close(fifo2_rd);
+
+ /*
+ * Start up a thread that will perfom a waitpid() on the child
+ * door server process. We do this because if the calling
+ * application that is using our interfaces is forking it's own
+ * children and using wait(), then it won't expect to see our
+ * children. We take advantage of the fact that if there are
+ * wait() and a waitpid() calls in progress at the same time
+ * when a child exists, preference will be given to any
+ * waitpid() calls that are explicity waiting for that child.
+ * There is of course a window of time where the child could
+ * exit after we've forked it but before we've called waitpid()
+ * where another wait() in this process could collect the result.
+ * There's nothing we can really do to prevent this short of
+ * stopping all the other threads in this process.
+ */
+ (void) thr_create(NULL, 0,
+ lxt_door_waitpid, (void *)(uintptr_t)child_pid, THR_DAEMON, &tid);
+
+ /*
+ * fifo1 is used for the child process to signal us that the
+ * door server is ready to take requests.
+ */
+ (void) close(fifo1_wr);
+ (void) read(fifo1_rd, &junk, 1);
+ (void) close(fifo1_rd);
+
+ /* If there was a door that was open, close it now. */
+
+ if (lxt_door_fd >= 0)
+ (void) close(lxt_door_fd);
+ /*
+ * The server should be started up by now and fattach()ed the door
+ * server to the fifo/door path. so if we re-open that path now we
+ * should get a fd to the door server.
+ */
+ lxt_door_fd = open(fifo1_path, O_RDWR);
+
+ lxt_debug("lxt_door_init: new server door = %d", lxt_door_fd);
+
+ /* We don't need the fifo/door anymore so delete it. */
+ (void) unlink(fifo1_path);
+ free(fifo1_path);
+ return;
+
+fail:
+ if (fifo1_path != NULL)
+ (void) unlink(fifo1_path);
+ if (fifo2_path != NULL)
+ (void) unlink(fifo2_path);
+ if (fifo1_rd != -1)
+ (void) close(fifo1_rd);
+ if (fifo1_wr != -1)
+ (void) close(fifo1_wr);
+ if (fifo2_rd != -1)
+ (void) close(fifo2_rd);
+ if (fifo2_wr != -1)
+ (void) close(fifo2_wr);
+}
+
+static int
+lxt_door_call(door_arg_t *door_arg, int lock_held)
+{
+ int fd;
+
+ if (!lock_held)
+ (void) mutex_lock(&lxt_door_lock);
+
+ /* Get a copy of lxt_door_fd */
+ fd = lxt_door_fd;
+
+ if (!lock_held)
+ (void) mutex_unlock(&lxt_door_lock);
+
+ if (fd == -1) {
+ lxt_debug("lxt_door_call: no door available");
+ return (-1);
+ }
+
+ if (door_call(fd, door_arg) != 0) {
+ lxt_debug("lxt_door_call: call failed");
+ return (-1);
+ }
+ if (door_arg->rbuf == NULL) {
+ lxt_debug("lxt_door_call: call returned NULL");
+ return (-1);
+ }
+ return (0);
+}
+
+static int
+lxt_door_request(door_arg_t *door_arg)
+{
+ door_arg_t door_ping;
+ lxt_server_arg_t ping_request, *ping_result;
+ int rv, ping_success = 0;
+
+ /* First just try the door call. */
+ lxt_debug("lxt_door_request: calling server");
+ if (lxt_door_call(door_arg, 0) == 0)
+ return (0);
+
+ /* Prepare a door server ping request. */
+ bzero(&door_ping, sizeof (door_ping));
+ bzero(&ping_request, sizeof (ping_request));
+ door_ping.data_ptr = (char *)&ping_request;
+ door_ping.data_size = sizeof (ping_request);
+ ping_request.lxt_sa_op = LXT_SERVER_OP_PING;
+
+ (void) mutex_lock(&lxt_door_lock);
+
+ /* Ping the doors server. */
+ lxt_debug("lxt_door_request: pinging server");
+ if (lxt_door_call(&door_ping, 1) == 0) {
+ /*LINTED*/
+ ping_result = (lxt_server_arg_t *)door_ping.rbuf;
+ ping_success = ping_result->lxt_sa_success;
+ (void) munmap(door_ping.rbuf, door_ping.rsize);
+ }
+
+ if (!ping_success) {
+ /* The server is not responding so start up a new one. */
+ lxt_door_init();
+ }
+ (void) mutex_unlock(&lxt_door_lock);
+
+ /* Retry the original request */
+ lxt_debug("lxt_door_request: calling server, retry");
+ if ((rv = lxt_door_call(door_arg, 0)) == 0)
+ return (0);
+ return (rv);
+}
+
+static struct hostent *
+lxt_gethost(int op, const char *token, int token_len, int type,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop)
+{
+ door_arg_t door_arg;
+ lxt_gethost_arg_t *data;
+ lxt_server_arg_t *request;
+ int request_size, errno_tmp, i;
+
+ lxt_debug("lxt_gethost: request caught");
+
+ request_size = sizeof (*request) + sizeof (*data) +
+ token_len + buf_len - 1;
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_gethost: calloc() failed");
+ *h_errnop = TRY_AGAIN;
+ return (NULL);
+ }
+ /*LINTED*/
+ data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0];
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = op;
+ data->lxt_gh_type = type;
+ data->lxt_gh_token_len = token_len;
+ data->lxt_gh_buf_len = buf_len;
+ data->lxt_gh_storage_len = token_len + token_len;
+ bcopy(token, &data->lxt_gh_storage[0], token_len);
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_gethost: door_call() failed");
+ /* Don't know what caused the error so clear errno. */
+ errno = 0;
+ *h_errnop = ND_SYSTEM;
+ free(request);
+ return (NULL);
+ }
+
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_gethost: door_call() returned NULL");
+ /* Don't know what caused the error so clear errno. */
+ errno = 0;
+ *h_errnop = ND_SYSTEM;
+ return (NULL);
+ }
+
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+ /*LINTED*/
+ data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0];
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_gethost: remote function call failed");
+ errno_tmp = request->lxt_sa_errno;
+ *h_errnop = data->lxt_gh_h_errno;
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+ errno = errno_tmp;
+ return (NULL);
+ }
+
+ /* Copy out the results and output buffer. */
+ bcopy(&data->lxt_gh_result, result, sizeof (*result));
+ bcopy(&data->lxt_gh_storage[token_len], buf, buf_len);
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+
+ /* Now go through the results and convert all offsets to pointers */
+ result->h_name = LXT_OFFSET_TO_PTR(result->h_name, buf);
+ result->h_aliases = LXT_OFFSET_TO_PTR(result->h_aliases, buf);
+ result->h_addr_list = LXT_OFFSET_TO_PTR(result->h_addr_list, buf);
+ for (i = 0; result->h_aliases[i] != NULL; i++) {
+ result->h_aliases[i] =
+ LXT_OFFSET_TO_PTR(result->h_aliases[i], buf);
+ }
+ for (i = 0; result->h_addr_list[i] != NULL; i++) {
+ result->h_addr_list[i] =
+ LXT_OFFSET_TO_PTR(result->h_addr_list[i], buf);
+ }
+
+ return (result);
+}
+
+static struct servent *
+lxt_getserv(int op, const char *token, const int token_len, const char *proto,
+ struct servent *result, char *buf, int buf_len)
+{
+ door_arg_t door_arg;
+ lxt_getserv_arg_t *data;
+ lxt_server_arg_t *request;
+ int request_size, errno_tmp, i;
+
+ lxt_debug("lxt_getserv: request caught");
+
+ request_size = sizeof (*request) + sizeof (*data) +
+ token_len + buf_len - 1;
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_getserv: calloc() failed");
+ return (NULL);
+ }
+ /*LINTED*/
+ data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0];
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = op;
+ data->lxt_gs_token_len = token_len;
+ data->lxt_gs_buf_len = buf_len;
+ data->lxt_gs_storage_len = token_len + token_len;
+ bcopy(token, &data->lxt_gs_storage[0], token_len);
+
+ bzero(data->lxt_gs_proto, sizeof (data->lxt_gs_proto));
+ if (proto != NULL)
+ (void) strncpy(data->lxt_gs_proto, proto,
+ sizeof (data->lxt_gs_proto));
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ /* Call the doors server */
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_getserv: door_call() failed");
+ /* Don't know what caused the error so clear errno */
+ errno = 0;
+ free(request);
+ return (NULL);
+ }
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_getserv: door_call() returned NULL");
+ /* Don't know what caused the error so clear errno */
+ errno = 0;
+ return (NULL);
+ }
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+ /*LINTED*/
+ data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0];
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_getserv: remote function call failed");
+ errno_tmp = request->lxt_sa_errno;
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+ errno = errno_tmp;
+ return (NULL);
+ }
+
+ /* Copy out the results and output buffer. */
+ bcopy(&data->lxt_gs_result, result, sizeof (*result));
+ bcopy(&data->lxt_gs_storage[token_len], buf, buf_len);
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+
+ /*
+ * Now go through the results and convert all offsets to pointers.
+ * See the comments in lxt_server_getserv() for why we need
+ * to subtract 1 from each offset.
+ */
+ result->s_name = LXT_OFFSET_TO_PTR(result->s_name, buf);
+ result->s_proto = LXT_OFFSET_TO_PTR(result->s_proto, buf);
+ result->s_aliases = LXT_OFFSET_TO_PTR(result->s_aliases, buf);
+ for (i = 0; result->s_aliases[i] != NULL; i++) {
+ result->s_aliases[i] =
+ LXT_OFFSET_TO_PTR(result->s_aliases[i], buf);
+ }
+
+ return (result);
+}
+
+static void
+lxt_openlog(const char *ident, int logopt, int facility)
+{
+ door_arg_t door_arg;
+ lxt_openlog_arg_t *data;
+ lxt_server_arg_t *request;
+ int request_size;
+
+ request_size = sizeof (*request) + sizeof (*data);
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_openlog: calloc() failed");
+ return;
+ }
+ /*LINTED*/
+ data = (lxt_openlog_arg_t *)&request->lxt_sa_data[0];
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = LXT_SERVER_OP_OPENLOG;
+ data->lxt_ol_facility = facility;
+ data->lxt_ol_logopt = logopt;
+ (void) strlcpy(data->lxt_ol_ident, ident, sizeof (data->lxt_ol_ident));
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ /* Call the doors server */
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_openlog: door_call() failed");
+ free(request);
+ return;
+ }
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_openlog: door_call() returned NULL");
+ return;
+ }
+
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_openlog: remote function call failed");
+ }
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+}
+
+static void
+lxt_vsyslog(int priority, const char *message, va_list va)
+{
+ door_arg_t door_arg;
+ lxt_syslog_arg_t *data;
+ lxt_server_arg_t *request;
+ psinfo_t p;
+ char procfile[PRFNSZ], *buf = NULL, *estr;
+ int buf_len, buf_i, estr_len, request_size, procfd;
+ int i, key, err_count = 0, tok_count = 0;
+ int errno_backup = errno;
+
+ /*
+ * Here we're going to use vsnprintf() to expand the message
+ * string passed in before we hand it off to a Linux process.
+ * Before we can call vsnprintf() we'll need to do modify the
+ * string to deal with certain special tokens.
+ *
+ * syslog() supports a special '%m' format token that expands to
+ * the error message string associated with the current value
+ * of errno. Unfortunatly if we pass this token to vsnprintf()
+ * it will choke so we need to expand that token manually here.
+ *
+ * We also need to expand any "%%" characters into "%%%%".
+ * The reason is that we'll be calling vsnprintf() which will
+ * translate "%%%%" back to "%%", which is safe to pass to the
+ * Linux version if syslog. If we didn't do this then vsnprintf()
+ * would translate "%%" to "%" and then the Linux syslog would
+ * attempt to intrepret "%" and whatever character follows it
+ * as a printf format style token.
+ */
+ for (key = i = 0; message[i] != '\0'; i++) {
+ if (!key && message[i] == '%') {
+ key = 1;
+ continue;
+ }
+ if (key && message[i] == '%')
+ tok_count++;
+ if (key && message[i] == 'm')
+ err_count++;
+ key = 0;
+ }
+
+ /* We found some tokens that we need to expand. */
+ if (err_count || tok_count) {
+ estr = strerror(errno_backup);
+ estr_len = strlen(estr);
+ assert(estr_len >= 2);
+
+ /* Allocate a buffer to hold the expanded string. */
+ buf_len = i + 1 +
+ (tok_count * 2) + (err_count * (estr_len - 2));
+ if ((buf = calloc(1, buf_len)) == NULL) {
+ lxt_debug("lxt_vsyslog: calloc() failed");
+ return;
+ }
+
+ /* Finally, expand %% and %m. */
+ for (key = buf_i = i = 0; message[i] != '\0'; i++) {
+ assert(buf_i < buf_len);
+ if (!key && message[i] == '%') {
+ buf[buf_i++] = '%';
+ key = 1;
+ continue;
+ }
+ if (key && message[i] == 'm') {
+ (void) bcopy(estr, &buf[buf_i - 1], estr_len);
+ buf_i += estr_len - 1;
+ } else if (key && message[i] == '%') {
+ (void) bcopy("%%%%", &buf[buf_i - 1], 4);
+ buf_i += 4 - 1;
+ } else {
+ buf[buf_i++] = message[i];
+ }
+ key = 0;
+ }
+ assert(buf[buf_i] == '\0');
+ assert(buf_i == (buf_len - 1));
+
+ /* Use the expanded buffer as our format string. */
+ message = buf;
+ }
+
+ /* Allocate the request we're going to send to the server */
+ request_size = sizeof (*request) + sizeof (*data);
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_vsyslog: calloc() failed");
+ return;
+ }
+
+ /*LINTED*/
+ data = (lxt_syslog_arg_t *)&request->lxt_sa_data[0];
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = LXT_SERVER_OP_SYSLOG;
+ data->lxt_sl_priority = priority;
+ data->lxt_sl_pid = getpid();
+ (void) vsnprintf(data->lxt_sl_message, sizeof (data->lxt_sl_message),
+ message, va);
+
+ /* If we did token expansion then free the intermediate buffer. */
+ if (err_count || tok_count)
+ free(buf);
+
+ /* Add the current program name into the request */
+ (void) sprintf(procfile, "/proc/%u/psinfo", (int)getpid());
+ /* (void) sprintf(procfile, "/native/proc/%u/psinfo", (int)getpid()); */
+ if ((procfd = open(procfile, O_RDONLY)) >= 0) {
+ if (read(procfd, &p, sizeof (psinfo_t)) >= 0) {
+ (void) strncpy(data->lxt_sl_progname, p.pr_fname,
+ sizeof (data->lxt_sl_progname));
+ }
+ (void) close(procfd);
+ }
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ /* Call the doors server */
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_vsyslog: door_call() failed");
+ free(request);
+ return;
+ }
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_vsyslog: door_call() returned NULL");
+ return;
+ }
+
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_vsyslog: remote function call failed");
+ }
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+}
+
+static void
+lxt_closelog(void)
+{
+ door_arg_t door_arg;
+ lxt_server_arg_t *request;
+ int request_size;
+
+ request_size = sizeof (*request);
+ if ((request = calloc(1, request_size)) == NULL) {
+ lxt_debug("lxt_closelog: calloc() failed");
+ return;
+ }
+
+ /* Initialize the server request. */
+ request->lxt_sa_op = LXT_SERVER_OP_CLOSELOG;
+
+ /* Initialize door_call() arguments. */
+ bzero(&door_arg, sizeof (door_arg));
+ door_arg.data_ptr = (char *)request;
+ door_arg.data_size = request_size;
+
+ /* Call the doors server */
+ if (lxt_door_request(&door_arg) != 0) {
+ lxt_debug("lxt_closelog: door_call() failed");
+ free(request);
+ return;
+ }
+ free(request);
+
+ if (door_arg.rbuf == NULL) {
+ lxt_debug("lxt_closelog: door_call() returned NULL");
+ return;
+ }
+
+ /*LINTED*/
+ request = (lxt_server_arg_t *)door_arg.rbuf;
+
+ /* Check if the remote procedure call failed */
+ if (!request->lxt_sa_success) {
+ lxt_debug("lxt_closelog: remote function call failed");
+ }
+ (void) munmap(door_arg.rbuf, door_arg.rsize);
+}
+
+static void
+lxt_pset_keep(priv_op_t op, priv_ptype_t type, priv_set_t *pset,
+ const char *priv)
+{
+ if (priv_ismember(pset, priv) == B_TRUE) {
+ if (op == PRIV_OFF) {
+ (void) priv_delset(pset, priv);
+ lxt_debug("lxt_pset_keep: "
+ "preventing drop of \"%s\" from \"%s\" set",
+ priv, type);
+ }
+ } else {
+ if (op == PRIV_SET) {
+ (void) priv_addset(pset, priv);
+ lxt_debug("lxt_pset_keep: "
+ "preventing drop of \"%s\" from \"%s\" set",
+ priv, type);
+ }
+ }
+}
+
+/*
+ * Public interfaces - used by lx_nametoaddr
+ */
+void
+lxt_vdebug(const char *msg, va_list va)
+{
+ char buf[LXT_MSG_MAXLEN + 1];
+ int rv, n;
+
+ if (debug_fd == -1)
+ return;
+
+ /* Prefix the message with pid/tid. */
+ if ((n = snprintf(buf, sizeof (buf), "%u/%u: ",
+ getpid(), thr_self())) == -1)
+ return;
+
+ /* Format the message. */
+ if (vsnprintf(&buf[n], sizeof (buf) - n, msg, va) == -1)
+ return;
+
+ /* Add a carrige return if there isn't one already. */
+ if ((buf[strlen(buf) - 1] != '\n') &&
+ (strlcat(buf, "\n", sizeof (buf)) >= sizeof (buf)))
+ return;
+
+ /* We retry in case of EINTR */
+ do {
+ rv = write(debug_fd, buf, strlen(buf));
+ } while ((rv == -1) && (errno == EINTR));
+}
+
+void
+lxt_debug(const char *msg, ...)
+{
+ va_list va;
+ int errno_backup;
+
+ if (debug_fd == -1)
+ return;
+
+ errno_backup = errno;
+ va_start(va, msg);
+ lxt_vdebug(msg, va);
+ va_end(va);
+ errno = errno_backup;
+}
+
+struct hostent *
+lxt_gethostbyaddr_r(const char *addr, int addr_len, int type,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop)
+{
+ lxt_debug("lxt_gethostbyaddr_r: request recieved");
+ return (lxt_gethost(LXT_SERVER_OP_ADDR2HOST,
+ addr, addr_len, type, result, buf, buf_len, h_errnop));
+}
+
+struct hostent *
+lxt_gethostbyname_r(const char *name,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop)
+{
+ lxt_debug("lxt_gethostbyname_r: request recieved");
+ return (lxt_gethost(LXT_SERVER_OP_NAME2HOST,
+ name, strlen(name) + 1, 0, result, buf, buf_len, h_errnop));
+}
+
+struct servent *
+lxt_getservbyport_r(int port, const char *proto,
+ struct servent *result, char *buf, int buf_len)
+{
+ lxt_debug("lxt_getservbyport_r: request recieved");
+ return (lxt_getserv(LXT_SERVER_OP_PORT2SERV,
+ (const char *)&port, sizeof (int), proto, result, buf, buf_len));
+}
+
+struct servent *
+lxt_getservbyname_r(const char *name, const char *proto,
+ struct servent *result, char *buf, int buf_len)
+{
+ lxt_debug("lxt_getservbyname_r: request recieved");
+ return (lxt_getserv(LXT_SERVER_OP_NAME2SERV,
+ name, strlen(name) + 1, proto, result, buf, buf_len));
+}
+
+/*
+ * "Public" interfaces - used to override public existing interfaces
+ */
+#pragma weak _close = close
+int
+close(int fd)
+{
+ static fp1_t fp = NULL;
+
+ /*
+ * Don't let the process close our file descriptor that points
+ * back to the root directory.
+ */
+ if (fd == root_fd)
+ return (0);
+ if (fd == debug_fd)
+ return (0);
+
+ if (fp == NULL)
+ fp = (fp1_t)dlsym(RTLD_NEXT, "close");
+ return (fp((uintptr_t)fd));
+}
+
+int
+_setppriv(priv_op_t op, priv_ptype_t type, const priv_set_t *pset)
+{
+ static fp3_t fp = NULL;
+ priv_set_t *pset_new;
+ int rv;
+
+ lxt_debug("_setppriv: request caught");
+
+ if (fp == NULL)
+ fp = (fp3_t)dlsym(RTLD_NEXT, "_setppriv");
+
+ while ((pset_new = priv_allocset()) == NULL)
+ (void) sleep(1);
+
+ priv_copyset(pset, pset_new);
+ lxt_pset_keep(op, type, pset_new, PRIV_PROC_EXEC);
+ lxt_pset_keep(op, type, pset_new, PRIV_PROC_FORK);
+ lxt_pset_keep(op, type, pset_new, PRIV_PROC_CHROOT);
+ lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_READ);
+ lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_WRITE);
+ lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_SEARCH);
+
+ rv = fp(op, (uintptr_t)type, (uintptr_t)pset_new);
+ priv_freeset(pset_new);
+ return (rv);
+}
+
+void
+openlog(const char *ident, int logopt, int facility)
+{
+ lxt_debug("openlog: request caught");
+ lxt_openlog(ident, logopt, facility);
+}
+
+void
+syslog(int priority, const char *message, ...)
+{
+ va_list va;
+
+ lxt_debug("syslog: request caught");
+ va_start(va, message);
+ lxt_vsyslog(priority, message, va);
+ va_end(va);
+}
+
+void
+vsyslog(int priority, const char *message, va_list va)
+{
+ lxt_debug("vsyslog: request caught");
+ lxt_vsyslog(priority, message, va);
+}
+
+void
+closelog(void)
+{
+ lxt_debug("closelog: request caught");
+ lxt_closelog();
+}
diff --git a/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers b/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers
new file mode 100644
index 0000000000..a898b55613
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers
@@ -0,0 +1,58 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+SUNWprivate_1.1 {
+ global:
+ lxt_vdebug;
+ lxt_debug;
+ lxt_gethostbyaddr_r;
+ lxt_gethostbyname_r;
+ lxt_getservbyport_r;
+ lxt_getservbyname_r;
+ _close;
+ _setppriv;
+ openlog;
+ syslog;
+ vsyslog;
+ closelog;
+
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_thunk/i386/Makefile b/usr/src/lib/brand/lx/lx_thunk/i386/Makefile
new file mode 100644
index 0000000000..c4b6c71027
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/i386/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB)
+
+install: all $(ROOTLIBS)
diff --git a/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h b/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h
new file mode 100644
index 0000000000..b19c91873a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_THUNK_H
+#define _LX_THUNK_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct hostent *lxt_gethostbyaddr_r(const char *addr, int addr_len, int type,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop);
+struct hostent *lxt_gethostbyname_r(const char *name,
+ struct hostent *result, char *buf, int buf_len, int *h_errnop);
+struct servent *lxt_getservbyport_r(int port, const char *proto,
+ struct servent *result, char *buf, int buf_len);
+struct servent *lxt_getservbyname_r(const char *name, const char *proto,
+ struct servent *result, char *buf, int buf_len);
+
+void openlog(const char *ident, int logopt, int facility);
+void syslog(int priority, const char *message, ...);
+void closelog(void);
+
+void lxt_debug(const char *msg, ...);
+void lxt_vdebug(const char *msg, va_list va);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_THUNK_H */
diff --git a/usr/src/lib/brand/lx/netfiles/Makefile b/usr/src/lib/brand/lx/netfiles/Makefile
new file mode 100644
index 0000000000..47be18db0f
--- /dev/null
+++ b/usr/src/lib/brand/lx/netfiles/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+TXTS = etc_netconfig etc_default_nfs
+NFS_DFL = ../../../../cmd/fs.d/nfs/etc/nfs.dfl
+
+all: $(TXTS)
+
+include ../Makefile.lx
+
+lint:
+
+install: $(ROOTTXTS)
+
+clean:
+ -$(RM) etc_default_nfs
+
+clobber: clean
+ -$(RM) $(ROOTXMLDOCS) $(ROOTTXTS)
+
+etc_default_nfs: $(NFS_DFL)
+ $(RM) $@
+ $(CP) $(NFS_DFL) $@
diff --git a/usr/src/lib/brand/lx/netfiles/etc_netconfig b/usr/src/lib/brand/lx/netfiles/etc_netconfig
new file mode 100644
index 0000000000..56222abf56
--- /dev/null
+++ b/usr/src/lib/brand/lx/netfiles/etc_netconfig
@@ -0,0 +1,38 @@
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# The "Network Configuration" File.
+#
+# Each entry is of the form:
+#
+# <network_id> <semantics> <flags> <protofamily> <protoname> \
+# <device> <nametoaddr_libs>
+#
+# For running solaris daemons in a linux zone we use this non-default
+# /etc/netconfig. The reason is that all name resolution has to be
+# done linux name service interfaces. To do this we specify a custom
+# nametoaddr library that libnsl will invoke to do name service lookups.
+#
+udp tpi_clts v inet udp /dev/udp lx_nametoaddr.so.1
+tcp tpi_cots_ord v inet tcp /dev/tcp lx_nametoaddr.so.1
diff --git a/usr/src/lib/brand/lx/zone/Makefile b/usr/src/lib/brand/lx/zone/Makefile
new file mode 100644
index 0000000000..4ae4f128b5
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/Makefile
@@ -0,0 +1,67 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+PROGS = lx_install lx_distro_install lx_init_zone
+SUBDIRS = distros
+XMLDOCS = config.xml platform.xml
+TEMPLATES = SUNWlx.xml SUNWlx26.xml
+
+all: $(PROGS)
+
+include $(SRC)/cmd/Makefile.cmd
+include ../Makefile.lx
+
+all := TARGET= all
+install := TARGET= install
+clobber := TARGET= clobber
+
+POFILES= $(PROGS:%=%.po)
+POFILE= lx_zone.po
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ $(BUILDPO.pofiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+install: $(PROGS) $(ROOTXMLDOCS) $(ROOTTEMPLATES) $(ROOTPROGS) $(SUBDIRS)
+
+lint:
+
+clean:
+ -$(RM) $(PROGS)
+
+clobber: clean $(SUBDIRS)
+ -$(RM) $(ROOTXMLDOCS) $(ROOTPROGS) $(ROOTTEMPLATES)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/brand/lx/zone/SUNWlx.xml b/usr/src/lib/brand/lx/zone/SUNWlx.xml
new file mode 100644
index 0000000000..04c38873de
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/SUNWlx.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+
+<!--
+ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ ident "%Z%%M% %I% %E% SMI"
+
+ DO NOT EDIT THIS FILE. Use zonecfg(1M) instead.
+-->
+
+<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1">
+
+<zone name="default" zonepath="" autoboot="false" brand="lx">
+</zone>
diff --git a/usr/src/lib/brand/lx/zone/SUNWlx26.xml b/usr/src/lib/brand/lx/zone/SUNWlx26.xml
new file mode 100644
index 0000000000..9bd8af4d92
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/SUNWlx26.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+
+<!--
+ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ ident "%Z%%M% %I% %E% SMI"
+
+ DO NOT EDIT THIS FILE. Use zonecfg(1M) instead.
+-->
+
+<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1">
+
+<zone name="default" zonepath="" autoboot="false" brand="lx">
+ <attr name="kernel-version" type="string" value="2.6"/>
+</zone>
diff --git a/usr/src/lib/brand/lx/zone/config.xml b/usr/src/lib/brand/lx/zone/config.xml
new file mode 100644
index 0000000000..b28fbcd2c2
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/config.xml
@@ -0,0 +1,94 @@
+<?xml version="1.0"?>
+
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+
+ DO NOT EDIT THIS FILE.
+-->
+
+<!DOCTYPE brand PUBLIC "-//Sun Microsystems Inc//DTD Brands//EN"
+ "file:///usr/share/lib/xml/dtd/brand.dtd.1">
+
+<brand name="lx">
+ <modname>lx_brand</modname>
+
+ <initname>/sbin/init</initname>
+ <login_cmd>/bin/login -h zone:%Z %u</login_cmd>
+ <forcedlogin_cmd>/bin/login -h zone:%Z -f %u</forcedlogin_cmd>
+ <user_cmd>/usr/bin/getent passwd %u</user_cmd>
+
+ <install>/usr/lib/brand/lx/lx_install %z %R</install>
+ <installopts>d:hsvX</installopts>
+ <boot>/usr/lib/brand/lx/lx_support boot %R %z</boot>
+ <halt>/usr/lib/brand/lx/lx_support halt %R %z</halt>
+ <verify_cfg>/usr/lib/brand/lx/lx_support verify</verify_cfg>
+ <verify_adm></verify_adm>
+ <postclone></postclone>
+ <postinstall></postinstall>
+
+ <privilege set="default" name="contract_event" />
+ <privilege set="default" name="contract_identity" />
+ <privilege set="default" name="contract_observer" />
+ <privilege set="default" name="file_chown" />
+ <privilege set="default" name="file_chown_self" />
+ <privilege set="default" name="file_dac_execute" />
+ <privilege set="default" name="file_dac_read" />
+ <privilege set="default" name="file_dac_search" />
+ <privilege set="default" name="file_dac_write" />
+ <privilege set="default" name="file_owner" />
+ <privilege set="default" name="file_setid" />
+ <privilege set="default" name="ipc_dac_read" />
+ <privilege set="default" name="ipc_dac_write" />
+ <privilege set="default" name="ipc_owner" />
+ <privilege set="default" name="net_bindmlp" />
+ <privilege set="default" name="net_icmpaccess" />
+ <privilege set="default" name="net_mac_aware" />
+ <privilege set="default" name="net_privaddr" />
+ <privilege set="default" name="proc_chroot" />
+ <privilege set="default" name="sys_audit" />
+ <privilege set="default" name="proc_audit" />
+ <privilege set="default" name="proc_lock_memory" />
+ <privilege set="default" name="proc_owner" />
+ <privilege set="default" name="proc_setid" />
+ <privilege set="default" name="proc_taskid" />
+ <privilege set="default" name="sys_acct" />
+ <privilege set="default" name="sys_admin" />
+ <privilege set="default" name="sys_mount" />
+ <privilege set="default" name="sys_nfs" />
+ <privilege set="default" name="sys_resource" />
+
+ <privilege set="prohibited" name="dtrace_kernel" />
+ <privilege set="prohibited" name="proc_zone" />
+ <privilege set="prohibited" name="sys_config" />
+ <privilege set="prohibited" name="sys_devices" />
+ <privilege set="prohibited" name="sys_ip_config" />
+ <privilege set="prohibited" name="sys_linkdir" />
+ <privilege set="prohibited" name="sys_net_config" />
+ <privilege set="prohibited" name="sys_res_config" />
+ <privilege set="prohibited" name="sys_suser_compat" />
+ <privilege set="prohibited" name="xvm_control" />
+ <privilege set="prohibited" name="virt_manage" />
+
+ <privilege set="required" name="proc_exec" />
+ <privilege set="required" name="proc_fork" />
+ <privilege set="required" name="sys_mount" />
+</brand>
diff --git a/usr/src/lib/brand/lx/zone/distros/Makefile b/usr/src/lib/brand/lx/zone/distros/Makefile
new file mode 100644
index 0000000000..7b5a600c94
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/Makefile
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../../Makefile.lx
+
+DISTROS = centos35.distro centos36.distro centos37.distro \
+ centos38.distro rhel35.distro rhel36.distro rhel37.distro \
+ rhel38.distro rhel_centos_common
+
+ROOTDISTRODIR= $(ROOTBRANDDIR)/distros
+ROOTDISTROS= $(DISTROS:%=$(ROOTDISTRODIR)/%)
+
+$(ROOTDISTROS) := FILEMODE = 444
+
+$(ROOTDISTRODIR):
+ $(INS.dir)
+
+$(ROOTDISTRODIR)/%: %
+ $(INS.file)
+
+install: $(ROOTDISTROS)
+
+lint clean all:
+
+clobber:
+ -$(RM) $(ROOTDISTROS)
+
diff --git a/usr/src/lib/brand/lx/zone/distros/centos35.distro b/usr/src/lib/brand/lx/zone/distros/centos35.distro
new file mode 100644
index 0000000000..cb5c2add9f
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/centos35.distro
@@ -0,0 +1,66 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the CentOS 3.5 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1118161135.08
+distro_version="3.5"
+set -A distro_cdorder 1 2 3
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the CentOS 3.5 deltas from the common cluster lists
+delta_miniroot_rpms=centos-release
+delta_core_rpms="centos-yumconf centos-yumcache yum"
+delta_server_rpms=$delta_core_rpms
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss \
+ openoffice.org-style-gnome"
+delta_developer_rpms=$delta_desktop_rpms
+delta_all_rpms=$delta_developer_rpms
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
diff --git a/usr/src/lib/brand/lx/zone/distros/centos36.distro b/usr/src/lib/brand/lx/zone/distros/centos36.distro
new file mode 100644
index 0000000000..8dbc4307ac
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/centos36.distro
@@ -0,0 +1,66 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the CentOS 3.6 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1130453594.8
+distro_version="3.6"
+set -A distro_cdorder 1 2 3
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the CentOS 3.6 deltas from the common cluster lists
+delta_miniroot_rpms=centos-release
+delta_core_rpms="centos-yumconf centos-yumcache yum"
+delta_server_rpms=$delta_core_rpms
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss \
+ openoffice.org-style-gnome"
+delta_developer_rpms="$delta_desktop_rpms gd-progs"
+delta_all_rpms="$delta_developer_rpms emacs-nox"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
diff --git a/usr/src/lib/brand/lx/zone/distros/centos37.distro b/usr/src/lib/brand/lx/zone/distros/centos37.distro
new file mode 100644
index 0000000000..f8ac5e0fb1
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/centos37.distro
@@ -0,0 +1,65 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the CentOS 3.7 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1144177644.47
+distro_version="3.7"
+set -A distro_cdorder 1 2 3
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the CentOS 3.7 deltas from the common cluster lists
+delta_miniroot_rpms=centos-release
+delta_core_rpms="centos-yumconf centos-yumcache yum"
+delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config"
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss"
+delta_developer_rpms="$delta_desktop_rpms gd-progs ruby-docs irb ruby-tcltk"
+delta_all_rpms="$delta_developer_rpms emacs-nox"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
diff --git a/usr/src/lib/brand/lx/zone/distros/centos38.distro b/usr/src/lib/brand/lx/zone/distros/centos38.distro
new file mode 100644
index 0000000000..22ae2e43b2
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/centos38.distro
@@ -0,0 +1,79 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the CentOS 3.8 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1155307611.42
+distro_version="3.8"
+set -A distro_cdorder 1 2 3
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the CentOS 3.8 deltas from the common cluster lists
+delta_miniroot_rpms=centos-release
+delta_core_rpms="centos-yumconf centos-yumcache yum"
+delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config"
+delta_desktop_rpms="$delta_server_rpms \
+ expectk \
+ seamonkey \
+ seamonkey-chat \
+ seamonkey-mail \
+ seamonkey-nspr \
+ seamonkey-nss \
+ tcl-html \
+ tcllib"
+delta_developer_rpms="$delta_desktop_rpms \
+ gd-progs \
+ freetype-demos \
+ freetype-utils \
+ glibc-debug \
+ irb \
+ python-docs \
+ ruby-docs \
+ ruby-tcltk \
+ seamonkey-dom-inspector \
+ seamonkey-js-debugger \
+ seamonkey-devel \
+ seamonkey-nspr-devel \
+ seamonkey-nss-devel"
+delta_all_rpms="$delta_developer_rpms emacs-nox"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel35.distro b/usr/src/lib/brand/lx/zone/distros/rhel35.distro
new file mode 100644
index 0000000000..0b6b23ae52
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel35.distro
@@ -0,0 +1,98 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the RHEL 3 Update 5 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1115874580.003298
+distro_version="Update 5"
+set -A distro_cdorder 2 3 4 1
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the RHEL 3.5 deltas from the common cluster lists
+delta_miniroot_rpms=redhat-release
+delta_core_rpms=""
+delta_server_rpms=$delta_core_rpms
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss \
+ openoffice.org-style-gnome"
+delta_developer_rpms=$delta_desktop_rpms
+delta_all_rpms="$delta_developer_rpms comps"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
+
+#
+# List of packages missing from the "WS" personality of this distribution
+# as compared to the "AS" personality.
+#
+distro_WS_missing="amanda-server \
+ caching-nameserver \
+ finger-server \
+ freeradius \
+ inews \
+ inn \
+ krb5-server \
+ netdump-server \
+ openldap-servers \
+ pxe \
+ quagga \
+ radvd \
+ redhat-config-bind \
+ samba-swat \
+ tftp-server \
+ tux \
+ vsftpd \
+ ypserv \
+ arptables_jf \
+ mtx \
+ redhat-config-netboot"
+
+#
+# No packages are missing from the "ES" personality as compared to the "AS"
+# personality.
+#
+unset distro_ES_missing
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel36.distro b/usr/src/lib/brand/lx/zone/distros/rhel36.distro
new file mode 100644
index 0000000000..51c80832ff
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel36.distro
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the RHEL 3 Update 6 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1127323691.616555
+distro_version="Update 6"
+set -A distro_cdorder 2 3 4 1
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the RHEL 3.6 deltas from the common cluster lists
+delta_miniroot_rpms=redhat-release
+delta_core_rpms=""
+delta_server_rpms=$delta_core_rpms
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss \
+ openoffice.org-style-gnome"
+delta_developer_rpms="$delta_desktop_rpms gd-progs"
+delta_all_rpms="$delta_developer_rpms emacs-nox comps"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
+
+#
+# List of packages missing from the "WS" personality of this distribution
+# as compared to the "AS" personality.
+#
+distro_WS_missing="amanda-server \
+ caching-nameserver \
+ finger-server \
+ freeradius \
+ inews \
+ inn \
+ netdump-server \
+ openldap-servers \
+ pxe \
+ quagga \
+ radvd \
+ redhat-config-bind \
+ samba-swat \
+ tftp-server \
+ tux \
+ vsftpd \
+ ypserv \
+ arptables_jf \
+ mtx \
+ redhat-config-netboot"
+
+#
+# No packages are missing from the "ES" personality as compared to the "AS"
+# personality.
+#
+unset distro_ES_missing
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel37.distro b/usr/src/lib/brand/lx/zone/distros/rhel37.distro
new file mode 100644
index 0000000000..2c3b81d82b
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel37.distro
@@ -0,0 +1,96 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the RHEL 3 Update 7 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1141679045.364586
+distro_version="Update 7"
+set -A distro_cdorder 2 3 4 1
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the RHEL 3.7 deltas from the common cluster lists
+delta_miniroot_rpms=redhat-release
+delta_core_rpms=""
+delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config"
+delta_desktop_rpms="$delta_server_rpms \
+ mozilla \
+ mozilla-chat \
+ mozilla-dom-inspector \
+ mozilla-js-debugger \
+ mozilla-mail \
+ mozilla-nspr \
+ mozilla-nss"
+delta_developer_rpms="$delta_desktop_rpms gd-progs ruby-docs irb ruby-tcltk"
+delta_all_rpms="$delta_developer_rpms emacs-nox comps"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
+
+#
+# List of packages missing from the "WS" personality of this distribution
+# as compared to the "AS" personality.
+#
+distro_WS_missing="amanda-server \
+ caching-nameserver \
+ finger-server \
+ freeradius \
+ inews \
+ inn \
+ netdump-server \
+ openldap-servers \
+ pxe \
+ quagga \
+ radvd \
+ redhat-config-bind \
+ samba-swat \
+ tftp-server \
+ tux \
+ vsftpd \
+ ypserv \
+ arptables_jf \
+ mtx \
+ redhat-config-netboot"
+
+#
+# No packages are missing from the "ES" personality as compared to the "AS"
+# personality.
+#
+unset distro_ES_missing
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel38.distro b/usr/src/lib/brand/lx/zone/distros/rhel38.distro
new file mode 100644
index 0000000000..5255ac206a
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel38.distro
@@ -0,0 +1,109 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Installation information for the RHEL 3 Update 8 distribution disc set:
+#
+# + Serial number (as found in the disc set's .discinfo file)
+# + Version Name
+# + Order CDs holding the distribution must be installed in
+# + MB of disk space required to hold a full install of the distribution
+#
+distro_serial=1152738297.776178
+distro_version="Update 8"
+set -A distro_cdorder 2 3 4 1
+
+distro_mb_required=500
+
+# Include the common_<cluster>_* definitions.
+. ${distro_dir}/rhel_centos_common
+
+# Define the RHEL 3.8 deltas from the common cluster lists
+delta_miniroot_rpms=redhat-release
+delta_core_rpms=""
+delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config"
+delta_desktop_rpms="$delta_server_rpms \
+ seamonkey \
+ seamonkey-chat \
+ seamonkey-mail \
+ seamonkey-nspr \
+ seamonkey-nss"
+delta_developer_rpms="$delta_desktop_rpms \
+ gd-progs \
+ irb \
+ ruby-docs \
+ ruby-tcltk \
+ seamonkey-dom-inspector \
+ seamonkey-js-debugger \
+ seamonkey-devel \
+ seamonkey-nspr-devel \
+ seamonkey-nss-devel"
+delta_all_rpms="$delta_developer_rpms emacs-nox comps"
+
+# Define the final cluster lists for the installer
+distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms"
+distro_core_rpms="$common_core_rpms $delta_core_rpms"
+distro_server_rpms="$common_server_rpms $delta_server_rpms"
+distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms"
+distro_developer_rpms="$common_developer_rpms $delta_developer_rpms"
+distro_all_rpms="$common_all_rpms $delta_all_rpms"
+
+#
+# List of packages missing from the "WS" personality of this distribution
+# as compared to the "AS" personality.
+#
+distro_WS_missing="amanda-server \
+ caching-nameserver \
+ finger-server \
+ freeradius \
+ inews \
+ inn \
+ netdump-server \
+ openldap-servers \
+ pxe \
+ quagga \
+ radvd \
+ redhat-config-bind \
+ samba-swat \
+ tftp-server \
+ tux \
+ vsftpd \
+ ypserv \
+ arptables_jf \
+ mtx \
+ redhat-config-netboot"
+
+#
+# No packages are missing from the "ES" personality as compared to the "AS"
+# personality.
+#
+unset distro_ES_missing
+
+#
+# Identify the packages that need to be set aside for installation after
+# all the other packages are installed.
+#
+deferred_rpms="openoffice.org openoffice.org-i18n openoffice.org-libs"
diff --git a/usr/src/lib/brand/lx/zone/distros/rhel_centos_common b/usr/src/lib/brand/lx/zone/distros/rhel_centos_common
new file mode 100644
index 0000000000..583264b723
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/distros/rhel_centos_common
@@ -0,0 +1,1016 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# This file contains the basic cluster contents shared by all of the
+# Linux distros we support. Each distro has its own .distro file that
+# expands on the basic cluster lists provided here.
+#
+
+#
+# Required packages for the install miniroot, these are the minimum packages a
+# system must have installed in order to run rpm (which is then used from
+# within the zone to perform the balance of the installation.)
+#
+common_miniroot_rpms="SysVinit \
+ basesystem \
+ bash \
+ beecrypt \
+ bzip2-libs \
+ coreutils \
+ elfutils \
+ elfutils-libelf \
+ filesystem \
+ glibc \
+ glibc-common \
+ gpm \
+ initscripts \
+ iptables \
+ iptables-ipv6 \
+ kernel-utils \
+ laus-libs \
+ libacl \
+ libattr \
+ libgcc \
+ libtermcap \
+ ncurses \
+ pam \
+ popt \
+ rpm \
+ rpm-libs \
+ setup \
+ termcap \
+ zlib"
+
+#
+# This starts a listing of RPMs comprising a variety of install package options
+# for a distribution.
+#
+# The supported package clusters are:
+#
+# + core
+# + server
+# + desktop
+# + developer
+# + system
+#
+# The RPMs needed to install each cluster are listed in the shell variable
+#
+# distro_<level>_rpms
+#
+# This file provides "common_<level>_rpms", which are lists of the packages
+# in each cluster that are common to all distros.
+#
+# The package names are listed alphabetically for readability. rpm will
+# reorder the list to ensure that each package's dependencies are installed
+# before it is.
+#
+# Note: Since the distro_install script uses a regular expression to expand
+# RPM package names to filenames, there may be some tweaking required to
+# guarantee a unique match between a package name and a corresponding RPM
+# file on the install media.
+#
+# One such example below is the package "XFree86-4." The official name of
+# the package is "XFree86," but the regular expression in the script
+# matches that package name to the XFree86-100dpi-fonts and
+# XFree86-75dpi-fonts package RPMs in addition to the proper XFree86 RPM.
+# Therefore the "XFree86" package name was modified to be "XFree86-4",
+# which does result in a unique package name to RPM file match.
+#
+common_core_rpms="GConf2 \
+ Glide3 \
+ ORBit \
+ ORBit2 \
+ XFree86-Mesa-libGL \
+ XFree86-Mesa-libGLU \
+ XFree86-libs \
+ XFree86-libs-data \
+ Xaw3d \
+ ash \
+ at \
+ atk \
+ audiofile \
+ autofs \
+ bc \
+ binutils \
+ bonobo-activation \
+ bzip2 \
+ chkconfig \
+ compat-pwdb \
+ cpio \
+ cpp \
+ cracklib \
+ cracklib-dicts \
+ crontabs \
+ cups-libs \
+ cyrus-sasl \
+ cyrus-sasl-md5 \
+ db4 \
+ desktop-file-utils \
+ dev \
+ diffutils \
+ diskdumputils \
+ e2fsprogs \
+ ed \
+ ethtool \
+ expat \
+ file \
+ findutils \
+ finger \
+ fontconfig \
+ freetype \
+ ftp \
+ gail \
+ gawk \
+ gdbm \
+ gdk-pixbuf \
+ gettext \
+ glib \
+ glib2 \
+ glibc-headers \
+ glibc-kernheaders \
+ gmp \
+ gnupg \
+ grep \
+ groff \
+ gtk+ \
+ gtk2 \
+ gzip \
+ hesiod \
+ hwdata \
+ indexhtml \
+ info \
+ iproute \
+ iputils \
+ kernel \
+ kernel-BOOT \
+ krb5-libs \
+ krb5-workstation \
+ kudzu \
+ laus \
+ less \
+ libaio \
+ libart_lgpl \
+ libbonobo \
+ libcap \
+ libgcj \
+ libgcj-ssa \
+ libglade2 \
+ libgnomecanvas \
+ libjpeg \
+ libmng \
+ libogg \
+ libpng \
+ libpng10 \
+ libstdc++ \
+ libtiff \
+ libtool-libs \
+ libungif \
+ libusb \
+ libuser \
+ libvorbis \
+ libwnck \
+ libxml \
+ libxml2 \
+ libxml2-python \
+ libxslt \
+ linc \
+ lockdev \
+ logrotate \
+ losetup \
+ lsof \
+ lvm \
+ lynx \
+ m4 \
+ mailcap \
+ make \
+ man \
+ man-pages \
+ mingetty \
+ mkinitrd \
+ mkisofs \
+ mktemp \
+ modutils \
+ mount \
+ mtools \
+ nc \
+ net-snmp \
+ net-snmp-libs \
+ net-tools \
+ netdump \
+ newt \
+ nfs-utils \
+ nscd \
+ nss_db \
+ nss_ldap \
+ ntp \
+ ntsysv \
+ openldap \
+ openssh \
+ openssh-clients \
+ openssh-server \
+ openssl \
+ pango \
+ passwd \
+ patch \
+ pax \
+ pcre \
+ pdksh \
+ perl \
+ perl-CGI \
+ perl-DateManip \
+ perl-Filter \
+ perl-HTML-Parser \
+ perl-HTML-Tagset \
+ perl-Parse-Yapp \
+ perl-URI \
+ perl-XML-Dumper \
+ perl-XML-Encoding \
+ perl-XML-Grove \
+ perl-XML-Parser \
+ perl-XML-Twig \
+ perl-libwww-perl \
+ perl-libxml-enno \
+ perl-libxml-perl \
+ portmap \
+ procmail \
+ procps \
+ psacct \
+ psmisc \
+ pspell \
+ pygtk2 \
+ pygtk2-libglade \
+ python \
+ pyxf86config \
+ readline \
+ redhat-logos \
+ redhat-menus \
+ rhpl \
+ rpm-python \
+ rpmdb-redhat \
+ rsh \
+ rsync \
+ rusers \
+ rwho \
+ sed \
+ setarch \
+ sgml-common \
+ shadow-utils \
+ slang \
+ startup-notification \
+ sudo \
+ sysklogd \
+ syslinux \
+ tar \
+ tcl \
+ tcp_wrappers \
+ tcsh \
+ telnet \
+ time \
+ traceroute \
+ ttmkfdir \
+ tzdata \
+ units \
+ unix2dos \
+ unzip \
+ usermode \
+ utempter \
+ util-linux \
+ vim-common \
+ vim-minimal \
+ vixie-cron \
+ wget \
+ which \
+ words \
+ xinetd \
+ xml-common \
+ yp-tools \
+ ypbind \
+ zip"
+
+common_server_rpms="$common_core_rpms \
+ 4Suite \
+ MyODBC \
+ MySQL-python \
+ Omni \
+ Omni-foomatic \
+ PyXML \
+ VFlib2 \
+ XFree86-4 \
+ XFree86-base-fonts \
+ XFree86-font-utils \
+ XFree86-truetype-fonts \
+ XFree86-xauth \
+ XFree86-xdm \
+ XFree86-xfs \
+ acl \
+ alchemist \
+ amanda \
+ amanda-server \
+ arts \
+ aspell \
+ aspell-config \
+ at-spi \
+ authd \
+ bcel \
+ bind \
+ bind-chroot \
+ bind-libs \
+ bind-utils \
+ bitmap-fonts \
+ caching-nameserver \
+ chkfontpath \
+ commons-beanutils \
+ commons-collections \
+ commons-digester \
+ commons-logging \
+ commons-modeler \
+ compat-db \
+ compat-libstdc++ \
+ crypto-utils \
+ cup-v10k \
+ cups \
+ curl \
+ cyrus-sasl-gssapi \
+ cyrus-sasl-plain \
+ dhcp \
+ distcache \
+ distcache-devel \
+ esound \
+ expect \
+ fam \
+ finger-server \
+ foomatic \
+ freeradius \
+ gd \
+ ghostscript \
+ ghostscript-fonts \
+ gimp-print \
+ gnome-libs \
+ gnome-mime-data \
+ gnome-python2 \
+ gnome-python2-bonobo \
+ gnome-python2-canvas \
+ gnome-python2-gtkhtml2 \
+ gnome-vfs2 \
+ gnuplot \
+ gtkhtml2 \
+ htmlview \
+ httpd \
+ hwcrypto \
+ imap \
+ imap-utils \
+ imlib \
+ inews \
+ inn \
+ jakarta-regexp \
+ krb5-server \
+ krbafs \
+ libIDL \
+ libbonoboui \
+ libdbi \
+ libdbi-dbd-mysql \
+ libgnome \
+ libgnomeprint22 \
+ libgnomeprintui22 \
+ libgnomeui \
+ libgsf \
+ libole2 \
+ logwatch \
+ mailman \
+ mailx \
+ mod_auth_mysql \
+ mod_auth_pgsql \
+ mod_authz_ldap \
+ mod_perl \
+ mod_python \
+ mod_ssl \
+ mpage \
+ mtr \
+ mx \
+ mx4j \
+ mysql \
+ mysql-bench \
+ mysql-devel \
+ net-snmp-utils \
+ netdump-server \
+ newt-perl \
+ openldap-servers \
+ openssl-perl \
+ pam_krb5 \
+ perl-DBD-MySQL \
+ perl-DBD-Pg \
+ perl-DBI \
+ perl-DB_File \
+ perl-Digest-HMAC \
+ perl-Digest-SHA1 \
+ perl-Net-DNS \
+ perl-Time-HiRes \
+ php \
+ php-imap \
+ php-ldap \
+ php-mysql \
+ php-odbc \
+ php-pgsql \
+ pnm2ppa \
+ postfix \
+ postgresql-odbc \
+ pxe \
+ pyorbit \
+ qt \
+ qt-MySQL \
+ qt-ODBC \
+ quagga \
+ radvd \
+ rdist \
+ redhat-config-bind \
+ redhat-config-httpd \
+ redhat-config-printer \
+ redhat-config-printer-gui \
+ redhat-config-samba \
+ redhat-config-securitylevel \
+ redhat-config-securitylevel-tui \
+ redhat-config-services \
+ redhat-java-rpm-scripts \
+ redhat-switch-mail \
+ redhat-switch-mail-gnome \
+ rh-postgresql \
+ rh-postgresql-contrib \
+ rh-postgresql-docs \
+ rh-postgresql-jdbc \
+ rh-postgresql-libs \
+ rh-postgresql-python \
+ rh-postgresql-server \
+ rh-postgresql-tcl \
+ rh-postgresql-test \
+ rhdb-utils \
+ rsh-server \
+ rusers-server \
+ samba \
+ samba-client \
+ samba-common \
+ samba-swat \
+ sendmail \
+ sendmail-cf \
+ slocate \
+ spamassassin \
+ squid \
+ squirrelmail \
+ switchdesk \
+ sysreport \
+ telnet-server \
+ tftp-server \
+ tmpwatch \
+ tux \
+ unixODBC \
+ unixODBC-kde \
+ urw-fonts \
+ usermode-gtk \
+ vsftpd \
+ webalizer \
+ xalan-j \
+ xerces-j \
+ xinitrc \
+ ypserv"
+
+common_desktop_rpms="$common_server_rpms \
+ Canna-libs \
+ FreeWnn-libs \
+ Gtk-Perl \
+ ImageMagick \
+ ImageMagick-perl \
+ SDL \
+ XFree86-100dpi-fonts \
+ XFree86-75dpi-fonts \
+ XFree86-Xnest \
+ XFree86-Xvfb \
+ XFree86-doc \
+ XFree86-tools \
+ XFree86-twm \
+ a2ps \
+ am-utils \
+ amanda-client \
+ anacron \
+ apel-xemacs \
+ aumix \
+ authconfig \
+ authconfig-gtk \
+ autorun \
+ cdparanoia-alpha9.8 \
+ cdparanoia-libs-alpha9.8 \
+ cdrecord \
+ cipe \
+ ckermit \
+ comps-extras \
+ control-center \
+ ctags \
+ desktop-backgrounds-basic \
+ desktop-printing \
+ dialog \
+ docbook-dtds \
+ docbook-style-dsssl \
+ docbook-style-xsl \
+ docbook-utils \
+ docbook-utils-pdf \
+ dtach \
+ dvd+rw-tools \
+ dvdrecord \
+ eel2 \
+ elinks \
+ enscript \
+ eog \
+ evolution \
+ evolution-connector \
+ fetchmail \
+ file-roller \
+ firstboot \
+ fontilus \
+ gaim \
+ gconf-editor \
+ gdm \
+ gedit \
+ gftp \
+ ggv \
+ gimp \
+ gimp-data-extras \
+ gimp-perl \
+ gimp-print-cups \
+ gimp-print-plugin \
+ gimp-print-utils \
+ gnome-applets \
+ gnome-audio \
+ gnome-desktop \
+ gnome-games \
+ gnome-icon-theme \
+ gnome-media \
+ gnome-panel \
+ gnome-pilot \
+ gnome-python2-applet \
+ gnome-session \
+ gnome-spell \
+ gnome-system-monitor \
+ gnome-terminal \
+ gnome-themes \
+ gnome-user-docs \
+ gnome-utils \
+ gnome-vfs2-extras \
+ gnomemeeting \
+ gphoto2 \
+ gsl \
+ gstreamer \
+ gstreamer-plugins \
+ gstreamer-tools \
+ gtk-engines \
+ gtk2-engines \
+ gtkam \
+ gtkam-gimp \
+ gtkglarea \
+ gtkhtml3 \
+ guile \
+ hotplug \
+ hpijs \
+ hpoj \
+ htdig \
+ hwbrowser \
+ intltool \
+ itcl \
+ jadetex \
+ kdeaddons \
+ kdeartwork \
+ kdebase \
+ kdegames \
+ kdegraphics \
+ kdelibs \
+ kdemultimedia \
+ kdenetwork \
+ kdepim \
+ kdeutils \
+ lftp \
+ libao \
+ libf2c \
+ libgail-gnome \
+ libgal2 \
+ libghttp \
+ libglade \
+ libgtop2 \
+ libmrproject \
+ libpcap \
+ libraw1394 \
+ librsvg2 \
+ libsoup \
+ linuxdoc-tools \
+ lm_sensors \
+ magicdev \
+ metacity \
+ mikmod \
+ mrproject \
+ mrtg \
+ mutt \
+ nautilus \
+ nautilus-cd-burner \
+ nautilus-media \
+ netpbm \
+ netpbm-progs \
+ open \
+ openh323 \
+ openjade \
+ openldap-clients \
+ openmotif \
+ openmotif21 \
+ openoffice.org \
+ openoffice.org-i18n \
+ openoffice.org-libs \
+ openssh-askpass \
+ openssh-askpass-gnome \
+ parted \
+ passivetex \
+ perl-PDL \
+ perl-SGMLSpm \
+ perl-suidperl \
+ pilot-link \
+ printman \
+ psutils \
+ pwlib \
+ pyOpenSSL \
+ python-optik \
+ redhat-artwork \
+ redhat-config-date \
+ redhat-config-keyboard \
+ redhat-config-kickstart \
+ redhat-config-language \
+ redhat-config-mouse \
+ redhat-config-network \
+ redhat-config-network-tui \
+ redhat-config-nfs \
+ redhat-config-packages \
+ redhat-config-proc \
+ redhat-config-rootpassword \
+ redhat-config-soundcard \
+ redhat-config-users \
+ redhat-config-xfree86 \
+ redhat-logviewer \
+ rhn-applet \
+ rhnlib \
+ sane-backends \
+ sane-frontends \
+ screen \
+ scrollkeeper \
+ shapecfg \
+ sharutils \
+ sox \
+ star \
+ switchdesk-gnome \
+ switchdesk-kde \
+ sysstat \
+ talk \
+ tclx \
+ tetex \
+ tetex-afm \
+ tetex-dvips \
+ tetex-fonts \
+ tetex-latex \
+ tetex-xdvi \
+ tix \
+ tk \
+ tkinter \
+ transfig \
+ ttfprint \
+ umb-scheme \
+ up2date \
+ up2date-gnome \
+ usbutils \
+ uucp \
+ vim-enhanced \
+ vlock \
+ vnc \
+ vnc-server \
+ vorbis-tools \
+ vte \
+ w3c-libwww \
+ xchat \
+ xdelta \
+ xemacs \
+ xemacs-el \
+ xemacs-info \
+ xfig \
+ xhtml1-dtds \
+ xloadimage \
+ xmltex \
+ xmlto \
+ xmms \
+ xpdf \
+ xsane \
+ xsane-gimp \
+ xscreensaver \
+ xsri \
+ xterm \
+ yelp \
+ zsh"
+
+common_developer_rpms="$common_desktop_rpms \
+ ElectricFence \
+ GConf2-devel \
+ ORBit-devel \
+ ORBit2-devel \
+ SDL-devel \
+ XFree86-devel \
+ ant \
+ ant-libs \
+ arts-devel \
+ at-spi-devel \
+ atk-devel \
+ audiofile-devel \
+ autoconf \
+ autoconf213 \
+ automake \
+ automake14 \
+ automake15 \
+ bison \
+ blas \
+ bonobo-activation-devel \
+ bug-buddy \
+ byacc \
+ cdecl \
+ cproto \
+ crash \
+ cscope \
+ cups-devel \
+ cvs \
+ ddd \
+ dejagnu \
+ dev86 \
+ diffstat \
+ doxygen \
+ eel2-devel \
+ emacs \
+ emacs-el \
+ emacs-leim \
+ esound-devel \
+ flex \
+ fontconfig-devel \
+ freetype-devel \
+ gail-devel \
+ gcc \
+ gcc-c++ \
+ gcc-c++-ssa \
+ gcc-g77 \
+ gcc-g77-ssa \
+ gcc-gnat \
+ gcc-java \
+ gcc-java-ssa \
+ gcc-objc \
+ gcc-objc-ssa \
+ gcc-ssa \
+ gd-devel \
+ gdb \
+ gdk-pixbuf-devel \
+ gdk-pixbuf-gnome \
+ glade2 \
+ glib-devel \
+ glib2-devel \
+ glibc-devel \
+ glibc-profile \
+ glibc-utils \
+ gnome-desktop-devel \
+ gnome-libs-devel \
+ gnome-vfs2-devel \
+ gperf \
+ gtk+-devel \
+ gtk-doc \
+ gtk2-devel \
+ gtkhtml2-devel \
+ httpd-devel \
+ im-sdk \
+ imlib-devel \
+ indent \
+ jaf \
+ javamail \
+ joe \
+ jpackage-utils \
+ junit \
+ kdebase-devel \
+ kdegraphics-devel \
+ kdelibs-devel \
+ kdenetwork-devel \
+ kdepim-devel \
+ kdesdk \
+ kdesdk-devel \
+ kdeutils-devel \
+ kdevelop \
+ kdoc \
+ kernel-doc \
+ kernel-source \
+ lam \
+ lapack \
+ lha \
+ libIDL-devel \
+ libacl-devel \
+ libart_lgpl-devel \
+ libattr-devel \
+ libbonobo-devel \
+ libbonoboui-devel \
+ libgcc-ssa \
+ libgcj-devel \
+ libgcj-ssa-devel \
+ libglade2-devel \
+ libgnat \
+ libgnome-devel \
+ libgnomecanvas-devel \
+ libgnomeprint22-devel \
+ libgnomeprintui22-devel \
+ libgnomeui-devel \
+ libjpeg-devel \
+ libmng-devel \
+ libmudflap \
+ libmudflap-devel \
+ libobjc \
+ libole2-devel \
+ libpng-devel \
+ librsvg2-devel \
+ libstdc++-devel \
+ libstdc++-ssa \
+ libstdc++-ssa-devel \
+ libtiff-devel \
+ libtool \
+ libungif-devel \
+ libxml2-devel \
+ libxslt-devel \
+ linc-devel \
+ ltrace \
+ memprof \
+ nasm \
+ ncurses-devel \
+ nedit \
+ netpbm-devel \
+ openmotif-devel \
+ oprofile \
+ pango-devel \
+ patchutils \
+ pcre-devel \
+ perl-CPAN \
+ perl-Crypt-SSLeay \
+ pilot-link-devel \
+ pkgconfig \
+ pstack \
+ pygtk2-devel \
+ python-devel \
+ python-tools \
+ qt-designer \
+ qt-devel \
+ rcs \
+ redhat-rpm-config \
+ rpm-build \
+ ruby \
+ ruby-libs \
+ ruby-mode \
+ sane-backends-devel \
+ sip \
+ sip-devel \
+ splint \
+ startup-notification-devel \
+ strace \
+ swig \
+ texinfo \
+ tora \
+ vim-X11 \
+ vte-devel \
+ zlib-devel"
+
+common_all_rpms="$common_developer_rpms \
+ Canna
+ FreeWnn \
+ ImageMagick-c++-5.5.6 \
+ Wnn6-SDK \
+ ami \
+ amtu \
+ anaconda \
+ anaconda-help \
+ anaconda-images \
+ anaconda-product \
+ anaconda-runtime \
+ apmd \
+ arptables_jf \
+ attr \
+ bg5ps \
+ bitmap-fonts-cjk \
+ bogl \
+ bogl-bterm \
+ bootparamd \
+ booty \
+ bridge-utils \
+ busybox \
+ busybox-anaconda \
+ compat-gcc \
+ compat-gcc-c++ \
+ compat-glibc-7.x \
+ compat-libstdc++-devel \
+ compat-slang \
+ db4-java \
+ db4-utils \
+ dbskkd-cdb \
+ desktop-backgrounds-extra \
+ devlabel \
+ dhclient \
+ dietlibc \
+ dos2unix \
+ dosfstools \
+ dump \
+ eject \
+ emacspeak \
+ ethereal \
+ ethereal-gnome \
+ fbset \
+ festival \
+ grub \
+ h2ps \
+ hdparm \
+ ipsec-tools \
+ irda-utils \
+ iscsi-initiator-utils \
+ isdn4k-utils \
+ jfsutils \
+ jisksp14 \
+ jisksp16 \
+ jwhois \
+ kappa20 \
+ kbd \
+ kernel-pcmcia-cs \
+ knm_new \
+ kon2 \
+ kon2-fonts \
+ libtabe \
+ libwvstreams \
+ lilo \
+ linuxwacom \
+ lslk \
+ mdadm \
+ mgetty \
+ minicom \
+ mkbootdisk \
+ mt-st \
+ mtx \
+ nano \
+ ncompress \
+ net-snmp-perl \
+ netconfig \
+ nhpf \
+ nmap \
+ octave \
+ openssl096b \
+ pam_passwdqc \
+ pam_smb \
+ pinfo \
+ ppp \
+ prelink \
+ psgml \
+ pvm \
+ quota \
+ rdate \
+ rdesktop \
+ redhat-config-netboot \
+ rhgb \
+ rmt \
+ rootfiles \
+ rp-pppoe \
+ schedutils \
+ setserial \
+ setuptool \
+ sg3_utils \
+ skkdic \
+ skkinput
+ specspo \
+ stunnel \
+ tcpdump \
+ tftp \
+ tn5250 \
+ tsclient \
+ vconfig \
+ wireless-tools \
+ wvdial \
+ x3270 \
+ x3270-text \
+ x3270-x11 \
+ xcin"
diff --git a/usr/src/lib/brand/lx/zone/lx_distro_install.ksh b/usr/src/lib/brand/lx/zone/lx_distro_install.ksh
new file mode 100644
index 0000000000..7c4e89dd25
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_distro_install.ksh
@@ -0,0 +1,2772 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This script is called from /usr/lib/brand/lx/lx_install.
+#
+# options passed down from lx_install:
+# -z $ZONENAME
+# -r $LINUX_ROOT
+#
+# options passed down from zoneadm -z <zone-name> install
+# -d <Linux-archives-dir>
+# [core | server | desktop | development | all]
+#
+# The desktop cluster will be installed by default.
+#
+
+# Restrict executables to /bin, /usr/bin and /usr/sbin
+PATH=/bin:/usr/bin:/usr/sbin
+export PATH
+
+
+# Setup i18n output
+TEXTDOMAIN="SUNW_OST_OSCMD"
+export TEXTDOMAIN
+
+# Log passed arguments to file descriptor 2
+log()
+{
+ [[ -n $logfile ]] && echo "$@" >&2
+}
+
+#
+# Send the provided printf()-style arguments to the screen and to the
+# logfile.
+#
+screenlog()
+{
+ typeset fmt="$1"
+ shift
+
+ printf "$fmt\n" "$@"
+ [[ -n $logfile ]] && printf "$fmt\n" "$@" >&2
+}
+
+# Print and log provided text if the shell variable "verbose_mode" is set
+verbose()
+{
+ [[ -n $verbose_mode ]] && echo "$@"
+ [[ -n $logfile ]] && [[ -n $verbose_mode ]] && echo "$@" >&2
+}
+
+#
+# Print to the screen if the shell variable "verbose_mode" is set, but always
+# send the output to the log.
+#
+verboselog()
+{
+ [[ -n $verbose_mode ]] && echo "$@"
+ [[ -n $logfile ]] && echo "$@" >&2
+}
+
+bad_rpmdir=$(gettext "'%s' is not a valid RPM directory!")
+
+mb_req=$(gettext "(%s MB required, %s MB available)")
+no_space=$(gettext "Not enough free space available in '%s'")
+
+inst_clust=$(gettext "Installing cluster '%s'")
+unknown_clust=$(gettext "ERROR: Unknown cluster name: '%s'")
+
+unknown_media=$(gettext "Unknown or unreadable media loaded in %s")
+
+eject_fail=$(gettext "Attempt to eject '%s' failed.")
+
+lofi_failed=$(gettext "Attempt to add '%s' as lofi device FAILED.")
+lofs_failed=$(gettext "Attempt to lofs mount '%s' on '%s' FAILED.")
+
+media_spec=$(gettext "the provided media (%s)")
+
+distro_mediafail=\
+$(gettext "Attempt to determine Linux distribution from\n %s FAILED.")
+
+mini_bootfail=$(gettext "Attempt to boot miniroot for zone '%s' FAILED.")
+mini_copyfail=$(gettext "Attempt to copy miniroot for zone '%s' FAILED.")
+mini_initfail=$(gettext "Attempt to initialize miniroot for zone '%s' FAILED.")
+mini_instfail=$(gettext "Attempt to install RPM '%s' to miniroot FAILED.")
+mini_mediafail=$(gettext "Install of zone '%s' miniroot from\n %s FAILED.")
+mini_setfail=$(gettext "Attempt to setup miniroot for zone '%s' FAILED.")
+
+mini_mntfsfail=\
+$(gettext "Attempt to mount miniroot filesystems for zone '%s' FAILED.")
+
+rpm_initfail=\
+$(gettext "Attempt to initialize RPM database for zone '%s' FAILED.")
+
+symlink_failed=$(gettext "Attempt to symbolically link '%s' to '%s' FAILED.")
+
+discinfo_nofile=$(gettext "ERROR: Discinfo file '%s' not found!")
+discinfo_notreadable=$(gettext "ERROR: Discinfo file '%s': not readable!")
+discinfo_wrongarch=\
+$(gettext "ERROR: '%s': disc architecture is '%s'; install requires 'i386'!")
+
+wrong_serial=$(gettext "Incorrect serial number found on provided %s.")
+wrong_ser_expect=$(gettext " (found #%s, expected #%s)")
+
+wrong_cd=$(gettext "Incorrect CD inserted (found %s, wanted %s)")
+
+zone_initrootfail=\
+$(gettext "Attempt to initialize root filesystem for zone '%s' FAILED.")
+
+zone_haltfail=$(gettext "Unable to halt zone '%s'!")
+zone_instfail=$(gettext "Install of zone '%s' from '%s' FAILED '%s'.")
+zone_mediafail=$(gettext "Install of zone '%s' from\n %s FAILED.")
+
+zone_rootfail=\
+$(gettext "ERROR: The specified zone root directory '%s' could not be created.")
+zone_rootsub=\
+$(gettext "ERROR: The specified zone root subdirectory '%s' does not exist.")
+
+mk_mntfail=$(gettext "Could not create the mount directory '%s'")
+mountfail=$(gettext "Mount of '%s' on '%s' FAILED.")
+
+insert_discmsg=\
+$(gettext "Please insert %s, or a\n %s DVD in the removable media")
+
+mount_proper_iso1=$(gettext "Please mount the ISO for %s or a")
+mount_proper_iso2=$(gettext "%s DVD on device '%s'")
+
+silent_nodisc=$(gettext "ERROR: Cannot install from CDs in silent mode.")
+silent_nolofi=\
+$(gettext "ERROR: Cannot install from lofi-based CD ISOs in silent mode.")
+
+install_msg=$(gettext "Installing zone '%s' from\n %s.")
+install_ndiscs=\
+$(gettext "You will need CDs 1 - %s (or the equivalent DVD) to")
+install_nisos=\
+$(gettext "You will need ISO images representing CDs 1 - %s (or the equivalent")
+
+locate_npkgs=$(gettext "Attempting to locate %s packages...")
+
+install_one_rpm=$(gettext "Installing 1 %spackage.")
+install_nrpms_few=\
+$(gettext "Installing %s %spackages; this may take a few minutes...")
+install_nrpms_several=\
+$(gettext "Installing %s %spackages; this may take several minutes...")
+
+install_longwait=\
+$(gettext "NOTE: There may be a long delay before you see further output.")
+
+install_defmkfail=$(gettext "Could not create the temporary directory '%s'")
+install_defcpfail=$(gettext "Could not make a local copy of deferred RPM '%s'")
+install_dist=$(gettext "Installing distribution '%s'...")
+install_zonefail=$(gettext "Attempt to install zone '%s' FAILED.")
+
+no_distropath=$(gettext "ERROR: Distribution path '%s' doesn't exist.")
+
+install_done=$(gettext "Installation of %s to zone\n '%s' completed %s.")
+install_failed=$(gettext "Installation of %s to zone\n '%s' FAILED %s.")
+
+eject_final_msg=\
+$(gettext "Would you like the system to eject the %sinstall %s when")
+eject_final_prompt=$(gettext "installation of '%s' is complete? (%s)")
+eject_final_status=$(gettext "The %sinstall %s %s be ejected.")
+
+#
+# Get the device underlying a specified mounted file system and return it in
+# the shell variable "mount_dev"
+#
+# Returns 0 on success, 1 on failure.
+#
+get_mountdev()
+{
+ typeset mount_dir="$1"
+ typeset device
+ unset mount_dev
+
+ #
+ # Obtain information on the specified mounted device.
+ #
+ device=`{ df -k "$mount_dir" | egrep "^/" ; } 2>/dev/null` || return 1
+ mount_dev=$(echo $device | awk -e '{print $1}' 2>/dev/null)
+
+ [[ "`echo $mount_dev | cut -c 1`" = "/" ]] && return 0
+
+ unset mount_dev
+ return 1
+}
+
+#
+# Get the directory name a specified device is mounted as and return it in
+# the shell variable "mount_dir"
+#
+# Returns 0 on success, 1 on failre.
+#
+get_mountdir()
+{
+ typeset mount_dev="$1"
+ typeset dir
+ unset mount_dir
+
+ [[ -b "$mount_dev" ]] || return 1
+
+ #
+ # Obtain information on the specified mounted device.
+ #
+ dir=`{ df -k "$mount_dev" | egrep "^/" ; } 2>/dev/null` || return 1
+ mount_dir=$(echo $dir | awk -e '{print $6}' 2>/dev/null)
+
+ [[ "`echo $mount_dir | cut -c 1`" = "/" ]] && return 0
+
+ unset mount_dir
+ return 1
+}
+
+#
+# Check the free disk space of the passed filesystem against the passed
+# argument.
+#
+# Returns 0 on success, 1 on failure.
+#
+check_mbfree()
+{
+ typeset dir="$1"
+ typeset mb_required=$2
+
+ #
+ # Return free space in partition containing passed argument in MB
+ #
+ typeset mbfree=`{ LC_ALL=C df -k "$dir" | \
+ egrep -v Filesystem ; } 2>/dev/null` || return 1
+ mbfree=$(echo $mbfree | awk -e '{print $4}' 2>/dev/null)
+
+ ((mbfree /= 1024))
+ if ((mbfree < mb_required)); then
+ screenlog "$no_space" "$zoneroot"
+ screenlog "$mb_req" "$mb_required" "$mb_free"
+ return 1
+ fi
+ return 0
+}
+
+#
+# Find packages by attempting to expand passed RPM names to their full filenames
+# in the passed RPM directory.
+#
+# Arguments:
+#
+# Argument 1: Path to mounted install media
+# Arguments [2 - n]: RPM names to process
+#
+# The expanded filenames are returned in the shell array "rpm_names."
+#
+# For example:
+#
+# find_packages /mnt/iso dev kernel tetex redhat-menus
+#
+# would return something like:
+#
+# rpms_found[0]: dev-3.3.12.3-1.centos.0.i386.rpm
+# rpms_found[1]: kernel-2.4.21-32.EL.i586.rpm
+# rpms_found[2]: tetex-1.0.7-67.7.i386.rpm
+# rpms_found[3]: redhat-menus-0.39-1.noarch.rpm
+#
+# The routine returns 0 on success, 1 on an error.
+#
+find_packages()
+{
+ typeset found=0
+ typeset left=0
+
+ typeset rpmdir="$1/$rd_rpmdir"
+ typeset curdir=${PWD:=$(pwd)}
+
+ typeset arch
+ typeset procinfo
+ typeset rpmglob
+ typeset rpmfile
+
+ unset rpms_found
+ unset rpms_left
+
+ shift
+ cd "$rpmdir"
+
+ typeset rpmcheck="$(echo *.rpm)"
+
+ if [[ "$rpmcheck" = "*.rpm" ]]; then
+ screenlog "$bad_rpmdir" "$rpmdir"
+ cd "$curdir"
+ return 1
+ fi
+
+ #
+ # If the miniroot is booted, and the archs list isn't already set,
+ # ask the zone's rpm command for the list of compatible architectures.
+ #
+ if [[ -n $miniroot_booted && -z $archs ]]; then
+ procinfo=$(zlogin "$zonename" /bin/rpm --showrc | \
+ grep "^compatible archs")
+
+ [[ $? -eq 0 ]] &&
+ archs=$(echo $procinfo | sed 's/^compatible archs : //')
+
+ [[ -n $archs ]] &&
+ log "RPM-reported compatible architectures: $archs"
+ fi
+
+ #
+ # Either the miniroot isn't booted or asking rpm for the information
+ # failed for some reason, so make some reasonable assumptions.
+ #
+ if [[ -z $archs ]]; then
+ procinfo=$(LC_ALL=C psrinfo -vp | grep family)
+
+ #
+ # Check for additional processor capabilities
+ #
+ if [[ "$procinfo" = *" family 6 "* ||
+ "$procinfo" = *" family 15 "* ||
+ "$procinfo" = *" family 16 "* ||
+ "$procinfo" = *" family 17 "* ]]; then
+ if [[ "$procinfo" = *AuthenticAMD* ]]; then
+ #
+ # Linux gives "athlon" packages precedence
+ # over "i686" packages, so duplicate that
+ # here.
+ #
+ archs="athlon i686"
+ else
+ archs="i686"
+ fi
+ fi
+
+ archs="$archs i586 i486 i386 noarch"
+
+ log "Derived compatible architectures: $archs"
+ fi
+
+ verboselog "RPM source directory:\n \"$rpmdir\"\n"
+
+ if [[ $# -eq 1 ]]; then
+ msg=$(gettext "Attempting to locate 1 package...")
+ screenlog "$msg"
+ else
+ screenlog "$locate_npkgs" "$#"
+ fi
+
+ for rpm in "$@"; do
+ #
+ # Search for the appropriate RPM, using the compatible
+ # architecture list contained in "archs" to look for the best
+ # match.
+ #
+ # For example, if the processor is an i686, and the rpm is
+ # "glibc", the script will look for the files (in order):
+ #
+ # glibc[.-][0-9]*.i686.rpm
+ # glibc[.-][0-9]*.i586.rpm
+ # glibc[.-][0-9]*.i486.rpm
+ # glibc[.-][0-9]*.i386.rpm
+ # glibc[.-][0-9]*.noarch.rpm
+ # glibc[.-][0-9]*.fat.rpm
+ #
+ # and will stop when it finds the first match.
+ #
+ # TODO: Once the miniroot is booted, we should verify that
+ # the rpm name has been expanded to "$rpmfile" properly
+ # by comparing "$rpm" and the output of:
+ #
+ # zlogin -z <zone> /bin/rpm --qf '%{NAME}' -qp $rpmfile
+ #
+ for arch in $archs; do
+ #
+ # Use the filename globbing functionality of ksh's
+ # echo command to search for the file we want.
+ #
+ # If no matching file is found, echo will simply
+ # return the passed string.
+ #
+ rpmglob="$rpm[.-][0-9]*.$arch.rpm"
+ rpmfile="$(echo $rpmglob)"
+
+ [[ "$rpmfile" != "$rpmglob" ]] && break
+
+ unset rpmfile
+ done
+
+ if [[ -z $rpmfile ]]; then
+ rpms_left[$left]="$rpm"
+ ((left += 1))
+ else
+ rpms_found[$found]="$rpmfile"
+ ((found += 1))
+ fi
+ done
+
+ cd "$curdir"
+ log "\"$rpmdir\": matched $found of $# packages."
+ log "\"$rpmdir\": $left RPMs remaining."
+ return 0
+}
+
+#
+# Build the rpm lists used to install a machine.
+#
+# The first argument is the number of discs in the distribution. The
+# second, optional, argument is the metacluster to install.
+#
+# The array "distro_rpm[]" is built from the individual package RPM arrays
+# read in from an individual distribution definition file.
+#
+build_rpm_list()
+{
+ # Default to a desktop installation
+ typeset cluster=desktop
+ typeset cnt=0
+ typeset pkgs
+
+ for clust in "$@"; do
+ ((cnt += 1))
+ case $clust in
+ core) cluster=core ;;
+ desk*) cluster=desktop ;;
+ serv*) cluster=server ;;
+ dev*) cluster=developer ;;
+ all) cluster=all
+ break;;
+ *) screenlog "$unknown_clust" "$clust"
+ exit $ZONE_SUBPROC_USAGE ;;
+ esac
+ done
+
+ if [ $cnt -gt 1 ]; then
+ msg=$(gettext "Too many install clusters specified")
+ screenlog "$msg"
+ exit $ZONE_SUBPROC_USAGE
+ fi
+
+ screenlog "$inst_clust" $cluster
+
+ case $cluster in
+ core) distro_rpms=$distro_core_rpms ;;
+ desktop) distro_rpms=$distro_desktop_rpms ;;
+ server) distro_rpms=$distro_server_rpms ;;
+ developer) distro_rpms=$distro_developer_rpms ;;
+ all) distro_rpms=$distro_all_rpms ;;
+ esac
+
+ # The RPMs in the miniroot must all be installed properly as well
+ distro_rpms="$distro_miniroot_rpms $distro_rpms"
+}
+
+#
+# Install the "miniroot" minimal Linux environment that is booted single-user
+# to complete the install.
+#
+# This works by doing feeding the RPM list needed for the installation one
+# by one to rpm2cpio(1).
+#
+# Usage:
+# install_miniroot <mounted media dir> <names of RPMS to install>
+#
+#
+install_miniroot()
+{
+ typeset mediadir="$1"
+ typeset rpm
+
+ shift
+
+ #
+ # There's a quirk in our version of ksh that sometimes resets the
+ # trap handler for the shell. Since RPM operations will be the
+ # longest part of any given install, make sure that an interrupt while
+ # the command is running will bring the miniroot down and clean up
+ # the interrupted install.
+ #
+ trap trap_cleanup INT
+
+ if [[ $# -eq 1 ]]; then
+ msg=$(gettext "Installing %s miniroot package...")
+ else
+ msg=$(gettext "Installing %s miniroot packages...")
+ fi
+
+ screenlog "\n$msg" "$#"
+
+ for rpm in "$@"; do
+ verboselog "\nInstalling \"$rpm\" to miniroot at\n" \
+ " \"$zoneroot\"..."
+
+ rpm2cpio "$mediadir/$rd_rpmdir/$rpm" | \
+ ( cd "$rootdir" && cpio -idu ) 1>&2
+
+ if [[ $? -ne 0 ]]; then
+ screenlog "$mini_instfail" "$rpm"
+ return 1
+ fi
+ done
+
+ screenlog ""
+ return 0
+}
+
+#
+# Install the zone from the mounted disc image by feeding a list of RPMs to
+# install from this image to RPM running on the zone via zlogin(1).
+#
+# Usage:
+# install_zone <path to mounted install media> [<names of RPMS to install>]
+#
+# If the caller doesn't supply a list of RPMs to install, we install any
+# we previously stashed away in the deferred RPMs directory.
+#
+install_zone()
+{
+ #
+ # Convert the passed install media pathname to a zone-relative path
+ # by stripping $rootpath from the head of the path.
+ #
+ typeset zonerpmdir="${1##$rootdir}/$rd_rpmdir"
+
+ typeset defdir="$rootdir/var/lx_install/deferred_rpms"
+ typeset mounted_root="$1"
+ typeset rpmopts="-i"
+
+ typeset defer
+ typeset deferred_found
+ typeset install_rpms
+ typeset nrpms
+ typeset rpm
+ typeset rpmerr
+
+ shift
+
+ #
+ # If the caller provided a list of RPMs, determine which of them
+ # should be installed now, and which should be deferred until
+ # later.
+ #
+ if [[ $# -gt 0 ]]; then
+ if [[ -n $deferred_rpms ]]; then
+ [[ -d $defdir ]] || if ! mkdir -p $defdir; then
+ screenlog "$install_defmkfail" "$mntdir"
+ return 1
+ fi
+
+ msg=$(gettext "Checking for deferred packages...")
+ screenlog "$msg"
+
+ find_packages "$mounted_root" $deferred_rpms
+ deferred_found="${rpms_found[@]}"
+ numdeferred=${#rpms_found[@]}
+ else
+ deferred_found=""
+ fi
+
+ install_rpms="$@"
+ nrpms=$#
+
+ #
+ # If this distro has any deferred RPMs, we want to simply
+ # copy them into the zone instead of installing them. We
+ # then remove them from the list of RPMs to be installed on
+ # this pass.
+ #
+ for rpm in $deferred_found; do
+ if echo "$install_rpms" | egrep -s "$rpm"; then
+ verboselog "Deferring installation of \"$rpm\""
+
+ #
+ # Remove the RPM from the install_rpms list
+ # and append it to the deferred_saved array
+ #
+ install_rpms=$(echo "$install_rpms " |
+ sed "s/ $rpm / /g")
+
+ # remove trailing spaces, if any
+ install_rpms=${install_rpms%%+( )}
+
+ deferred_saved[${#deferred_saved[@]}]="$rpm"
+
+ if ! cp "$mounted_root/$rd_rpmdir/$rpm" \
+ "$defdir"; then
+ screenlog "$install_defcpfail" "$rpm"
+ return 1
+ fi
+ fi
+
+ #
+ # If we've deferred the installation of EVERYTHING,
+ # simply return success
+ #
+ [[ -z $install_rpms ]] && return 0
+ done
+
+ [[ -n $deferred_found ]] & verbose ""
+ elif [[ -z $deferred_saved ]]; then
+ # There are no deferred RPMs to install, so we're done.
+ return 0
+ else
+ # Install the RPMs listed in the deferred_saved array
+ install_rpms=${deferred_saved[@]}
+ nrpms=${#deferred_saved[@]}
+ zonerpmdir=/var/lx_install/deferred_rpms
+ defer="deferred "
+ fi
+
+ #
+ # There's a quirk in our version of ksh that sometimes resets the
+ # trap handler for the shell. Since RPM operations will be the
+ # longest part of any given install, make sure that an interrupt while
+ # the command is running will bring the miniroot down and clean up
+ # the interrupted install.
+ #
+ trap trap_cleanup INT
+
+ #
+ # Print a message depending on how many RPMS we have to install.
+ #
+ # 25 RPMS seems like a reasonable boundary between when an install may
+ # take a "few" or "several" minutes; this may be tuned if needed.
+ #
+ screenlog ""
+
+ if [[ $nrpms -eq 1 ]]; then
+ screenlog "$install_one_rpm" "$defer"
+ elif [[ $nrpms -lt 25 ]]; then
+ screenlog "$install_nrpms_few" "$nrpms" "$defer"
+ else
+ screenlog "$install_nrpms_several" "$nrpms" "$defer"
+
+ #
+ # For installs of over 600 packages or so, it can take rpm a
+ # really, REALLY long time to output anything, even when
+ # running in verbose mode.
+ #
+ # For example, when doing an "all" install from a DVD or DVD
+ # ISO, depending on the speed of the optical drive and the
+ # speed of the machine's CPU(s), it may be up to TEN MINUTES or
+ # MORE before rpm prints out its "Processing..." message even
+ # though it is, in fact, processing the entire package list,
+ # checking for dependencies (something it is unfortunately
+ # entirely silent about.)
+ #
+ # Since the user might otherwise think the install was hung
+ # when running in verbose mode, warn them that it could be
+ # quite a while before they see any further output from the
+ # installer.
+ #
+ #
+ [[ $nrpms -gt 600 ]] && verbose "$install_longwait"
+ fi
+
+ log ""
+ log "Installing: $install_rpms"
+ log ""
+ log "NOTE: Any messages appearing below prefixed with \"warning:\""
+ log " and/or that do not cause the installer to abort the"
+ log " installation process may safely be ignored."
+ log ""
+
+ echo
+
+ # If verbose mode is selected, run rpm in verbose mode as well.
+ [[ -n $verbose_mode ]] && rpmopts="-ivh"
+
+ #
+ # LX_INSTALL must be defined when running this command in order to
+ # enable switches built into various emulated system calls to allow
+ # the dev package (which may not actually write to /dev) to function.
+ #
+ zlogin "$zonename" "( cd "$zonerpmdir" ; LX_INSTALL=1 \
+ /bin/rpm $rpmopts --force --aid --nosignature --root /a \
+ $install_rpms )"
+
+ rpmerr=$?
+
+ if [[ $rpmerr -ne 0 ]]; then
+ log ""
+ log "Zone rpm install command exited abnormally, code $rpmerr"
+ log ""
+
+ screenlog "$zone_instfail" "$zonename" "$zonerpmdir" "$rpmerr"
+ return 1
+ fi
+
+ log ""
+ log "$nrpms package(s) installed."
+
+ return 0
+}
+
+#
+# Attempt to unmount all file systems passed on the command line
+#
+# Returns 0 if all umounts succeeded, otherwise the number of umount failures
+#
+umount_list()
+{
+ typeset failures=0
+ typeset mounted
+
+ unset umount_failures
+
+ for mounted in "$@"; do
+ if ! umount "$mounted"; then
+ umount_failures="$umount_failures $mounted"
+ ((failures += 1))
+ fi
+ done
+
+ return $failures
+}
+
+#
+#
+# Set up lofi mounts required for chroot(1M) to work on a new root directory
+# located in /a within a zone.
+#
+newroot_lofimnt()
+{
+ typeset dev
+ typeset mounted
+ typeset target
+
+ unset newroot_mounted
+
+ #
+ # /usr and /lib get lofs mounted in the zone on /native read-only
+ #
+ # $zoneroot/dev gets lofs mounted on /native/dev read/write to allow
+ # the use of native devices.
+ #
+ mount -F lofs -r /lib "$rootdir/a/native/lib" || return 1
+ newroot_mounted="$rootdir/a/native/lib"
+
+ if ! mount -F lofs -r /usr "$rootdir/a/native/usr"; then
+ umount "$rootdir/a/native/lib"
+ unset newroot_mounted
+ return 1
+ fi
+
+ newroot_mounted="$newroot_mounted $rootdir/a/native/usr"
+
+ if ! mount -F lofs "$zoneroot/root/native/dev" \
+ "$rootdir/a/native/dev"; then
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ return 1
+ fi
+
+ newroot_mounted="$newroot_mounted $rootdir/a/native/dev"
+
+ #
+ # This is a bit ugly; to provide device access within the chrooted
+ # environment RPM will use for its install, we will create the same
+ # symlinks "$rootdir/dev" contains in the new dev directory, and will
+ # lofs mount the balance of "$rootdir/dev" into the same locations in
+ # /dev in the new filesystem we're installing to.
+ #
+ for dev in "$zoneroot"/root/dev/*
+ do
+ if [[ "$dev" = "$zoneroot/root/dev/*" ]]; then
+ log "ERROR: No files found in $zoneroot/root/dev"
+ umount_list $newroot_mounted
+ return 1
+ fi
+
+ target="$rootdir/a/dev/$(basename $dev)"
+
+ #
+ # If the device file is a symbolic link, create a new link
+ # in the target directory with the same source.
+ #
+ # If the device file is any other file or directory, lofs
+ # mount it from the device directory into the target directory.
+ #
+ if [[ -h $dev ]]; then
+ typeset source=$(LC_ALL=C file -h "$dev")
+
+ #
+ # Remove extraneous text from the output of file(1) so
+ # we're left only with the target path of the symbolic
+ # link.
+ #
+ source="${source##*link to }"
+
+ [[ -a "$target" ]] && /bin/rm -f "$target"
+
+ if ! ln -s "$source" "$target"; then
+ screenlog "$symlink_failed" "$source" "$target"
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ return 1
+ fi
+ else
+ [[ ! -a "$target" ]] && touch "$target"
+
+ if ! mount -F lofs "$dev" "$target"; then
+ screenlog "$lofs_failed" "$dev" "$target"
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ return 1
+ fi
+
+ newroot_mounted="$newroot_mounted $target"
+ fi
+
+ done
+
+ return 0
+}
+
+#
+# Replace the root directory of a zone with the duplicate previously created
+# in the zone's /a directory.
+#
+replace_miniroot()
+{
+ #
+ # The zoneadm halt will automatically unmount any file systems
+ # mounted via lofs in the zone, so that saves us from having to
+ # methodically unmount each one.
+ #
+ if ! zoneadm -z "$zonename" halt; then
+ screenlog "$zone_haltfail" "$zonename"
+ return 1
+ fi
+
+ unset miniroot_booted
+ unset newroot_mounted
+
+ [[ -d "$zoneroot/a" ]] && rm -rf "$zoneroot/a"
+ [[ -d "$zoneroot/oldroot" ]] && rm -rf "$zoneroot/oldroot"
+
+ #
+ # Copy the logfile or we'll lose all details of the install into the
+ # new root directory, so strip "$zoneroot" off the pathname of the
+ # current logfile and use it to generate the pathname of the log file
+ # in the new root directory.
+ #
+ [[ -n $logfile && -f "$logfile" ]] &&
+ cp "$logfile" "$rootdir/a${logfile##$rootdir}"
+
+ mv -f "$rootdir/a" "$zoneroot/a" || return 1
+ mv -f "$rootdir" "$zoneroot/oldroot" || return 1
+ mv -f "$zoneroot/a" "$rootdir" || return 1
+
+ #
+ # After the directory munging above, we've moved the new copy of the
+ # logfile atop the logfile we WERE writing to, so if we don't reopen
+ # the logfile here the shell will continue writing to the old logfile's
+ # inode, meaning we would lose all log information from this point on.
+ #
+ [[ -n $logfile ]] && exec 2>>"$logfile"
+
+ rm -rf "$zoneroot/oldroot"
+
+ #
+ # Remove the contents of the /dev directory created by the install.
+ #
+ # We don't technically need to do this, but the zone infrastructure
+ # will mount $zoneroot/dev atop $rootdir/dev anyway, hiding its
+ # contents so we may as well clean up after ourselves.
+ #
+ # The extra checks are some basic paranoia due to the potentially
+ # dangerous nature of this command but are not intended to catch all
+ # malicious cases
+ #
+ [[ "$rootdir" != "" && "$rootdir" != "/" ]] && rm -rf "$rootdir"/dev/*
+
+ return 0
+}
+
+setup_miniroot()
+{
+ unset miniroot_booted
+
+ if ! "$cwd/lx_init_zone" "$rootdir" mini; then
+ screenlog "$mini_initfail" "$zonename"
+ return 1
+ fi
+
+ if ! copy_miniroot; then
+ screenlog "$mini_copyfail" "$zonename"
+ return 1
+ fi
+
+ #
+ # zoneadm gets upset if the zone root directory is group or world
+ # readable or executable, so make sure it isn't before proceeding.
+ #
+ chmod 0700 "$zoneroot"
+
+ msg=$(gettext "Booting zone miniroot...")
+ screenlog "$msg"
+
+ if ! zoneadm -z "$zonename" boot -f; then
+ screenlog "$mini_bootfail" "$zonename"
+ return 1
+ fi
+
+ miniroot_booted=1
+
+ #
+ # Now that the miniroot is booted, unset the compatible architecture
+ # list that find_packages was using for the miniroot so that it will
+ # get the list from rpm for the full install.
+ #
+ unset archs
+
+ #
+ # Mount all the filesystems needed to install the new root
+ # directory.
+ #
+ if ! newroot_lofimnt; then
+ screenlog "$mini_mntfsfail" "$zonename"
+
+ if [[ -n $newroot_mounted ]]; then
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ fi
+ return 1
+ fi
+
+ #
+ # Attempt to initialize the RPM database for the new zone
+ #
+ if ! zlogin "$zonename" /bin/rpm --initdb --root /a; then
+ screenlog "$rpm_initfail" "$zonename"
+ return 1
+ fi
+
+ msg=$(gettext "Miniroot zone setup complete.")
+ screenlog "$msg"
+ return 0
+}
+
+finish_install()
+{
+ #
+ # Perform some last cleanup tasks on the newly installed zone.
+ #
+ # Note that the zlogin commands aren't checked for errors, as the
+ # newly installed zone will still boot even if the commands fail.
+ #
+ typeset file
+
+ typeset defdir=$rootdir/var/lx_install/deferred_rpms
+
+ msg=$(gettext "Completing installation; this may take a few minutes.")
+ screenlog "$msg"
+
+ if [[ -d $defdir ]]; then
+ rm -f $defdir/*.rpm
+ rmdir $defdir
+ fi
+
+ # Run ldconfig in the new root
+ zlogin "$zonename" /usr/sbin/chroot /a \
+ /sbin/ldconfig -f /etc/ld.so.conf
+
+ #
+ # Create the /etc/shadow and /etc/gshadow files if they don't already
+ # exist
+ #
+ [[ -a "$rootdir/a/etc/shadow" ]] ||
+ zlogin "$zonename" /usr/sbin/chroot /a /usr/sbin/pwconv
+
+ [[ -a "$rootdir/a/etc/gshadow" ]] ||
+ zlogin "$zonename" /usr/sbin/chroot /a /usr/sbin/grpconv
+
+ #
+ # Make sure all init.d and rc[0-6].d links are set up properly.
+ #
+ for file in `ls "$rootdir/a/etc/init.d"`; do
+ zlogin "$zonename" /usr/sbin/chroot /a \
+ /sbin/chkconfig --del $file > /dev/null 2>&1
+
+ zlogin "$zonename" /usr/sbin/chroot /a \
+ /sbin/chkconfig --add $file > /dev/null 2>&1
+ done
+
+ replace_miniroot
+
+ rmdir -ps "$media_mntdir"
+
+ if ! "$cwd/lx_init_zone" "$rootdir"; then
+ screenlog "$zone_initrootfail" "$zonename"
+ return 1
+ fi
+
+ return 0
+}
+
+#
+# Duplicate the installed "miniroot" image in a subdirectory of the base
+# directory of the zone.
+#
+# This is done so that a new root directory can be created that will be used
+# as the root of a chrooted directory that RPM running on the zone will install
+# into.
+#
+copy_miniroot()
+{
+ #
+ # Create the directory $zoneroot/a if it doesn't already exist
+ #
+ [[ -d "$zoneroot/a" ]] ||
+ { mkdir -p "$zoneroot/a" || return 1 ; }
+
+ msg=$(gettext "Duplicating miniroot; this may take a few minutes...")
+ screenlog "$msg"
+
+ #
+ # Duplicate the miniroot to /a, but don't copy over any /etc/rc.d or
+ # lxsave_ files.
+ #
+ ( cd "$rootdir"; find . -print | egrep -v "/etc/rc\.d|lxsave_" | \
+ cpio -pdm ../a )
+
+ [[ -d "$rootdir/a" ]] && rm -rf "$rootdir/a" 2>/dev/null
+ mv -f "$zoneroot/a" "$rootdir/a" || return 1
+
+ return 0
+}
+
+#
+# Read the first six lines of the .discinfo file from the root of the passed
+# disc directory (which should either be a mounted disc or ISO file.)
+#
+# The read lines will be used to set appropriate shell variables on success:
+#
+# rd_line[0]: Disc Set Serial Number (sets rd_serial)
+# rd_line[1]: Distribution Release Name (sets rd_release)
+# rd_line[2]: Distribution Architecture (sets rd_arch)
+# rd_line[3]: Disc Number$[s] in Distribution (sets rd_cdnum)
+# rd_line[4]: "base" directory for disc (currently unused)
+# rd_line[5]: RPM directory for disc (sets rd_rpmdir)
+#
+# Returns 0 on success, 1 on failure.
+#
+read_discinfo()
+{
+ typeset rd_file="$1/.discinfo"
+
+ unset rd_arch
+ unset rd_cdnum
+ unset rd_disctype
+ unset rd_pers
+ unset rd_release
+ unset rd_rpmdir
+ unset rd_serial
+
+ #
+ # If more than one argument was passed to read_discinfo, the second
+ # is a flag meaning that we should NOT print a warning message if
+ # we don't find a .discinfo file, as this is just a test to see if
+ # a distribution ISO is already mounted on the passed mount point.
+ #
+ if [[ ! -f "$rd_file" ]]; then
+ [[ $# -eq 1 ]] &&
+ screenlog "$discinfo_nofile" "$rd_file"
+ return 1
+ fi
+
+ verbose "Attempting to read \"$rd_file\"..."
+
+ if [[ ! -r "$rd_file" ]]; then
+ screenlog "$discinfo_notreadable" "$rd_file"
+ return 1
+ fi
+
+ typeset rd_line
+ typeset linenum=0
+
+ while read -r rd_line[$linenum]; do
+ #
+ # If .discinfo architecture isn't "i386," fail here as
+ # we only support i386 distros at this time.
+ #
+ if [[ $linenum = 2 && "${rd_line[2]}" != "i386" ]]; then
+ screenlog "$discinfo_wrongarch" "$rd_file" \
+ "${rd_line[2]}"
+ return 1
+ fi
+
+ #
+ # We've successfully read the first six lines of .discinfo
+ # into $rd_line, so do the appropriate shell variable munging.
+ #
+ if ((linenum == 5)); then
+ rd_serial=${rd_line[0]}
+ rd_release=${rd_line[1]}
+
+ # CentOS names their releases "final"
+ [[ "$rd_release" = "final" ]] && rd_release="CentOS"
+
+ #
+ # Line four of the .discinfo file contains either a
+ # single disc number for a CD or a comma delimited list
+ # representing the CDs contained on a particular DVD.
+ #
+ rd_cdnum=${rd_line[3]}
+
+ if [[ "$rd_cdnum" = *,* ]]; then
+ rd_disctype="DVD"
+ else
+ rd_disctype="CD"
+ fi
+
+ rd_rpmdir=${rd_line[5]}
+
+ #
+ # If the specified RPM directory doesn't exist, this is
+ # not a valid binary RPM disc (it's most likely a
+ # source RPM disc), so don't add it to the list of
+ # valid ISO files.
+ #
+ [[ ! -d "$1/$rd_rpmdir" ]] && return 1
+
+ if [[ "$rd_cdnum" = "1" &&
+ "$rd_release" = "Red Hat"* ]]; then
+ typeset rh_glob
+
+ #
+ # If this is a Red Hat release, get its
+ # personality name from the name of the
+ # redhat-release RPM package.
+ #
+ # Start by looking for the file
+ # "redhat-release-*.rpm" in the directory
+ # RedHat/RPMS of the ISO we're examining by
+ # using ksh's "echo" command to handle
+ # filename globbing.
+ #
+ # If no matching file is found, echo will
+ # simply return the passed string.
+ #
+ rh_glob="$1/RedHat/RPMS/redhat-release-*.rpm"
+ rd_pers="$(echo $rh_glob)"
+
+ if [[ "$rd_pers" != "$rh_glob" ]]; then
+ #
+ # An appropriate file was found, so
+ # extract the personality type from the
+ # filename.
+ #
+ # For example, the presence of the file:
+ #
+ # redhat-release-3WS-13.5.1.i386.rpm
+ #
+ # would indicate the ISO either
+ # represents a "WS" personality CD or
+ # a "WS" installation DVD.
+ #
+ # Start the extraction by deleting the
+ # pathname up to the personality type.
+ #
+ rh_glob="*/redhat-release-[0-9]"
+ rd_pers="${rd_pers##$rh_glob}"
+
+ #
+ # Now remove the trailing portion of the
+ # pathname to leave only the personality
+ # type, such as "WS" or "ES."
+ #
+ rd_pers="${rd_pers%%-*\.rpm}"
+ else
+ unset rd_pers
+ fi
+ fi
+
+ return 0
+ fi
+
+ ((linenum += 1))
+ done < "$rd_file"
+
+ #
+ # The file didn't have at least six lines, so indicate that parsing
+ # failed.
+ #
+ return 1
+}
+
+#
+# Mount install media within the zone.
+#
+# The media will be mounted at $zoneroot/root/media, either via a loopback
+# mount (if it's a managed removable disc) or directly (if the media is an ISO
+# file or if the specified filename is a block device.)
+#
+# Returns 0 on success, 1 on failure, 2 if no disc was available
+#
+mount_install_media()
+{
+ typeset device="$1"
+ typeset mount_err
+
+ unset removable
+ unset zone_mounted
+
+ [[ -z $mntdir ]] && return 1
+
+ [[ -d $mntdir ]] || if ! mkdir -p $mntdir; then
+ screenlog "$mk_mntfail" "$mntdir"
+ unset mntdir
+ return 1
+ fi
+
+ if [[ "$install_media" = "disc" && "$managed_removable" = "1" ]]; then
+ #
+ # The removable disc device is an automatically managed one,
+ # so just wait for the device mounter to notice a disc has been
+ # inserted into the drive and for the disc to appear at the
+ # mount point.
+ #
+ typeset mount_interval=2
+ typeset mount_timeout=10
+ typeset mount_timer=0
+
+ typeset nickname=$(basename $device)
+
+ eject -q "$nickname" > /dev/null 2>&1 || return 2
+ removable="$nickname"
+
+ #
+ # Double check that the device was mounted. If it wasn't, that
+ # usually means the disc in the drive isn't in a format we can
+ # read or the physical disc is unreadable in some way.
+ #
+ # The mount_timer loop is needed because the "eject -q" above
+ # may report a disc is available before the mounter associated
+ # with the drive actually gets around to mounting the device,
+ # so we need to give it a chance to do so. The mount_interval
+ # allows us to short-circuit the timer loop as soon as the
+ # device is mounted.
+ #
+ while ((mount_timer < mount_timeout)); do
+ [[ -d "$device" ]] && break
+
+ sleep $mount_interval
+ ((mount_timer += mount_interval))
+ done
+
+ if [[ ! -d "$device" ]]; then
+ screenlog "\n$unknown_media" "$device"
+ return 2
+ fi
+
+ mount -F lofs -r "$device" "$mntdir"
+ mount_err=$?
+ else
+ #
+ # Attempt to mount the media manually.
+ #
+ # First, make sure the passed device name really IS a device.
+ #
+ [[ -b "$device" ]] || return 2
+
+ #
+ # Now check to see if the device is already mounted and lofi
+ # mount the existing mount point into the zone if it is.
+ #
+ if get_mountdir "$device"; then
+ mount -F lofs -r "$mount_dir" "$mntdir"
+ mount_err=$?
+ else
+ [[ "$install_media" = "disc" ]] && removable="$device"
+
+ # It wasn't mounted, so go ahead and try to do so.
+ mount -F hsfs -r "$device" "$mntdir"
+ mount_err=$?
+ fi
+
+ # A mount_err of 33 means no suitable media was found
+ ((mount_err == 33)) && return 2
+ fi
+
+ if ((mount_err != 0)); then
+ screenlog "$mountfail" "$device" "$mntdir"
+ unset mntdir
+ return 1
+ fi
+
+ zone_mounted="$mntdir"
+ verbose "Mount of \"$device\" on \"$mntdir\" succeeded."
+ return 0
+}
+
+# Eject the disc mounted on the passed directory name
+eject_removable_disc()
+{
+ screenlog ""
+ verbose " (Attempting to eject '$removable'... \c"
+
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+
+ if ! eject "$removable"; then
+ verbose "failed.)\n"
+ screenlog "$eject_fail" "$removable"
+
+ msg=$(gettext "Please eject the disc manually.")
+ screenlog "$msg"
+ else
+ verbose "done.)\n"
+ fi
+
+ unset removable
+}
+
+#
+# Ask for the user to provide a disc or ISO.
+#
+# Returns 0 on success, 1 on failure.
+#
+prompt_for_media()
+{
+ # No prompting is allowed in silent mode.
+ if [[ -n $silent_mode ]]; then
+ log "$silent_err_msg"
+ return 1
+ fi
+
+ if [[ "$1" != "" ]]; then
+ msg="$release_name, CD $1"
+ else
+ typeset disc=$(gettext "disc")
+
+ msg=$(gettext "any")
+ msg="$msg $release_name $disc"
+ fi
+
+ if [[ "$install_media" = "disc" ]]; then
+ screenlog "$insert_discmsg" "$msg" "$release_name"
+
+ msg=$(gettext "drive and press <RETURN>.")
+ screenlog " $msg"
+
+ [[ -n $removable ]] && eject_removable_disc
+ else
+ if [[ -n $zone_mounted ]]; then
+ umount "$mntdir"
+ unset zone_mounted
+ fi
+
+ #
+ # This is only be printed in the case of a user
+ # specifying a device name as an install medium.
+ # This is handy for testing the installer or if the user
+ # has ISOs stored in some strange way that somehow
+ # breaks the "install from ISO" mechanism, as ISOs
+ # can be manually added using lofiadm(1M) command and
+ # the resulting lofi device name passed to the
+ # installer.
+ #
+ screenlog "$mount_proper_iso1" "$msg"
+ screenlog " $mount_proper_iso2" "$release_name" "$mntdev"
+
+ msg=$(gettext "and press <RETURN>.")
+ screenlog " $msg"
+ fi
+
+ read && return 0
+
+ return 1
+}
+
+#
+# Get a particular CD of a multi-disc set.
+#
+# This basically works by doing the following:
+#
+# 1) Mount the disc
+# 2) Read the disc's .discinfo file to see which CD it is or represents
+# 3) If it doesn't contain the desired CD, ask the user for a disc
+# containing the CD we wanted.
+#
+# Returns 0 on success, 1 on failure.
+#
+get_cd()
+{
+ typeset mntdev="$1"
+
+ typeset cdnum
+ typeset discname
+ typeset enter
+ typeset mount_err
+ typeset prompted
+
+
+ if [[ $# -eq 2 ]]; then
+ # Caller specified a particular CD to look for
+ cdnum="$2"
+ discname="$release_name, CD $cdnum"
+ else
+ # Caller wanted any disc
+ discname="a $release_name disc"
+ fi
+
+ verboselog "\nChecking for $discname on device"
+ verboselog " \"$mntdev\"\n"
+
+ while :; do
+ # Check to see if a distro disc is already mounted
+ mntdir="$media_mntdir"
+
+ unset rd_disctype
+ if ! read_discinfo "$mntdir" "test"; then
+ mount_install_media "$mntdev"
+ mount_err=$?
+
+ #
+ # If the mount succeeded, continue on in the main
+ # script
+ #
+ if ((mount_err == 0)); then
+ read_discinfo "$mntdir"
+ elif ((mount_err == 2)); then
+ # No medium was found, so prompt for one.
+ prompt_for_media "$cdnum" && prompted=1 continue
+
+ unset mntdir
+ return 1
+ else
+ # mount failed
+ unset mntdir
+ return 1
+ fi
+ fi
+
+ if [[ -n $distro_serial &&
+ "$rd_serial" != "$distro_serial" ]]; then
+ screenlog "$wrong_serial" "$install_disctype"
+ screenlog " $wrong_ser_expect" "$rd_serial" \
+ "$distro_serial"
+
+ #
+ # If we're installing from ISOs, don't prompt the user
+ # if the wrong serial number is present, as there's
+ # nothing they can do about it.
+ #
+ [[ "$install_media" = "ISO" ]] && return 1
+
+ prompt_for_media "$cdnum" && continue
+
+ umount "$mntdir"
+ unset zone_mountdir
+ return 1
+ fi
+
+ #
+ # Make sure that the mounted media is CD $cdnum.
+ #
+ # If it is, return to the caller, otherwise eject the
+ # disc and try again.
+ #
+ if [[ "$rd_disctype" = "CD" ]]; then
+ verboselog "Found CD #$rd_cdnum," \
+ "Serial #$rd_serial"
+ verboselog "Release Name \"$rd_release\""
+
+ [[ -n $rd_pers ]] &&
+ verboselog "Detected RedHat Personality" \
+ "\"$rd_pers\""
+
+ verboselog ""
+
+ # If we didn't care which CD it was, return success
+ [[ "$cdnum" = "" ]] && return 0
+
+ # Return if the CD number read is a match
+ [[ "$rd_cdnum" = "$cdnum" ]] && return 0
+ else
+ verboselog "\nFound DVD (representing CDs" \
+ "$rd_cdnum), Serial #$rd_serial"
+ verboselog "Release Name \"$rd_release\"\n"
+
+ [[ -n $rd_pers ]] &&
+ verboselog "Detected RedHat Personality" \
+ "\"$rd_pers\""
+
+ verboselog ""
+
+ # If we didn't care which CD it was, return success
+ [[ "$cdnum" = "" ]] && return 0
+
+ #
+ # Since a DVD represents multiple CDs, make sure the
+ # DVD inserted represents the CD we want.
+ #
+ { echo "$rd_cdnum," | egrep -s "$cdnum," ; } &&
+ return 0
+ fi
+
+ if [[ -n $prompted ]]; then
+ if [[ "$rd_disctype" = "CD" ]]; then
+ screenlog "$wrong_cd" "$rd_cdnum" "$cdnum"
+ else
+ msg=$(gettext "Incorrect DVD inserted.")
+ screenlog "$msg"
+
+ log "(DVD represented CDs $rd_cdnum," \
+ " wanted CD $cdnum)"
+ fi
+ fi
+
+ #
+ # If we're installing from ISOs, don't prompt the user if the
+ # wrong CD is mounted, as there's nothing they can do about it.
+ #
+ [[ "$install_media" = "ISO" ]] && return 1
+
+ prompt_for_media "$cdnum" && prompted=1 && continue
+
+ umount "$mntdir"
+ unset zone_mountdir
+ return 1
+ done
+}
+
+#
+# Find out which distro the mounted disc belongs to by comparing the
+# mounted disc's serial number against those contained in the various
+# distro files.
+#
+# When a match is found, the shell variable "distro_file" will be set to
+# the name of the matching file. Since that will have been the last file
+# sourced by the shell, there's no need for the caller to do it again; the
+# variable is only set in case it's of some use later.
+#
+# Returns 0 on success, 1 on failure.
+#
+get_disc_distro()
+{
+ typeset distro
+ typeset distro_files="$(echo $distro_dir/*.distro)"
+
+ unset distro_file
+
+ [[ "$distro_files" = "$distro_dir/*.distro" ]] && return 1
+
+ for distro in $distro_files; do
+ [[ ! -f "$distro" ]] && continue
+
+ verbose "Checking for disc distro \"$distro\"..."
+
+ . "$distro" > /dev/null
+
+ [[ "$rd_serial" != "$distro_serial" ]] && continue
+
+ distro_file="$distro"
+ release_name="$rd_release $distro_version"
+ distro_ncds=${#distro_cdorder[@]}
+
+ return 0
+ done
+
+ return 1
+}
+
+#
+# Iterate through the install media to install the miniroot and full zone
+#
+# The install media may be physical discs, a lofi mounted ISO file, or
+# iso files located in a directory specified by the user.
+#
+# All installations, regardless of media type, use a CD as their basic media
+# unit. DVDs or ISOs representing DVDs actually contain multiple "CDs" of
+# installation packages.
+#
+# The variable "distro_ncds," as set elsewhere, represents the number
+# of CDs required to install the distribution. Whether the installation
+# actually requires multiple physical discs or ISOs depends upon their content.
+#
+# Returns 0 on success, 1 on failure.
+#
+iterate_media()
+{
+ typeset cdnum=1
+ typeset cds
+ typeset disc_rpms
+ typeset err_media
+ typeset err_msg
+ typeset install_type="$1"
+ typeset ldevs
+ typeset mountdev
+ typeset rh_pers
+
+ shift
+
+ if [[ "$install_type" = "miniroot" ]]; then
+ typeset i
+
+ disc_rpms=$distro_miniroot_rpms
+ err_msg="$mini_mediafail"
+
+ # For miniroot installs, ask for CDs in numerical order
+ cds[0]="zero_pad"
+
+ for i in ${distro_cdorder[@]}; do
+ cds[$cdnum]=$cdnum
+ ((cdnum += 1))
+ done
+
+ cdnum=1
+ else
+ disc_rpms=$distro_rpms
+ err_msg="$zone_mediafail"
+
+ #
+ # For full zone installs, ask for CDs in the order RPM needs
+ # to find the packages.
+ #
+ set -A cds "zero_pad" ${distro_cdorder[@]}
+ fi
+
+ if [[ "$install_media" = "ISO" ]]; then
+ set -A ldevs "zero_pad" "$@"
+ else
+ mountdev="$1"
+ err_media="$release_name, CD ${cds[$cdnum]} (or DVD)"
+ fi
+
+ unset rpms_left_save
+
+ while ((cdnum <= distro_ncds)); do
+ [[ -z ${cds[$cdnum]} ]] && ((cdnum += 1)) && continue
+
+ if [[ "$install_media" = "ISO" ]]; then
+ typeset isonum="${cds[$cdnum]}"
+
+ #
+ # If this routine was called with a single ISO device
+ # name, it must be a DVD, so refer to that one lofi
+ # device (and associated ISO pathname)
+ #
+ [[ $# -eq 1 ]] && isonum=1
+
+ err_media="ISO \"${iso_pathnames[$isonum]}\""
+ mountdev="${ldevs[$isonum]}"
+ fi
+
+ #
+ # If the disc needed in the install order isn't the one in
+ # the drive, ask for the correct one.
+ #
+ if ! get_cd "$mountdev" "${cds[$cdnum]}"; then
+ screenlog "$err_msg" "$zonename" "$err_media"
+ return 1
+ fi
+
+ # set the RedHat personality type, if applicable
+ [[ -n $rd_pers && -z $rh_pers ]] && rh_pers=$rd_pers
+
+ #
+ # We now know the actual type of media being used, so
+ # modify the "err_media" string accordingly.
+ #
+ if [[ "$install_media" = "disc" ]]; then
+ if [[ "$rd_disctype" = "DVD" ]]; then
+ err_media="$release_name DVD"
+ else
+ err_media="$release_name, CD ${cds[$cdnum]}"
+ fi
+ fi
+
+ find_packages "$mntdir" $disc_rpms
+
+ #
+ # Save a copy of $rpms_left. Other functions clobber it.
+ #
+ rpms_left_save="${rpms_left[@]}"
+
+ if [[ -n $rpms_found ]]; then
+ if [[ "$install_type" = "miniroot" ]]; then
+ verboselog "\nInstalling miniroot from"
+ verboselog " $err_media...\n"
+
+ if ! install_miniroot "$mntdir" \
+ "${rpms_found[@]}"; then
+ screenlog "$err_msg" "$zonename" \
+ "$err_media"
+ return 1
+ fi
+ else
+ screenlog "\n$install_msg\n" "$zonename" \
+ "$err_media"
+
+ if ! install_zone "$mntdir" \
+ ${rpms_found[@]}; then
+ screenlog "$err_msg" "$zonename" \
+ "$err_media"
+ return 1
+ fi
+ fi
+
+ #
+ # Mark installation from this CD (or ISO representing
+ # this CD) as completed.
+ #
+ if [[ "$rd_disctype" = "CD" ]]; then
+ unset cds[$cdnum]
+ fi
+ fi
+
+ # A DVD install takes a single disc, so stop iterating
+ [[ "$rd_disctype" = "DVD" ]] && break
+
+ # If there are no RPMs left, we're done.
+ [[ -z $rpms_left_save ]] && break
+
+ disc_rpms="$rpms_left_save"
+ ((cdnum += 1))
+
+ if [[ "$install_media" != "ISO" ]]; then
+ #
+ # modify the err_media variable to reflect the next
+ # CD in the sequence
+ #
+ err_media="$release_name, CD ${cds[$cdnum]}"
+ else
+ # Unmount the last used ISO if appropriate
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+ fi
+ done
+
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+
+ if [[ -n $rpms_left_save ]]; then
+ #
+ # Uh oh - there were RPMS we couldn't locate. This COULD
+ # indicate a failed installation, but we need to check for
+ # a RedHat personality "missing" list first.
+ #
+ if [[ -n $rh_pers && "$rh_pers" != "AS" ]]; then
+ typeset missing
+
+ if [[ $rh_pers = "WS" ]]; then
+ missing="$distro_WS_missing"
+ elif [[ $rh_pers = "ES" ]]; then
+ missing="$distro_ES_missing"
+ fi
+
+ #
+ # If any packages left in "rpm_left_save" appear in the
+ # list of packages expected to be missing from this
+ # personality, remove them from the "rpm_left_save"
+ # list.
+ #
+ if [[ -n $missing ]]; then
+ typeset pkg
+
+ for pkg in $missing
+ do
+ rpm_left_save=$(echo "$rpm_left_save " |
+ sed "s/$pkg //g")
+
+ #
+ # If all of the packages in
+ # "rpm_left_save" appeared in this
+ # personality's list of "expected
+ # missing" packages, then the
+ # installation completed successfully.
+ #
+ [[ -z ${rpm_left_save%%+( )} ]] &&
+ return 0
+ done
+ fi
+ fi
+
+ log "\nERROR: Unable to locate some needed packages:\n" \
+ " ${rpms_left_save%%+( )}\n"
+ screenlog "$err_msg" "$zonename"
+ return 1
+ fi
+
+ return 0
+}
+
+#
+# Install a zone from installation media
+#
+# Returns 0 on success, 1 on failure
+#
+install_from_media()
+{
+ msg=$(gettext "Installing miniroot for zone '%s'.")
+ screenlog "$msg" "$zonename"
+
+ iterate_media "miniroot" $@ || return 1
+
+ if ! setup_miniroot; then
+ screenlog "$mini_setfail" "$zonename"
+ return 1
+ fi
+
+ msg=$(gettext "Performing full install for zone '%s'.")
+
+ screenlog "\n$msg" "$zonename"
+
+ iterate_media "full" $@ || return 1
+
+ #
+ # Attempt to install deferred RPMS, if any
+ #
+ if [[ -n $deferred_rpms ]]; then
+ if ! install_zone ""; then
+ return 1
+ fi
+ fi
+
+ finish_install
+ return $?
+}
+
+#
+# Add an entry to the valid distro list.
+#
+# The passed argument is the ISO type ("CD Set" or "DVD")
+#
+add_to_distro_list()
+{
+ typeset name
+
+ distro_file[${#distro_file[@]}]="$distro"
+
+ name="$release_name"
+ [[ -n $redhat_pers ]] && name="$name $redhat_pers"
+
+ select_name[${#select_name[@]}]="$name ($1)"
+ release[${#release[@]}]="$release_name"
+ iso_set[${#iso_set[@]}]="${iso_names[@]}"
+ verboselog "Distro \"$name\" ($1) found."
+}
+
+#
+# Find out which distros we have ISO files to support
+#
+# Do this by cycling through the distro directory and reading each distro
+# file in turn looking for:
+#
+# 1) The number of discs in a distribution
+# 2) The serial number of the distribution
+# 3) The name of the distribution
+#
+# Based on this, we can determine based on the ISO files available which
+# distributions, if any, we have a complete set of files to support.
+#
+# The function returns the supported isos in the array "iso_set."
+#
+validate_iso_distros()
+{
+ typeset cd
+ typeset disctype
+ typeset index
+ typeset iso
+ typeset ncds
+ typeset pers
+ typeset pers_cd
+ typeset pers_index
+ typeset serial
+
+ typeset distro_files="$(echo $distro_dir/*.distro)"
+ typeset nisos=${#iso_filename[@]}
+
+ unset distro_file
+ unset iso_set
+ unset release
+ unset select_name
+
+ if [[ "$distro_files" = "$distro_dir/*.distro" ]]; then
+ msg=$(gettext "Unable to find any distro files!")
+ screenlog "$msg"
+ return
+ fi
+
+ for distro in $distro_files; do
+ #
+ # We're done if we've already processed all available ISO files
+ # or if there were none in the first place.
+ #
+ ((${#iso_filename[@]} == 0)) && break
+
+ [[ ! -f $distro ]] && continue
+
+ . "$distro" > /dev/null
+ ncds=${#distro_cdorder[@]}
+
+ unset iso_names
+ unset pers
+ unset pers_cd
+
+ verbose "\nChecking ISOs against distro file \"$distro\"..."
+
+ index=0
+
+ while ((index < nisos)); do
+ #
+ # If the filename has been nulled out, it's already
+ # been found as part of a distro, so continue to the
+ # next one.
+ #
+ if [[ -z ${iso_filename[$index]} ]]; then
+ ((index += 1))
+ continue
+ fi
+
+ iso="${iso_filename[$index]}"
+ serial="${iso_serial[$index]}"
+ release_name="${iso_release[$index]}"
+ redhat_pers="${iso_pers[$index]}"
+
+ verbose " ISO \"$iso\":"
+
+ #
+ # If the serial number doesn't match that for
+ # this distro, check other ISOs
+ #
+ if [[ "$serial" != "$distro_serial" ]]; then
+ ((index += 1))
+ continue
+ fi
+
+ verbose " Serial #$serial"
+ verbose " Release Name \"$release_name\""
+
+ [[ -n ${iso_pers[$index]} ]] &&
+ verbose " RedHat Personality \"$redhat_pers\""
+
+ if [[ "${iso_disctype[$index]}" = "CD" ]]; then
+ disctype="CD #"
+ cd="${iso_cdnum[$index]}"
+ else
+ disctype="DVD, representing CDs #"
+ cd=0
+ fi
+
+ verbose " ${disctype}${iso_cdnum[$index]}\n"
+
+ #
+ # Once we've matched a particular distro, don't check
+ # this ISO to see if it's part of any other.
+ #
+ unset iso_filename[$index]
+
+ iso_names[$cd]="$iso"
+
+ #
+ # A DVD-based distro consists of one and ONLY one disc,
+ # so process it now.
+ #
+ if [[ "${iso_disctype[$index]}" = "DVD" ]]; then
+ typeset dvd_discs=",${iso_cdnum[$index]}"
+
+ cd=1
+ while ((cd <= ncds)); do
+ dvd_discs=$(echo "$dvd_discs" |
+ sed "s/,$cd//")
+ ((cd += 1))
+ done
+
+ #
+ # If no CDs are left in $dvd_discs, the DVD
+ # was a complete distribution, so add it to
+ # the valid distro list.
+ #
+ if [[ -z $dvd_discs ]]; then
+ add_to_distro_list "DVD"
+ unset iso_names[$cd]
+ fi
+ elif [[ -n ${iso_pers[$index]} ]]; then
+ #
+ # If this is a RedHat personality CD, save off
+ # some extra information about it so we can
+ # discern between mutiple personality discs
+ # later, if needed.
+ #
+ pers[${#pers[@]}]=${iso_pers[$index]}
+ pers_cd[${#pers_cd[@]}]="$iso"
+ fi
+
+ ((index += 1))
+ done
+
+ #
+ # Check to see if we have ISOs representing a full CD set.
+ # If we don't, don't mark this as an available distro.
+ #
+ (( ${#iso_names[@]} != $ncds )) && continue
+
+ relase_name="$release_name $distro_version"
+
+ if [[ -z ${pers[@]} ]]; then
+ #
+ # If there were no personality discs, just add this
+ # ISO set to the distro list.
+ #
+ unset redhat_pers
+ add_to_distro_list "CD Set"
+ else
+ #
+ # If a valid CD-based distro was found and there are
+ # RedHat personality discs for that distro present,
+ # create entries for each personality in the available
+ # distro list.
+ #
+ pers_index=0
+
+ while ((pers_index < ${#pers[@]})); do
+ redhat_pers=${pers[$pers_index]}
+
+ if [[ -n ${pers_cd[$pers_index]} ]]; then
+ #
+ # RedHat personality discs are always
+ # disc 1 of a CD set, so if we found a
+ # valid personality disc for this set,
+ # set the disc 1 entry for this distro
+ # to the ISO for the proper personality
+ # disc.
+ #
+ iso_names[1]="${pers_cd[$pers_index]}"
+ add_to_distro_list "CD Set"
+ fi
+
+ ((pers_index += 1))
+ done
+ fi
+ done
+}
+
+#
+# Do a lofi add for the passed filename and set lofi_dev to the lofi
+# device name lofiadm created for it (e.g. "/dev/lofi/1".)
+#
+# If the passed filename already has a lofi device name, simply set lofi_dir
+# to the existing device name.
+#
+# Returns 0 on success, 1 on failure.
+#
+lofi_add()
+{
+ typeset filename="$1"
+
+ lofi_dev=$(lofiadm "$filename" 2>/dev/null) && return 0
+ lofi_dev=$(lofiadm -a "$filename") && return 0
+
+ screenlog "$lofi_failed" "$filename"
+ return 1
+}
+
+#
+# Delete the lofi device name passed in.
+#
+# Returns 0 on success, 1 on failure.
+#
+lofi_del()
+{
+ typeset dev="$1"
+
+ [[ "$dev" != /dev/lofi/* ]] && return 1
+
+ if lofiadm -d "$dev" 2>/dev/null; then
+ [[ -n $lofi_dev ]] && unset lofi_dev
+ return 0
+ fi
+
+ return 1
+}
+
+#
+# Mount the lofi device name passed in.
+#
+# Set the variable mntdir to the directory on which the lofi device is
+# mounted.
+#
+# Returns 0 on success, 1 on failure.
+#
+lofi_mount()
+{
+ typeset lofidev="$1"
+ typeset mntpoint="$2"
+
+ #
+ # Check to see if the lofi device is already mounted and return
+ # the existing mount point if it is.
+ #
+ get_mountdir "$lofidev" && { mntdir="$mount_dir" ; return 0 ; }
+
+ unset mntdir
+ if [[ ! -d "$mntpoint" ]]; then
+ if ! mkdir -p "$mntpoint"; then
+ log "Could not create mountpoint \"$mntpoint\"!\n"
+ return 1
+ fi
+ lofi_created="$mntpoint"
+ fi
+
+ verbose "Attempting mount of device \"$lofidev\""
+ verbose " on directory \"$mntpoint\"... \c"
+
+ if ! mount -F hsfs -r "$lofidev" "$mntpoint" 2>/dev/null; then
+ verbose "FAILED."
+ [[ -n $lofi_created ]] && rmdir -ps "$lofi_created" &&
+ unset lofi_created
+ return 1
+ fi
+
+ mntdir="$mntpoint"
+ verbose "succeeded."
+ return 0
+}
+
+#
+# Unmount the lofi device name passed in, and remove the device mount point
+# after unmounting the device.
+#
+# Returns 0 on success, 1 on failure.
+#
+lofi_umount()
+{
+ typeset mntdev="$1"
+
+ #
+ # If the directory name passed wasn't mounted to begin with,
+ # just return success.
+ #
+ get_mountdir "$mntdev" || return 0
+
+ verbose "Unmounting device \"$mntdev\"... \c"
+
+ if ! umount "$mntdev" ; then
+ verbose "FAILED."
+ return 1
+ fi
+
+ verbose "succeeded."
+ return 0
+}
+
+# Scan the passed list of ISOs.
+scan_isos()
+{
+ typeset iso
+ typeset index=0
+
+ unset iso_serial
+ unset iso_release
+ unset iso_cdnum
+ unset iso_disctype
+ unset iso_filename
+ unset iso_pers
+
+ for iso in "$@"; do
+ verbose "Checking possible ISO\n \"$iso\"..."
+
+ if lofi_add "$iso"; then
+ verbose " added as lofi device \"$lofi_dev\""
+ if lofi_mount "$lofi_dev" "/tmp/lxiso"; then
+ if read_discinfo "$mntdir"; then
+ iso_release[$index]="$rd_release"
+ iso_serial[$index]="$rd_serial"
+ iso_cdnum[$index]="$rd_cdnum"
+ iso_disctype[$index]="$rd_disctype"
+
+ [[ -n $rd_pers ]] &&
+ iso_pers[$index]="$rd_pers"
+
+ iso_filename[$index]="$iso"
+ ((index += 1))
+ fi
+ lofi_umount "$lofi_dev"
+ else
+ verbose " not a usable ISO image."
+ log "Unable to mount \"$lofi_dev\" (\"$iso\")"
+ fi
+
+ lofi_del "$lofi_dev"
+ else
+ verbose " not a valid ISO image."
+ fi
+ done
+}
+
+#
+# Prompt the user with the first argument, then make a menu selection
+# from the balance.
+#
+# This is effectively similar to the ksh "select" function, except it
+# outputs to stdout.
+#
+# Shell variables set:
+# choice - set to the menu number selected
+# selection - set to the menu text selected
+#
+pick_one()
+{
+ typeset menu_items
+ typeset menu_index
+ typeset reply
+
+ typeset prompt="$1"
+ shift
+
+ unset choice
+
+ set -A menu_items "$@"
+
+ until [[ -n $choice ]]; do
+ menu_index=1
+
+ echo "\n$prompt\n"
+
+ for f in "${menu_items[@]}"; do
+ echo "$menu_index) $f"
+ ((menu_index += 1))
+ done
+
+ echo "\n$(gettext "Please select") (1-$#): " "\c"
+ read reply
+ echo
+
+ [[ -z $reply ]] && echo && continue
+
+ #
+ # Reprint menu selections if the answer was not a number in
+ # range of the menu items available
+ #
+ [[ $reply != +([0-9]) ]] && continue
+ ((reply < 1)) || ((reply > $#)) && continue
+
+ choice=$reply
+ selection=${menu_items[((choice - 1))]}
+ done
+}
+
+#
+# Select a distribution to install from the arguments passed and set
+# "ndsitro" to the value chosen - 1 (so it may be used as an array index.)
+#
+# The routine will automatically return with ndisto set to 0 if only one
+# argument is passed.
+#
+select_distro()
+{
+ unset choice
+ unset ndistro
+
+ if (($# > 1)); then
+ if [[ -n $silent_mode ]]; then
+ typeset dist
+
+ log "ERROR: multiple distrubutions present in ISO" \
+ "directory but silent install"
+ log " mode specified. Distros available:"
+ for dist in "$@"; do
+ log " \"$dist\""
+ done
+ return 1
+ fi
+
+ pick_one \
+ "$(gettext "Which distro would you like to install?")" \
+ "$@"
+ fi
+
+ #
+ # Covers both the cases of when only one distro name is passed
+ # to the routine as well as when an EOF is sent to the distribution
+ # selection prompt.
+ #
+ if [[ -z $choice ]]; then
+ screenlog "$install_dist" "$1"
+ ndistro=0
+ else
+ screenlog "$install_dist" "$selection"
+ ndistro=$((choice - 1))
+ fi
+
+ return 0
+}
+
+#
+# Install a zone from discs or manually lofi-mounted ISOs.
+#
+# Return 0 on success, 1 on failure
+#
+do_disc_install()
+{
+ typeset path="$1"
+
+ typeset eject_final="N"
+ typeset install_status
+
+ #
+ # Get a disc, it doesn't matter which one.
+ #
+ # We don't know which distro this may be yet, so we can't yet
+ # ask for the first disc in the install order.
+ #
+ if ! get_cd "$path"; then
+ if [[ -z $silent_mode ]]; then
+ typeset distro_disc=\
+ $(gettext "a supported Linux distribution disc")
+
+ screenlog "\n$distro_mediafail" "$distro_disc ($path)"
+ fi
+ return 1
+ fi
+
+ if [[ -n $silent_mode && "$rd_disctype" = "CD" ]]; then
+ log "$silent_err_msg"
+ return 1
+ fi
+
+ if ! get_disc_distro "$mntdir"; then
+ msg=$(gettext "Unable to find a supported Linux release on")
+ screenlog "$msg"
+ screenlog " $media_spec" "$path"
+ umount "$mntdir" > /dev/null 2>&1
+ return 1
+ fi
+
+ check_mbfree $zoneroot $distro_mb_required || return 1
+ build_rpm_list $install_packages
+
+ echo
+
+ if [[ "$install_media" = "disc" ]]; then
+ #
+ # If we're in interactive mode, ask the user if they want the
+ # disc ejected when the installation is complete.
+ #
+ # Silent mode installs will require the user to manually run
+ # eject(1).
+ #
+ if [[ -n $removable && -z $silent_mode ]]; then
+ typeset ans
+ typeset disc
+ typeset status
+ typeset which=""
+
+ disc="$rd_disctype"
+ [[ "$disc" = "CD" ]] && which=$(gettext "final ")
+
+ #
+ # Ask the user if they want the install disc ejected
+ # when the installation is complete. Any answer but
+ # "n" or "N" is taken to mean yes, eject it.
+ #
+ eject_final="Y"
+ status=$(gettext "WILL")
+
+ screenlog "$eject_final_msg" "$which" "$disc"
+ screenlog " $eject_final_prompt" "$zonename" "[y]/n"
+
+ read ans && [[ "$ans" = [Nn]* ]] && eject_final="N" &&
+ status=$(gettext "will NOT")
+
+ screenlog "\n$eject_final_status\n" "$which" "$disc" \
+ "$status"
+ fi
+
+ screenlog "$install_ndiscs" "$distro_ncds"
+
+ msg=$(gettext "install %s.")
+ screenlog "$msg" "$release_name"
+ else
+ screenlog "$install_nisos" "$distro_ncds"
+
+ msg=$(gettext "DVD) to install %s.")
+ screenlog "$msg" "$release_name"
+ fi
+
+ install_from_media "$path"
+ install_status=$?
+
+ [[ "$eject_final" = "Y" ]] && eject_removable_disc
+
+ return $install_status
+}
+
+#
+# Install a zone using the list of ISO files passed as arguments to this
+# function.
+#
+# Return 0 on success, 1 on failure.
+#
+do_iso_install()
+{
+ typeset install_status
+ typeset iso_path
+ typeset ldev
+
+ msg=$(gettext "Checking for valid Linux distribution ISO images...")
+ screenlog "\n$msg"
+
+ scan_isos "$@"
+
+ if [[ -z ${iso_filename[@]} ]]; then
+ msg=$(gettext "No valid ISO images available or mountable.")
+ screenlog "\n$msg"
+ return 1
+ fi
+
+ validate_iso_distros
+
+ if [[ -z ${release[@]} ]]; then
+ msg=$(gettext "No supported Linux distributions found.")
+ screenlog "\n$msg"
+ return 1
+ fi
+
+ select_distro "${select_name[@]}" || return 1
+ unset select_name
+
+ . ${distro_file[$ndistro]} > /dev/null
+ distro_ncds=${#distro_cdorder[@]}
+
+ check_mbfree $zoneroot $distro_mb_required || return 1
+ build_rpm_list $install_packages
+
+ unset lofi_devs
+
+ verboselog ""
+ for iso_path in ${iso_set[$ndistro]}; do
+ if ! lofi_add "$iso_path"; then
+ for ldev in $lofi_devs; do
+ lofi_del "$ldev"
+ done
+ return 1
+ fi
+
+ verboselog "Added \"$iso_path\""
+ verboselog " as \"$lofi_dev\""
+ lofi_devs="$lofi_devs $lofi_dev"
+ done
+
+ release_name="${release[$ndistro]}"
+
+ set -A iso_pathnames "zero_pad" ${iso_set[$ndistro]}
+ install_from_media $lofi_devs
+ install_status=$?
+
+ for ldev in $lofi_devs; do
+ lofi_del "$ldev"
+ done
+
+ unset lofi_devs
+ return $install_status
+}
+
+# Clean up on interrupt
+trap_cleanup()
+{
+ cd "$cwd"
+
+ msg=$(gettext "Interrupt received, cleaning up partial install...")
+ screenlog "$msg"
+
+ [[ -n $miniroot_booted ]] && zoneadm -z "$zonename" halt &&
+ unset miniroot_booted && unset newroot_mounted
+
+ #
+ # OK, why a sync here? Because certain commands may have written data
+ # to mounted file systems before the interrupt, and given just the right
+ # timing there may be buffered data not yet sent to the disk or the
+ # system may still be writing data to the disk. Either way, the umount
+ # will then fail because the system will still see the mounted
+ # filesystems as busy.
+ #
+ sync
+
+ if [[ -n $newroot_mounted ]]; then
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ fi
+
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+
+ #
+ # Normally, this isn't needed but there is a window where mntdir is set
+ # before zone_mounted, so account for that case.
+ #
+ if [[ -n $mntdir ]]; then
+ umount "$mntdir"
+ unset mntdir
+ fi
+
+ [[ -n $lofi_dev ]] && lofi_del "$lofi_dev"
+
+ if [[ -n $lofi_devs ]]; then
+ typeset ldev
+
+ for ldev in $lofi_devs
+ do
+ lofi_del "$ldev"
+ done
+
+ unset lofi_devs
+ fi
+
+ [[ -n $lofi_created ]] && rmdir -ps "$lofi_created" &&
+ unset lofi_created
+
+ msg=$(gettext "Installation aborted.")
+ screenlog "$msg"
+ exit $ZONE_SUBPROC_FATAL
+}
+
+#
+# Start of main script
+#
+cwd=$(dirname "$0")
+distro_dir="$cwd/distros"
+
+unset deferred_saved
+unset distro_path
+unset logfile
+unset msg
+unset newroot_mounted
+unset silent_err_msg
+unset silent_mode
+unset verbose_mode
+unset zone_mounted
+unset zoneroot
+unset zonename
+
+#
+# Exit values used by the script, as #defined in <sys/zone.h>
+#
+# ZONE_SUBPROC_OK
+# ===============
+# Installation was successful
+#
+# ZONE_SUBPROC_USAGE
+# ==================
+# Improper arguments were passed, so print a usage message before exiting
+#
+# ZONE_SUBPROC_NOTCOMPLETE
+# ========================
+# Installation did not complete, but another installation attempt can be
+# made without an uninstall
+#
+# ZONE_SUBPROC_FATAL
+# ==================
+# Installation failed and an uninstall will be required before another
+# install can be attempted
+#
+ZONE_SUBPROC_OK=0
+ZONE_SUBPROC_USAGE=253
+ZONE_SUBPROC_NOTCOMPLETE=254
+ZONE_SUBPROC_FATAL=255
+
+#
+# Process and set up various global option variables:
+#
+# distro_path - Path containing files that make up the distribution
+# (e.g. a directory containing ISO files or a disc device)
+# logfile - Name (if any) of the install log file
+# zoneroot - Root directory for the zone to install
+# zonename - Name of the zone to install
+#
+while getopts 'svxd:l:r:z:' opt; do
+ case $opt in
+ s) silent_mode=1; unset verbose_mode;;
+ v) verbose_mode=1; unset silent_mode;;
+ x) set -x;;
+ d) distro_path="$OPTARG";;
+ l) logfile="$OPTARG";;
+ r) zoneroot="$OPTARG";;
+ z) zonename="$OPTARG";;
+ esac
+done
+shift OPTIND-1
+
+distro_path=${distro_path:=/cdrom/cdrom0}
+
+install_packages="$@"
+
+[[ -n $silent_mode ]] && exec 1>/dev/null
+
+if [[ -z $zonename ]]; then
+ msg=$(gettext "ERROR: Cannot install - no zone name was specified")
+ screenlog "$msg"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+fi
+
+if [[ -z $zoneroot ]]; then
+ msg=$(gettext "ERROR: Cannot install - no zone root directory was")
+ screenlog "$msg"
+
+ msg=$(gettext "specified.")
+ screenlog " $msg"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+fi
+
+# Make sure the specified zone root directory exists
+[[ -d "$zoneroot" ]] || mkdir -m 0700 -p "$zoneroot"
+
+if [[ ! -d "$zoneroot" ]]; then
+ screenlog "$zone_rootfail" "$zoneroot"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+fi
+
+rootdir="$zoneroot/root"
+
+# Make sure the specified zone root subdirectory exists
+[[ -d "$rootdir" ]] || mkdir -p "$rootdir"
+
+if [[ ! -d "$rootdir" ]]; then
+ screenlog "$zone_rootsub" "$rootdir"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+fi
+
+media_mntdir="$rootdir/media"
+
+if [[ -n $logfile ]]; then
+ # If a log file was specified, log information regarding the install
+ log "\nInstallation started `date`"
+ log "Installing from path \"$distro_path\""
+else
+ # Redirect stderr to /dev/null if silent mode is specified.
+ [[ -n $silent_mode ]] && exec 2>/dev/null
+fi
+
+distro_path=${distro_path:=$default_distro_path}
+
+# From this point on, call trap_cleanup() on interrupt (^C)
+trap trap_cleanup INT
+
+verbose "Installing zone \"$zonename\" at root \"$zoneroot\""
+release_name="supported Linux distribution"
+
+#
+# Based on the pathname, attempt to determine whether this will be a disc or
+# lofi-based install or one using ISOs.
+#
+if [[ "$distro_path" = /cdrom/* || "$distro_path" = /media/* ||
+ "$distro_path" = /dev/dsk/* || "$distro_path" = /dev/lofi/* ]]; then
+ if [[ "$distro_path" = /dev/lofi/* ]]; then
+ silent_err_msg="$silent_nolofi"
+ install_media="lofi"
+ else
+ silent_err_msg="$silent_nodisc"
+ install_media="disc"
+ fi
+
+ if [[ "$distro_path" = /cdrom/* || "$distro_path" = /media/* ]]; then
+ managed_removable=1
+ else
+ managed_removable=0
+ fi
+
+ log "Installing zone \"$zonename\" at root \"$zoneroot\""
+ verboselog " Attempting ${install_media}-based install via:"
+ verboselog " \"$distro_path\""
+
+ do_disc_install "$distro_path"
+else
+ typeset dir_start
+ typeset dir_file
+
+ dir_start=$(dirname "$distro_path" | cut -c 1)
+
+ [[ "$dir_start" != "/" ]] && distro_path="${PWD:=$(pwd)}/$distro_path"
+
+ if [[ ! -d "$distro_path" ]]; then
+ screenlog "$no_distropath" "$distro_path"
+ echo
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+ fi
+
+ log "Installing zone \"$zonename\" at root \"$zoneroot\""
+ verboselog " Attempting ISO-based install from directory:"
+ verboselog " \"$distro_path\""
+
+ unset iso_files
+
+ for dir_file in $distro_path/*; do
+ #
+ # Skip this file if it's not a regular file or isn't readable
+ #
+ [[ ! -f $dir_file || ! -r $dir_file ]] && continue
+
+ #
+ # If it's an hsfs file, it's an ISO, so add it to the possible
+ # distro ISO list
+ #
+ filetype=$(LC_ALL=C fstyp $dir_file 2>/dev/null) &&
+ [[ "$filetype" = "hsfs" ]] &&
+ iso_files="$iso_files $dir_file"
+ done
+
+ install_media="ISO"
+ do_iso_install $iso_files
+fi
+
+if [[ $? -ne 0 ]]; then
+ cd "$cwd"
+
+ [[ -n $miniroot_booted ]] && zoneadm -z "$zonename" halt &&
+ unset miniroot_booted && unset newroot_mounted
+
+ if [[ -n $zone_mounted ]]; then
+ umount "$zone_mounted"
+ unset zone_mounted
+ fi
+
+ if [[ -n $newroot_mounted ]]; then
+ umount_list $newroot_mounted
+ unset newroot_mounted
+ fi
+
+ screenlog "\n$install_failed\n" "$release_name" "$zonename" "`date`"
+
+ msg=$(gettext "Cleaning up after failed install...")
+ screenlog "$msg"
+
+ #
+ # The extra checks are some basic paranoia due to the potentially
+ # dangerous nature of these commands but are not intended to catch all
+ # malicious cases.
+ #
+ [[ -d "$zoneroot/a" ]] && rm -rf "$zoneroot/a"
+
+ exit $ZONE_SUBPROC_FATAL
+fi
+
+screenlog "$install_done" "$release_name" "$zonename" "`date`"
+
+exit $ZONE_SUBPROC_OK
diff --git a/usr/src/lib/brand/lx/zone/lx_init_zone.ksh b/usr/src/lib/brand/lx/zone/lx_init_zone.ksh
new file mode 100644
index 0000000000..fe2a7ec047
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_init_zone.ksh
@@ -0,0 +1,686 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# This script contains various routines used to post-process a zone for use
+# with BrandZ after it has been installed from RPM media or a tar image.
+#
+# Briefly, there are three main jobs we need to do:
+#
+# 1) Create any needed directories and symlinks BrandZ needs but that the
+# Linux install may not create
+#
+# 2) Modify rc scripts to shut off services that don't apply to a zone
+# or that wish to access hardware directly
+#
+# 3) Modify various Linux system files for use within a zone environment
+#
+
+#
+# Restrict executables to /bin and /usr/bin
+#
+PATH=/bin:/usr/bin
+export PATH
+
+#
+# Sends output to a log file via redirection of stderr.
+#
+# This script assumes its caller has already performed the redirection to the
+# logfile.
+#
+log()
+{
+ echo "$@" >&2
+}
+
+#
+# Setup i18n output
+#
+TEXTDOMAIN="SUNW_OST_OSCMD"
+export TEXTDOMAIN
+
+cmd_failed=$(gettext "%s failed! Aborting installation...")
+cmd2_failed=$(gettext "%s of '%s' to '%s' failed!")
+create_failed=$(gettext "Could not create new file '%s'!")
+disable_failed=$(gettext "Attempt to disable entries in '%s' failed!")
+install_aborted=$(gettext "Aborting installation...")
+install_noroot=$(gettext "Installation root directory '%s' does not exist.")
+ln_fail=$(gettext "Unable to symlink '%s' to '%s'!")
+mkdir_fail=$(gettext "Unable to create the directory '%s'")
+mod_failed=$(gettext -n "Attempt to modify entries in '%s' failed!")
+
+usage=$(gettext "usage: %s <install_root> [mini]")
+
+#
+# Output an internationalized string followed by a carriage return
+#
+i18n_echo()
+{
+ typeset fmt="$1"
+ shift
+
+ printf "$fmt\n" "$@"
+}
+
+#
+# Routine to make a full path out of a supplied path
+#
+fullpath()
+{
+ typeset path="$1"
+
+ echo $path | egrep -s "^/" || path="${PWD:=$(pwd)}/$path"
+ echo $path
+}
+
+#
+# Routine to create directories and handle errors
+#
+makedir()
+{
+ typeset dirname=$(fullpath "$1")
+ typeset mode=""
+
+ [[ $# -eq 2 ]] && mode="-m $2"
+
+ [[ -d "$dirname" ]] && return
+
+ if ! mkdir $mode -p "$dirname"; then
+ log "Unable to create the directory \"$dirname\"!"
+ i18n_echo "$mkdir_fail" "$dirname"
+ echo $(gettext "Aborting installation...")
+ exit 1
+ fi
+}
+
+#
+# Routine to create initial symlinks and handle errors
+#
+symlink()
+{
+ typeset src="$1"
+ typeset dst=$(fullpath "$2")
+
+ [[ -e "$dst" || -h "$dst" ]] && rm -f "$dst"
+
+ if ! ln -s "$src" "$dst"; then
+ log "Unable to symlink \"$src\" to \"$dst\"!"
+ i18n_echo "$ln_fail" "$src" "$dst"
+ echo $(gettext "Aborting installation...")
+ exit 1
+ fi
+}
+
+#
+# Install a file using "ln -s"
+#
+# Returns 0 on success, 1 on failure.
+#
+install_ln()
+{
+ typeset source="$1"
+ typeset target=$(fullpath "$2")
+
+ log " Installing \"$target\""
+
+ mv -f "$target" "$target.$tag" 2>/dev/null
+
+ if ! ln -s "$source" "$target"; then
+ log ""
+ log "Attempt to install $target FAILED."
+ return 1
+ fi
+
+ return 0
+}
+
+
+#
+# Enable NFS servers and the NFS lock daemon for a particular zone.
+#
+enable_nfs_services()
+{
+ log "Non-miniroot install; enabing NFS servers and NFS lock daemon"
+
+ #
+ # Setup files required for NFS:
+ #
+ # /native/etc/netconfig
+ # /native/etc/default/nfs
+ #
+ # These two files are treated as read-only in lx branded zones.
+ # To enfore this restriction we will read-only lofs mount them
+ # into the zone from the global zone. For these lofs mounts to
+ # work we'll need to create empty directories now that will serve
+ # as mount points later.
+ #
+ # /sbin/rpc.statd
+ # /sbin/rpc.lockd
+ #
+ # These files are symlinks to scripts supplied by the lx brand
+ # that will start up the solaris nfs daemons.
+ #
+ if { ! makedir native/etc/netconfig ||
+ ! makedir native/etc/default/nfs ; }; then
+ log "Aborting NFS setup..."
+ log ""
+ return
+ fi
+
+ if { ! install_ln ../native/usr/lib/brand/lx/lx_lockd sbin/rpc.lockd ||
+ ! install_ln ../native/usr/lib/brand/lx/lx_statd \
+ sbin/rpc.statd ; }; then
+ log "Aborting NFS setup..."
+ log ""
+ return
+ fi
+
+ #
+ # update /etc/services for NFS
+ #
+ log ""
+ log "Adding lockd entry to \"$install_root/etc/services\"..."
+
+ cp -p $install_root/etc/services $install_root/etc/services.$tag
+
+ #
+ # Brackets in the sed script below contain a space followed by a tab
+ #
+ cat $install_root/etc/services.$tag |
+ sed 's:\(111\/..p[ ][ ]*\):\1rpcbind :' |
+ cat > $install_root/etc/services
+
+ cat >> $install_root/etc/services <<-EOF
+ lockd 4045/udp # NFS lock daemon/manager
+ lockd 4045/tcp # NFS lock daemon/manager
+ EOF
+
+ #
+ # Modify /etc/init.d/nfslock to enable the USERLAND_LOCKD option and to
+ # find some commands in alternate locations.
+ #
+ log ""
+ log "Modifying \"$install_root/etc/init.d/nfslock\"..."
+ cp -p etc/init.d/nfslock etc/init.d/nfslock.$tag
+ cat etc/init.d/nfslock.$tag |
+ sed '
+ s/USERLAND_LOCKD=$/USERLAND_LOCKD="yes"/
+ s/killproc rpc.statd/killproc statd/
+ s/status rpc.statd/status statd/
+ s/pidof rpc.statd/pidof statd/
+ ' |
+ cat > etc/init.d/nfslock
+}
+
+#
+# The main script starts here.
+#
+# The syntax is:
+#
+# lx_init_zone <rootdir> [mini]
+#
+# Where:
+# <rootdir> is the root of the zone directory to be modified
+#
+# [mini] is an optional second argument that signifies whether this is
+# to be a miniroot install; if it is, NFS services are not enabled
+# in the processed zone
+#
+unset is_miniroot
+unset install_root
+
+install_root="$1"
+
+tag="lxsave_$(date +%m.%d.%Y@%T)"
+
+if (($# < 1 || $# > 2)); then
+ i18n_echo "$usage" "$0"
+ exit 1
+fi
+
+(($# == 2)) && is_miniroot=1
+
+if [[ ! -d "$install_root" ]]; then
+ i18n_echo "$install_noroot" "$install_root"
+ echo $(gettext "** Installation aborted **")
+ exit 1
+fi
+
+cd "$install_root"
+
+log ""
+log "Initial lx_brand environment modification started `date`"
+log "Making needed directories in \"$install_root\"."
+echo $(gettext "Setting up the initial lx brand environment.")
+
+#
+# Make various directories in /native that are needed to boot an lx branded
+# zone.
+#
+makedir native/dev
+makedir native/etc/default
+makedir native/etc/svc/volatile
+makedir native/lib
+makedir native/proc
+makedir native/tmp 1777
+makedir native/usr
+makedir native/var
+
+#
+# Make various other directories needed for the lx brand
+#
+makedir mnt
+makedir opt
+makedir usr/local/bin
+makedir usr/local/include
+makedir usr/local/lib
+makedir usr/local/sbin
+makedir usr/local/share
+makedir usr/local/src
+
+makedir dev 0755
+makedir tmp 1777
+makedir proc 0555
+makedir boot 0755
+
+#
+# zlogin requires that these utilities live in places other than their
+# Linux defaults, so create appropriate links for them here.
+#
+# XX - The need for these links may go away in the future if zlogin is
+# appropriately modified
+#
+symlink /bin/sh sbin/sh
+symlink /bin/su usr/bin/su
+symlink /native/usr/lib/ld.so.1 usr/lib/ld.so.1
+
+libpam_so="$(echo lib/libpam.so.0.*)"
+libpam_misc="$(echo lib/libpam_misc.so.0.*)"
+libpamc_so="$(echo lib/libpamc.so.0.*)"
+
+symlink "/$libpam_so" lib/libpam.so.0
+symlink "/$libpam_misc" lib/libpam_misc.so.0
+symlink "/$libpamc_so" lib/libpamc.so.0
+
+log ""
+log "Modifying system configuration in \"$install_root\""
+
+#
+# Create a /var/ld/ld.config that will point to /native/lib for our Solaris
+# libraries.
+#
+log "Creating \"$install_root/var/ld/ld.config\"..."
+
+makedir var/ld
+
+if ! crle -c var/ld/ld.config -l /native/lib:/native/usr/lib \
+ -s /native/lib/secure:/native/usr/lib/secure; then
+ log "\tCreation of \"$install_root/var/ld/ld.config\" failed!"
+ i18n_echo "$cmd_failed" "crle"
+ exit 1
+fi
+
+log ""
+log "Modifying \"$install_root/etc/fstab\"..."
+
+mv -f etc/fstab etc/fstab.$tag 2>/dev/null
+
+cat > etc/fstab <<- EOF
+ none / ufs defaults 1 1
+ none /proc proc defaults 0 0
+EOF
+
+if [[ $? -ne 0 ]]; then
+ log "Could not create new \"$install_root/etc/fstab\"!"
+ i18n_echo "$create_failed" "$install_root/etc/fstab"
+ exit 1
+fi
+
+#
+# The default /etc/inittab spawns mingetty on each of the virtual consoles
+# as well as xdm on the X console. Since we don't have virtual consoles nor
+# an X console, spawn a single mingetty on /dev/console instead.
+#
+# Don't bother changing the file if it looks like we already did.
+#
+if ! egrep -s "Disabled by lx brand" etc/inittab; then
+ log "Modifying: \"$install_root/etc/inittab\"..."
+
+ tmpfile=/tmp/inittab.$$
+
+ sed 's/^[1-6]:/# Disabled by lx brand: &/
+ s/^id:5:initdefault:/id:3:initdefault: # Modified by lx brand: &/' \
+ etc/inittab > $tmpfile
+
+ #
+ # Don't bother with further alterations if the sed above failed...
+ #
+ if [[ $? -eq 0 ]]; then
+ egrep -s "console login for lx brand" etc/inittab
+ if [[ $? -ne 0 ]]; then
+ cat >> $tmpfile <<- EOF
+
+ #
+ # console login for lx brand
+ #
+ 1:2345:respawn:/sbin/mingetty console
+ EOF
+
+ #
+ # Only install the new inittab if the append
+ # above succeeded.
+ #
+ if [[ $? -eq 0 ]]; then
+ #
+ # Attempt to save off the original inittab
+ # before moving over the modified version.
+ #
+ mv -f etc/inittab etc/inittab.$tag 2>/dev/null
+
+ mv -f $tmpfile etc/inittab
+
+ if [[ $? -ne 0 ]]; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/inittab\"" \
+ "failed!"
+ i18n_echo "$cmd2_failed" "mv" \
+ "$tmpfile" \
+ "$installroot/etc/inittab"
+ i18n_echo "$install_aborted"
+ exit 1
+ else
+ chmod 644 etc/inittab
+ fi
+ fi
+ fi
+
+ else
+ log "Attempt to disable entries in" \
+ "\"$install_root/etc/inittab\" failed!"
+ i18n_echo "$disable_failed" "$install_root/etc/inittab"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+
+if [[ ! -e "$install_root/etc/hosts" ]]; then
+ log ""
+ log "Creating: \"$install_root/etc/hosts\"..."
+
+ cat > "$install_root/etc/hosts" <<-_EOF_
+ 127.0.0.1 localhost
+ _EOF_
+fi
+
+#
+# User must configure various brand-specific items to enable networking, so
+# boot the system non-networked.
+#
+log ""
+log "Modifying: \"$install_root/etc/sysconfig/network\"..."
+
+mv -f etc/sysconfig/network etc/sysconfig/network.$tag 2>/dev/null
+
+cat > etc/sysconfig/network <<- EOF
+ NETWORKING="no"
+ #
+ # To enable networking, change the "no" above to "yes" and
+ # uncomment and fill in the following parameters.
+ #
+ # If you are specifying a hostname by name rather than by IP address,
+ # be sure the system can resolve the name properly via the use of a
+ # name service and/or the proper name files, as specified by
+ # nsswitch.conf. See nsswitch.conf(5) for further details.
+ #
+ # HOSTNAME=your_hostname_here
+ #
+EOF
+
+if [[ $? -ne 0 ]]; then
+ log "Could not create new \"$install_root/etc/sysconfig/network\"!"
+ i18n_echo "$create_failed" "$install_root/etc/sysconfig/network"
+ i18n_echo "$install_aborted"
+ exit 1
+fi
+
+if [[ -a etc/sysconfig/syslog ]]; then
+ #
+ # By default, syslogd will attempt to create a socket in /dev/log, but
+ # /dev is not be writable. Instead, modify /etc/sysconfig/syslog to
+ # tell it to use /var/run/syslog instead, and make /dev/log a symlink
+ # to /var/run/syslog.
+ #
+ log ""
+ log "Modifying: \"$install_root/etc/sysconfig/syslog\"..."
+
+ tmpfile=/tmp/lx_sc.syslog.$$
+
+ sed 's@\(SYSLOGD_OPTIONS="-m 0\)"@\1 -p /var/run/syslog"@' \
+ etc/sysconfig/syslog > $tmpfile
+
+ #
+ # Only install the new sysconfig/syslog if the edit above succeeded.
+ #
+ if [[ $? -eq 0 ]]; then
+ #
+ # Attempt to save off the original syslog before moving over
+ # the modified version.
+ #
+ mv -f etc/sysconfig/syslog etc/sysconfig/syslog.$tag 2>/dev/null
+
+ if ! mv -f $tmpfile etc/sysconfig/syslog; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/sysconfig/syslog\" failed!"
+ i18n_echo "$cmd2_failed" "mv" "$tmpfile" \
+ "$installroot/etc/sysconfig/syslog"
+ i18n_echo "$install_aborted"
+ exit 1
+ else
+ chmod 755 etc/sysconfig/syslog
+ fi
+ else
+ log "Attempt to modify entries in" \
+ "\"$install_root/sysconfig/syslog\" failed!"
+ i18n_echo "$mod_failed" "$install_root/sysconfig/syslog"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+
+if [[ $? -ne 0 ]]; then
+ log "Could not create new \"$install_root/etc/sysconfig/syslog\"!"
+ i18n_echo "$create_failed" "$install_root/etc/sysconfig/syslog"
+ i18n_echo "$install_aborted"
+ exit 1
+fi
+
+#
+# /etc/rc.d/init.d/keytable tries to load a physical keyboard map, which won't
+# work in a zone. If we remove etc/sysconfig/keyboard, it won't try this at all.
+#
+mv -f etc/sysconfig/keyboard etc/sysconfig/keyboard.$tag 2>/dev/null
+
+#
+# /etc/rc.d/init.d/gpm tries to configure the console mouse for cut-and-paste
+# text operations, which we don't support. Removing this file disables the
+# mouse configuration.
+#
+mv -f etc/sysconfig/mouse etc/sysconfig/mouse.$tag 2>/dev/null
+
+#
+# The following scripts attempt to start services or otherwise configure
+# the system in ways incompatible with zones, so don't execute them at boot
+# time.
+#
+log ""
+log "Modifying \"$install_root/etc/rc.d/init.d\" to disable any"
+log " services not supported by BrandZ:"
+unsupported_services="
+ kudzu
+ microcode_ctl
+ network
+ random
+ pcmcia
+ isdn
+ iptables
+ ip6tables
+ iscsi
+ psacct
+ gpm
+ irda
+ smartd
+ rawdevices
+ netdump
+ hpoj
+ mdmonitor
+ mdmpd
+ irqbalance
+"
+
+for file in $unsupported_services; do
+ if [[ -a "etc/rc.d/init.d/$file" ]]; then
+
+ if mv -f "etc/rc.d/init.d/$file" "etc/rc.d/init.d/$file.$tag"; then
+ log " + Moved script \"etc/rc.d/init.d/$file\" to"
+ log " \"etc/rc.d/init.d/$file.$tag\""
+ fi
+ fi
+
+ rc_files="$(echo etc/rc.d/rc[0-6].d/[SK]+([0-9])$file)"
+
+ if [[ "$rc_files" != "etc/rc.d/rc[0-6].d/[SK]+([0-9])$file" ]]; then
+ for file in $rc_files; do
+ if [[ -h "$file" ]]; then
+ rm -f "$file" &&
+ log " + Removed symbolic link \"$file\""
+ else
+ rm -f "$file" &&
+ log " + Removed script \"$file\""
+ fi
+ done
+ fi
+done
+
+#
+# There is a lot of stuff in the standard halt and reboot scripts that we
+# have no business running in a zone. Fortunately, the stuff we want to
+# skip is all in one contiguous chunk.
+#
+# Don't bother to modify the file if it looks like we already did.
+#
+if ! egrep -s "Disabled by lx brand" etc/rc.d/init.d/halt; then
+ log ""
+ log "Modifying \"$install_root/etc/rc.d/init.d/halt\" for operation"
+ log " within a zone..."
+ awk 'BEGIN {skip = ""}
+ /^# Save mixer/ {skip = "# Disabled by lx brand: "}
+ /halt.local/ {skip = ""}
+ /./ {print skip $0}' etc/rc.d/init.d/halt > /tmp/halt.$$
+
+ if [[ $? -eq 0 ]]; then
+ mv -f etc/rc.d/init.d/halt etc/rc.d/init.d/halt.$tag 2>/dev/null
+ mv -f /tmp/halt.$$ etc/rc.d/init.d/halt
+ chmod 755 etc/rc.d/init.d/halt
+ else
+ log "Attempt to modify \"$install_root/etc/rc.d/init.d/halt\"" \
+ "FAILED"
+ log "Continuing with balance of zone setup..."
+ fi
+fi
+
+#
+# Fix up /etc/rc.d/rc.sysinit:
+#
+# 1) /sbin/hwclock requires the iopl() system call, which BrandZ won't support.
+# Since the hardware clock cannot be set from within a zone, we comment out
+# the line.
+#
+# 2) Disable dmesg commands, since we don't implement klogctl
+#
+# 3) Disable initlog and the mount of /dev/pts
+#
+# 4) Don't touch /dev/tty* in order to start virtual terminals, as that won't
+# work from within a zone.
+#
+# 5) Don't try to check the root filesystem (/) as there is no associated
+# physical device, and any attempt to run fsck will fail.
+#
+# Don't modify the rc.sysinit file if it looks like we already did.
+#
+if ! egrep -s "Disabled by lx brand" etc/rc.d/rc.sysinit; then
+ log ""
+ log "Modifying: \"$install_root/etc/rc.d/rc.sysinit\"..."
+ log ""
+
+ tmpfile=/tmp/lx_rc.sysinit.$$
+
+ sed 's@^/sbin/hwclock@# Disabled by lx brand: &@
+ s@^HOSTTYPE=@HOSTTYPE=\"s390\" # Spoofed for lx brand: &@
+ s@/bin/dmesg -n@: # Disabled by lx brand: &@
+ s@^dmesg -s@# Disabled by lx brand: &@
+ s@initlog -c \"fsck@: # Disabled by lx brand: &@
+ s@^.*mount .* /dev/pts$@# Disabled by lx brand: &@' \
+ etc/rc.d/rc.sysinit > $tmpfile
+
+ #
+ # Only install the new rc.sysinit if the edit above succeeded.
+ #
+ if [[ $? -eq 0 ]]; then
+ #
+ # Attempt to save off the original rc.sysinit
+ # before moving over the modified version.
+ #
+ mv -f etc/rc.d/rc.sysinit etc/rc.d/rc.sysinit.$tag 2>/dev/null
+
+ if ! mv -f $tmpfile etc/rc.d/rc.sysinit; then
+ log "mv of \"$tmpfile\" to" \
+ "\"$installroot/etc/rc.d/rc.sysinit\" failed!"
+ i18n_echo "$cmd2_failed" "mv" "$tmpfile" \
+ "$installroot/etc/rc.d/rc.sysinit"
+ i18n_echo "$install_aborted"
+ exit 1
+ else
+ chmod 755 etc/rc.d/rc.sysinit
+ fi
+ else
+ log "Attempt to modify entries in" \
+ "\"$install_root/rc.d/rc.sysinit\" failed!"
+ i18n_echo "$mod_failed" "$install_root/rc.d/rc.sysinit"
+ i18n_echo "$install_aborted"
+ exit 1
+ fi
+fi
+
+if [[ -z $is_miniroot ]]; then
+ enable_nfs_services || log "NFS services were not properly enabled."
+fi
+
+log ""
+log "System configuration modifications complete `date`"
+log ""
+i18n_echo "System configuration modifications complete."
+exit 0
diff --git a/usr/src/lib/brand/lx/zone/lx_install.ksh b/usr/src/lib/brand/lx/zone/lx_install.ksh
new file mode 100644
index 0000000000..d2ad32fc42
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_install.ksh
@@ -0,0 +1,579 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+# Restrict executables to /bin, /usr/bin, /usr/sbin and /usr/sfw/bin
+PATH=/bin:/usr/bin:/usr/sbin:/usr/sfw/bin
+
+export PATH
+
+# Setup i18n output
+TEXTDOMAIN="SUNW_OST_OSCMD"
+export TEXTDOMAIN
+
+# Log passed arguments to file descriptor 2
+log()
+{
+ [[ -n $logfile ]] && echo "$@" >&2
+}
+
+#
+# Send the provided printf()-style arguments to the screen and to the
+# logfile.
+#
+screenlog()
+{
+ typeset fmt="$1"
+ shift
+
+ printf "$fmt\n" "$@"
+ [[ -n $logfile ]] && printf "$fmt\n" "$@" >&2
+}
+
+# Print and log provided text if the shell variable "verbose_mode" is set
+verbose()
+{
+ [[ -n $verbose_mode ]] && echo "$@"
+ [[ -n $logfile ]] && [[ -n $verbose_mode ]] && echo "$@" >&2
+}
+
+unsupported_cpu=\
+$(gettext "ERROR: Cannot install branded zone: processor must be %s-compatible")
+
+cmd_not_found=$(gettext "Required command '%s' cannot be found!")
+cmd_not_exec=$(gettext "Required command '%s' not executable!")
+zone_initfail=$(gettext "Attempt to initialize zone '%s' FAILED.")
+path_abs=$(gettext "Pathname specified to -d '%s' must be absolute.")
+
+cmd_h=$(gettext "%s -z <zone name> %s -h")
+cmd_full=\
+$(gettext "%s -z <zone name> %s [-v | -s] [-d <dir>|<device>] [<cluster> ... ]")
+
+both_modes=$(gettext "%s: error: cannot select both silent and verbose modes")
+
+not_found=$(gettext "%s: error: file or directory not found.")
+
+wrong_type=\
+$(gettext "%s: error: must be a gzip, bzip2, .Z or uncompressed tar archive.")
+
+not_readable=$(gettext "Cannot read file '%s'")
+
+no_install=$(gettext "Could not create install directory '%s'")
+no_log=$(gettext "Could not create log directory '%s'")
+no_logfile=$(gettext "Could not create log file '%s'")
+
+root_full=$(gettext "Zonepath root %s exists and contains data; remove or move aside prior to install.")
+
+install_zone=$(gettext "Installing zone '%s' at root directory '%s'")
+install_from=$(gettext "from archive '%s'")
+
+install_fail=$(gettext "Installation of zone '%s' FAILED.")
+see_log=$(gettext "See the log file:\n '%s'\nfor details.")
+
+install_abort=$(gettext "Installation of zone '%s' aborted.")
+install_good=$(gettext "Installation of zone '%s' completed successfully.")
+
+# Check if commands passed in exist and are executable.
+check_cmd()
+{
+ for cmd in "$@"; do
+ if [[ ! -f $cmd ]]; then
+ screenlog "$cmd_not_found" "$cmd"
+ screenlog "$install_abort" "$zonename"
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+ fi
+
+ if [[ ! -x $cmd ]]; then
+ screenlog "$cmd_not_exec" "$cmd"
+ screenlog "$install_abort" "$zonename"
+ exit $ZONE_SUBPROC_NOTCOMPLETE
+ fi
+ done
+}
+
+# Post process as tarball-installed zone for use by BrandZ.
+init_tarzone()
+{
+ typeset rootdir="$1"
+
+ if ! $branddir/lx_init_zone "$rootdir"; then
+ screenlog "$zone_initfail" "$zonename"
+ return 1
+ fi
+}
+
+# Clean up on interrupt
+trap_cleanup()
+{
+ msg=$(gettext "Installation cancelled due to interrupt.")
+
+ screenlog "$msg"
+ exit $int_code
+}
+
+#
+# Output the usage message.
+#
+# This is done this way due to limitations in the way gettext strings are
+# extracted from shell scripts and processed. Use of this somewhat awkward
+# syntax allows us to produce longer lines of text than otherwise would be
+# possible without wrapping lines across more than one line of code.
+#
+usage()
+{
+ int_code=$ZONE_SUBPROC_USAGE
+
+ echo $(gettext "Usage:")
+ printf " $cmd_h\n" "zoneadm" "install"
+ printf " $cmd_full\n" "zoneadm" "install"
+
+ echo
+
+ echo $(gettext "The installer will attempt to use the default system") \
+ $(gettext "removable disc device if <archive dir> is not") \
+ $(gettext "specified.") | fmt -80
+
+ echo
+
+ echo $(gettext "<cluster> specifies which package cluster you wish") \
+ $(gettext "to install.") | fmt -80
+
+ echo
+ echo $(gettext "The 'desktop' cluster will be installed by default.")
+ echo
+ echo $(gettext "The available clusters are:")
+ echo " + core"
+ echo " + server"
+ echo " + desktop"
+ echo " + development"
+ echo " + all"
+ echo
+
+ echo $(gettext "Each cluster includes all of the clusters preceding") \
+ $(gettext "it, so the 'server' cluster includes the 'core'") \
+ $(gettext "cluster, the 'desktop' cluster includes the 'core'") \
+ $(gettext "and 'server' clusters, and so on.") | fmt -80
+
+ echo
+ echo $(gettext "Examples")
+ echo "========"
+
+ echo $(gettext "Example 1: Install a base Linux system from CDs or a") \
+ $(gettext "DVD using the system default removable disc device:") |
+ fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install"
+ echo
+
+ echo $(gettext "Example 2: Install the 'server' cluster from CDs or") \
+ $(gettext "a DVD via an alternative removable disc device:") |
+ fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -d /cdrom/cdrom1 server"
+ echo
+
+ echo $(gettext "Example 3: Install the desktop Linux environment") \
+ $(gettext "from an ISO image made available as '/dev/lofi/1' by") \
+ $(gettext "use of lofiadm(1M):") | fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -d /dev/lofi/1 desktop"
+ echo
+
+ echo $(gettext "Example 4: Install the entire Linux environment from") \
+ $(gettext "ISO images located in the directory") \
+ "'/export/centos_3.8/isos':" | fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -d /export/centos_3.8/isos all"
+ echo
+
+ echo $(gettext "Example 5: Install from a compressed tar archive of") \
+ $(gettext "an existing Linux installation (a tar ball) with") \
+ $(gettext "verbose output regarding the progress of the") \
+ $(gettext "installation:") | fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -v -d /tmp/linux_full.tar.gz"
+ echo
+
+ echo $(gettext "Example 6: Install from a compressed tar archive of") \
+ $(gettext "an existing Linux installation (a tar ball) with NO") \
+ $(gettext "output regarding the progress of the installation") \
+ $(gettext "(silent mode.)") | fmt -80
+
+ echo
+
+ echo $(gettext "NOTE: Silent mode is only recommended for use by") \
+ $(gettext "shell scripts and other non-interactive programs:") |
+ fmt -80
+
+ echo
+ echo " # zoneadm -z myzone install -d /tmp/linux_full.tar.gz -s"
+ echo
+
+ exit $int_code
+}
+
+#
+# The main body of the script starts here.
+#
+# This script should never be called directly by a user but rather should
+# only be called by zoneadm to install a BrandZ Linux zone.
+#
+
+#
+# Exit values used by the script, as #defined in <sys/zone.h>
+#
+# ZONE_SUBPROC_OK
+# ===============
+# Installation was successful
+#
+# ZONE_SUBPROC_USAGE
+# ==================
+# Improper arguments were passed, so print a usage message before exiting
+#
+# ZONE_SUBPROC_NOTCOMPLETE
+# ========================
+# Installation did not complete, but another installation attempt can be
+# made without an uninstall
+#
+# ZONE_SUBPROC_FATAL
+# ==================
+# Installation failed and an uninstall will be required before another
+# install can be attempted
+#
+ZONE_SUBPROC_OK=0
+ZONE_SUBPROC_USAGE=253
+ZONE_SUBPROC_NOTCOMPLETE=254
+ZONE_SUBPROC_FATAL=255
+
+#
+# An unspecified exit or interrupt should exit with ZONE_SUBPROC_NOTCOMPLETE,
+# meaning a user will not need to do an uninstall before attempting another
+# install.
+#
+int_code=$ZONE_SUBPROC_NOTCOMPLETE
+
+trap trap_cleanup INT
+
+# If we weren't passed at least two arguments, exit now.
+[[ $# -lt 2 ]] && usage
+
+#
+# This script is always started with a full path so we can extract the
+# brand directory name here.
+#
+branddir=$(dirname "$0")
+zonename="$1"
+zoneroot="$2"
+
+install_root="$zoneroot/root"
+logdir="$install_root/var/log"
+
+shift; shift # remove zonename and zoneroot from arguments array
+
+unset gtaropts
+unset install_opts
+unset install_src
+unset msg
+unset silent_mode
+unset verbose_mode
+
+while getopts "d:hsvX" opt
+do
+ case "$opt" in
+ h) usage;;
+ s) silent_mode=1;;
+ v) verbose_mode=1;;
+ d) install_src="$OPTARG" ;;
+ X) install_opts="$install_opts -x" ;;
+ *) usage;;
+ esac
+done
+shift OPTIND-1
+
+# Providing more than one passed argument generates a usage message
+if [[ $# -gt 1 ]]; then
+ msg=$(gettext "ERROR: Too many arguments provided:")
+
+ screenlog "$msg"
+ screenlog " \"%s\"" "$@"
+ screenlog ""
+ usage
+fi
+
+# Validate any free-form arguments
+if [[ $# -eq 1 && "$1" != "core" && "$1" != "server" && "$1" != "desktop" &&
+ "$1" != "development" && "$1" != "all" ]]; then
+ msg=$(gettext "ERROR: Unknown cluster name specified: %s")
+
+ screenlog "$msg" "\"$1\""
+ screenlog ""
+ usage
+fi
+
+# The install can't be both verbose AND silent...
+if [[ -n $silent_mode && -n $verbose_mode ]]; then
+ screenlog "$both_modes" "zoneadm install"
+ screenlog ""
+ usage
+fi
+
+#
+# Validate that we're running on a i686-compatible CPU; abort the zone
+# installation now if we're not.
+#
+procinfo=$(LC_ALL=C psrinfo -vp | grep family)
+
+#
+# All x86 processors in CPUID families 6, 15, 16 or 17 should be
+# i686-compatible, assuming third party processor vendors follow AMD and
+# Intel's lead.
+#
+if [[ "$procinfo" != *" x86 "* ]] ||
+ [[ "$procinfo" != *" family 6 "* && "$procinfo" != *" family 15 "* &&
+ "$procinfo" != *" family 16 "* && "$procinfo" != *" family 17 "* ]] ; then
+ screenlog "$unsupported_cpu" "i686"
+ exit $int_code
+fi
+
+if [[ -n $install_src ]]; then
+ #
+ # Validate $install_src.
+ #
+ # If install_src is a directory, assume it contains ISO images to
+ # install from, otherwise treat the argument as if it points to a tar
+ # ball file.
+ #
+ if [[ "`echo $install_src | cut -c 1`" != "/" ]]; then
+ screenlog "$path_abs" "$install_src"
+ exit $int_code
+ fi
+
+ if [[ ! -a "$install_src" ]]; then
+ screenlog "$not_found" "$install_src"
+ screenlog "$install_abort" "$zonename"
+ exit $int_code
+ fi
+
+ if [[ ! -r "$install_src" ]]; then
+ screenlog "$not_readable" "$install_src"
+ screenlog "$install_abort" "$zonename"
+ exit $int_code
+ fi
+
+ #
+ # If install_src is a block device, a directory, a possible device
+ # created via lofiadm(1M), or the directory used by a standard volume
+ # management daemon, pass it on to the secondary install script.
+ #
+ # Otherwise, validate the passed filename to prepare for a tar ball
+ # install.
+ #
+ if [[ ! -b "$install_src" && ! -d "$install_src" &&
+ "$install_src" != /dev/lofi/* && "$install_src" != /cdrom/* &&
+ "$install_src" != /media/* ]]; then
+ if [[ ! -f "$install_src" ]]; then
+ screenlog "$wrong_type" "$install_src"
+ screenlog "$install_abort" "$zonename"
+ exit $int_code
+ fi
+
+ filetype=`{ LC_ALL=C file $install_src |
+ awk '{print $2}' ; } 2>/dev/null`
+
+ if [[ "$filetype" = "gzip" ]]; then
+ verbose "\"$install_src\": \"gzip\" archive"
+ gtaropts="-xz"
+ elif [[ "$filetype" = "bzip2" ]]; then
+ verbose "\"$install_src\": \"bzip2\" archive"
+ gtaropts="-xj"
+ elif [[ "$filetype" = "compressed" ]]; then
+ verbose "\"$install_src\": Lempel-Ziv" \
+ "compressed (\".Z\") archive."
+ gtaropts="-xZ"
+ elif [[ "$filetype" = "USTAR" ]]; then
+ verbose "\"$install_src\":" \
+ "uncompressed (\"tar\") archive."
+ gtaropts="-x"
+ else
+ screenlog "$wrong_type" "$install_src"
+ screenlog "$install_abort" "$zonename"
+ exit $int_code
+ fi
+ fi
+fi
+
+#
+# Start silent operation and pass the flag to prepare pass the flag to
+# the ISO installer, if needed.
+#
+if [[ -n $silent_mode ]]
+then
+ exec 1>/dev/null
+ install_opts="$install_opts -s"
+fi
+
+#
+# If verbose mode was specified, pass the verbose flag to lx_distro_install
+# for ISO or disc installations and to gtar for tarball-based installs.
+#
+if [[ -n $verbose_mode ]]
+then
+ echo $(gettext "Verbose output mode enabled.")
+ install_opts="$install_opts -v"
+ [[ -n $gtaropts ]] && gtaropts="${gtaropts}v"
+fi
+
+[[ -n $gtaropts ]] && gtaropts="${gtaropts}f"
+
+if [[ ! -d "$install_root" ]]
+then
+ if ! mkdir -p "$install_root" 2>/dev/null; then
+ screenlog "$no_install" "$install_root"
+ exit $int_code
+ fi
+fi
+
+#
+# Check for a non-empty root.
+#
+cnt=`ls $install_root | wc -l`
+if [ $cnt -ne 0 ]; then
+ screenlog "$root_full" "$install_root"
+ exit $int_code
+fi
+
+if [[ ! -d "$logdir" ]]
+then
+ if ! mkdir -p "$logdir" 2>/dev/null; then
+ screenlog "$no_log" "$logdir"
+ exit $int_code
+ fi
+fi
+
+logfile="${logdir}/$zonename.install.$$.log"
+
+if ! > $logfile; then
+ screenlog "$no_logfile" "$logfile"
+ exit $int_code
+fi
+
+# Redirect stderr to the log file to automatically log any error messages
+exec 2>>"$logfile"
+
+#
+# From here on out, an unspecified exit or interrupt should exit with
+# ZONE_SUBPROC_FATAL, meaning a user will need to do an uninstall before
+# attempting another install, as we've modified the directories we were going
+# to install to in some way.
+#
+int_code=$ZONE_SUBPROC_FATAL
+
+log "Installation started for zone \"$zonename\" `/usr/bin/date`"
+
+if [[ -n $gtaropts ]]; then
+ check_cmd /usr/sfw/bin/gtar $branddir/lx_init_zone
+
+ screenlog "$install_zone" "$zonename" "$zoneroot"
+ screenlog "$install_from" "$install_src"
+ echo
+ echo $(gettext "This process may take several minutes.")
+ echo
+
+ if ! ( cd "$install_root" && gtar "$gtaropts" "$install_src" ) ; then
+ log "Error: extraction from tar archive failed."
+ else
+ if ! [[ -d "${install_root}/bin" &&
+ -d "${install_root}/sbin" ]]; then
+ log "Error: improper or incomplete tar archive."
+ else
+ $branddir/lx_init_zone "$install_root" &&
+ init_tarzone "$install_root"
+
+ #
+ # Emit the same code from here whether we're
+ # interrupted or exiting normally.
+ #
+ int_code=$?
+ fi
+ fi
+
+ if [[ $int_code -eq ZONE_SUBPROC_OK ]]; then
+ log "Tar install completed for zone '$zonename' `date`."
+ else
+ log "Tar install failed for zone \"$zonename\" `date`."
+
+ fi
+else
+ check_cmd $branddir/lx_distro_install
+
+ $branddir/lx_distro_install -z "$zonename" -r "$zoneroot" \
+ -d "$install_src" -l "$logfile" $install_opts "$@"
+
+ #
+ # Emit the same code from here whether we're interrupted or exiting
+ # normally.
+ #
+ int_code=$?
+
+ [[ $int_code -eq $ZONE_SUBPROC_USAGE ]] && usage
+fi
+
+if [[ $int_code -ne $ZONE_SUBPROC_OK ]]; then
+ screenlog ""
+ screenlog "$install_fail" "$zonename"
+ screenlog ""
+
+ #
+ # Only make a reference to the log file if one will exist after
+ # zoneadm exits.
+ #
+ [[ $int_code -ne $ZONE_SUBPROC_NOTCOMPLETE ]] &&
+ screenlog "$see_log" "$logfile"
+
+ exit $int_code
+fi
+
+#
+# After the install completes, we've likely moved a new copy of the logfile into
+# place atop the logfile we WERE writing to, so if we don't reopen the logfile
+# here the shell will continue writing to the old logfile's inode, meaning we
+# would lose all log information from this point on.
+#
+exec 2>>"$logfile"
+
+screenlog ""
+screenlog "$install_good" "$zonename"
+screenlog ""
+
+echo $(gettext "Details saved to log file:")
+echo " \"$logfile\""
+echo
+
+exit $ZONE_SUBPROC_OK
diff --git a/usr/src/lib/brand/lx/zone/platform.xml b/usr/src/lib/brand/lx/zone/platform.xml
new file mode 100644
index 0000000000..a53f0ee509
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/platform.xml
@@ -0,0 +1,85 @@
+<?xml version="1.0"?>
+
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ ident "%Z%%M% %I% %E% SMI"
+
+ DO NOT EDIT THIS FILE.
+-->
+
+<!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN"
+ "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1">
+
+<platform name="lx" allow-exclusive-ip="false">
+ <!-- Global filesystems to mount when booting the zone -->
+ <global_mount special="/dev" directory="/native/dev" type="dev"
+ opt="attrdir=%R/dev" />
+ <global_mount special="/lib" directory="/native/lib"
+ opt="ro" type="lofs" />
+ <global_mount special="/usr/lib" directory="/native/usr/lib"
+ opt="ro" type="lofs" />
+ <global_mount special="/usr/lib/brand/lx/etc_default_nfs"
+ directory="/native/etc/default/nfs" type="lofs" opt="ro" />
+ <global_mount special="/usr/lib/brand/lx/etc_netconfig"
+ directory="/native/etc/netconfig" type="lofs" opt="ro" />
+
+ <!-- Local filesystems to mount when booting the zone -->
+ <mount special="/native/dev" directory="/dev" type="lofs" />
+ <mount special="proc" directory="/native/proc" type="proc" />
+ <mount special="swap" directory="/native/etc/svc/volatile"
+ type="tmpfs" />
+ <mount special="swap" directory="/native/tmp" type="tmpfs" />
+
+ <!-- Devices to create under /dev -->
+ <device match="null" />
+ <device match="pts/*" />
+ <device match="random" />
+ <device match="tcp" />
+ <device match="tcp6" />
+ <device match="tty" />
+ <device match="udp" />
+ <device match="udp6" />
+ <device match="urandom" />
+ <device match="zero" />
+
+ <!-- Renamed devices to create under /dev -->
+ <device match="brand/lx/ptmx" name="ptmx" />
+ <device match="zcons/%z/zoneconsole" name="console" />
+
+ <!-- Audio devices to create under /dev -->
+ <device match="brand/lx/dsp" name="dsp" />
+ <device match="brand/lx/mixer" name="mixer" />
+
+ <!-- Symlinks to create under /dev -->
+ <symlink source="fd" target="../proc/self/fd" />
+ <symlink source="log" target="/var/run/syslog" />
+ <symlink source="stderr" target="../proc/self/fd/2" />
+ <symlink source="stdin" target="../proc/self/fd/0" />
+ <symlink source="stdout" target="../proc/self/fd/1" />
+ <symlink source="systty" target="console" />
+
+ <!-- Create a mount point for for the /dev/initctl fifo -->
+ <device match="null" name="initctl" />
+
+</platform>
diff --git a/usr/src/pkg/manifests/SUNWlx.mf b/usr/src/pkg/manifests/SUNWlx.mf
index 0d5d285fe5..b7da7af793 100644
--- a/usr/src/pkg/manifests/SUNWlx.mf
+++ b/usr/src/pkg/manifests/SUNWlx.mf
@@ -20,11 +20,12 @@
#
#
-# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
-# Was renamed to system/zones/brand/lx, both now obsolete.
-
-set name=pkg.fmri value=pkg:/SUNWlx@0.5.11,5.11-0.143
-set name=pkg.obsolete value=true
+set name=pkg.fmri value=pkg:/SUNWlx@0.5.11,5.11-0.133
+set name=pkg.renamed value=true
set name=variant.arch value=i386
+set name=variant.opensolaris.zone value=global value=nonglobal
+depend fmri=pkg:/system/zones/brand/lx@0.5.11,5.11-0.133 type=require
diff --git a/usr/src/pkg/manifests/system-zones-brand-lx.mf b/usr/src/pkg/manifests/system-zones-brand-lx.mf
index ca3a8cc541..6af5d31c4a 100644
--- a/usr/src/pkg/manifests/system-zones-brand-lx.mf
+++ b/usr/src/pkg/manifests/system-zones-brand-lx.mf
@@ -20,9 +20,108 @@
#
#
-# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
-set name=pkg.fmri value=pkg:/system/zones/brand/lx@0.5.11,5.11-0.143
-set name=pkg.obsolete value=true
+#
+# This package will install successfully into any zone, global or
+# non-global. The files, directories, links, and hardlinks, however,
+# will only be installed into the global zone.
+#
+<include global_zone_only_component>
+set name=pkg.fmri value=pkg:/system/zones/brand/lx@$(PKGVERS)
+set name=pkg.description value="Support for the 'lx' Brand"
+set name=pkg.summary value="lx Brand"
+set name=info.classification \
+ value="org.opensolaris.category.2008:Applications/System Utilities"
set name=variant.arch value=i386
+dir path=etc group=sys
+dir path=etc/zones group=sys
+dir path=usr group=sys
+dir path=usr/kernel group=sys
+dir path=usr/kernel/brand group=sys
+dir path=usr/kernel/brand/$(ARCH64) group=sys
+dir path=usr/kernel/drv group=sys
+dir path=usr/kernel/drv/$(ARCH64) group=sys
+dir path=usr/kernel/dtrace group=sys
+dir path=usr/kernel/dtrace/$(ARCH64) group=sys
+dir path=usr/kernel/fs group=sys
+dir path=usr/kernel/fs/$(ARCH64) group=sys
+dir path=usr/kernel/strmod group=sys
+dir path=usr/kernel/strmod/$(ARCH64) group=sys
+dir path=usr/lib
+dir path=usr/lib/brand
+dir path=usr/lib/brand/lx
+dir path=usr/lib/brand/lx/$(ARCH64)
+dir path=usr/lib/brand/lx/distros
+dir path=usr/lib/devfsadm group=sys
+dir path=usr/lib/devfsadm/linkmod group=sys
+driver name=lx_audio
+driver name=lx_ptm perms="lx_ptmajor 0666 root sys"
+driver name=lx_systrace perms="* 0644 root sys"
+file path=etc/zones/SUNWlx.xml mode=0444
+file path=etc/zones/SUNWlx26.xml mode=0444
+file path=usr/kernel/brand/$(ARCH64)/lx_brand group=sys mode=0755
+file path=usr/kernel/brand/lx_brand group=sys mode=0755
+file path=usr/kernel/drv/$(ARCH64)/lx_audio group=sys
+file path=usr/kernel/drv/$(ARCH64)/lx_ptm group=sys
+file path=usr/kernel/drv/$(ARCH64)/lx_systrace group=sys
+file path=usr/kernel/drv/lx_audio group=sys
+file path=usr/kernel/drv/lx_audio.conf group=sys
+file path=usr/kernel/drv/lx_ptm group=sys
+file path=usr/kernel/drv/lx_ptm.conf group=sys
+file path=usr/kernel/drv/lx_systrace group=sys
+file path=usr/kernel/drv/lx_systrace.conf group=sys
+file path=usr/kernel/fs/$(ARCH64)/lx_afs group=sys mode=0755
+file path=usr/kernel/fs/$(ARCH64)/lx_proc group=sys mode=0755
+file path=usr/kernel/fs/lx_afs group=sys mode=0755
+file path=usr/kernel/fs/lx_proc group=sys mode=0755
+file path=usr/kernel/strmod/$(ARCH64)/ldlinux group=sys mode=0755
+file path=usr/kernel/strmod/ldlinux group=sys mode=0755
+file path=usr/lib/brand/lx/$(ARCH64)/lx_librtld_db.so.1
+file path=usr/lib/brand/lx/$(ARCH64)/lx_nametoaddr.so.1
+file path=usr/lib/brand/lx/$(ARCH64)/lx_thunk.so.1
+file path=usr/lib/brand/lx/config.xml mode=0444
+file path=usr/lib/brand/lx/distros/centos35.distro mode=0444
+file path=usr/lib/brand/lx/distros/centos36.distro mode=0444
+file path=usr/lib/brand/lx/distros/centos37.distro mode=0444
+file path=usr/lib/brand/lx/distros/centos38.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel35.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel36.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel37.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel38.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel_centos_common mode=0444
+file path=usr/lib/brand/lx/etc_default_nfs group=sys mode=0444
+file path=usr/lib/brand/lx/etc_netconfig group=sys mode=0444
+file path=usr/lib/brand/lx/lx_distro_install mode=0755
+file path=usr/lib/brand/lx/lx_init_zone mode=0755
+file path=usr/lib/brand/lx/lx_install mode=0755
+file path=usr/lib/brand/lx/lx_librtld_db.so.1
+file path=usr/lib/brand/lx/lx_lockd mode=0755
+file path=usr/lib/brand/lx/lx_nametoaddr.so.1
+file path=usr/lib/brand/lx/lx_native mode=0755
+file path=usr/lib/brand/lx/lx_statd mode=0755
+file path=usr/lib/brand/lx/lx_support mode=0755
+file path=usr/lib/brand/lx/lx_thunk mode=0755
+file path=usr/lib/brand/lx/lx_thunk.so.1
+file path=usr/lib/brand/lx/platform.xml mode=0444
+file path=usr/lib/devfsadm/linkmod/SUNW_lx_link_$(ARCH).so group=sys
+file path=usr/lib/lx_brand.so.1
+hardlink path=usr/kernel/dtrace/$(ARCH64)/lx_systrace \
+ target=../../../kernel/drv/$(ARCH64)/lx_systrace
+hardlink path=usr/kernel/dtrace/lx_systrace \
+ target=../../kernel/drv/lx_systrace
+legacy pkg=SUNWlxr arch=$(ARCH) category=system \
+ desc="Support for the 'lx' Brand" \
+ hotline="Please contact your local service provider" \
+ name="lx Brand (Root)" vendor="Sun Microsystems, Inc." \
+ version=11.11,REV=2009.11.11
+legacy pkg=SUNWlxu arch=$(ARCH) category=system \
+ desc="Support for the 'lx' Brand" \
+ hotline="Please contact your local service provider" \
+ name="lx Brand (Usr)" vendor="Sun Microsystems, Inc." \
+ version=11.11,REV=2009.11.11
+license cr_Sun license=cr_Sun
+license lic_CDDL license=lic_CDDL
+link path=usr/lib/brand/lx/64 target=$(ARCH64)
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 09550a587c..bd0a9d465a 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -427,6 +427,8 @@ PROFILE_OBJS += profile.o
SYSTRACE_OBJS += systrace.o
+LX_SYSTRACE_OBJS += lx_systrace.o
+
LOCKSTAT_OBJS += lockstat.o
FASTTRAP_OBJS += fasttrap.o fasttrap_isa.o
@@ -491,6 +493,10 @@ PTSL_OBJS += tty_pts.o
PTM_OBJS += ptm.o
+LX_PTM_OBJS += lx_ptm.o
+
+LX_AUDIO_OBJS += lx_audio.o
+
MII_OBJS += mii.o mii_cicada.o mii_natsemi.o mii_intel.o mii_qualsemi.o \
mii_marvell.o mii_realtek.o mii_other.o
diff --git a/usr/src/uts/common/brand/lx/autofs/lx_autofs.c b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
new file mode 100644
index 0000000000..d2bb03c118
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
@@ -0,0 +1,1569 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <fs/fs_subr.h>
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/dirent.h>
+#include <sys/fs/fifonode.h>
+#include <sys/modctl.h>
+#include <sys/mount.h>
+#include <sys/policy.h>
+#include <sys/sunddi.h>
+
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+
+#include <sys/lx_autofs_impl.h>
+
+/*
+ * External functions
+ */
+extern uintptr_t space_fetch(char *key);
+extern int space_store(char *key, uintptr_t ptr);
+
+/*
+ * Globals
+ */
+static vfsops_t *lx_autofs_vfsops;
+static vnodeops_t *lx_autofs_vn_ops = NULL;
+static int lx_autofs_fstype;
+static major_t lx_autofs_major;
+static minor_t lx_autofs_minor = 0;
+
+/*
+ * Support functions
+ */
+static void
+i_strfree(char *str)
+{
+ kmem_free(str, strlen(str) + 1);
+}
+
+static char *
+i_strdup(char *str)
+{
+ int n = strlen(str);
+ char *ptr = kmem_alloc(n + 1, KM_SLEEP);
+ bcopy(str, ptr, n + 1);
+ return (ptr);
+}
+
+static int
+i_str_to_int(char *str, int *val)
+{
+ long res;
+
+ if (str == NULL)
+ return (-1);
+
+ if ((ddi_strtol(str, NULL, 10, &res) != 0) ||
+ (res < INT_MIN) || (res > INT_MAX))
+ return (-1);
+
+ *val = res;
+ return (0);
+}
+
+static void
+i_stack_init(list_t *lp)
+{
+ list_create(lp,
+ sizeof (stack_elem_t), offsetof(stack_elem_t, se_list));
+}
+
+static void
+i_stack_fini(list_t *lp)
+{
+ ASSERT(list_head(lp) == NULL);
+ list_destroy(lp);
+}
+
+static void
+i_stack_push(list_t *lp, caddr_t ptr1, caddr_t ptr2, caddr_t ptr3)
+{
+ stack_elem_t *se;
+
+ se = kmem_alloc(sizeof (*se), KM_SLEEP);
+ se->se_ptr1 = ptr1;
+ se->se_ptr2 = ptr2;
+ se->se_ptr3 = ptr3;
+ list_insert_head(lp, se);
+}
+
+static int
+i_stack_pop(list_t *lp, caddr_t *ptr1, caddr_t *ptr2, caddr_t *ptr3)
+{
+ stack_elem_t *se;
+
+ if ((se = list_head(lp)) == NULL)
+ return (-1);
+ list_remove(lp, se);
+ if (ptr1 != NULL)
+ *ptr1 = se->se_ptr1;
+ if (ptr2 != NULL)
+ *ptr2 = se->se_ptr2;
+ if (ptr3 != NULL)
+ *ptr3 = se->se_ptr3;
+ kmem_free(se, sizeof (*se));
+ return (0);
+}
+
+static vnode_t *
+fifo_peer_vp(vnode_t *vp)
+{
+ fifonode_t *fnp = VTOF(vp);
+ fifonode_t *fn_dest = fnp->fn_dest;
+ return (FTOV(fn_dest));
+}
+
+static vnode_t *
+i_vn_alloc(vfs_t *vfsp, vnode_t *uvp)
+{
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+ vnode_t *vp, *vp_old;
+
+ /* Allocate a new vnode structure in case we need it. */
+ vp = vn_alloc(KM_SLEEP);
+ vn_setops(vp, lx_autofs_vn_ops);
+ VN_SET_VFS_TYPE_DEV(vp, vfsp, uvp->v_type, uvp->v_rdev);
+ vp->v_data = uvp;
+ ASSERT(vp->v_count == 1);
+
+ /*
+ * Take a hold on the vfs structure. This is how unmount will
+ * determine if there are any active vnodes in the file system.
+ */
+ VFS_HOLD(vfsp);
+
+ /*
+ * Check if we already have a vnode allocated for this underlying
+ * vnode_t.
+ */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_vn_hash,
+ (mod_hash_key_t)uvp, (mod_hash_val_t *)&vp_old) != 0) {
+
+ /*
+ * Didn't find an existing node.
+ * Add this node to the hash and return.
+ */
+ VERIFY(mod_hash_insert(data->lav_vn_hash,
+ (mod_hash_key_t)uvp,
+ (mod_hash_val_t)vp) == 0);
+ mutex_exit(&data->lav_lock);
+ return (vp);
+ }
+
+ /* Get a hold on the existing vnode and free up the one we allocated. */
+ VN_HOLD(vp_old);
+ mutex_exit(&data->lav_lock);
+
+ /* Free up the new vnode we allocated. */
+ VN_RELE(uvp);
+ VFS_RELE(vfsp);
+ vn_invalid(vp);
+ vn_free(vp);
+
+ return (vp_old);
+}
+
+static void
+i_vn_free(vnode_t *vp)
+{
+ vfs_t *vfsp = vp->v_vfsp;
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+ vnode_t *uvp = vp->v_data;
+ vnode_t *vp_tmp;
+
+ ASSERT(MUTEX_HELD((&data->lav_lock)));
+ ASSERT(MUTEX_HELD((&vp->v_lock)));
+
+ ASSERT(vp->v_count == 0);
+
+ /* We're about to free this vnode so take it out of the hash. */
+ (void) mod_hash_remove(data->lav_vn_hash,
+ (mod_hash_key_t)uvp, (mod_hash_val_t)&vp_tmp);
+
+ /*
+ * No one else can lookup this vnode any more so there's no need
+ * to hold locks.
+ */
+ mutex_exit(&data->lav_lock);
+ mutex_exit(&vp->v_lock);
+
+ /* Release the underlying vnode. */
+ VN_RELE(uvp);
+ VFS_RELE(vfsp);
+ vn_invalid(vp);
+ vn_free(vp);
+}
+
+static lx_autofs_lookup_req_t *
+i_lalr_alloc(lx_autofs_vfs_t *data, int *dup_request, char *nm)
+{
+ lx_autofs_lookup_req_t *lalr, *lalr_dup;
+
+ /* Pre-allocate a new automounter request before grabbing locks. */
+ lalr = kmem_zalloc(sizeof (*lalr), KM_SLEEP);
+ mutex_init(&lalr->lalr_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&lalr->lalr_cv, NULL, CV_DEFAULT, NULL);
+ lalr->lalr_ref = 1;
+ lalr->lalr_pkt.lap_protover = LX_AUTOFS_PROTO_VERSION;
+
+ /* Assign a unique id for this request. */
+ lalr->lalr_pkt.lap_id = id_alloc(data->lav_ids);
+
+ /*
+ * The token expected by the linux automount is the name of
+ * the directory entry to look up. (And not the entire
+ * path that is being accessed.)
+ */
+ lalr->lalr_pkt.lap_name_len = strlen(nm);
+ if (lalr->lalr_pkt.lap_name_len >
+ (sizeof (lalr->lalr_pkt.lap_name) - 1)) {
+ zcmn_err(getzoneid(), CE_NOTE,
+ "invalid autofs lookup: \"%s\"", nm);
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+ return (NULL);
+ }
+ (void) strlcpy(lalr->lalr_pkt.lap_name, nm,
+ sizeof (lalr->lalr_pkt.lap_name));
+
+ /* Check for an outstanding request for this path. */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_path_hash,
+ (mod_hash_key_t)nm, (mod_hash_val_t *)&lalr_dup) == 0) {
+ /*
+ * There's already an outstanding request for this
+ * path so we don't need a new one.
+ */
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+ lalr = lalr_dup;
+
+ /* Bump the ref count on the old request. */
+ atomic_add_int(&lalr->lalr_ref, 1);
+
+ *dup_request = 1;
+ } else {
+ /* Add it to the hashes. */
+ VERIFY(mod_hash_insert(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+ (mod_hash_val_t)lalr) == 0);
+ VERIFY(mod_hash_insert(data->lav_path_hash,
+ (mod_hash_key_t)i_strdup(nm),
+ (mod_hash_val_t)lalr) == 0);
+
+ *dup_request = 0;
+ }
+ mutex_exit(&data->lav_lock);
+
+ return (lalr);
+}
+
+static lx_autofs_lookup_req_t *
+i_lalr_find(lx_autofs_vfs_t *data, int id)
+{
+ lx_autofs_lookup_req_t *lalr;
+
+ /* Check for an outstanding request for this id. */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_id_hash, (mod_hash_key_t)(uintptr_t)id,
+ (mod_hash_val_t *)&lalr) != 0) {
+ mutex_exit(&data->lav_lock);
+ return (NULL);
+ }
+ atomic_add_int(&lalr->lalr_ref, 1);
+ mutex_exit(&data->lav_lock);
+ return (lalr);
+}
+
+static void
+i_lalr_complete(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+ lx_autofs_lookup_req_t *lalr_tmp;
+
+ /* Remove this request from the hashes so no one can look it up. */
+ mutex_enter(&data->lav_lock);
+ (void) mod_hash_remove(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+ (mod_hash_val_t)&lalr_tmp);
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)lalr->lalr_pkt.lap_name,
+ (mod_hash_val_t)&lalr_tmp);
+ mutex_exit(&data->lav_lock);
+
+ /* Mark this requst as complete and wakeup anyone waiting on it. */
+ mutex_enter(&lalr->lalr_lock);
+ lalr->lalr_complete = 1;
+ cv_broadcast(&lalr->lalr_cv);
+ mutex_exit(&lalr->lalr_lock);
+}
+
+static void
+i_lalr_release(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+ ASSERT(!MUTEX_HELD(&lalr->lalr_lock));
+ if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0)
+ return;
+ ASSERT(lalr->lalr_ref == 0);
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+}
+
+static void
+i_lalr_abort(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+ lx_autofs_lookup_req_t *lalr_tmp;
+
+ /*
+ * This is a little tricky. We're aborting the wait for this
+ * request. So if anyone else is waiting for this request we
+ * can't free it, but if no one else is waiting for the request
+ * we should free it.
+ */
+ mutex_enter(&data->lav_lock);
+ if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0) {
+ mutex_exit(&data->lav_lock);
+ return;
+ }
+ ASSERT(lalr->lalr_ref == 0);
+
+ /* Remove this request from the hashes so no one can look it up. */
+ (void) mod_hash_remove(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+ (mod_hash_val_t)&lalr_tmp);
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)lalr->lalr_pkt.lap_name,
+ (mod_hash_val_t)&lalr_tmp);
+ mutex_exit(&data->lav_lock);
+
+ /* It's ok to free this now because the ref count was zero. */
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+}
+
+static int
+i_fifo_lookup(pid_t pgrp, int fd, file_t **fpp_wr, file_t **fpp_rd)
+{
+ proc_t *prp;
+ uf_info_t *fip;
+ uf_entry_t *ufp_wr, *ufp_rd = NULL;
+ file_t *fp_wr, *fp_rd = NULL;
+ vnode_t *vp_wr, *vp_rd;
+ int i;
+
+ /*
+ * sprlock() is zone aware, so assuming this mount call was
+ * initiated by a process in a zone, if it tries to specify
+ * a pgrp outside of it's zone this call will fail.
+ *
+ * Also, we want to grab hold of the main automounter process
+ * and its going to be the group leader for pgrp, so its
+ * pid will be equal to pgrp.
+ */
+ prp = sprlock(pgrp);
+ if (prp == NULL)
+ return (-1);
+ mutex_exit(&prp->p_lock);
+
+ /* Now we want to access the processes open file descriptors. */
+ fip = P_FINFO(prp);
+ mutex_enter(&fip->fi_lock);
+
+ /* Sanity check fifo write fd. */
+ if (fd >= fip->fi_nfiles) {
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /* Get a pointer to the write fifo. */
+ UF_ENTER(ufp_wr, fip, fd);
+ if (((fp_wr = ufp_wr->uf_file) == NULL) ||
+ ((vp_wr = fp_wr->f_vnode) == NULL) || (vp_wr->v_type != VFIFO)) {
+ /* Invalid fifo fd. */
+ UF_EXIT(ufp_wr);
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * Now we need to find the read end of the fifo (for reasons
+ * explained below.) We assume that the read end of the fifo
+ * is in the same process as the write end.
+ */
+ vp_rd = fifo_peer_vp(fp_wr->f_vnode);
+ for (i = 0; i < fip->fi_nfiles; i++) {
+ UF_ENTER(ufp_rd, fip, i);
+ if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+ (fp_rd->f_vnode == vp_rd))
+ break;
+ UF_EXIT(ufp_rd);
+ }
+ if (i == fip->fi_nfiles) {
+ /* Didn't find it. */
+ UF_EXIT(ufp_wr);
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * We need to drop fi_lock before we can try to acquire f_tlock
+ * the good news is that the file pointers are protected because
+ * we're still holding uf_lock.
+ */
+ mutex_exit(&fip->fi_lock);
+
+ /*
+ * Here we bump the open counts on the fifos. The reason
+ * that we do this is because when we go to write to the
+ * fifo we want to ensure that they are actually open (and
+ * not in the process of being closed) without having to
+ * stop the automounter. (If the write end of the fifo
+ * were closed and we tried to write to it we would panic.
+ * If the read end of the fifo was closed and we tried to
+ * write to the other end, the process that invoked the
+ * lookup operation would get an unexpected SIGPIPE.)
+ */
+ mutex_enter(&fp_wr->f_tlock);
+ fp_wr->f_count++;
+ ASSERT(fp_wr->f_count >= 2);
+ mutex_exit(&fp_wr->f_tlock);
+
+ mutex_enter(&fp_rd->f_tlock);
+ fp_rd->f_count++;
+ ASSERT(fp_rd->f_count >= 2);
+ mutex_exit(&fp_rd->f_tlock);
+
+ /* Release all our locks. */
+ UF_EXIT(ufp_wr);
+ UF_EXIT(ufp_rd);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+
+ /* Return the file pointers. */
+ *fpp_rd = fp_rd;
+ *fpp_wr = fp_wr;
+ return (0);
+}
+
+static uint_t
+/*ARGSUSED*/
+i_fifo_close_cb(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+ int *id = (int *)arg;
+ /* Return the key and terminate the walk. */
+ *id = (uintptr_t)key;
+ return (MH_WALK_TERMINATE);
+}
+
+static void
+i_fifo_close(lx_autofs_vfs_t *data)
+{
+ /*
+ * Close the fifo to prevent any future requests from
+ * getting sent to the automounter.
+ */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr != NULL) {
+ (void) closef(data->lav_fifo_wr);
+ data->lav_fifo_wr = NULL;
+ }
+ if (data->lav_fifo_rd != NULL) {
+ (void) closef(data->lav_fifo_rd);
+ data->lav_fifo_rd = NULL;
+ }
+ mutex_exit(&data->lav_lock);
+
+ /*
+ * Wakeup any threads currently waiting for the automounter
+ * note that it's possible for multiple threads to have entered
+ * this function and to be doing the work below simultaneously.
+ */
+ for (;;) {
+ lx_autofs_lookup_req_t *lalr;
+ int id;
+
+ /* Lookup the first entry in the hash. */
+ id = -1;
+ mod_hash_walk(data->lav_id_hash,
+ i_fifo_close_cb, &id);
+ if (id == -1) {
+ /* No more id's in the hash. */
+ break;
+ }
+ if ((lalr = i_lalr_find(data, id)) == NULL) {
+ /* Someone else beat us to it. */
+ continue;
+ }
+
+ /* Mark the request as compleate and release it. */
+ i_lalr_complete(data, lalr);
+ i_lalr_release(data, lalr);
+ }
+}
+
+static int
+i_fifo_verify_rd(lx_autofs_vfs_t *data)
+{
+ proc_t *prp;
+ uf_info_t *fip;
+ uf_entry_t *ufp_rd = NULL;
+ file_t *fp_rd = NULL;
+ vnode_t *vp_rd;
+ int i;
+
+ ASSERT(MUTEX_HELD((&data->lav_lock)));
+
+ /* Check if we've already been shut down. */
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ return (-1);
+ }
+ vp_rd = fifo_peer_vp(data->lav_fifo_wr->f_vnode);
+
+ /*
+ * sprlock() is zone aware, so assuming this mount call was
+ * initiated by a process in a zone, if it tries to specify
+ * a pgrp outside of it's zone this call will fail.
+ *
+ * Also, we want to grab hold of the main automounter process
+ * and its going to be the group leader for pgrp, so its
+ * pid will be equal to pgrp.
+ */
+ prp = sprlock(data->lav_pgrp);
+ if (prp == NULL)
+ return (-1);
+ mutex_exit(&prp->p_lock);
+
+ /* Now we want to access the processes open file descriptors. */
+ fip = P_FINFO(prp);
+ mutex_enter(&fip->fi_lock);
+
+ /*
+ * Now we need to find the read end of the fifo (for reasons
+ * explained below.) We assume that the read end of the fifo
+ * is in the same process as the write end.
+ */
+ for (i = 0; i < fip->fi_nfiles; i++) {
+ UF_ENTER(ufp_rd, fip, i);
+ if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+ (fp_rd->f_vnode == vp_rd))
+ break;
+ UF_EXIT(ufp_rd);
+ }
+ if (i == fip->fi_nfiles) {
+ /* Didn't find it. */
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * Seems the automounter still has the read end of the fifo
+ * open, we're done here. Release all our locks and exit.
+ */
+ mutex_exit(&fip->fi_lock);
+ UF_EXIT(ufp_rd);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+
+ return (0);
+}
+
+static int
+i_fifo_write(lx_autofs_vfs_t *data, lx_autofs_pkt_t *lap)
+{
+ struct uio uio;
+ struct iovec iov;
+ file_t *fp_wr, *fp_rd;
+ int error;
+
+ /*
+ * The catch here is we need to make sure _we_ don't close
+ * the the fifo while writing to it. (Another thread could come
+ * along and realize the automounter process is gone and close
+ * the fifo. To do this we bump the open count before we
+ * write to the fifo.
+ */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ mutex_exit(&data->lav_lock);
+ return (ENOENT);
+ }
+ fp_wr = data->lav_fifo_wr;
+ fp_rd = data->lav_fifo_rd;
+
+ /* Bump the open count on the write fifo. */
+ mutex_enter(&fp_wr->f_tlock);
+ fp_wr->f_count++;
+ mutex_exit(&fp_wr->f_tlock);
+
+ /* Bump the open count on the read fifo. */
+ mutex_enter(&fp_rd->f_tlock);
+ fp_rd->f_count++;
+ mutex_exit(&fp_rd->f_tlock);
+
+ mutex_exit(&data->lav_lock);
+
+ iov.iov_base = (caddr_t)lap;
+ iov.iov_len = sizeof (*lap);
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_loffset = 0;
+ uio.uio_segflg = (short)UIO_SYSSPACE;
+ uio.uio_resid = sizeof (*lap);
+ uio.uio_llimit = 0;
+ uio.uio_fmode = FWRITE | FNDELAY | FNONBLOCK;
+
+ error = VOP_WRITE(fp_wr->f_vnode, &uio, 0, kcred, NULL);
+ (void) closef(fp_wr);
+ (void) closef(fp_rd);
+
+ /*
+ * After every write we verify that the automounter still has
+ * these files open.
+ */
+ mutex_enter(&data->lav_lock);
+ if (i_fifo_verify_rd(data) != 0) {
+ /*
+ * Something happened to the automounter.
+ * Close down the communication pipe we setup.
+ */
+ mutex_exit(&data->lav_lock);
+ i_fifo_close(data);
+ if (error != 0)
+ return (error);
+ return (ENOENT);
+ }
+ mutex_exit(&data->lav_lock);
+
+ return (error);
+}
+
+static int
+i_bs_readdir(vnode_t *dvp, list_t *dir_stack, list_t *file_stack)
+{
+ struct iovec iov;
+ struct uio uio;
+ dirent64_t *dp, *dbuf;
+ vnode_t *vp;
+ size_t dlen, dbuflen;
+ int eof, error, ndirents = 64;
+ char *nm;
+
+ dlen = ndirents * (sizeof (*dbuf));
+ dbuf = kmem_alloc(dlen, KM_SLEEP);
+
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = UIO_COPY_CACHED;
+ uio.uio_loffset = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+
+ eof = 0;
+ error = 0;
+ while (!error && !eof) {
+ uio.uio_resid = dlen;
+ iov.iov_base = (char *)dbuf;
+ iov.iov_len = dlen;
+
+ (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+ if (VOP_READDIR(dvp, &uio, kcred, &eof, NULL, 0) != 0) {
+ VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+ kmem_free(dbuf, dlen);
+ return (-1);
+ }
+ VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+
+ if ((dbuflen = dlen - uio.uio_resid) == 0) {
+ /* We're done. */
+ break;
+ }
+
+ for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
+ dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
+
+ nm = dp->d_name;
+
+ if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
+ continue;
+
+ if (VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, kcred,
+ NULL, NULL, NULL) != 0) {
+ kmem_free(dbuf, dlen);
+ return (-1);
+ }
+ if (vp->v_type == VDIR) {
+ if (dir_stack != NULL) {
+ i_stack_push(dir_stack, (caddr_t)dvp,
+ (caddr_t)vp, i_strdup(nm));
+ } else {
+ VN_RELE(vp);
+ }
+ } else {
+ if (file_stack != NULL) {
+ i_stack_push(file_stack, (caddr_t)dvp,
+ (caddr_t)vp, i_strdup(nm));
+ } else {
+ VN_RELE(vp);
+ }
+ }
+ }
+ }
+ kmem_free(dbuf, dlen);
+ return (0);
+}
+
+static void
+i_bs_destroy(vnode_t *dvp, char *path)
+{
+ list_t search_stack;
+ list_t dir_stack;
+ list_t file_stack;
+ vnode_t *pdvp, *vp;
+ char *dpath, *fpath;
+ int ret;
+
+ if (VOP_LOOKUP(dvp, path, &vp, NULL, 0, NULL, kcred,
+ NULL, NULL, NULL) != 0) {
+ /* A directory entry with this name doesn't actually exist. */
+ return;
+ }
+
+ if ((vp->v_type & VDIR) == 0) {
+ /* Easy, the directory entry is a file so delete it. */
+ VN_RELE(vp);
+ (void) VOP_REMOVE(dvp, path, kcred, NULL, 0);
+ return;
+ }
+
+ /*
+ * The directory entry is a subdirectory, now we have a bit more
+ * work to do. (We'll have to recurse into the sub directory.)
+ * It would have been much easier to do this recursively but kernel
+ * stacks are notoriously small.
+ */
+ i_stack_init(&search_stack);
+ i_stack_init(&dir_stack);
+ i_stack_init(&file_stack);
+
+ /* Save our newfound subdirectory into a list. */
+ i_stack_push(&search_stack, (caddr_t)dvp, (caddr_t)vp, i_strdup(path));
+
+ /* Do a recursive depth first search into the subdirectories. */
+ while (i_stack_pop(&search_stack,
+ (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+ /* Get a list of the subdirectories in this directory. */
+ if (i_bs_readdir(dvp, &search_stack, NULL) != 0)
+ goto exit;
+
+ /* Save the current directory a separate stack. */
+ i_stack_push(&dir_stack, (caddr_t)pdvp, (caddr_t)dvp, dpath);
+ }
+
+ /*
+ * Now dir_stack contains a list of directories, the deepest paths
+ * are at the top of the list. So let's go through and process them.
+ */
+ while (i_stack_pop(&dir_stack,
+ (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+ /* Get a list of the files in this directory. */
+ if (i_bs_readdir(dvp, NULL, &file_stack) != 0) {
+ VN_RELE(dvp);
+ i_strfree(dpath);
+ goto exit;
+ }
+
+ /* Delete all the files in this directory. */
+ while (i_stack_pop(&file_stack,
+ NULL, (caddr_t *)&vp, &fpath) == 0) {
+ VN_RELE(vp)
+ ret = VOP_REMOVE(dvp, fpath, kcred, NULL, 0);
+ i_strfree(fpath);
+ if (ret != 0) {
+ i_strfree(dpath);
+ goto exit;
+ }
+ }
+
+ /* Delete this directory. */
+ VN_RELE(dvp);
+ ret = VOP_RMDIR(pdvp, dpath, pdvp, kcred, NULL, 0);
+ i_strfree(dpath);
+ if (ret != 0)
+ goto exit;
+ }
+
+exit:
+ while (
+ (i_stack_pop(&search_stack, NULL, (caddr_t *)&vp, &path) == 0) ||
+ (i_stack_pop(&dir_stack, NULL, (caddr_t *)&vp, &path) == 0) ||
+ (i_stack_pop(&file_stack, NULL, (caddr_t *)&vp, &path) == 0)) {
+ VN_RELE(vp);
+ i_strfree(path);
+ }
+ i_stack_fini(&search_stack);
+ i_stack_fini(&dir_stack);
+ i_stack_fini(&file_stack);
+}
+
+static vnode_t *
+i_bs_create(vnode_t *dvp, char *bs_name)
+{
+ vnode_t *vp;
+ vattr_t vattr;
+
+ /*
+ * After looking at the mkdir syscall path it seems we don't need
+ * to initialize all of the vattr_t structure.
+ */
+ bzero(&vattr, sizeof (vattr));
+ vattr.va_type = VDIR;
+ vattr.va_mode = 0755; /* u+rwx,og=rx */
+ vattr.va_mask = AT_TYPE|AT_MODE;
+
+ if (VOP_MKDIR(dvp, bs_name, &vattr, &vp, kcred, NULL, 0, NULL) != 0)
+ return (NULL);
+ return (vp);
+}
+
+static int
+i_automounter_call(vnode_t *dvp, char *nm)
+{
+ lx_autofs_lookup_req_t *lalr;
+ lx_autofs_vfs_t *data;
+ int error, dup_request;
+
+ /* Get a pointer to the vfs mount data. */
+ data = dvp->v_vfsp->vfs_data;
+
+ /* The automounter only support queries in the root directory. */
+ if (dvp != data->lav_root)
+ return (ENOENT);
+
+ /*
+ * Check if the current process is in the automounters process
+ * group. (If it is, the current process is either the autmounter
+ * itself or one of it's forked child processes.) If so, don't
+ * redirect this lookup back into the automounter because we'll
+ * hang.
+ */
+ mutex_enter(&pidlock);
+ if (data->lav_pgrp == curproc->p_pgrp) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ /* Verify that the automount process pipe still exists. */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ mutex_exit(&data->lav_lock);
+ return (ENOENT);
+ }
+ mutex_exit(&data->lav_lock);
+
+ /* Allocate an automounter request structure. */
+ if ((lalr = i_lalr_alloc(data, &dup_request, nm)) == NULL)
+ return (ENOENT);
+
+ /*
+ * If we were the first one to allocate this request then we
+ * need to send it to the automounter.
+ */
+ if ((!dup_request) &&
+ ((error = i_fifo_write(data, &lalr->lalr_pkt)) != 0)) {
+ /*
+ * Unable to send the request to the automounter.
+ * Unblock any other threads waiting on the request
+ * and release the request.
+ */
+ i_lalr_complete(data, lalr);
+ i_lalr_release(data, lalr);
+ return (error);
+ }
+
+ /* Wait for someone to signal us that this request has compleated. */
+ mutex_enter(&lalr->lalr_lock);
+ while (!lalr->lalr_complete) {
+ if (cv_wait_sig(&lalr->lalr_cv, &lalr->lalr_lock) == 0) {
+ /* We got a signal, abort this lookup. */
+ mutex_exit(&lalr->lalr_lock);
+ i_lalr_abort(data, lalr);
+ return (EINTR);
+ }
+ }
+ mutex_exit(&lalr->lalr_lock);
+ i_lalr_release(data, lalr);
+
+ return (0);
+}
+
+static int
+i_automounter_ioctl(vnode_t *vp, int cmd, intptr_t arg)
+{
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vp->v_vfsp->vfs_data;
+
+ /*
+ * Be strict.
+ * We only accept ioctls from the automounter process group.
+ */
+ mutex_enter(&pidlock);
+ if (data->lav_pgrp != curproc->p_pgrp) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ if ((cmd == LX_AUTOFS_IOC_READY) || (cmd == LX_AUTOFS_IOC_FAIL)) {
+ lx_autofs_lookup_req_t *lalr;
+ int id = arg;
+
+ /*
+ * We don't actually care if the request failed or succeeded.
+ * We do the same thing either way.
+ */
+ if ((lalr = i_lalr_find(data, id)) == NULL)
+ return (ENXIO);
+
+ /* Mark the request as compleate and release it. */
+ i_lalr_complete(data, lalr);
+ i_lalr_release(data, lalr);
+ return (0);
+ }
+ if (cmd == LX_AUTOFS_IOC_CATATONIC) {
+ /* The automounter is shutting down. */
+ i_fifo_close(data);
+ return (0);
+ }
+ return (ENOTSUP);
+}
+
+static int
+i_parse_mntopt(vfs_t *vfsp, lx_autofs_vfs_t *data)
+{
+ char *fd_str, *pgrp_str, *minproto_str, *maxproto_str;
+ int fd, pgrp, minproto, maxproto;
+ file_t *fp_wr, *fp_rd;
+
+ /* Require all options to be present. */
+ if ((vfs_optionisset(vfsp, LX_MNTOPT_FD, &fd_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_PGRP, &pgrp_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_MINPROTO, &minproto_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_MAXPROTO, &maxproto_str) != 1))
+ return (EINVAL);
+
+ /* Get the values for each parameter. */
+ if ((i_str_to_int(fd_str, &fd) != 0) ||
+ (i_str_to_int(pgrp_str, &pgrp) != 0) ||
+ (i_str_to_int(minproto_str, &minproto) != 0) ||
+ (i_str_to_int(maxproto_str, &maxproto) != 0))
+ return (EINVAL);
+
+ /*
+ * We support v2 of the linux kernel automounter protocol.
+ * Make sure the mount request we got indicates support
+ * for this version of the protocol.
+ */
+ if ((minproto > 2) || (maxproto < 2))
+ return (EINVAL);
+
+ /*
+ * Now we need to lookup the fifos we'll be using
+ * to talk to the userland automounter process.
+ */
+ if (i_fifo_lookup(pgrp, fd, &fp_wr, &fp_rd) != 0)
+ return (EINVAL);
+
+ /* Save the mount options and fifo pointers. */
+ data->lav_fd = fd;
+ data->lav_pgrp = pgrp;
+ data->lav_fifo_rd = fp_rd;
+ data->lav_fifo_wr = fp_wr;
+ return (0);
+}
+
+/*
+ * VFS entry points
+ */
+static int
+lx_autofs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ lx_autofs_vfs_t *data;
+ dev_t dev;
+ char name[40];
+ int error;
+
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT)))
+ return (EBUSY);
+
+ /* We don't support mountes in the global zone. */
+ if (getzoneid() == GLOBAL_ZONEID)
+ return (EPERM);
+
+ /* We don't support mounting on top of ourselves. */
+ if (vn_matchops(mvp, lx_autofs_vn_ops))
+ return (EPERM);
+
+ /* Allocate a vfs struct. */
+ data = kmem_zalloc(sizeof (lx_autofs_vfs_t), KM_SLEEP);
+
+ /* Parse mount options. */
+ if ((error = i_parse_mntopt(vfsp, data)) != 0) {
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+ return (error);
+ }
+
+ /* Initialize the backing store. */
+ i_bs_destroy(mvp, LX_AUTOFS_BS_DIR);
+ if ((data->lav_bs_vp = i_bs_create(mvp, LX_AUTOFS_BS_DIR)) == NULL) {
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+ return (EBUSY);
+ }
+ data->lav_bs_name = LX_AUTOFS_BS_DIR;
+
+ /* We have to hold the underlying vnode we're mounted on. */
+ data->lav_mvp = mvp;
+ VN_HOLD(mvp);
+
+ /* Initialize vfs fields */
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lx_autofs_fstype;
+ vfsp->vfs_data = data;
+
+ /* Invent a dev_t (sigh) */
+ do {
+ dev = makedevice(lx_autofs_major,
+ atomic_add_32_nv(&lx_autofs_minor, 1) & L_MAXMIN32);
+ } while (vfs_devismounted(dev));
+ vfsp->vfs_dev = dev;
+ vfs_make_fsid(&vfsp->vfs_fsid, dev, lx_autofs_fstype);
+
+ /* Create an id space arena for automounter requests. */
+ (void) snprintf(name, sizeof (name), "lx_autofs_id_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_ids = id_space_create(name, 1, INT_MAX);
+
+ /* Create hashes to keep track of automounter requests. */
+ mutex_init(&data->lav_lock, NULL, MUTEX_DEFAULT, NULL);
+ (void) snprintf(name, sizeof (name), "lx_autofs_path_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_path_hash = mod_hash_create_strhash(name,
+ LX_AUTOFS_VFS_PATH_HASH_SIZE, mod_hash_null_valdtor);
+ (void) snprintf(name, sizeof (name), "lx_autofs_id_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_id_hash = mod_hash_create_idhash(name,
+ LX_AUTOFS_VFS_ID_HASH_SIZE, mod_hash_null_valdtor);
+
+ /* Create a hash to keep track of vnodes. */
+ (void) snprintf(name, sizeof (name), "lx_autofs_vn_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_vn_hash = mod_hash_create_ptrhash(name,
+ LX_AUTOFS_VFS_VN_HASH_SIZE, mod_hash_null_valdtor,
+ sizeof (vnode_t));
+
+ /* Create root vnode */
+ data->lav_root = i_vn_alloc(vfsp, data->lav_bs_vp);
+ data->lav_root->v_flag |=
+ VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT;
+
+ return (0);
+}
+
+static int
+lx_autofs_unmount(vfs_t *vfsp, int flag, struct cred *cr)
+{
+ lx_autofs_vfs_t *data;
+
+ if (secpolicy_fs_unmount(cr, vfsp) != 0)
+ return (EPERM);
+
+ /* We do not currently support forced unmounts. */
+ if (flag & MS_FORCE)
+ return (ENOTSUP);
+
+ /*
+ * We should never have a reference count of less than 2: one for the
+ * caller, one for the root vnode.
+ */
+ ASSERT(vfsp->vfs_count >= 2);
+
+ /* If there are any outstanding vnodes, we can't unmount. */
+ if (vfsp->vfs_count > 2)
+ return (EBUSY);
+
+ /* Check for any remaining holds on the root vnode. */
+ data = vfsp->vfs_data;
+ ASSERT(data->lav_root->v_vfsp == vfsp);
+ if (data->lav_root->v_count > 1)
+ return (EBUSY);
+
+ /* Close the fifo to the automount process. */
+ if (data->lav_fifo_wr != NULL)
+ (void) closef(data->lav_fifo_wr);
+ if (data->lav_fifo_rd != NULL)
+ (void) closef(data->lav_fifo_rd);
+
+ /*
+ * We have to release our hold on our root vnode before we can
+ * delete the backing store. (Since the root vnode is linked
+ * to the backing store.)
+ */
+ VN_RELE(data->lav_root);
+
+ /* Cleanup the backing store. */
+ i_bs_destroy(data->lav_mvp, data->lav_bs_name);
+ VN_RELE(data->lav_mvp);
+
+ /* Cleanup out remaining data structures. */
+ mod_hash_destroy_strhash(data->lav_path_hash);
+ mod_hash_destroy_idhash(data->lav_id_hash);
+ mod_hash_destroy_ptrhash(data->lav_vn_hash);
+ id_space_destroy(data->lav_ids);
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+
+ return (0);
+}
+
+static int
+lx_autofs_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+
+ *vpp = data->lav_root;
+ VN_HOLD(*vpp);
+
+ return (0);
+}
+
+static int
+lx_autofs_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+ vnode_t *urvp = data->lav_root->v_data;
+ dev32_t d32;
+ int error;
+
+ if ((error = VFS_STATVFS(urvp->v_vfsp, sp)) != 0)
+ return (error);
+
+ /* Update some of values before returning. */
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ (void) strlcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name,
+ sizeof (sp->f_basetype));
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ bzero(sp->f_fstr, sizeof (sp->f_fstr));
+ return (0);
+}
+
+static const fs_operation_def_t lx_autofs_vfstops[] = {
+ { VFSNAME_MOUNT, { .vfs_mount = lx_autofs_mount } },
+ { VFSNAME_UNMOUNT, { .vfs_unmount = lx_autofs_unmount } },
+ { VFSNAME_ROOT, { .vfs_root = lx_autofs_root } },
+ { VFSNAME_STATVFS, { .vfs_statvfs = lx_autofs_statvfs } },
+ { NULL, NULL }
+};
+
+/*
+ * VOP entry points - simple passthrough
+ *
+ * For most VOP entry points we can simply pass the request on to
+ * the underlying filesystem we're mounted on.
+ */
+static int
+lx_autofs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_CLOSE(uvp, flag, count, offset, cr, ctp));
+}
+
+static int
+lx_autofs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ctp, int flags)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_READDIR(uvp, uiop, cr, eofp, ctp, flags));
+}
+
+static int
+lx_autofs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
+ caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_ACCESS(uvp, mode, flags, cr, ctp));
+}
+
+static int
+lx_autofs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_RWLOCK(uvp, write_lock, ctp));
+}
+
+static void
+lx_autofs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ VOP_RWUNLOCK(uvp, write_lock, ctp);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
+ caller_context_t *ctp, int flags)
+{
+ vnode_t *udvp = dvp->v_data;
+
+ /*
+ * cdir is the calling processes current directory.
+ * If cdir is lx_autofs vnode then get its real underlying
+ * vnode ptr. (It seems like the only thing cdir is
+ * ever used for is to make sure the user doesn't delete
+ * their current directory.)
+ */
+ if (vn_matchops(cdir, lx_autofs_vn_ops)) {
+ vnode_t *ucdir = cdir->v_data;
+ return (VOP_RMDIR(udvp, nm, ucdir, cr, ctp, flags));
+ }
+
+ return (VOP_RMDIR(udvp, nm, cdir, cr, ctp, flags));
+}
+
+/*
+ * VOP entry points - special passthrough
+ *
+ * For some VOP entry points we will first pass the request on to
+ * the underlying filesystem we're mounted on. If there's an error
+ * then we immediately return the error, but if the request succeeds
+ * we have to do some extra work before returning.
+ */
+static int
+lx_autofs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ctp)
+{
+ vnode_t *ovp = *vpp;
+ vnode_t *uvp = ovp->v_data;
+ int error;
+
+ if ((error = VOP_OPEN(&uvp, flag, cr, ctp)) != 0)
+ return (error);
+
+ /* Check for clone opens. */
+ if (uvp == ovp->v_data)
+ return (0);
+
+ /* Deal with clone opens by returning a new vnode. */
+ *vpp = i_vn_alloc(ovp->v_vfsp, uvp);
+ VN_RELE(ovp);
+ return (0);
+}
+
+static int
+lx_autofs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ int error;
+
+ if ((error = VOP_GETATTR(uvp, vap, flags, cr, ctp)) != 0)
+ return (error);
+
+ /* Update the attributes with our filesystem id. */
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ return (0);
+}
+
+static int
+lx_autofs_mkdir(vnode_t *dvp, char *nm, struct vattr *vap, vnode_t **vpp,
+ cred_t *cr, caller_context_t *ctp, int flags, vsecattr_t *vsecp)
+{
+ vnode_t *udvp = dvp->v_data;
+ vnode_t *uvp = NULL;
+ int error;
+
+ if ((error = VOP_MKDIR(udvp, nm, vap, &uvp, cr,
+ ctp, flags, vsecp)) != 0)
+ return (error);
+
+ /* Update the attributes with our filesystem id. */
+ vap->va_fsid = dvp->v_vfsp->vfs_dev;
+
+ /* Allocate a new vnode. */
+ *vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+}
+
+/*
+ * VOP entry points - custom
+ */
+/*ARGSUSED*/
+static void
+lx_autofs_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ctp)
+{
+ lx_autofs_vfs_t *data = vp->v_vfsp->vfs_data;
+
+ /*
+ * We need to hold the vfs lock because if we're going to free
+ * this vnode we have to prevent anyone from looking it up
+ * in the vnode hash.
+ */
+ mutex_enter(&data->lav_lock);
+ mutex_enter(&vp->v_lock);
+
+ if (vp->v_count < 1) {
+ panic("lx_autofs_inactive: bad v_count");
+ /*NOTREACHED*/
+ }
+
+ /* Drop the temporary hold by vn_rele now. */
+ if (--vp->v_count > 0) {
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&data->lav_lock);
+ return;
+ }
+
+ /*
+ * No one should have been blocked on this lock because we're
+ * about to free this vnode.
+ */
+ i_vn_free(vp);
+}
+
+static int
+lx_autofs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ctp,
+ int *direntflags, pathname_t *realpnp)
+{
+ vnode_t *udvp = dvp->v_data;
+ vnode_t *uvp = NULL;
+ int error;
+
+ /* First try to lookup if this path component already exitst. */
+ if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr, ctp,
+ direntflags, realpnp)) == 0) {
+ *vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+ }
+
+ /* Only query the automounter if the path does not exist. */
+ if (error != ENOENT)
+ return (error);
+
+ /* Refer the lookup to the automounter. */
+ if ((error = i_automounter_call(dvp, nm)) != 0)
+ return (error);
+
+ /* Retry the lookup operation. */
+ if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr, ctp,
+ direntflags, realpnp)) == 0) {
+ *vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+ }
+ return (error);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr,
+ int *rvalp, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+
+ /* Intercept certain ioctls. */
+ switch ((uint_t)cmd) {
+ case LX_AUTOFS_IOC_READY:
+ case LX_AUTOFS_IOC_FAIL:
+ case LX_AUTOFS_IOC_CATATONIC:
+ case LX_AUTOFS_IOC_EXPIRE:
+ case LX_AUTOFS_IOC_PROTOVER:
+ case LX_AUTOFS_IOC_SETTIMEOUT:
+ return (i_automounter_ioctl(vp, cmd, arg));
+ }
+
+ /* Pass any remaining ioctl on. */
+ return (VOP_IOCTL(uvp, cmd, arg, mode, cr, rvalp, ctp));
+}
+
+/*
+ * VOP entry points definitions
+ */
+static const fs_operation_def_t lx_autofs_tops_root[] = {
+ { VOPNAME_OPEN, { .vop_open = lx_autofs_open } },
+ { VOPNAME_CLOSE, { .vop_close = lx_autofs_close } },
+ { VOPNAME_IOCTL, { .vop_ioctl = lx_autofs_ioctl } },
+ { VOPNAME_RWLOCK, { .vop_rwlock = lx_autofs_rwlock } },
+ { VOPNAME_RWUNLOCK, { .vop_rwunlock = lx_autofs_rwunlock } },
+ { VOPNAME_GETATTR, { .vop_getattr = lx_autofs_getattr } },
+ { VOPNAME_ACCESS, { .vop_access = lx_autofs_access } },
+ { VOPNAME_READDIR, { .vop_readdir = lx_autofs_readdir } },
+ { VOPNAME_LOOKUP, { .vop_lookup = lx_autofs_lookup } },
+ { VOPNAME_INACTIVE, { .vop_inactive = lx_autofs_inactive } },
+ { VOPNAME_MKDIR, { .vop_mkdir = lx_autofs_mkdir } },
+ { VOPNAME_RMDIR, { .vop_rmdir = lx_autofs_rmdir } },
+ { NULL }
+};
+
+/*
+ * lx_autofs_init() gets invoked via the mod_install() call in
+ * this modules _init() routine. Therefor, the code that cleans
+ * up the structures we allocate below is actually found in
+ * our _fini() routine.
+ */
+/* ARGSUSED */
+static int
+lx_autofs_init(int fstype, char *name)
+{
+ int error;
+
+ if ((lx_autofs_major =
+ (major_t)space_fetch(LX_AUTOFS_SPACE_KEY_UDEV)) == 0) {
+
+ if ((lx_autofs_major = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lx_autofs_init: "
+ "can't get unique device number");
+ return (EAGAIN);
+ }
+
+ if (space_store(LX_AUTOFS_SPACE_KEY_UDEV,
+ (uintptr_t)lx_autofs_major) != 0) {
+ cmn_err(CE_WARN, "lx_autofs_init: "
+ "can't save unique device number");
+ return (EAGAIN);
+ }
+ }
+
+ lx_autofs_fstype = fstype;
+ if ((error = vfs_setfsops(
+ fstype, lx_autofs_vfstops, &lx_autofs_vfsops)) != 0) {
+ cmn_err(CE_WARN, "lx_autofs_init: bad vfs ops template");
+ return (error);
+ }
+
+ if ((error = vn_make_ops("lx_autofs vnode ops",
+ lx_autofs_tops_root, &lx_autofs_vn_ops)) != 0) {
+ VERIFY(vfs_freevfsops_by_type(fstype) == 0);
+ lx_autofs_vn_ops = NULL;
+ return (error);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Module linkage
+ */
+static mntopt_t lx_autofs_mntopt[] = {
+ { LX_MNTOPT_FD, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_PGRP, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_MINPROTO, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_MAXPROTO, NULL, 0, MO_HASVALUE }
+};
+
+static mntopts_t lx_autofs_mntopts = {
+ sizeof (lx_autofs_mntopt) / sizeof (mntopt_t),
+ lx_autofs_mntopt
+};
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ LX_AUTOFS_NAME,
+ lx_autofs_init,
+ VSW_HASPROTO | VSW_VOLATILEDEV,
+ &lx_autofs_mntopts
+};
+
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "linux autofs filesystem", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int error;
+
+ if ((error = mod_remove(&modlinkage)) != 0)
+ return (error);
+
+ if (lx_autofs_vn_ops != NULL) {
+ vn_freevnodeops(lx_autofs_vn_ops);
+ lx_autofs_vn_ops = NULL;
+ }
+
+ /*
+ * In our init routine, if we get an error after calling
+ * vfs_setfsops() we cleanup by calling vfs_freevfsops_by_type().
+ * But we don't need to call vfs_freevfsops_by_type() here
+ * because the fs framework did this for us as part of the
+ * mod_remove() call above.
+ */
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
new file mode 100644
index 0000000000..bfeb78330a
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
@@ -0,0 +1,396 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#include <sys/modctl.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/frame.h>
+#include <sys/dtrace.h>
+#include <sys/dtrace_impl.h>
+
+#include <sys/lx_impl.h>
+
+#define LX_SYSTRACE_SHIFT 16
+#define LX_SYSTRACE_ISENTRY(x) ((int)(x) >> LX_SYSTRACE_SHIFT)
+#define LX_SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << LX_SYSTRACE_SHIFT) - 1))
+#define LX_SYSTRACE_ENTRY(id) ((1 << LX_SYSTRACE_SHIFT) | (id))
+#define LX_SYSTRACE_RETURN(id) (id)
+
+#define LX_SYSTRACE_ENTRY_AFRAMES 2
+#define LX_SYSTRACE_RETURN_AFRAMES 4
+
+typedef struct lx_systrace_sysent {
+ const char *lss_name;
+ dtrace_id_t lss_entry;
+ dtrace_id_t lss_return;
+} lx_systrace_sysent_t;
+
+static dev_info_t *lx_systrace_devi;
+static dtrace_provider_id_t lx_systrace_id;
+static kmutex_t lx_systrace_lock;
+static uint_t lx_systrace_nenabled;
+
+static int lx_systrace_nsysent;
+static lx_systrace_sysent_t *lx_systrace_sysent;
+
+/*ARGSUSED*/
+static void
+lx_systrace_entry(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+ ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+ dtrace_id_t id;
+
+ if (sysnum >= lx_systrace_nsysent)
+ return;
+
+ if ((id = lx_systrace_sysent[sysnum].lss_entry) == DTRACE_IDNONE)
+ return;
+
+ dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_return(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+ ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+ dtrace_id_t id;
+
+ if (sysnum >= lx_systrace_nsysent)
+ return;
+
+ if ((id = lx_systrace_sysent[sysnum].lss_return) == DTRACE_IDNONE)
+ return;
+
+ dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_provide(void *arg, const dtrace_probedesc_t *desc)
+{
+ int i;
+
+ if (desc != NULL)
+ return;
+
+ for (i = 0; i < lx_systrace_nsysent; i++) {
+ if (dtrace_probe_lookup(lx_systrace_id, NULL,
+ lx_systrace_sysent[i].lss_name, "entry") != 0)
+ continue;
+
+ (void) dtrace_probe_create(lx_systrace_id, NULL,
+ lx_systrace_sysent[i].lss_name, "entry",
+ LX_SYSTRACE_ENTRY_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE_ENTRY(i)));
+
+ (void) dtrace_probe_create(lx_systrace_id, NULL,
+ lx_systrace_sysent[i].lss_name, "return",
+ LX_SYSTRACE_RETURN_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE_RETURN(i)));
+
+ lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent[i].lss_return = DTRACE_IDNONE;
+ }
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_enable(void *arg, dtrace_id_t id, void *parg)
+{
+ int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+ ASSERT(sysnum < lx_systrace_nsysent);
+
+ mutex_enter(&lx_systrace_lock);
+ if (lx_systrace_nenabled++ == 0)
+ lx_brand_systrace_enable();
+ mutex_exit(&lx_systrace_lock);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent[sysnum].lss_entry = id;
+ } else {
+ lx_systrace_sysent[sysnum].lss_return = id;
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_disable(void *arg, dtrace_id_t id, void *parg)
+{
+ int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+ ASSERT(sysnum < lx_systrace_nsysent);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent[sysnum].lss_entry = DTRACE_IDNONE;
+ } else {
+ lx_systrace_sysent[sysnum].lss_return = DTRACE_IDNONE;
+ }
+
+ mutex_enter(&lx_systrace_lock);
+ if (--lx_systrace_nenabled == 0)
+ lx_brand_systrace_disable();
+ mutex_exit(&lx_systrace_lock);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_destroy(void *arg, dtrace_id_t id, void *parg)
+{
+}
+
+/*ARGSUSED*/
+static uint64_t
+lx_systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
+ int aframes)
+{
+ struct frame *fp = (struct frame *)dtrace_getfp();
+ uintptr_t *stack;
+ uint64_t val = 0;
+ int i;
+
+ if (argno >= 6)
+ return (0);
+
+ /*
+ * Walk the four frames down the stack to the entry or return callback.
+ * Our callback calls dtrace_probe() which calls dtrace_dif_variable()
+ * which invokes this function to get the extended arguments. We get
+ * the frame pointer in via call to dtrace_getfp() above which makes for
+ * four frames.
+ */
+ for (i = 0; i < 4; i++) {
+ fp = (struct frame *)fp->fr_savfp;
+ }
+
+ stack = (uintptr_t *)&fp[1];
+
+ /*
+ * Skip the first argument to the callback -- the system call number.
+ */
+ argno++;
+
+#ifdef __amd64
+ /*
+ * On amd64, the first 6 arguments are passed in registers while
+ * subsequent arguments are on the stack.
+ */
+ argno -= 6;
+#endif
+
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+ val = stack[argno];
+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+
+ return (val);
+}
+
+
+static const dtrace_pattr_t lx_systrace_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+};
+
+static dtrace_pops_t lx_systrace_pops = {
+ lx_systrace_provide,
+ NULL,
+ lx_systrace_enable,
+ lx_systrace_disable,
+ NULL,
+ NULL,
+ NULL,
+ lx_systrace_getarg,
+ NULL,
+ lx_systrace_destroy
+};
+
+static int
+lx_systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ int i;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ break;
+ case DDI_RESUME:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "lx_systrace", S_IFCHR,
+ 0, DDI_PSEUDO, NULL) == DDI_FAILURE ||
+ dtrace_register("lx-syscall", &lx_systrace_attr,
+ DTRACE_PRIV_KERNEL, 0, &lx_systrace_pops, NULL,
+ &lx_systrace_id) != 0) {
+ ddi_remove_minor_node(devi, NULL);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(devi);
+ lx_systrace_devi = devi;
+
+ /*
+ * Count up the lx_brand system calls.
+ */
+ for (i = 0; lx_sysent[i].sy_callc != NULL; i++)
+ continue;
+
+ /*
+ * Initialize our corresponding table.
+ */
+ lx_systrace_sysent = kmem_zalloc(i * sizeof (lx_systrace_sysent_t),
+ KM_SLEEP);
+ lx_systrace_nsysent = i;
+
+ for (i = 0; i < lx_systrace_nsysent; i++) {
+ lx_systrace_sysent[i].lss_name = lx_sysent[i].sy_name;
+ lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent[i].lss_return = DTRACE_IDNONE;
+ }
+
+ /*
+ * Install probe triggers.
+ */
+ lx_systrace_entry_ptr = lx_systrace_entry;
+ lx_systrace_return_ptr = lx_systrace_return;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (dtrace_unregister(lx_systrace_id) != 0)
+ return (DDI_FAILURE);
+
+ /*
+ * Free table.
+ */
+ kmem_free(lx_systrace_sysent, lx_systrace_nsysent *
+ sizeof (lx_systrace_sysent_t));
+ lx_systrace_sysent = NULL;
+ lx_systrace_nsysent = 0;
+
+ /*
+ * Reset probe triggers.
+ */
+ lx_systrace_entry_ptr = NULL;
+ lx_systrace_return_ptr = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ return (0);
+}
+
+static struct cb_ops lx_systrace_cb_ops = {
+ lx_systrace_open, /* open */
+ nodev, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ nodev, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops lx_systrace_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ ddi_getinfo_1to1, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ lx_systrace_attach, /* attach */
+ lx_systrace_detach, /* detach */
+ nodev, /* reset */
+ &lx_systrace_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type (this is a pseudo driver) */
+ "Linux Brand System Call Tracing", /* name of module */
+ &lx_systrace_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
new file mode 100644
index 0000000000..e4499c8a5b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_systrace" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/io/ldlinux.c b/usr/src/uts/common/brand/lx/io/ldlinux.c
new file mode 100644
index 0000000000..76c5e1d255
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/ldlinux.c
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/ddi.h>
+#include <sys/cmn_err.h>
+#include <sys/modctl.h>
+#include <sys/ptms.h>
+#include <sys/stropts.h>
+#include <sys/strsun.h>
+#include <sys/sunddi.h>
+
+#include <sys/ldlinux.h>
+
+
+/*
+ * ldlinuxopen - open routine gets called when the module gets pushed onto the
+ * stream.
+ */
+/* ARGSUSED */
+static int
+ldlinuxopen(
+ queue_t *q, /* pointer to the read side queue */
+ dev_t *devp, /* pointer to stream tail's dev */
+ int oflag, /* the user open(2) supplied flags */
+ int sflag, /* open state flag */
+ cred_t *credp) /* credentials */
+{
+ struct ldlinux *tp; /* ldlinux entry for this module */
+ mblk_t *mop;
+ struct stroptions *sop;
+ struct termios *termiosp;
+ int len;
+
+ if (sflag != MODOPEN)
+ return (EINVAL);
+
+ if (q->q_ptr != NULL) {
+ /* It's already attached. */
+ return (0);
+ }
+
+ mop = allocb(sizeof (struct stroptions), BPRI_MED);
+ if (mop == NULL)
+ return (ENOSR);
+ mop->b_datap->db_type = M_SETOPTS;
+ mop->b_wptr += sizeof (struct stroptions);
+ sop = (struct stroptions *)mop->b_rptr;
+ sop->so_flags = SO_ISTTY;
+
+ /*
+ * Allocate state structure.
+ */
+ tp = kmem_alloc(sizeof (*tp), KM_SLEEP);
+
+ /* Stash a pointer to our private data in q_ptr. */
+ q->q_ptr = WR(q)->q_ptr = tp;
+
+ /*
+ * Get termios defaults. These are stored as
+ * a property in the "options" node.
+ */
+ if (ddi_getlongprop(DDI_DEV_T_ANY, ddi_root_node(), 0, "ttymodes",
+ (caddr_t)&termiosp, &len) == DDI_PROP_SUCCESS &&
+ len == sizeof (struct termios)) {
+ if (termiosp->c_lflag & ICANON) {
+ tp->veof = termiosp->c_cc[VEOF];
+ tp->veol = termiosp->c_cc[VEOL];
+ tp->vmin = 1;
+ tp->vtime = 0;
+ } else {
+ tp->veof = 0;
+ tp->veol = 0;
+ tp->vmin = termiosp->c_cc[VMIN];
+ tp->vtime = termiosp->c_cc[VTIME];
+ }
+ kmem_free(termiosp, len);
+ } else {
+ /*
+ * winge winge winge...
+ */
+ cmn_err(CE_WARN,
+ "ldlinuxopen: Couldn't get ttymodes property!");
+ bzero(tp, sizeof (*tp));
+ }
+
+ tp->state = 0;
+
+ /*
+ * Commit to the open and send the M_SETOPTS off to the stream head.
+ */
+ qprocson(q);
+ putnext(q, mop);
+
+ return (0);
+}
+
+
+/*
+ * ldlinuxclose - This routine gets called when the module gets
+ * popped off of the stream.
+ */
+/* ARGSUSED */
+static int
+ldlinuxclose(queue_t *q, int flag, cred_t *credp)
+{
+ struct ldlinux *tp;
+
+ qprocsoff(q);
+ tp = q->q_ptr;
+ kmem_free(tp, sizeof (*tp));
+ q->q_ptr = WR(q)->q_ptr = NULL;
+ return (0);
+}
+
+
+static void
+do_ioctl(queue_t *q, mblk_t *mp)
+{
+ struct ldlinux *tp = q->q_ptr;
+ struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
+ struct lx_cc *cb;
+ mblk_t *tmp;
+ int error;
+
+ switch (iocp->ioc_cmd) {
+ case TIOCSETLD:
+ /* prepare caller supplied data for access */
+ error = miocpullup(mp, sizeof (struct lx_cc));
+ if (error != 0) {
+ miocnak(q, mp, 0, error);
+ return;
+ }
+
+ /* get a pointer to the caller supplied data */
+ cb = (struct lx_cc *)mp->b_cont->b_rptr;
+
+ /* save caller supplied data in our per-stream cache */
+ tp->veof = cb->veof;
+ tp->veol = cb->veol;
+ tp->vmin = cb->vmin;
+ tp->vtime = cb->vtime;
+
+ /* initialize and send a reply indicating that we're done */
+ miocack(q, mp, 0, 0);
+ return;
+
+ case TIOCGETLD:
+ /* allocate a reply message */
+ if ((tmp = allocb(sizeof (struct lx_cc), BPRI_MED)) == NULL) {
+ miocnak(q, mp, 0, ENOSR);
+ return;
+ }
+
+ /* initialize the reply message */
+ mioc2ack(mp, tmp, sizeof (struct lx_cc), 0);
+
+ /* get a pointer to the reply data */
+ cb = (struct lx_cc *)mp->b_cont->b_rptr;
+
+ /* copy data from our per-stream cache into the reply data */
+ cb->veof = tp->veof;
+ cb->veol = tp->veol;
+ cb->vmin = tp->vmin;
+ cb->vtime = tp->vtime;
+
+ /* send the reply indicating that we're done */
+ qreply(q, mp);
+ return;
+
+ case PTSSTTY:
+ tp->state |= ISPTSTTY;
+ break;
+
+ default:
+ break;
+ }
+
+ putnext(q, mp);
+}
+
+
+/*
+ * ldlinuxput - Module read and write queue put procedure.
+ */
+static void
+ldlinuxput(queue_t *q, mblk_t *mp)
+{
+ struct ldlinux *tp = q->q_ptr;
+
+ switch (DB_TYPE(mp)) {
+ default:
+ break;
+ case M_IOCTL:
+ if ((q->q_flag & QREADR) == 0) {
+ do_ioctl(q, mp);
+ return;
+ }
+ break;
+
+ case M_FLUSH:
+ /*
+ * Handle read and write flushes.
+ */
+ if ((((q->q_flag & QREADR) != 0) && (*mp->b_rptr & FLUSHR)) ||
+ (((q->q_flag & QREADR) == 0) && (*mp->b_rptr & FLUSHW))) {
+ if ((tp->state & ISPTSTTY) && (*mp->b_rptr & FLUSHBAND))
+ flushband(q, *(mp->b_rptr + 1), FLUSHDATA);
+ else
+ flushq(q, FLUSHDATA);
+ }
+ break;
+ }
+ putnext(q, mp);
+}
+
+
+static struct module_info ldlinux_info = {
+ LDLINUX_MODID,
+ LDLINUX_MOD,
+ 0,
+ INFPSZ,
+ 0,
+ 0
+};
+
+static struct qinit ldlinuxinit = {
+ (int (*)()) ldlinuxput,
+ NULL,
+ ldlinuxopen,
+ ldlinuxclose,
+ NULL,
+ &ldlinux_info
+};
+
+static struct streamtab ldlinuxinfo = {
+ &ldlinuxinit,
+ &ldlinuxinit
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct fmodsw fsw = {
+ LDLINUX_MOD,
+ &ldlinuxinfo,
+ D_MTQPAIR | D_MP
+};
+
+static struct modlstrmod modlstrmod = {
+ &mod_strmodops, "termios extensions for lx brand", &fsw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modlstrmod, NULL
+};
+
+int
+_init()
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini()
+{
+ return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.c b/usr/src/uts/common/brand/lx/io/lx_audio.c
new file mode 100644
index 0000000000..e8c6234d92
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_audio.c
@@ -0,0 +1,1996 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#include <sys/audio.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/disp.h>
+#include <sys/ddi.h>
+#include <sys/file.h>
+#include <sys/id_space.h>
+#include <sys/kmem.h>
+#include <sys/lx_audio.h>
+#include <sys/mixer.h>
+#include <sys/modhash.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/stropts.h>
+#include <sys/types.h>
+#include <sys/zone.h>
+
+/* Properties used by the lx_audio driver */
+#define LXA_PROP_INPUTDEV "inputdev"
+#define LXA_PROP_OUTPUTDEV "outputdev"
+
+/* default device paths used by this driver */
+#define LXA_DEV_DEFAULT "/dev/audio"
+#define LXA_DEV_CUSTOM_DIR "/dev/sound/"
+
+/* maximum possible number of concurrent opens of this driver */
+#define LX_AUDIO_MAX_OPENS 1024
+
+/*
+ * these are default fragment size and fragment count values.
+ * these values were chosen to make quake work well on my
+ * laptop: 2Ghz Pentium M + NVIDIA GeForce Go 6400.
+ *
+ * for reference:
+ * - 1 sec of stereo output at 44Khz is about 171 Kb of data
+ * - 1 sec of mono output at 8Khz is about 8Kb of data
+ */
+#define LXA_OSS_FRAG_SIZE (1024) /* 1/8 sec at 8Khz mono */
+#define LXA_OSS_FRAG_CNT (1024 * 2)
+
+/* maximum ammount of fragment memory we'll allow a process to mmap */
+#define LXA_OSS_FRAG_MEM (1024 * 1024 * 2) /* 2Mb */
+
+/* forward declarations */
+typedef struct lxa_state lxa_state_t;
+typedef struct lxa_zstate lxa_zstate_t;
+
+/*
+ * Structure and enum declarations
+ */
+typedef enum {
+ LXA_TYPE_INVALID = 0,
+ LXA_TYPE_AUDIO = 1, /* audio device */
+ LXA_TYPE_AUDIOCTL = 2 /* audio control/mixer device */
+} lxa_dev_type_t;
+
+struct lxa_zstate {
+ char *lxa_zs_zonename;
+
+ /*
+ * we could store the input/output audio device setting here,
+ * but instead we're keeing them as device node properties
+ * so that a user can easily see the audio configuration for
+ * a zone via prtconf.
+ */
+
+ /*
+ * OSS doesn't support multiple opens of the audio device.
+ * (multiple opens of the mixer device are supported.)
+ * so here we'll keep a pointer to any open input/output
+ * streams. (OSS does support two opens if one is for input
+ * and the other is for output.)
+ */
+ lxa_state_t *lxa_zs_istate;
+ lxa_state_t *lxa_zs_ostate;
+
+ /*
+ * we need to cache channel gain and balance. channel gain and
+ * balance map to PCM volume in OSS, which are supposedly a property
+ * of the underlying hardware. but in solaris, channels are
+ * implemented in software and only exist when an audio device
+ * is actually open. (each open returns a unique channel.) OSS
+ * apps will expect consistent PCM volume set/get operations to
+ * work even if no audio device is open. hence, if no underlying
+ * device is open we need to cache the gain and balance setting.
+ */
+ lxa_mixer_levels_t lxa_zs_pcm_levels;
+};
+
+struct lxa_state {
+ lxa_zstate_t *lxas_zs; /* zone state pointer */
+
+ dev_t lxas_dev_old; /* dev_t used to open the device */
+ dev_t lxas_dev_new; /* new dev_t assigned to an open */
+ int lxas_flags; /* original flags passed to open */
+ lxa_dev_type_t lxas_type; /* type of device that was opened */
+
+ int lxas_devs_same; /* input and output device the same? */
+
+ /* input device variables */
+ ldi_handle_t lxas_idev_lh; /* ldi handle for access */
+ int lxas_idev_flags; /* flags used for open */
+
+ /* output device variables */
+ ldi_handle_t lxas_odev_lh; /* ldi handle for access */
+ int lxas_odev_flags; /* flags used for open */
+
+ /*
+ * since we support multiplexing of devices we need to remember
+ * certain parameters about the devices
+ */
+ uint_t lxas_hw_features;
+ uint_t lxas_sw_features;
+
+ uint_t lxas_frag_size;
+ uint_t lxas_frag_cnt;
+
+ /*
+ * members needed to support mmap device access. note that to
+ * simplifly things we only support one mmap access per open.
+ */
+ ddi_umem_cookie_t lxas_umem_cookie;
+ char *lxas_umem_ptr;
+ size_t lxas_umem_len;
+ kthread_t *lxas_mmap_thread;
+ int lxas_mmap_thread_running;
+ int lxas_mmap_thread_exit;
+ int lxas_mmap_thread_frag;
+};
+
+/*
+ * Global variables
+ */
+dev_info_t *lxa_dip = NULL;
+kmutex_t lxa_lock;
+id_space_t *lxa_minor_id = NULL;
+mod_hash_t *lxa_state_hash = NULL;
+mod_hash_t *lxa_zstate_hash = NULL;
+size_t lxa_state_hash_size = 15;
+size_t lxa_zstate_hash_size = 15;
+size_t lxa_registered_zones = 0;
+
+/*
+ * function declarations
+ */
+static void lxa_mmap_output_disable(lxa_state_t *);
+
+/*
+ * functions
+ */
+static void
+lxa_state_close(lxa_state_t *lxa_state)
+{
+ lxa_zstate_t *lxa_zs = lxa_state->lxas_zs;
+ minor_t minor = getminor(lxa_state->lxas_dev_new);
+
+ /* disable any mmap output that might still be going on */
+ lxa_mmap_output_disable(lxa_state);
+
+ /*
+ * if this was the active input/output device, unlink it from
+ * the global zone state so that other opens of the audio device
+ * can now succeed.
+ */
+ mutex_enter(&lxa_lock);
+ if (lxa_zs->lxa_zs_istate == lxa_state)
+ lxa_zs->lxa_zs_istate = NULL;
+ if (lxa_zs->lxa_zs_ostate == lxa_state) {
+ lxa_zs->lxa_zs_ostate = NULL;
+ }
+ mutex_exit(&lxa_lock);
+
+ /* remove this state structure from the hash (if it's there) */
+ (void) mod_hash_remove(lxa_state_hash,
+ (mod_hash_key_t)(uintptr_t)minor, (mod_hash_val_t *)&lxa_state);
+
+ /* close any audio device that we have open */
+ if (lxa_state->lxas_idev_lh != NULL)
+ (void) ldi_close(lxa_state->lxas_idev_lh,
+ lxa_state->lxas_idev_flags, kcred);
+ if (lxa_state->lxas_odev_lh != NULL)
+ (void) ldi_close(lxa_state->lxas_odev_lh,
+ lxa_state->lxas_odev_flags, kcred);
+
+ /* free up any memory allocated by mmaps */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ ddi_umem_free(lxa_state->lxas_umem_cookie);
+
+ /* release the id associated with this state structure */
+ id_free(lxa_minor_id, minor);
+
+ kmem_free(lxa_state, sizeof (*lxa_state));
+}
+
+static char *
+getzonename(void)
+{
+ return (curproc->p_zone->zone_name);
+}
+
+static char *
+lxa_devprop_name(char *zname, char *pname)
+{
+ char *zpname;
+ int n;
+
+ ASSERT((pname != NULL) && (zname != NULL));
+
+ /* prepend the zone name to the property name */
+ n = snprintf(NULL, 0, "%s_%s", zname, pname) + 1;
+ zpname = kmem_alloc(n, KM_SLEEP);
+ (void) snprintf(zpname, n, "%s_%s", zname, pname);
+
+ return (zpname);
+}
+
+static int
+lxa_devprop_verify(char *pval)
+{
+ int n;
+
+ ASSERT(pval != NULL);
+
+ if (strcmp(pval, "default") == 0)
+ return (0);
+
+ /* make sure the value is an integer */
+ for (n = 0; pval[n] != '\0'; n++) {
+ if ((pval[n] < '0') && (pval[n] > '9')) {
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+static char *
+lxa_devprop_lookup(char *zname, char *pname, lxa_dev_type_t lxa_type)
+{
+ char *zprop_name, *pval;
+ char *dev_path;
+ int n, rv;
+
+ ASSERT((pname != NULL) && (zname != NULL));
+ ASSERT((lxa_type == LXA_TYPE_AUDIO) || (lxa_type == LXA_TYPE_AUDIOCTL));
+
+ zprop_name = lxa_devprop_name(zname, pname);
+
+ /* attempt to lookup the property */
+ rv = ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, zprop_name, &pval);
+ strfree(zprop_name);
+
+ if (rv != DDI_PROP_SUCCESS)
+ return (NULL);
+
+ if (lxa_devprop_verify(pval) != 0) {
+ ddi_prop_free(pval);
+ return (NULL);
+ }
+
+ if (strcmp(pval, "none") == 0) {
+ /* there is no audio device specified */
+ return (NULL);
+ } else if (strcmp(pval, "default") == 0) {
+ /* use the default audio device on the system */
+ dev_path = strdup(LXA_DEV_DEFAULT);
+ } else {
+ /* a custom audio device was specified, generate a path */
+ n = snprintf(NULL, 0, "%s%s", LXA_DEV_CUSTOM_DIR, pval) + 1;
+ dev_path = kmem_alloc(n, KM_SLEEP);
+ (void) snprintf(dev_path, n, "%s%s", LXA_DEV_CUSTOM_DIR, pval);
+ }
+ ddi_prop_free(pval);
+
+ /*
+ * if this is an audio control device so we need to append
+ * "ctl" to the path
+ */
+ if (lxa_type == LXA_TYPE_AUDIOCTL) {
+ char *tmp;
+ n = snprintf(NULL, 0, "%s%s", dev_path, "ctl") + 1;
+ tmp = kmem_alloc(n, KM_SLEEP);
+ (void) snprintf(tmp, n, "%s%s", dev_path, "ctl");
+ strfree(dev_path);
+ dev_path = tmp;
+ }
+
+ return (dev_path);
+}
+
+static int
+lxa_dev_getfeatures(lxa_state_t *lxa_state)
+{
+ audio_info_t ai_idev, ai_odev;
+ int n, rv;
+
+ /* set a default fragment size */
+ lxa_state->lxas_frag_size = LXA_OSS_FRAG_SIZE;
+ lxa_state->lxas_frag_cnt = LXA_OSS_FRAG_CNT;
+
+ /* get info for the currently open audio devices */
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_idev_lh,
+ AUDIO_GETINFO, (intptr_t)&ai_idev, FKIOCTL, kcred, &n)) != 0))
+ return (rv);
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_GETINFO, (intptr_t)&ai_odev, FKIOCTL, kcred, &n)) != 0))
+ return (rv);
+
+ /* if we're only open for reading or writing then it's easy */
+ if (lxa_state->lxas_idev_lh == NULL) {
+ lxa_state->lxas_sw_features = ai_odev.sw_features;
+ lxa_state->lxas_hw_features = ai_odev.hw_features;
+ return (0);
+ } else if (lxa_state->lxas_odev_lh == NULL) {
+ lxa_state->lxas_sw_features = ai_idev.sw_features;
+ lxa_state->lxas_hw_features = ai_idev.hw_features;
+ return (0);
+ }
+
+ /*
+ * well if we're open for reading and writing but the underlying
+ * device is the same then it's also pretty easy
+ */
+ if (lxa_state->lxas_devs_same) {
+ if ((ai_odev.sw_features != ai_idev.sw_features) ||
+ (ai_odev.hw_features != ai_idev.hw_features)) {
+ zcmn_err(getzoneid(), CE_WARN, "lx_audio error: "
+ "audio device reported inconsistent features");
+ return (EIO);
+ }
+ lxa_state->lxas_sw_features = ai_odev.sw_features;
+ lxa_state->lxas_hw_features = ai_odev.hw_features;
+ return (0);
+ }
+
+ /*
+ * figure out which software features we're going to support.
+ * we will report a feature as supported if both the input
+ * and output device support it.
+ */
+ lxa_state->lxas_sw_features = 0;
+ n = ai_idev.sw_features & ai_odev.sw_features;
+ if (n & AUDIO_SWFEATURE_MIXER)
+ lxa_state->lxas_sw_features |= AUDIO_SWFEATURE_MIXER;
+
+ /*
+ * figure out which hardware features we're going to support.
+ * for a first pass we will report a feature as supported if
+ * both the input and output device support it.
+ */
+ lxa_state->lxas_hw_features = 0;
+ n = ai_idev.hw_features & ai_odev.hw_features;
+ if (n & AUDIO_HWFEATURE_MSCODEC)
+ lxa_state->lxas_hw_features |= AUDIO_HWFEATURE_MSCODEC;
+
+ /*
+ * if we made it here then we have different audio input and output
+ * devices. this will allow us to report support for additional
+ * hardware features that may not supported by just the input or
+ * output device alone.
+ */
+
+ /* always report tha we support both playback and recording */
+ lxa_state->lxas_hw_features =
+ AUDIO_HWFEATURE_PLAY | AUDIO_HWFEATURE_RECORD;
+
+ /* always report full duplex support */
+ lxa_state->lxas_hw_features = AUDIO_HWFEATURE_DUPLEX;
+
+ /* never report that we have input to output loopback support */
+ ASSERT((lxa_state->lxas_hw_features & AUDIO_HWFEATURE_IN2OUT) == 0);
+ return (0);
+}
+
+static int
+lxa_dev_open(lxa_state_t *lxa_state)
+{
+ char *idev, *odev;
+ int flags, rv;
+ ldi_handle_t lh;
+ ldi_ident_t li = NULL;
+
+ ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) ||
+ (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL));
+
+ /*
+ * check if we have configuration properties for this zone.
+ * if we don't then audio isn't supported in this zone.
+ */
+ idev = lxa_devprop_lookup(getzonename(), LXA_PROP_INPUTDEV,
+ lxa_state->lxas_type);
+ odev = lxa_devprop_lookup(getzonename(), LXA_PROP_OUTPUTDEV,
+ lxa_state->lxas_type);
+
+ /* make sure there is at least one device to read from or write to */
+ if ((idev == NULL) && (odev == NULL))
+ return (ENODEV);
+
+ /* see if the input and output devices are actually the same device */
+ if (((idev != NULL) && (odev != NULL)) &&
+ (strcmp(idev, odev) == 0))
+ lxa_state->lxas_devs_same = 1;
+
+ /* we don't respect FEXCL */
+ flags = lxa_state->lxas_flags & ~FEXCL;
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+ /*
+ * if we're opening audio devices then we need to muck
+ * with the FREAD/FWRITE flags.
+ *
+ * certain audio device may only support input or output
+ * (but not both.) so if we're multiplexing input/output
+ * to different devices we need to make sure we don't try
+ * and open the output device for reading and the input
+ * device for writing.
+ *
+ * if we're using the same device for input/output we still
+ * need to do this because some audio devices won't let
+ * themselves be opened multiple times for read access.
+ */
+ lxa_state->lxas_idev_flags = flags & ~FWRITE;
+ lxa_state->lxas_odev_flags = flags & ~FREAD;
+
+ /* make sure we have devices to read from and write to */
+ if (((flags & FREAD) && (idev == NULL)) ||
+ ((flags & FWRITE) && (odev == NULL))) {
+ rv = ENODEV;
+ goto out;
+ }
+ } else {
+ lxa_state->lxas_idev_flags = lxa_state->lxas_odev_flags = flags;
+ }
+
+ /* get an ident to open the devices */
+ if (ldi_ident_from_dev(lxa_state->lxas_dev_new, &li) != 0) {
+ rv = ENODEV;
+ goto out;
+ }
+
+ /* open the input device */
+ lxa_state->lxas_idev_lh = NULL;
+ if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) ||
+ (lxa_state->lxas_idev_flags & FREAD)) &&
+ (idev != NULL)) {
+ rv = ldi_open_by_name(idev, lxa_state->lxas_idev_flags,
+ kcred, &lh, li);
+ if (rv != 0) {
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "unable to open audio device: %s", idev);
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "possible zone audio configuration error");
+ goto out;
+ }
+ lxa_state->lxas_idev_lh = lh;
+ }
+
+ /* open the output device */
+ lxa_state->lxas_odev_lh = NULL;
+ if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) ||
+ (lxa_state->lxas_odev_flags & FWRITE)) &&
+ (odev != NULL)) {
+ rv = ldi_open_by_name(odev, lxa_state->lxas_odev_flags,
+ kcred, &lh, li);
+ if (rv != 0) {
+ /*
+ * If this open failed and we previously opened an
+ * input device, it is the responsibility of the
+ * caller to close that device after we return
+ * failure here.
+ */
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "unable to open audio device: %s", odev);
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "possible zone audio configuration error");
+ goto out;
+ }
+ lxa_state->lxas_odev_lh = lh;
+ }
+
+ /* free up stuff */
+out:
+ if (li != NULL)
+ ldi_ident_release(li);
+ if (idev != NULL)
+ strfree(idev);
+ if (odev != NULL)
+ strfree(odev);
+
+ return (rv);
+}
+
+void
+lxa_mmap_thread_exit(lxa_state_t *lxa_state)
+{
+ mutex_enter(&lxa_lock);
+ lxa_state->lxas_mmap_thread = NULL;
+ lxa_state->lxas_mmap_thread_frag = 0;
+ lxa_state->lxas_mmap_thread_running = 0;
+ lxa_state->lxas_mmap_thread_exit = 0;
+ mutex_exit(&lxa_lock);
+ thread_exit();
+ /*NOTREACHED*/
+}
+
+void
+lxa_mmap_thread(lxa_state_t *lxa_state)
+{
+ struct uio uio, uio_null;
+ iovec_t iovec, iovec_null;
+ uint_t bytes_per_sec, usec_per_frag, ticks_per_frag;
+ int rv, junk, eof, retry;
+ audio_info_t ai;
+
+ /* we better be setup for writing to the output device */
+ ASSERT((lxa_state->lxas_flags & FWRITE) != 0);
+ ASSERT(lxa_state->lxas_odev_lh != NULL);
+
+ /* setup a uio to output one fragment */
+ uio.uio_iov = &iovec;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+
+ /* setup a uio to output a eof (a fragment with a length of 0) */
+ uio_null.uio_iov = &iovec_null;
+ uio_null.uio_iov->iov_len = 0;
+ uio_null.uio_iov->iov_base = NULL;
+ uio_null.uio_iovcnt = 1;
+ uio_null.uio_offset = 0;
+ uio_null.uio_segflg = UIO_SYSSPACE;
+ uio_null.uio_fmode = 0;
+ uio_null.uio_extflg = 0;
+ uio_null.uio_llimit = MAXOFFSET_T;
+ uio_null.uio_resid = 0;
+
+lxa_mmap_thread_top:
+ ASSERT(!MUTEX_HELD(&lxa_lock));
+
+ /* first drain any pending audio output */
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_DRAIN, NULL, FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_DRAIN failed, aborting audio output");
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+ }
+
+ /*
+ * we depend on the ai.play.eof value to keep track of
+ * audio output progress so reset it here.
+ */
+ AUDIO_INITINFO(&ai);
+ ai.play.eof = 0;
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_SETINFO failed, aborting audio output");
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+ }
+
+ /*
+ * we're going to need to know the sampling rate and number
+ * of output channels to estimate how long we can sleep between
+ * requests.
+ */
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO,
+ (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_GETINFO failed, aborting audio output");
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+ }
+
+ /* estimate how many ticks it takes to output a fragment of data */
+ bytes_per_sec = (ai.play.sample_rate * ai.play.channels *
+ ai.play.precision) / 8;
+ usec_per_frag = MICROSEC * lxa_state->lxas_frag_size / bytes_per_sec;
+ ticks_per_frag = drv_usectohz(usec_per_frag);
+
+ /* queue up three fragments of of data into the output stream */
+ eof = 3;
+
+ /* sanity check the eof value */
+ ASSERT(ai.play.eof == 0);
+ ai.play.eof = 0;
+
+ /* we always start audio output at fragment 0 */
+ mutex_enter(&lxa_lock);
+ lxa_state->lxas_mmap_thread_frag = 0;
+
+ /*
+ * we shouldn't have allowed the mapping if it isn't a multiple
+ * of the fragment size
+ */
+ ASSERT((lxa_state->lxas_umem_len % lxa_state->lxas_frag_size) == 0);
+
+ while (!lxa_state->lxas_mmap_thread_exit) {
+ size_t start, end;
+
+ /*
+ * calculate the start and ending offsets of the next
+ * fragment to output
+ */
+ start = lxa_state->lxas_mmap_thread_frag *
+ lxa_state->lxas_frag_size;
+ end = start + lxa_state->lxas_frag_size;
+
+ ASSERT(start < lxa_state->lxas_umem_len);
+ ASSERT(end <= lxa_state->lxas_umem_len);
+
+ /* setup the uio to output one fragment of audio */
+ uio.uio_resid = end - start;
+ uio.uio_iov->iov_len = end - start;
+ uio.uio_iov->iov_base = &lxa_state->lxas_umem_ptr[start];
+
+ /* increment the current fragment index */
+ lxa_state->lxas_mmap_thread_frag =
+ (lxa_state->lxas_mmap_thread_frag + 1) %
+ (lxa_state->lxas_umem_len / lxa_state->lxas_frag_size);
+
+ /* drop the audio lock before actually outputting data */
+ mutex_exit(&lxa_lock);
+
+ /*
+ * write the fragment of audio data to the device stream
+ * then write a eof to the stream to tell the device to
+ * increment ai.play.eof when it's done processing the
+ * fragment we just wrote
+ */
+ if ((rv = ldi_write(lxa_state->lxas_odev_lh,
+ &uio, kcred)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "ldi_write() failed (%d), "
+ "resetting audio output", rv);
+ goto lxa_mmap_thread_top;
+ }
+ if ((rv = ldi_write(lxa_state->lxas_odev_lh,
+ &uio_null, kcred)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "ldi_write(eof) failed (%d), "
+ "resetting audio output", rv);
+ goto lxa_mmap_thread_top;
+ }
+
+ /*
+ * we want to avoid buffer underrun so ensure that
+ * there is always at least one fragment of data in the
+ * output stream.
+ */
+ mutex_enter(&lxa_lock);
+ if (--eof > 0) {
+ continue;
+ }
+
+ /*
+ * now we wait until the audio device has finished outputting
+ * at least one fragment of data.
+ */
+ retry = 0;
+ while (!lxa_state->lxas_mmap_thread_exit && (eof == 0)) {
+ uint_t ai_eof_old = ai.play.eof;
+
+ mutex_exit(&lxa_lock);
+
+ /*
+ * delay for the number of ticks it takes
+ * to output one fragment of data
+ */
+ if (ticks_per_frag > 0)
+ delay(ticks_per_frag);
+
+ /* check if we've managed to output any fragments */
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_GETINFO, (intptr_t)&ai,
+ FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_GETINFO failed (%d), "
+ "resetting audio output", rv);
+ /* re-start mmap audio output */
+ goto lxa_mmap_thread_top;
+ }
+
+ if (ai_eof_old == ai.play.eof) {
+ /* institute a random retry limit */
+ if (retry++ < 100) {
+ mutex_enter(&lxa_lock);
+ continue;
+ }
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "output stalled, "
+ "resetting audio output");
+ /* re-start mmap audio output */
+ goto lxa_mmap_thread_top;
+ }
+
+ if (ai.play.eof > ai_eof_old) {
+ eof = ai.play.eof - ai_eof_old;
+ } else {
+ /* eof counter wrapped around */
+ ASSERT(ai_eof_old < ai.play.eof);
+ eof = ai.play.eof + (ai_eof_old - UINTMAX_MAX);
+ }
+ /* we're done with this loop so re-aquire the lock */
+ ASSERT(eof != 0);
+ mutex_enter(&lxa_lock);
+ }
+ }
+ mutex_exit(&lxa_lock);
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+}
+
+static void
+lxa_mmap_output_disable(lxa_state_t *lxa_state)
+{
+ kt_did_t tid;
+
+ mutex_enter(&lxa_lock);
+
+ /* if the output thread isn't running there's nothing to do */
+ if (lxa_state->lxas_mmap_thread_running == 0) {
+ mutex_exit(&lxa_lock);
+ return;
+ }
+
+ /* tell the pcm mmap output thread to exit */
+ lxa_state->lxas_mmap_thread_exit = 1;
+
+ /* wait for the mmap output thread to exit */
+ tid = lxa_state->lxas_mmap_thread->t_did;
+ mutex_exit(&lxa_lock);
+ thread_join(tid);
+}
+
+static void
+lxa_mmap_output_enable(lxa_state_t *lxa_state)
+{
+ mutex_enter(&lxa_lock);
+
+ /* if the output thread is already running there's nothing to do */
+ if (lxa_state->lxas_mmap_thread_running != 0) {
+ mutex_exit(&lxa_lock);
+ return;
+ }
+
+ /* setup output state */
+ lxa_state->lxas_mmap_thread_running = 1;
+ lxa_state->lxas_mmap_thread_exit = 0;
+ lxa_state->lxas_mmap_thread_frag = 0;
+
+ /* kick off a thread to do the mmap pcm output */
+ lxa_state->lxas_mmap_thread = thread_create(NULL, 0,
+ (void (*)())lxa_mmap_thread, lxa_state,
+ 0, &p0, TS_RUN, minclsyspri);
+ ASSERT(lxa_state->lxas_mmap_thread != NULL);
+
+ mutex_exit(&lxa_lock);
+}
+
+static int
+lxa_ioc_mmap_output(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ uint_t trigger;
+
+ /* we only support output via mmap */
+ if ((lxa_state->lxas_flags & FWRITE) == 0)
+ return (EINVAL);
+
+ /* if the user hasn't mmap the device then there's nothing to do */
+ if (lxa_state->lxas_umem_cookie == NULL)
+ return (EINVAL);
+
+ /* copy in the request */
+ if (ddi_copyin((void *)arg, &trigger, sizeof (trigger), mode) != 0)
+ return (EFAULT);
+
+ /* a zero value disables output */
+ if (trigger == 0) {
+ lxa_mmap_output_disable(lxa_state);
+ return (0);
+ }
+
+ /* a non-zero value enables output */
+ lxa_mmap_output_enable(lxa_state);
+ return (0);
+}
+
+static int
+lxa_ioc_mmap_ptr(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ int ptr;
+
+ /* we only support output via mmap */
+ if ((lxa_state->lxas_flags & FWRITE) == 0)
+ return (EINVAL);
+
+ /* if the user hasn't mmap the device then there's nothing to do */
+ if (lxa_state->lxas_umem_cookie == NULL)
+ return (EINVAL);
+
+ /* if the output thread isn't running then there's nothing to do */
+ if (lxa_state->lxas_mmap_thread_running == 0)
+ return (EINVAL);
+
+ mutex_enter(&lxa_lock);
+ ptr = lxa_state->lxas_mmap_thread_frag * lxa_state->lxas_frag_size;
+ mutex_exit(&lxa_lock);
+
+ if (ddi_copyout(&ptr, (void *)arg, sizeof (ptr), mode) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lxa_ioc_get_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ lxa_frag_info_t fi;
+
+ fi.lxa_fi_size = lxa_state->lxas_frag_size;
+ fi.lxa_fi_cnt = lxa_state->lxas_frag_cnt;
+
+ if (ddi_copyout(&fi, (void *)arg, sizeof (fi), mode) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lxa_ioc_set_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ lxa_frag_info_t fi;
+
+ /* if the device is mmaped we can't change the fragment settings */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ return (EINVAL);
+
+ /* copy in the request */
+ if (ddi_copyin((void *)arg, &fi, sizeof (fi), mode) != 0)
+ return (EFAULT);
+
+ /* do basic bounds checking */
+ if ((fi.lxa_fi_cnt == 0) || (fi.lxa_fi_size < 16))
+ return (EINVAL);
+
+ /* don't accept size values less than 16 */
+
+ lxa_state->lxas_frag_size = fi.lxa_fi_size;
+ lxa_state->lxas_frag_cnt = fi.lxa_fi_cnt;
+
+ return (0);
+}
+
+static int
+lxa_audio_drain(lxa_state_t *lxa_state)
+{
+ int junk;
+
+ /* only applies to output buffers */
+ if (lxa_state->lxas_odev_lh == NULL)
+ return (EINVAL);
+
+ /* can't fail so ignore the return value */
+ (void) ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_DRAIN, NULL,
+ FKIOCTL, kcred, &junk);
+ return (0);
+}
+
+/*
+ * lxa_audio_info_merge() usage notes:
+ *
+ * - it's important to make sure NOT to get the ai_idev and ai_odev
+ * parameters mixed up when calling lxa_audio_info_merge().
+ *
+ * - it's important for the caller to make sure that AUDIO_GETINFO
+ * was called for the input device BEFORE the output device. (see
+ * the comments for merging the monitor_gain setting to see why.)
+ */
+static void
+lxa_audio_info_merge(lxa_state_t *lxa_state,
+ audio_info_t *ai_idev, audio_info_t *ai_odev, audio_info_t *ai_merged)
+{
+ /* if we're not setup for output return the intput device info */
+ if (lxa_state->lxas_odev_lh == NULL) {
+ *ai_merged = *ai_idev;
+ return;
+ }
+
+ /* if we're not setup for input return the output device info */
+ if (lxa_state->lxas_idev_lh == NULL) {
+ *ai_merged = *ai_odev;
+ return;
+ }
+
+ /* get record values from the input device */
+ ai_merged->record = ai_idev->record;
+
+ /* get play values from the output device */
+ ai_merged->play = ai_odev->play;
+
+ /* muting status only matters for the output device */
+ ai_merged->output_muted = ai_odev->output_muted;
+
+ /* we don't support device reference counts, always return 1 */
+ ai_merged->ref_cnt = 1;
+
+ /*
+ * for supported hw/sw features report the combined feature
+ * set we calcuated out earlier.
+ */
+ ai_merged->hw_features = lxa_state->lxas_hw_features;
+ ai_merged->sw_features = lxa_state->lxas_sw_features;
+
+ if (!lxa_state->lxas_devs_same) {
+ /*
+ * if the input and output devices are different
+ * physical devices then we don't support input to
+ * output loopback so we always report the input
+ * to output loopback gain to be zero.
+ */
+ ai_merged->monitor_gain = 0;
+ } else {
+ /*
+ * the intput and output devices are actually the
+ * same physical device. hence it probably supports
+ * intput to output loopback. regardless we should
+ * pass back the intput to output gain reported by
+ * the device. when we pick a value to passback we
+ * use the output device value since that was
+ * the most recently queried. (we base this
+ * decision on the assumption that io gain is
+ * actually hardware setting in the device and
+ * hence if it is changed on one open instance of
+ * the device the change will be visable to all
+ * other instances of the device.)
+ */
+ ai_merged->monitor_gain = ai_odev->monitor_gain;
+ }
+
+ /*
+ * for currently enabled software features always return the
+ * merger of the two. (of course the enabled software features
+ * for the input and output devices should alway be the same,
+ * so if it isn't complain.)
+ */
+ if (ai_idev->sw_features_enabled != ai_odev->sw_features_enabled)
+ zcmn_err(getzoneid(), CE_WARN, "lx_audio: "
+ "unexpected sofware feature state");
+ ai_merged->sw_features_enabled =
+ ai_idev->sw_features_enabled & ai_odev->sw_features_enabled;
+}
+
+static int
+lxa_audio_setinfo(lxa_state_t *lxa_state, int cmd, intptr_t arg,
+ int mode)
+{
+ audio_info_t ai, ai_null, ai_idev, ai_odev;
+ int rv, junk;
+
+ /* copy in the request */
+ if (ddi_copyin((void *)arg, &ai, sizeof (ai), mode) != 0)
+ return (EFAULT);
+
+ /*
+ * if the caller is attempting to enable a software feature that
+ * we didn't report as supported the return an error
+ */
+ if ((ai.sw_features_enabled != -1) &&
+ (ai.sw_features_enabled & ~lxa_state->lxas_sw_features))
+ return (EINVAL);
+
+ /*
+ * if a process has mmaped this device then we don't allow
+ * changes to the play.eof field (since mmap output depends
+ * on this field.
+ */
+ if ((lxa_state->lxas_umem_cookie != NULL) &&
+ (ai.play.eof != -1))
+ return (EIO);
+
+ /* initialize the new requests */
+ AUDIO_INITINFO(&ai_null);
+ ai_idev = ai_odev = ai;
+
+ /* remove audio input settings from the output device request */
+ ai_odev.record = ai_null.record;
+
+ /* remove audio output settings from the input device request */
+ ai_idev.play = ai_null.play;
+ ai_idev.output_muted = ai_null.output_muted;
+
+ /* apply settings to the intput device */
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, cmd,
+ (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /* apply settings to the output device */
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, cmd,
+ (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /*
+ * a AUDIO_SETINFO call performs an implicit AUDIO_GETINFO to
+ * return values (see the coments in audioio.h.) so we need
+ * to combine the values returned from the input and output
+ * device back into the users buffer.
+ */
+ lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai);
+
+ /* copyout the results */
+ if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+lxa_audio_getinfo(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ audio_info_t ai, ai_idev, ai_odev;
+ int rv, junk;
+
+ /* get the settings from the input device */
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, AUDIO_GETINFO,
+ (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /* get the settings from the output device */
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO,
+ (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /*
+ * we need to combine the values returned from the input
+ * and output device back into a single user buffer.
+ */
+ lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai);
+
+ /* copyout the results */
+ if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lxa_mixer_ai_from_lh(ldi_handle_t lh, audio_info_t *ai)
+{
+ int rv, junk;
+
+ ASSERT((lh != NULL) && (ai != NULL));
+
+ /* get the device state and channel state */
+ rv = ldi_ioctl(lh, AUDIO_GETINFO, (intptr_t)ai, FKIOCTL, kcred, &junk);
+
+ return (rv);
+}
+
+static int
+lxa_mixer_get_ai(lxa_state_t *lxa_state, audio_info_t *ai)
+{
+ audio_info_t ai_idev, ai_odev;
+ int rv;
+
+ /* if there is no input device, query the output device */
+ if (lxa_state->lxas_idev_lh == NULL)
+ return (lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh, ai));
+
+ /* if there is no ouput device, query the intput device */
+ if (lxa_state->lxas_odev_lh == NULL)
+ return (lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh, ai));
+
+ /*
+ * now get the audio_info and channel information for the
+ * underlying output device.
+ */
+ if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh,
+ &ai_idev)) != 0)
+ return (rv);
+ if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh,
+ &ai_odev)) != 0)
+ return (rv);
+
+ /* now merge the audio_info structures */
+ lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, ai);
+ return (0);
+}
+
+static int
+lxa_mixer_get_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode)
+{
+ lxa_mixer_levels_t lxa_ml;
+ audio_info_t ai;
+ int rv;
+
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ if ((rv = lxa_mixer_get_ai(lxa_state, &ai)) != 0)
+ return (rv);
+
+ switch (cmd) {
+ case LXA_IOC_MIXER_GET_VOL:
+ lxa_ml.lxa_ml_gain = ai.play.gain;
+ lxa_ml.lxa_ml_balance = ai.play.balance;
+ break;
+ case LXA_IOC_MIXER_GET_MIC:
+ lxa_ml.lxa_ml_gain = ai.record.gain;
+ lxa_ml.lxa_ml_balance = ai.record.balance;
+ break;
+ }
+
+ if (ddi_copyout(&lxa_ml, (void *)arg, sizeof (lxa_ml), mode) != 0)
+ return (EFAULT);
+ return (0);
+}
+
+static int
+lxa_mixer_set_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode)
+{
+ lxa_mixer_levels_t lxa_ml;
+ audio_info_t ai;
+
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ /* get the new mixer settings */
+ if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0)
+ return (EFAULT);
+
+ /* sanity check the mixer settings */
+ if (!LXA_MIXER_LEVELS_OK(&lxa_ml))
+ return (EINVAL);
+
+ /* initialize an audio_info struct with the new settings */
+ AUDIO_INITINFO(&ai);
+ switch (cmd) {
+ case LXA_IOC_MIXER_SET_VOL:
+ ai.play.gain = lxa_ml.lxa_ml_gain;
+ ai.play.balance = lxa_ml.lxa_ml_balance;
+ break;
+ case LXA_IOC_MIXER_SET_MIC:
+ ai.record.gain = lxa_ml.lxa_ml_gain;
+ ai.record.balance = lxa_ml.lxa_ml_balance;
+ break;
+ }
+
+ return (lxa_audio_setinfo(lxa_state, AUDIO_SETINFO, (intptr_t)&ai,
+ FKIOCTL));
+}
+
+static int
+lxa_mixer_get_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ /* simply return the cached pcm mixer settings */
+ mutex_enter(&lxa_lock);
+ if (ddi_copyout(&lxa_state->lxas_zs->lxa_zs_pcm_levels, (void *)arg,
+ sizeof (lxa_state->lxas_zs->lxa_zs_pcm_levels), mode) != 0) {
+ mutex_exit(&lxa_lock);
+ return (EFAULT);
+ }
+ mutex_exit(&lxa_lock);
+ return (0);
+}
+
+static int
+lxa_mixer_set_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ lxa_mixer_levels_t lxa_ml;
+ int rv;
+
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ /* get the new mixer settings */
+ if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0)
+ return (EFAULT);
+
+ /* sanity check the mixer settings */
+ if (!LXA_MIXER_LEVELS_OK(&lxa_ml))
+ return (EINVAL);
+
+ mutex_enter(&lxa_lock);
+
+ /* if there is an active output channel, update it */
+ if (lxa_state->lxas_zs->lxa_zs_ostate != NULL) {
+ audio_info_t ai;
+
+ /* initialize an audio_info struct with the new settings */
+ AUDIO_INITINFO(&ai);
+ ai.play.gain = lxa_ml.lxa_ml_gain;
+ ai.play.balance = lxa_ml.lxa_ml_balance;
+
+ if ((rv = lxa_audio_setinfo(lxa_state->lxas_zs->lxa_zs_ostate,
+ AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) {
+ mutex_exit(&lxa_lock);
+ return (rv);
+ }
+ }
+
+ /* update the cached mixer settings */
+ lxa_state->lxas_zs->lxa_zs_pcm_levels = lxa_ml;
+
+ mutex_exit(&lxa_lock);
+ return (0);
+}
+
+static int
+lxa_zone_reg(intptr_t arg, int mode)
+{
+ lxa_zone_reg_t lxa_zr;
+ lxa_zstate_t *lxa_zs = NULL;
+ char *idev_name = NULL, *odev_name = NULL, *pval = NULL;
+ int i, junk;
+
+ if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0)
+ return (EFAULT);
+
+ /* make sure that zone_name is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++)
+ if (lxa_zr.lxa_zr_zone_name[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_zone_name))
+ return (EINVAL);
+
+ /* make sure that inputdev is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_inputdev); i++)
+ if (lxa_zr.lxa_zr_inputdev[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_inputdev))
+ return (EINVAL);
+
+ /* make sure it's a valid inputdev property value */
+ if (lxa_devprop_verify(lxa_zr.lxa_zr_inputdev) != 0)
+ return (EINVAL);
+
+ /* make sure that outputdev is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_outputdev); i++)
+ if (lxa_zr.lxa_zr_outputdev[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_outputdev))
+ return (EINVAL);
+
+ /* make sure it's a valid outputdev property value */
+ if (lxa_devprop_verify(lxa_zr.lxa_zr_outputdev) != 0)
+ return (EINVAL);
+
+ /* get the property names */
+ idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_INPUTDEV);
+ odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_OUTPUTDEV);
+
+ /*
+ * allocate and initialize a zone state structure
+ * since the audio device can't possibly be opened yet
+ * (since we're setting it up now and the zone isn't booted
+ * yet) assign some some resonable default pcm channel settings.
+ * also, default to one mixer channel.
+ */
+ lxa_zs = kmem_zalloc(sizeof (*lxa_zs), KM_SLEEP);
+ lxa_zs->lxa_zs_zonename = strdup(lxa_zr.lxa_zr_zone_name);
+ lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain = AUDIO_MID_GAIN;
+ lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance = AUDIO_MID_BALANCE;
+
+ mutex_enter(&lxa_lock);
+
+ /*
+ * make sure this zone isn't already registered
+ * a zone is registered with properties for that zone exist
+ * or there is a zone state structure for that zone
+ */
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ idev_name, &pval) == DDI_PROP_SUCCESS) {
+ goto err_unlock;
+ }
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ odev_name, &pval) == DDI_PROP_SUCCESS) {
+ goto err_unlock;
+ }
+ if (mod_hash_find(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zs->lxa_zs_zonename,
+ (mod_hash_val_t *)&junk) == 0)
+ goto err_unlock;
+
+ /*
+ * create the new properties and insert the zone state structure
+ * into the global hash
+ */
+ if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip,
+ idev_name, lxa_zr.lxa_zr_inputdev) != DDI_PROP_SUCCESS)
+ goto err_prop_remove;
+ if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip,
+ odev_name, lxa_zr.lxa_zr_outputdev) != DDI_PROP_SUCCESS)
+ goto err_prop_remove;
+ if (mod_hash_insert(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zs->lxa_zs_zonename,
+ (mod_hash_val_t)lxa_zs) != 0)
+ goto err_prop_remove;
+
+ /* success! */
+ lxa_registered_zones++;
+ mutex_exit(&lxa_lock);
+
+ /* cleanup */
+ strfree(idev_name);
+ strfree(odev_name);
+ return (0);
+
+err_prop_remove:
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name);
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name);
+
+err_unlock:
+ mutex_exit(&lxa_lock);
+
+ if (lxa_zs != NULL) {
+ strfree(lxa_zs->lxa_zs_zonename);
+ kmem_free(lxa_zs, sizeof (*lxa_zs));
+ }
+ if (pval != NULL)
+ ddi_prop_free(pval);
+ if (idev_name != NULL)
+ strfree(idev_name);
+ if (odev_name != NULL)
+ strfree(odev_name);
+ return (EIO);
+}
+
+static int
+lxa_zone_unreg(intptr_t arg, int mode)
+{
+ lxa_zone_reg_t lxa_zr;
+ lxa_zstate_t *lxa_zs = NULL;
+ char *idev_name = NULL, *odev_name = NULL, *pval = NULL;
+ int rv, i;
+
+ if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0)
+ return (EFAULT);
+
+ /* make sure that zone_name is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++)
+ if (lxa_zr.lxa_zr_zone_name[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_zone_name))
+ return (EINVAL);
+
+ /* get the property names */
+ idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_INPUTDEV);
+ odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_OUTPUTDEV);
+
+ mutex_enter(&lxa_lock);
+
+ if (lxa_registered_zones <= 0) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+
+ /* make sure this zone is actually registered */
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ idev_name, &pval) != DDI_PROP_SUCCESS) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+ ddi_prop_free(pval);
+ pval = NULL;
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ odev_name, &pval) != DDI_PROP_SUCCESS) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+ ddi_prop_free(pval);
+ pval = NULL;
+ if (mod_hash_find(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zr.lxa_zr_zone_name,
+ (mod_hash_val_t *)&lxa_zs) != 0) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+ ASSERT(strcmp(lxa_zr.lxa_zr_zone_name, lxa_zs->lxa_zs_zonename) == 0);
+
+ /*
+ * if the audio device is currently in use then refuse to
+ * unregister the zone
+ */
+ if ((lxa_zs->lxa_zs_ostate != NULL) ||
+ (lxa_zs->lxa_zs_ostate != NULL)) {
+ rv = EBUSY;
+ goto err_unlock;
+ }
+
+ /* success! cleanup zone config state */
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name);
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name);
+
+ /*
+ * note, the action of removing the zone state structure from the
+ * hash will automatically free lxa_zs->lxa_zs_zonename.
+ *
+ * the reason for this is that we used lxa_zs->lxa_zs_zonename
+ * as the hash key and by default mod_hash_create_strhash() uses
+ * mod_hash_strkey_dtor() as a the hash key destructor. (which
+ * free's the key for us.
+ */
+ (void) mod_hash_remove(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zr.lxa_zr_zone_name,
+ (mod_hash_val_t *)&lxa_zs);
+ lxa_registered_zones--;
+ mutex_exit(&lxa_lock);
+
+ /* cleanup */
+ kmem_free(lxa_zs, sizeof (*lxa_zs));
+ strfree(idev_name);
+ strfree(odev_name);
+ return (0);
+
+err_unlock:
+ mutex_exit(&lxa_lock);
+
+ if (pval != NULL)
+ ddi_prop_free(pval);
+ if (idev_name != NULL)
+ strfree(idev_name);
+ if (odev_name != NULL)
+ strfree(odev_name);
+ return (rv);
+}
+
+static int
+lxa_ioctl_devctl(int cmd, intptr_t arg, int mode)
+{
+ /* devctl ioctls are only allowed from the global zone */
+ ASSERT(getzoneid() == 0);
+ if (getzoneid() != 0)
+ return (EINVAL);
+
+ switch (cmd) {
+ case LXA_IOC_ZONE_REG:
+ return (lxa_zone_reg(arg, mode));
+ case LXA_IOC_ZONE_UNREG:
+ return (lxa_zone_unreg(arg, mode));
+ }
+
+ return (EINVAL);
+}
+
+static int
+/*ARGSUSED*/
+lxa_open(dev_t *devp, int flags, int otyp, cred_t *credp)
+{
+ lxa_dev_type_t open_type = LXA_TYPE_INVALID;
+ lxa_zstate_t *lxa_zs;
+ lxa_state_t *lxa_state;
+ minor_t minor;
+ int rv;
+
+ if (getminor(*devp) == LXA_MINORNUM_DEVCTL) {
+ /*
+ * this is a devctl node, it exists to administer this
+ * pseudo driver so it doesn't actually need access to
+ * any underlying audio devices. hence there is nothing
+ * really to do here. course, this driver should
+ * only be administered from the global zone.
+ */
+ ASSERT(getzoneid() == 0);
+ if (getzoneid() != 0)
+ return (EINVAL);
+ return (0);
+ }
+
+ /* lookup the zone state structure */
+ if (mod_hash_find(lxa_zstate_hash, (mod_hash_key_t)getzonename(),
+ (mod_hash_val_t *)&lxa_zs) != 0) {
+ return (EIO);
+ }
+
+ /* determine what type of device was opened */
+ switch (getminor(*devp)) {
+ case LXA_MINORNUM_DSP:
+ open_type = LXA_TYPE_AUDIO;
+ break;
+ case LXA_MINORNUM_MIXER:
+ open_type = LXA_TYPE_AUDIOCTL;
+ break;
+ default:
+ return (EINVAL);
+ }
+ ASSERT(open_type != LXA_TYPE_INVALID);
+
+ /* all other opens are clone opens so get a new minor node */
+ minor = id_alloc(lxa_minor_id);
+
+ /* allocate and initialize the new lxa_state structure */
+ lxa_state = kmem_zalloc(sizeof (*lxa_state), KM_SLEEP);
+ lxa_state->lxas_zs = lxa_zs;
+ lxa_state->lxas_dev_old = *devp;
+ lxa_state->lxas_dev_new = makedevice(getmajor(*devp), minor);
+ lxa_state->lxas_flags = flags;
+ lxa_state->lxas_type = open_type;
+
+ /* initialize the input and output device */
+ if (((rv = lxa_dev_open(lxa_state)) != 0) ||
+ ((rv = lxa_dev_getfeatures(lxa_state)) != 0)) {
+ lxa_state_close(lxa_state);
+ return (rv);
+ }
+
+ /*
+ * save this audio statue structure into a hash indexed
+ * by it's minor device number. (this will provide a convient
+ * way to lookup the state structure on future operations.)
+ */
+ if (mod_hash_insert(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t)lxa_state) != 0) {
+ lxa_state_close(lxa_state);
+ return (EIO);
+ }
+
+ mutex_enter(&lxa_lock);
+
+ /* apply the currently cached zone PCM mixer levels */
+ if ((lxa_state->lxas_type == LXA_TYPE_AUDIO) &&
+ (lxa_state->lxas_odev_lh != NULL)) {
+ audio_info_t ai;
+
+ AUDIO_INITINFO(&ai);
+ ai.play.gain = lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain;
+ ai.play.balance = lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance;
+
+ if ((rv = lxa_audio_setinfo(lxa_state,
+ AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) {
+ mutex_exit(&lxa_lock);
+ lxa_state_close(lxa_state);
+ return (rv);
+ }
+ }
+
+ /*
+ * we only allow one active open of the input or output device.
+ * check here for duplicate opens
+ */
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ (lxa_zs->lxa_zs_istate != NULL)) {
+ mutex_exit(&lxa_lock);
+ lxa_state_close(lxa_state);
+ return (EBUSY);
+ }
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ (lxa_zs->lxa_zs_ostate != NULL)) {
+ mutex_exit(&lxa_lock);
+ lxa_state_close(lxa_state);
+ return (EBUSY);
+ }
+
+ /* not a duplicate open, update the global zone state */
+ if (lxa_state->lxas_idev_lh != NULL)
+ lxa_zs->lxa_zs_istate = lxa_state;
+ if (lxa_state->lxas_odev_lh != NULL)
+ lxa_zs->lxa_zs_ostate = lxa_state;
+ }
+ mutex_exit(&lxa_lock);
+
+ /* make sure to return our newly allocated dev_t */
+ *devp = lxa_state->lxas_dev_new;
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_close(dev_t dev, int flags, int otyp, cred_t *credp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+
+ /* handle devctl minor nodes (these nodes don't have a handle */
+ if (getminor(dev) == LXA_MINORNUM_DEVCTL)
+ return (0);
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ lxa_state_close(lxa_state);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+ int rv;
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ /*
+ * if a process has mmaped this device then we don't allow
+ * any more reads or writes to the device
+ */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ return (EIO);
+
+ /* we can't do a read if there is no input device */
+ if (lxa_state->lxas_idev_lh == NULL)
+ return (EBADF);
+
+ /* pass the request on */
+ while (uiop->uio_resid != 0) {
+ rv = ldi_read(lxa_state->lxas_idev_lh, uiop, kcred);
+ if ((rv != 0) || (uiop->uio_fmode & (FNONBLOCK|FNDELAY))) {
+ break;
+ }
+ }
+ return (rv);
+}
+
+static int
+/*ARGSUSED*/
+lxa_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+ int rv;
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ /*
+ * if a process has mmaped this device then we don't allow
+ * any more reads or writes to the device
+ */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ return (EIO);
+
+ /* we can't do a write if there is no output device */
+ if (lxa_state->lxas_odev_lh == NULL)
+ return (EBADF);
+
+ /* pass the request on */
+ while (uiop->uio_resid != 0) {
+ rv = ldi_write(lxa_state->lxas_odev_lh, uiop, kcred);
+ if ((rv != 0) || (uiop->uio_fmode & (FNONBLOCK|FNDELAY))) {
+ break;
+ }
+ }
+ return (rv);
+}
+
+static int
+/*ARGSUSED*/
+lxa_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+
+ /* handle devctl minor nodes (these nodes don't have a handle */
+ if (getminor(dev) == LXA_MINORNUM_DEVCTL)
+ return (lxa_ioctl_devctl(cmd, arg, mode));
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) ||
+ (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL));
+
+ switch (cmd) {
+ case LXA_IOC_GETMINORNUM:
+ {
+ int minornum = getminor(lxa_state->lxas_dev_old);
+ if (ddi_copyout(&minornum, (void *)arg,
+ sizeof (minornum), mode) != 0)
+ return (EFAULT);
+ }
+ return (0);
+ }
+
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+ /* deal with native ioctl */
+ switch (cmd) {
+ case LXA_IOC_MMAP_OUTPUT:
+ return (lxa_ioc_mmap_output(lxa_state, arg, mode));
+ case LXA_IOC_MMAP_PTR:
+ return (lxa_ioc_mmap_ptr(lxa_state, arg, mode));
+ case LXA_IOC_GET_FRAG_INFO:
+ return (lxa_ioc_get_frag_info(lxa_state, arg, mode));
+ case LXA_IOC_SET_FRAG_INFO:
+ return (lxa_ioc_set_frag_info(lxa_state, arg, mode));
+ }
+
+ /* deal with layered ioctls */
+ switch (cmd) {
+ case AUDIO_DRAIN:
+ return (lxa_audio_drain(lxa_state));
+ case AUDIO_SETINFO:
+ return (lxa_audio_setinfo(lxa_state,
+ AUDIO_SETINFO, arg, mode));
+ case AUDIO_GETINFO:
+ return (lxa_audio_getinfo(lxa_state, arg, mode));
+ }
+ }
+
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) {
+ /* deal with native ioctl */
+ switch (cmd) {
+ case LXA_IOC_MIXER_GET_VOL:
+ return (lxa_mixer_get_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_SET_VOL:
+ return (lxa_mixer_set_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_GET_MIC:
+ return (lxa_mixer_get_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_SET_MIC:
+ return (lxa_mixer_set_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_GET_PCM:
+ return (lxa_mixer_get_pcm(lxa_state, arg, mode));
+ case LXA_IOC_MIXER_SET_PCM:
+ return (lxa_mixer_set_pcm(lxa_state, arg, mode));
+ }
+
+ }
+
+ return (EINVAL);
+}
+
+static int
+/*ARGSUSED*/
+lxa_devmap(dev_t dev, devmap_cookie_t dhp,
+ offset_t off, size_t len, size_t *maplen, uint_t model)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+ ddi_umem_cookie_t umem_cookie;
+ void *umem_ptr;
+ int rv;
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ /* we only support mmaping of audio devices */
+ if (lxa_state->lxas_type != LXA_TYPE_AUDIO)
+ return (EINVAL);
+
+ /* we only support output via mmap */
+ if ((lxa_state->lxas_flags & FWRITE) == 0)
+ return (EINVAL);
+
+ /* sanity check the amount of memory the user is allocating */
+ if ((len == 0) ||
+ (len > LXA_OSS_FRAG_MEM) ||
+ ((len % lxa_state->lxas_frag_size) != 0))
+ return (EINVAL);
+
+ /* allocate and clear memory to mmap */
+ umem_ptr = ddi_umem_alloc(len, DDI_UMEM_NOSLEEP, &umem_cookie);
+ if (umem_ptr == NULL)
+ return (ENOMEM);
+ bzero(umem_ptr, len);
+
+ /* setup the memory mappings */
+ rv = devmap_umem_setup(dhp, lxa_dip, NULL, umem_cookie, 0, len,
+ PROT_USER | PROT_READ | PROT_WRITE, 0, NULL);
+ if (rv != 0) {
+ ddi_umem_free(umem_cookie);
+ return (EIO);
+ }
+
+ mutex_enter(&lxa_lock);
+
+ /* we only support one mmap per open */
+ if (lxa_state->lxas_umem_cookie != NULL) {
+ ASSERT(lxa_state->lxas_umem_ptr != NULL);
+ mutex_exit(&lxa_lock);
+ ddi_umem_free(umem_cookie);
+ return (EBUSY);
+ }
+ ASSERT(lxa_state->lxas_umem_ptr == NULL);
+
+ *maplen = len;
+ lxa_state->lxas_umem_len = len;
+ lxa_state->lxas_umem_ptr = umem_ptr;
+ lxa_state->lxas_umem_cookie = umem_cookie;
+ mutex_exit(&lxa_lock);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ ASSERT(instance == 0);
+ if (instance != 0)
+ return (DDI_FAILURE);
+
+ lxa_dip = dip;
+ mutex_init(&lxa_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /* create our minor nodes */
+ if (ddi_create_minor_node(dip, LXA_MINORNAME_DEVCTL, S_IFCHR,
+ LXA_MINORNUM_DEVCTL, DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LXA_MINORNAME_DSP, S_IFCHR,
+ LXA_MINORNUM_DSP, DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LXA_MINORNAME_MIXER, S_IFCHR,
+ LXA_MINORNUM_MIXER, DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ /* allocate our data structures */
+ lxa_minor_id = id_space_create("lxa_minor_id",
+ LXA_MINORNUM_COUNT, LX_AUDIO_MAX_OPENS);
+ lxa_state_hash = mod_hash_create_idhash("lxa_state_hash",
+ lxa_state_hash_size, mod_hash_null_valdtor);
+ lxa_zstate_hash = mod_hash_create_strhash("lxa_zstate_hash",
+ lxa_zstate_hash_size, mod_hash_null_valdtor);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+/*ARGSUSED*/
+lxa_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ASSERT(!MUTEX_HELD(&lxa_lock));
+ if (lxa_registered_zones > 0)
+ return (DDI_FAILURE);
+
+ mod_hash_destroy_idhash(lxa_state_hash);
+ mod_hash_destroy_idhash(lxa_zstate_hash);
+ id_space_destroy(lxa_minor_id);
+ lxa_state_hash = NULL;
+ lxa_dip = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+static int
+/*ARGSUSED*/
+lxa_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp)
+{
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *resultp = lxa_dip;
+ return (DDI_SUCCESS);
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *resultp = (void *)0;
+ return (DDI_SUCCESS);
+ }
+ return (DDI_FAILURE);
+}
+
+/*
+ * Driver flags
+ */
+static struct cb_ops lxa_cb_ops = {
+ lxa_open, /* open */
+ lxa_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ lxa_read, /* read */
+ lxa_write, /* write */
+ lxa_ioctl, /* ioctl */
+ lxa_devmap, /* devmap */
+ nodev, /* mmap */
+ ddi_devmap_segmap, /* segmap */
+ nochpoll, /* chpoll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* cb_str */
+ D_NEW | D_MP | D_DEVMAP,
+ CB_REV,
+ NULL,
+ NULL
+};
+
+static struct dev_ops lxa_ops = {
+ DEVO_REV,
+ 0,
+ lxa_getinfo,
+ nulldev,
+ nulldev,
+ lxa_attach,
+ lxa_detach,
+ nodev,
+ &lxa_cb_ops,
+ NULL,
+ NULL,
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* type of module */
+ "linux audio driver", /* description of module */
+ &lxa_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+/*
+ * standard module entry points
+ */
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.conf b/usr/src/uts/common/brand/lx/io/lx_audio.conf
new file mode 100644
index 0000000000..2eeb5eb7ee
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_audio.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_audio" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.c b/usr/src/uts/common/brand/lx/io/lx_ptm.c
new file mode 100644
index 0000000000..6520ca5597
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.c
@@ -0,0 +1,1156 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * This driver attempts to emulate some of the the behaviors of
+ * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris
+ *
+ * It does this by layering over the /dev/ptmx device and intercepting
+ * opens to it.
+ *
+ * This driver makes the following assumptions about the way the ptm/pts
+ * drivers on Solaris work:
+ *
+ * - all opens of the /dev/ptmx device node return a unique dev_t.
+ *
+ * - the dev_t minor node value for each open ptm instance corrospondes
+ * to it's associated slave terminal device number. ie. the path to
+ * the slave terminal device associated with an open ptm instance
+ * who's dev_t minor node vaue is 5, is /dev/pts/5.
+ *
+ * - the ptm driver always allocates the lowest numbered slave terminal
+ * device possible.
+ */
+
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/devops.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/kstr.h>
+#include <sys/ldlinux.h>
+#include <sys/lx_ptm.h>
+#include <sys/modctl.h>
+#include <sys/pathname.h>
+#include <sys/ptms.h>
+#include <sys/ptyvar.h>
+#include <sys/stat.h>
+#include <sys/stropts.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#define LP_PTM_PATH "/dev/ptmx"
+#define LP_PTS_PATH "/dev/pts/"
+#define LP_PTS_DRV_NAME "pts"
+#define LP_PTS_USEC_DELAY (5 * 1000) /* 5 ms */
+#define LP_PTS_USEC_DELAY_MAX (5 * MILLISEC) /* 5 ms */
+
+/*
+ * this driver is layered on top of the ptm driver. we'd like to
+ * make this drivers minor name space a mirror of the ptm drivers
+ * namespace, but we can't actually do this. the reason is that the
+ * ptm driver is opened via the clone driver. there for no minor nodes
+ * of the ptm driver are actually accessible via the filesystem.
+ * since we're not a streams device we can't be opened by the clone
+ * driver. there for we need to have at least minor node accessible
+ * via the filesystem so that consumers can open it. we use the device
+ * node with a minor number of 0 for this purpose. what this means is
+ * that minor node 0 can't be used to map ptm minor node 0. since this
+ * minor node is now reserved we need to shift our ptm minor node
+ * mappings by one. ie. a ptm minor node with a value of 0 will
+ * corrospond to our minor node with a value of 1. these mappings are
+ * managed with the following macros.
+ */
+#define DEVT_TO_INDEX(x) LX_PTM_DEV_TO_PTS(x)
+#define INDEX_TO_MINOR(x) ((x) + 1)
+
+/*
+ * grow our layered handle array by the same size increment that the ptm
+ * driver uses to grow the pty device space - PTY_MAXDELTA
+ */
+#define LP_PTY_INC 128
+
+/*
+ * lx_ptm_ops contains state information about outstanding operations on the
+ * underlying master terminal device. Currently we only track information
+ * for read operations.
+ *
+ * Note that this data has not been rolled directly into the lx_ptm_handle
+ * structure because we can't put mutex's of condition variables into
+ * lx_ptm_handle structure. The reason is that the array of lx_ptm_handle
+ * structures linked to from the global lx_ptm state can be resized
+ * dynamically, and when it's resized, the new array is at a different
+ * memory location and the old array memory is discarded. Mutexs and cvs
+ * are accessed based off their address, so if this array was re-sized while
+ * there were outstanding operations on any mutexs or cvs in the array
+ * then the system would tip over. In the future the lx_ptm_handle structure
+ * array should probably be replaced with either an array of pointers to
+ * lx_ptm_handle structures or some other kind of data structure containing
+ * pointers to lx_ptm_handle structures. Then the lx_ptm_ops structure
+ * could be folded directly into the lx_ptm_handle structures. (This will
+ * also require the definition of a new locking mechanism to protect the
+ * contents of lx_ptm_handle structures.)
+ */
+typedef struct lx_ptm_ops {
+ int lpo_rops;
+ kcondvar_t lpo_rops_cv;
+ kmutex_t lpo_rops_lock;
+} lx_ptm_ops_t;
+
+/*
+ * Every open of the master terminal device in a zone results in a new
+ * lx_ptm_handle handle allocation. These handles are stored in an array
+ * hanging off the lx_ptm_state structure.
+ */
+typedef struct lx_ptm_handle {
+ /* Device handle to the underlying real /dev/ptmx master terminal. */
+ ldi_handle_t lph_handle;
+
+ /* Flag to indicate if TIOCPKT mode has been enabled. */
+ int lph_pktio;
+
+ /* Number of times the slave device has been opened/closed. */
+ int lph_eofed;
+
+ /* Callback handler in the ptm driver to check if slave is open. */
+ ptmptsopencb_t lph_ppocb;
+
+ /* Pointer to state for operations on underlying device. */
+ lx_ptm_ops_t *lph_lpo;
+} lx_ptm_handle_t;
+
+/*
+ * Global state for the lx_ptm driver.
+ */
+typedef struct lx_ptm_state {
+ /* lx_ptm device devinfo pointer */
+ dev_info_t *lps_dip;
+
+ /* LDI ident used to open underlying real /dev/ptmx master terminals. */
+ ldi_ident_t lps_li;
+
+ /* pts drivers major number */
+ major_t lps_pts_major;
+
+ /* rw lock used to manage access and growth of lps_lh_array */
+ krwlock_t lps_lh_rwlock;
+
+ /* number of elements in lps_lh_array */
+ uint_t lps_lh_count;
+
+ /* Array of handles to underlying real /dev/ptmx master terminals. */
+ lx_ptm_handle_t *lps_lh_array;
+} lx_ptm_state_t;
+
+/* Pointer to the lx_ptm global state structure. */
+static lx_ptm_state_t lps;
+
+/*
+ * List of modules to be autopushed onto slave terminal devices when they
+ * are opened in an lx branded zone.
+ */
+static char *lx_pts_mods[] = {
+ "ptem",
+ "ldterm",
+ "ttcompat",
+ LDLINUX_MOD,
+ NULL
+};
+
+static void
+lx_ptm_lh_grow(uint_t index)
+{
+ uint_t new_lh_count, old_lh_count;
+ lx_ptm_handle_t *new_lh_array, *old_lh_array;
+
+ /*
+ * allocate a new array. we drop the rw lock on the array so that
+ * readers can still access devices in case our memory allocation
+ * blocks.
+ */
+ new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1);
+ new_lh_array =
+ kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP);
+
+ /*
+ * double check that we still actually need to increase the size
+ * of the array
+ */
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+ if (index < lps.lps_lh_count) {
+ /* someone beat us to it so there's nothing more to do */
+ rw_exit(&lps.lps_lh_rwlock);
+ kmem_free(new_lh_array,
+ sizeof (lx_ptm_handle_t) * new_lh_count);
+ return;
+ }
+
+ /* copy the existing data into the new array */
+ ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+ ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+ if (lps.lps_lh_count != 0) {
+ bcopy(lps.lps_lh_array, new_lh_array,
+ sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+ }
+
+ /* save info on the old array */
+ old_lh_array = lps.lps_lh_array;
+ old_lh_count = lps.lps_lh_count;
+
+ /* install the new array */
+ lps.lps_lh_array = new_lh_array;
+ lps.lps_lh_count = new_lh_count;
+
+ rw_exit(&lps.lps_lh_rwlock);
+
+ /* free the old array */
+ if (old_lh_array != NULL) {
+ kmem_free(old_lh_array,
+ sizeof (lx_ptm_handle_t) * old_lh_count);
+ }
+}
+
+static void
+lx_ptm_lh_insert(uint_t index, ldi_handle_t lh)
+{
+ lx_ptm_ops_t *lpo;
+
+ ASSERT(lh != NULL);
+
+ /* Allocate and initialize the ops structure */
+ lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP);
+ mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL);
+
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ /* check if we need to grow the size of the layered handle array */
+ if (index >= lps.lps_lh_count) {
+ rw_exit(&lps.lps_lh_rwlock);
+ lx_ptm_lh_grow(index);
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+ }
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle == NULL);
+ ASSERT(lps.lps_lh_array[index].lph_pktio == 0);
+ ASSERT(lps.lps_lh_array[index].lph_eofed == 0);
+ ASSERT(lps.lps_lh_array[index].lph_lpo == NULL);
+
+ /* insert the new handle and return */
+ lps.lps_lh_array[index].lph_handle = lh;
+ lps.lps_lh_array[index].lph_pktio = 0;
+ lps.lps_lh_array[index].lph_eofed = 0;
+ lps.lps_lh_array[index].lph_lpo = lpo;
+
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_remove(uint_t index)
+{
+ ldi_handle_t lh;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+ ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0);
+ ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock));
+
+ /* free the write handle */
+ kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t));
+ lps.lps_lh_array[index].lph_lpo = NULL;
+
+ /* remove the handle and return it */
+ lh = lps.lps_lh_array[index].lph_handle;
+ lps.lps_lh_array[index].lph_handle = NULL;
+ lps.lps_lh_array[index].lph_pktio = 0;
+ lps.lps_lh_array[index].lph_eofed = 0;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lh);
+}
+
+static void
+lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ *ppocb = lps.lps_lh_array[index].lph_ppocb;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static void
+lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ lps.lps_lh_array[index].lph_ppocb = *ppocb;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_lookup(uint_t index)
+{
+ ldi_handle_t lh;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the handle */
+ lh = lps.lps_lh_array[index].lph_handle;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lh);
+}
+
+static lx_ptm_ops_t *
+lx_ptm_lpo_lookup(uint_t index)
+{
+ lx_ptm_ops_t *lpo;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_lpo != NULL);
+
+ /* return the handle */
+ lpo = lps.lps_lh_array[index].lph_lpo;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lpo);
+}
+
+static int
+lx_ptm_lh_pktio_get(uint_t index)
+{
+ int pktio;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the pktio state */
+ pktio = lps.lps_lh_array[index].lph_pktio;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (pktio);
+}
+
+static void
+lx_ptm_lh_pktio_set(uint_t index, int pktio)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* set the pktio state */
+ lps.lps_lh_array[index].lph_pktio = pktio;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_lh_eofed_get(uint_t index)
+{
+ int eofed;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the eofed state */
+ eofed = lps.lps_lh_array[index].lph_eofed;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (eofed);
+}
+
+static void
+lx_ptm_lh_eofed_set(uint_t index)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* set the eofed state */
+ lps.lps_lh_array[index].lph_eofed++;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_read_start(dev_t dev)
+{
+ lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+ mutex_enter(&lpo->lpo_rops_lock);
+ ASSERT(lpo->lpo_rops >= 0);
+
+ /* Wait for other read operations to finish */
+ while (lpo->lpo_rops != 0) {
+ if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) {
+ mutex_exit(&lpo->lpo_rops_lock);
+ return (-1);
+ }
+ }
+
+ /* Start a read operation */
+ VERIFY(++lpo->lpo_rops == 1);
+ mutex_exit(&lpo->lpo_rops_lock);
+ return (0);
+}
+
+static void
+lx_ptm_read_end(dev_t dev)
+{
+ lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+ mutex_enter(&lpo->lpo_rops_lock);
+ ASSERT(lpo->lpo_rops >= 0);
+
+ /* End a read operation */
+ VERIFY(--lpo->lpo_rops == 0);
+ cv_signal(&lpo->lpo_rops_cv);
+
+ mutex_exit(&lpo->lpo_rops_lock);
+}
+
+static int
+lx_ptm_pts_isopen(dev_t dev)
+{
+ ptmptsopencb_t ppocb;
+
+ lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb);
+ return (ppocb.ppocb_func(ppocb.ppocb_arg));
+}
+
+static void
+lx_ptm_eof_read(ldi_handle_t lh)
+{
+ struct uio uio;
+ iovec_t iov;
+ char junk[1];
+
+ /*
+ * We can remove any EOF message from the head of the stream by
+ * doing a zero byte read from the stream.
+ */
+ iov.iov_len = 0;
+ iov.iov_base = junk;
+ uio.uio_iovcnt = 1;
+ uio.uio_iov = &iov;
+ uio.uio_resid = iov.iov_len;
+ uio.uio_offset = 0;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+ (void) ldi_read(lh, &uio, kcred);
+}
+
+static int
+lx_ptm_eof_drop_1(dev_t dev, int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err, msg_size, msg_count;
+
+ *rvalp = 0;
+
+ /*
+ * Check if there is an EOF message (represented by a zero length
+ * data message) at the head of the stream. Note that the
+ * I_NREAD ioctl is a streams framework ioctl so it will succeed
+ * even if there have been previous write errors on this stream.
+ */
+ if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+ FKIOCTL, kcred, &msg_count)) != 0)
+ return (err);
+
+ if ((msg_count == 0) || (msg_size != 0)) {
+ /* No EOF message found */
+ return (0);
+ }
+
+ /* Record the fact that the slave device has been closed. */
+ lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+
+ /* drop the EOF */
+ lx_ptm_eof_read(lh);
+ *rvalp = 1;
+ return (0);
+}
+
+static int
+lx_ptm_eof_drop(dev_t dev, int *rvalp)
+{
+ int rval, err;
+
+ if (rvalp != NULL)
+ *rvalp = 0;
+ for (;;) {
+ if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0)
+ return (err);
+ if (rval == 0)
+ return (0);
+ if (rvalp != NULL)
+ *rvalp = 1;
+ }
+}
+
+static int
+lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ *rvalp = 0;
+ if (ignore_eof) {
+ int size, rval;
+
+ if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size,
+ FKIOCTL, kcred, &rval)) != 0)
+ return (err);
+ if (size != 0)
+ *rvalp = 1;
+ } else {
+ int msg_size, msg_count;
+
+ if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+ FKIOCTL, kcred, &msg_count)) != 0)
+ return (err);
+ if (msg_count != 0)
+ *rvalp = 1;
+ }
+ return (0);
+}
+
+static int
+lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int err;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR,
+ ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ err = ldi_ident_from_dip(dip, &lps.lps_li);
+ if (err != 0) {
+ ddi_remove_minor_node(dip, ddi_get_name(dip));
+ return (DDI_FAILURE);
+ }
+
+ lps.lps_dip = dip;
+ lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME);
+
+ rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL);
+ lps.lps_lh_count = 0;
+ lps.lps_lh_array = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ldi_ident_release(lps.lps_li);
+ lps.lps_dip = NULL;
+
+ ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+ ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+ if (lps.lps_lh_array != NULL) {
+ kmem_free(lps.lps_lh_array,
+ sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+ lps.lps_lh_array = NULL;
+ lps.lps_lh_count = 0;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+ struct strioctl iocb;
+ ptmptsopencb_t ppocb = { NULL, NULL };
+ ldi_handle_t lh;
+ major_t maj, our_major = getmajor(*devp);
+ minor_t min, lastmin;
+ uint_t index, anchor = 1;
+ dev_t ptm_dev;
+ int err, rval = 0;
+
+ /*
+ * Don't support the FNDELAY flag and FNONBLOCK until we either
+ * find a Linux app that opens /dev/ptmx with the O_NDELAY
+ * or O_NONBLOCK flags explicitly, or until we create test cases
+ * to determine how reads of master terminal devices opened with
+ * these flags behave in different situations on Linux. Supporting
+ * these flags will involve enhancing our read implementation
+ * and changing the way it deals with EOF notifications.
+ */
+ if (flag & (FNDELAY | FNONBLOCK))
+ return (ENOTSUP);
+
+ /*
+ * we're layered on top of the ptm driver so open that driver
+ * first. (note that we're opening /dev/ptmx in the global
+ * zone, not ourselves in the Linux zone.)
+ */
+ err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li);
+ if (err != 0)
+ return (err);
+
+ /* get the devt returned by the ptmx open */
+ err = ldi_get_dev(lh, &ptm_dev);
+ if (err != 0) {
+ (void) ldi_close(lh, flag, credp);
+ return (err);
+ }
+
+ /*
+ * we're a cloning driver so here's well change the devt that we
+ * return. the ptmx is also a cloning driver so we'll just use
+ * it's minor number as our minor number (it already manages it's
+ * minor name space so no reason to duplicate the effort.)
+ */
+ index = getminor(ptm_dev);
+ *devp = makedevice(our_major, INDEX_TO_MINOR(index));
+
+ /* Get a callback function to query if the pts device is open. */
+ iocb.ic_cmd = PTMPTSOPENCB;
+ iocb.ic_timout = 0;
+ iocb.ic_len = sizeof (ppocb);
+ iocb.ic_dp = (char *)&ppocb;
+
+ err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval);
+ if ((err != 0) || (rval != 0)) {
+ (void) ldi_close(lh, flag, credp);
+ return (EIO); /* XXX return something else here? */
+ }
+ ASSERT(ppocb.ppocb_func != NULL);
+
+ /*
+ * now setup autopush for the terminal slave device. this is
+ * necessary so that when a Linux program opens the device we
+ * can push required strmod modules onto the stream. in Solaris
+ * this is normally done by the application that actually
+ * allocates the terminal.
+ */
+ maj = lps.lps_pts_major;
+ min = index;
+ lastmin = 0;
+ err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin,
+ &anchor, lx_pts_mods);
+ if (err != 0) {
+ (void) ldi_close(lh, flag, credp);
+ return (EIO); /* XXX return something else here? */
+ }
+
+ /* save off this layered handle for future accesses */
+ lx_ptm_lh_insert(index, lh);
+ lx_ptm_lh_set_ppocb(index, &ppocb);
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+ ldi_handle_t lh;
+ major_t maj;
+ minor_t min, lastmin;
+ uint_t index;
+ int err;
+
+ index = DEVT_TO_INDEX(dev);
+
+ /*
+ * we must cleanup all the state associated with this major/minor
+ * terminal pair before actually closing the ptm master device.
+ * this is required because once the close of the ptm device is
+ * complete major/minor terminal pair is immediatly available for
+ * re-use in any zone.
+ */
+
+ /* free up our saved reference for this layered handle */
+ lh = lx_ptm_lh_remove(index);
+
+ /* unconfigure autopush for the associated terminal slave device */
+ maj = lps.lps_pts_major;
+ min = index;
+ lastmin = 0;
+ do {
+ /*
+ * we loop here because we don't want to release this ptm
+ * node if autopush can't be disabled on the associated
+ * slave device because then bad things could happen if
+ * another brand were to get this terminal allocated
+ * to them.
+ *
+ * XXX should we ever give up?
+ */
+ err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin,
+ 0, NULL);
+ } while (err != 0);
+
+ err = ldi_close(lh, flag, credp);
+
+ /*
+ * note that we don't have to bother with changing the permissions
+ * on the associated slave device here. the reason is that no one
+ * can actually open the device untill it's associated master
+ * device is re-opened, which will result in the permissions on
+ * it being reset.
+ */
+ return (err);
+}
+
+static int
+lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err, rval;
+ struct uio uio = *uiop;
+
+ *loop = 0;
+
+ /*
+ * Here's another way that Linux master terminals behave differently
+ * from Solaris master terminals. If you do a read on a Linux
+ * master terminal (that was opened witout NDELAY and NONBLOCK)
+ * who's corrosponding slave terminal is currently closed and
+ * has been opened and closed at least once, Linux return -1 and
+ * set errno to EIO where as Solaris blocks.
+ */
+ if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) {
+ /* Slave has been opened and closed at least once. */
+ if (lx_ptm_pts_isopen(dev) == 0) {
+ /*
+ * Slave is closed. Make sure that data is avaliable
+ * before attempting a read.
+ */
+ if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0)
+ return (err);
+
+ /* If there is no data available then return. */
+ if (rval == 0)
+ return (EIO);
+ }
+ }
+
+ /* Actually do the read operation. */
+ if ((err = ldi_read(lh, uiop, credp)) != 0)
+ return (err);
+
+ /* If read returned actual data then return. */
+ if (uio.uio_resid != uiop->uio_resid)
+ return (0);
+
+ /*
+ * This was a zero byte read (ie, an EOF). This indicates
+ * that the slave terinal device has been closed. Record
+ * the fact that the slave device has been closed and retry
+ * the read operation.
+ */
+ lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+ *loop = 1;
+ return (0);
+}
+
+static int
+lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ int pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev));
+ int err, loop;
+ struct uio uio;
+ struct iovec iovp;
+
+ ASSERT(uiop->uio_iovcnt > 0);
+
+ /*
+ * If packet mode has been enabled (via TIOCPKT) we need to pad
+ * all read requests with a leading byte that indicates any
+ * relevant control status information.
+ */
+ if (pktio != 0) {
+ /*
+ * We'd like to write the control information into
+ * the current buffer but we can't yet. We don't
+ * want to modify userspace memory here only to have
+ * the read operation fail later. So instead
+ * what we'll do here is read one character from the
+ * beginning of the memory pointed to by the uio
+ * structure. This will advance the output pointer
+ * by one. Then when the read completes successfully
+ * we can update the byte that we passed over. Before
+ * we do the read make a copy of the current uiop and
+ * iovec structs so we can write to them later.
+ */
+ uio = *uiop;
+ iovp = *uiop->uio_iov;
+ uio.uio_iov = &iovp;
+
+ if (uwritec(uiop) == -1)
+ return (EFAULT);
+ }
+
+ do {
+ /*
+ * Before we actually attempt a read operation we need
+ * to make sure there's some buffer space to actually
+ * read in some data. We do this because if we're in
+ * pktio mode and the caller only requested one byte,
+ * then we've already used up that one byte and we
+ * don't want to pass this read request. Doing a 0
+ * byte read (unless there is a problem with the stream
+ * head) always returns succcess. Normally when a streams
+ * read returns 0 bytes we interpret that as an EOF on
+ * the stream (ie, the slave side has been opened and
+ * closed) and we ignore it and re-try the read operation.
+ * So if we pass on a 0 byte read here lx_ptm_read_loop()
+ * will tell us to loop around and we'll end up in an
+ * infinite loop.
+ */
+ if (uiop->uio_resid == 0)
+ break;
+
+ /*
+ * Serialize all reads. We need to do this so that we can
+ * properly emulate the behavior of master terminals on Linux.
+ * In reality this serializaion should not pose any kind of
+ * performance problem since it would be very strange to have
+ * multiple threads trying to read from the same master
+ * terminal device concurrently.
+ */
+ if (lx_ptm_read_start(dev) != 0)
+ return (EINTR);
+
+ err = lx_ptm_read_loop(dev, uiop, credp, &loop);
+ lx_ptm_read_end(dev);
+ if (err != 0)
+ return (err);
+ } while (loop != 0);
+
+ if (pktio != 0) {
+ uint8_t pktio_data = TIOCPKT_DATA;
+
+ /*
+ * Note that the control status information we
+ * pass back is faked up in the sense that we
+ * don't actually report any events, we always
+ * report a status of 0.
+ */
+ if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0)
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ err = ldi_write(lh, uiop, credp);
+
+ return (err);
+}
+
+static int
+lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ /*
+ * here we need to make sure that we never allow the
+ * I_SETSIG and I_ESETSIG ioctls to pass through. we
+ * do this because we can't support them.
+ *
+ * the native Solaris ptm device supports these ioctls because
+ * they are streams framework ioctls and all streams devices
+ * support them by default. these ioctls cause the current
+ * process to be registered with a stream and receive signals
+ * when certain stream events occur.
+ *
+ * a problem arises with cleanup of these registrations
+ * for layered drivers.
+ *
+ * normally the streams framework is notified whenever a
+ * process closes any reference to a stream and it goes ahead
+ * and cleans up these registrations. but actual device drivers
+ * are not notified when a process performs a close operation
+ * unless the process is closing the last opened reference to
+ * the device on the entire system.
+ *
+ * so while we could pass these ioctls on and allow processes
+ * to register for signal delivery, we would never receive
+ * any notification when those processes exit (or close a
+ * stream) and we wouldn't be able to unregister them.
+ *
+ * luckily these operations are streams specific and Linux
+ * doesn't support streams devices. so it doesn't actually
+ * seem like we need to support these ioctls. if it turns
+ * out that we do need to support them for some reason in
+ * the future, the current driver model will have to be
+ * enhanced to better support streams device layering.
+ */
+ if ((cmd == I_SETSIG) || (cmd == I_ESETSIG))
+ return (EINVAL);
+
+ /*
+ * here we fake up support for TIOCPKT. Linux applications expect
+ * /etc/ptmx to support this ioctl, but on Solaris it doesn't.
+ * (it is supported on older bsd style ptys.) so we'll fake
+ * up support for it here.
+ *
+ * the reason that this ioctl is emulated here instead of in
+ * userland is that this ioctl affects the results returned
+ * from read() operations. if this ioctl was emulated in
+ * userland the brand library would need to intercept all
+ * read operations and check to see if pktio was enabled
+ * for the fd being read from. since this ioctl only needs
+ * to be supported on the ptmx device it makes more sense
+ * to support it here where we can easily update the results
+ * returned for read() operations performed on ourselves.
+ */
+ if (cmd == TIOCPKT) {
+ int pktio;
+
+ if (ddi_copyin((void *)arg, &pktio, sizeof (pktio),
+ mode) != DDI_SUCCESS)
+ return (EFAULT);
+
+ if (pktio == 0)
+ lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0);
+ else
+ lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1);
+
+ return (0);
+ }
+
+ err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp);
+
+ return (err);
+}
+
+static int
+lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp, int *loop)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ short reventsp2;
+ int err, rval;
+
+ *loop = 0;
+
+ /*
+ * If the slave device has been opened and closed at least
+ * once and the slave device is currently closed, then poll
+ * always needs to returns immediatly.
+ */
+ if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) &&
+ (lx_ptm_pts_isopen(dev) == 0)) {
+ /* In this case always return POLLHUP */
+ *reventsp = POLLHUP;
+
+ /*
+ * Check if there really is data on the stream.
+ * If so set the correct return flags.
+ */
+ if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) {
+ /* Something went wrong. */
+ return (err);
+ }
+ if (rval != 0)
+ *reventsp |= (events & (POLLIN | POLLRDNORM));
+
+ /*
+ * Is the user checking for writability? Note that for ptm
+ * devices Linux seems to ignore the POLLWRBAND write flag.
+ */
+ if ((events & POLLWRNORM) == 0)
+ return (0);
+
+ /*
+ * To check if the stream is writable we have to actually
+ * call poll, but make sure to set anyyet to 1 to prevent
+ * the streams framework from setting up callbacks.
+ */
+ if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0)
+ return (err);
+
+ *reventsp |= (reventsp2 & POLLWRNORM);
+ } else {
+ int lockstate;
+
+ /* The slave device is open, do the poll */
+ if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0)
+ return (err);
+
+ /*
+ * Drop any leading EOFs on the stream.
+ *
+ * Note that we have to use pollunlock() here to avoid
+ * recursive mutex enters in the poll framework. The
+ * reason is that if there is an EOF message on the stream
+ * then the act of reading from the queue to remove the
+ * message can cause the ptm drivers event service
+ * routine to be invoked, and if there is no open
+ * slave device then the ptm driver may generate
+ * error messages and put them on the stream. This
+ * in turn will generate a poll event and the poll
+ * framework will try to invoke any poll callbacks
+ * associated with the stream. In the process of
+ * doing that the poll framework will try to aquire
+ * locks that we are already holding. So we need to
+ * drop those locks here before we do our read.
+ */
+ lockstate = pollunlock();
+ err = lx_ptm_eof_drop(dev, &rval);
+ pollrelock(lockstate);
+ if (err)
+ return (err);
+
+ /* If no EOF was dropped then return */
+ if (rval == 0)
+ return (0);
+
+ /*
+ * An EOF was removed from the stream. Retry the entire
+ * poll operation from the top because polls on the ptm
+ * device should behave differently now.
+ */
+ *loop = 1;
+ }
+ return (0);
+}
+
+static int
+lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ int loop, err;
+
+ do {
+ /* Serialize ourself wrt read operations. */
+ if (lx_ptm_read_start(dev) != 0)
+ return (EINTR);
+
+ err = lx_ptm_poll_loop(dev,
+ events, anyyet, reventsp, phpp, &loop);
+ lx_ptm_read_end(dev);
+ if (err != 0)
+ return (err);
+ } while (loop != 0);
+ return (0);
+}
+
+static struct cb_ops lx_ptm_cb_ops = {
+ lx_ptm_open, /* open */
+ lx_ptm_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ lx_ptm_read, /* read */
+ lx_ptm_write, /* write */
+ lx_ptm_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ lx_ptm_poll, /* chpoll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* cb_str */
+ D_NEW | D_MP,
+ CB_REV,
+ NULL,
+ NULL
+};
+
+static struct dev_ops lx_ptm_ops = {
+ DEVO_REV,
+ 0,
+ ddi_getinfo_1to1,
+ nulldev,
+ nulldev,
+ lx_ptm_attach,
+ lx_ptm_detach,
+ nodev,
+ &lx_ptm_cb_ops,
+ NULL,
+ NULL,
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* type of module */
+ "Linux master terminal driver", /* description of module */
+ &lx_ptm_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.conf b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
new file mode 100644
index 0000000000..481b4e3c74
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_ptm" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
new file mode 100644
index 0000000000..15286718b6
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -0,0 +1,943 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/exec.h>
+#include <sys/lx_impl.h>
+#include <sys/machbrand.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_futex.h>
+#include <sys/lx_brand.h>
+#include <sys/termios.h>
+#include <sys/sunddi.h>
+#include <sys/ddi.h>
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+#include <sys/auxv.h>
+#include <sys/priv.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/archsystm.h>
+#include <sys/zone.h>
+#include <sys/brand.h>
+
+int lx_debug = 0;
+
+void lx_init_brand_data(zone_t *);
+void lx_free_brand_data(zone_t *);
+void lx_setbrand(proc_t *);
+int lx_getattr(zone_t *, int, void *, size_t *);
+int lx_setattr(zone_t *, int, void *, size_t);
+int lx_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t);
+int lx_get_kern_version(void);
+void lx_set_kern_version(zone_t *, int);
+void lx_copy_procdata(proc_t *, proc_t *);
+
+extern void lx_setrval(klwp_t *, int, int);
+extern void lx_proc_exit(proc_t *, klwp_t *);
+extern void lx_exec();
+extern int lx_initlwp(klwp_t *);
+extern void lx_forklwp(klwp_t *, klwp_t *);
+extern void lx_exitlwp(klwp_t *);
+extern void lx_freelwp(klwp_t *);
+extern greg_t lx_fixsegreg(greg_t, model_t);
+extern int lx_sched_affinity(int, uintptr_t, int, uintptr_t, int64_t *);
+
+int lx_systrace_brand_enabled;
+
+lx_systrace_f *lx_systrace_entry_ptr;
+lx_systrace_f *lx_systrace_return_ptr;
+
+static int lx_systrace_enabled;
+
+static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+ struct intpdata *idata, int level, long *execsz, int setid,
+ caddr_t exec_file, struct cred *cred, int brand_action);
+
+/* lx brand */
+struct brand_ops lx_brops = {
+ lx_init_brand_data,
+ lx_free_brand_data,
+ lx_brandsys,
+ lx_setbrand,
+ lx_getattr,
+ lx_setattr,
+ lx_copy_procdata,
+ lx_proc_exit,
+ lx_exec,
+ lx_setrval,
+ lx_initlwp,
+ lx_forklwp,
+ lx_freelwp,
+ lx_exitlwp,
+ lx_elfexec,
+ NULL,
+ NULL,
+ NSIG,
+};
+
+struct brand_mach_ops lx_mops = {
+ NULL,
+ lx_brand_int80_callback,
+ NULL,
+ NULL,
+ NULL,
+ lx_fixsegreg,
+};
+
+struct brand lx_brand = {
+ BRAND_VER_1,
+ "lx",
+ &lx_brops,
+ &lx_mops
+};
+
+static struct modlbrand modlbrand = {
+ &mod_brandops, "lx brand", &lx_brand
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlbrand, NULL
+};
+
+void
+lx_proc_exit(proc_t *p, klwp_t *lwp)
+{
+ zone_t *z = p->p_zone;
+
+ ASSERT(p->p_brand != NULL);
+ ASSERT(p->p_brand_data != NULL);
+
+ /*
+ * If init is dying and we aren't explicitly shutting down the zone
+ * or the system, then Solaris is about to restart init. The Linux
+ * init is not designed to handle a restart, which it interprets as
+ * a reboot. To give it a sane environment in which to run, we
+ * reboot the zone.
+ */
+ if (p->p_pid == z->zone_proc_initpid) {
+ if (z->zone_boot_err == 0 &&
+ z->zone_restart_init &&
+ zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
+ zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN)
+ (void) zone_kadmin(A_REBOOT, 0, NULL, CRED());
+ }
+ lx_exitlwp(lwp);
+ kmem_free(p->p_brand_data, sizeof (struct lx_proc_data));
+ p->p_brand_data = NULL;
+}
+
+void
+lx_setbrand(proc_t *p)
+{
+ kthread_t *t = p->p_tlist;
+ int err;
+
+ ASSERT(p->p_brand_data == NULL);
+ ASSERT(ttolxlwp(curthread) == NULL);
+
+ p->p_brand_data = kmem_zalloc(sizeof (struct lx_proc_data), KM_SLEEP);
+
+ /*
+ * This routine can only be called for single-threaded processes.
+ * Since lx_initlwp() can only fail if we run out of PIDs for
+ * multithreaded processes, we know that this can never fail.
+ */
+ err = lx_initlwp(t->t_lwp);
+ ASSERT(err == 0);
+}
+
+/* ARGSUSED */
+int
+lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
+{
+ boolean_t val;
+ int num;
+
+ if (attr == LX_ATTR_RESTART_INIT) {
+ if (bufsize > sizeof (boolean_t))
+ return (ERANGE);
+ if (copyin(buf, &val, sizeof (val)) != 0)
+ return (EFAULT);
+ if (val != B_TRUE && val != B_FALSE)
+ return (EINVAL);
+ zone->zone_restart_init = val;
+ return (0);
+ } else if (attr == LX_KERN_VERSION_NUM) {
+ if (bufsize > sizeof (int))
+ return (ERANGE);
+ if (copyin(buf, &num, sizeof (num)) != 0)
+ return (EFAULT);
+ lx_set_kern_version(zone, num);
+ return (0);
+ }
+ return (EINVAL);
+}
+
+/* ARGSUSED */
+int
+lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
+{
+ int num;
+ if (attr == LX_ATTR_RESTART_INIT) {
+ if (*bufsize < sizeof (boolean_t))
+ return (ERANGE);
+ if (copyout(&zone->zone_restart_init, buf,
+ sizeof (boolean_t)) != 0)
+ return (EFAULT);
+ *bufsize = sizeof (boolean_t);
+ return (0);
+ } else if (attr == LX_KERN_VERSION_NUM) {
+ if (*bufsize < sizeof (int))
+ return (ERANGE);
+ num = lx_get_kern_version();
+ if (copyout(&num, buf, sizeof (int)) != 0)
+ return (EFAULT);
+ *bufsize = sizeof (int);
+ return (0);
+ }
+ return (-EINVAL);
+}
+
+/*
+ * Enable ptrace system call tracing for the given LWP. This is done by
+ * both setting the flag in that LWP's brand data (in the kernel) and setting
+ * the process-wide trace flag (in the brand library of the traced process).
+ */
+static int
+lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set)
+{
+ proc_t *p;
+ kthread_t *t;
+ klwp_t *lwp;
+ lx_proc_data_t *lpdp;
+ lx_lwp_data_t *lldp;
+ uintptr_t addr;
+ int ret, flag = 1;
+
+ if ((p = sprlock(pid)) == NULL)
+ return (ESRCH);
+
+ if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) {
+ sprunlock(p);
+ return (EPERM);
+ }
+
+ if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) {
+ sprunlock(p);
+ return (ESRCH);
+ }
+
+ if ((lpdp = p->p_brand_data) == NULL ||
+ (lldp = lwp->lwp_brand) == NULL) {
+ sprunlock(p);
+ return (ESRCH);
+ }
+
+ if (set) {
+ /*
+ * Enable the ptrace flag for this LWP and this process. Note
+ * that we will turn off the LWP's ptrace flag, but we don't
+ * turn off the process's ptrace flag.
+ */
+ lldp->br_ptrace = 1;
+ lpdp->l_ptrace = 1;
+
+ addr = lpdp->l_traceflag;
+
+ mutex_exit(&p->p_lock);
+
+ /*
+ * This can fail only in some rare corner cases where the
+ * process is exiting or we're completely out of memory. In
+ * these cases, it's sufficient to return an error to the ptrace
+ * consumer and leave the process-wide flag set.
+ */
+ ret = uwrite(p, &flag, sizeof (flag), addr);
+
+ mutex_enter(&p->p_lock);
+
+ /*
+ * If we couldn't set the trace flag, unset the LWP's ptrace
+ * flag as there ptrace consumer won't expect this LWP to stop.
+ */
+ if (ret != 0)
+ lldp->br_ptrace = 0;
+ } else {
+ lldp->br_ptrace = 0;
+ ret = 0;
+ }
+
+ sprunlock(p);
+
+ if (ret != 0)
+ ret = EIO;
+
+ return (ret);
+}
+
+static void
+lx_ptrace_fire(void)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ lx_lwp_data_t *lldp = lwp->lwp_brand;
+
+ /*
+ * The ptrace flag only applies until the next event is encountered
+ * for the given LWP. If it's set, turn off the flag and poke the
+ * controlling process by raising a signal.
+ */
+ if (lldp->br_ptrace) {
+ lldp->br_ptrace = 0;
+ tsignal(t, SIGTRAP);
+ }
+}
+
+void
+lx_brand_systrace_enable(void)
+{
+ extern void lx_brand_int80_enable(void);
+
+ ASSERT(!lx_systrace_enabled);
+
+ lx_brand_int80_enable();
+
+ lx_systrace_enabled = 1;
+}
+
+void
+lx_brand_systrace_disable(void)
+{
+ extern void lx_brand_int80_disable(void);
+
+ ASSERT(lx_systrace_enabled);
+
+ lx_brand_int80_disable();
+
+ lx_systrace_enabled = 0;
+}
+
+void
+lx_init_brand_data(zone_t *zone)
+{
+ lx_zone_data_t *data;
+ ASSERT(zone->zone_brand == &lx_brand);
+ ASSERT(zone->zone_brand_data == NULL);
+ data = (lx_zone_data_t *)kmem_zalloc(sizeof (lx_zone_data_t), KM_SLEEP);
+ /*
+ * Set the default lxzd_kernel_version to LX_KERN_2_4.
+ * This can be changed by a call to setattr() during zone boot.
+ */
+ data->lxzd_kernel_version = LX_KERN_2_4;
+ data->lxzd_max_syscall = LX_NSYSCALLS_2_4;
+ zone->zone_brand_data = data;
+}
+
+void
+lx_free_brand_data(zone_t *zone)
+{
+ kmem_free(zone->zone_brand_data, sizeof (lx_zone_data_t));
+}
+
+/*
+ * Get the addresses of the user-space system call handler and attach it to
+ * the proc structure. Returning 0 indicates success; the value returned
+ * by the system call is the value stored in rval. Returning a non-zero
+ * value indicates a failure; the value returned is used to set errno, -1
+ * is returned from the syscall and the contents of rval are ignored. To
+ * set errno and have the syscall return a value other than -1 we can
+ * manually set errno and rval and return 0.
+ */
+int
+lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+ kthread_t *t = curthread;
+ proc_t *p = ttoproc(t);
+ lx_proc_data_t *pd;
+ int linux_call;
+ struct termios *termios;
+ uint_t termios_len;
+ int error;
+ lx_brand_registration_t reg;
+
+ /*
+ * There is one operation that is suppored for non-branded
+ * process. B_EXEC_BRAND. This is the equilivant of an
+ * exec call, but the new process that is created will be
+ * a branded process.
+ */
+ if (cmd == B_EXEC_BRAND) {
+ ASSERT(p->p_zone != NULL);
+ ASSERT(p->p_zone->zone_brand == &lx_brand);
+ return (exec_common(
+ (char *)arg1, (const char **)arg2, (const char **)arg3,
+ EBA_BRAND));
+ }
+
+ /* For all other operations this must be a branded process. */
+ if (p->p_brand == NULL)
+ return (set_errno(ENOSYS));
+
+ ASSERT(p->p_brand == &lx_brand);
+ ASSERT(p->p_brand_data != NULL);
+
+ switch (cmd) {
+ case B_REGISTER:
+ if (p->p_model == DATAMODEL_NATIVE) {
+ if (copyin((void *)arg1, &reg, sizeof (reg)) != 0) {
+ lx_print("Failed to copyin brand registration "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+#ifdef _LP64
+ } else {
+ lx_brand_registration32_t reg32;
+
+ if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0) {
+ lx_print("Failed to copyin brand registration "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+
+ reg.lxbr_version = (uint_t)reg32.lxbr_version;
+ reg.lxbr_handler =
+ (void *)(uintptr_t)reg32.lxbr_handler;
+ reg.lxbr_tracehandler =
+ (void *)(uintptr_t)reg32.lxbr_tracehandler;
+ reg.lxbr_traceflag =
+ (void *)(uintptr_t)reg32.lxbr_traceflag;
+#endif
+ }
+
+ if (reg.lxbr_version != LX_VERSION_1) {
+ lx_print("Invalid brand library version (%u)\n",
+ reg.lxbr_version);
+ return (EINVAL);
+ }
+
+ lx_print("Assigning brand 0x%p and handler 0x%p to proc 0x%p\n",
+ (void *)&lx_brand, (void *)reg.lxbr_handler, (void *)p);
+ pd = p->p_brand_data;
+ pd->l_handler = (uintptr_t)reg.lxbr_handler;
+ pd->l_tracehandler = (uintptr_t)reg.lxbr_tracehandler;
+ pd->l_traceflag = (uintptr_t)reg.lxbr_traceflag;
+ *rval = 0;
+ return (0);
+ case B_TTYMODES:
+ /* This is necessary for emulating TCGETS ioctls. */
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ddi_root_node(),
+ DDI_PROP_NOTPROM, "ttymodes", (uchar_t **)&termios,
+ &termios_len) != DDI_SUCCESS)
+ return (EIO);
+
+ ASSERT(termios_len == sizeof (*termios));
+
+ if (copyout(&termios, (void *)arg1, sizeof (termios)) != 0) {
+ ddi_prop_free(termios);
+ return (EFAULT);
+ }
+
+ ddi_prop_free(termios);
+ *rval = 0;
+ return (0);
+
+ case B_ELFDATA:
+ pd = curproc->p_brand_data;
+ if (copyout(&pd->l_elf_data, (void *)arg1,
+ sizeof (lx_elf_data_t)) != 0) {
+ (void) set_errno(EFAULT);
+ return (*rval = -1);
+ }
+ *rval = 0;
+ return (0);
+
+ case B_EXEC_NATIVE:
+ error = exec_common(
+ (char *)arg1, (const char **)arg2, (const char **)arg3,
+ EBA_NATIVE);
+ if (error) {
+ (void) set_errno(error);
+ return (*rval = -1);
+ }
+ return (*rval = 0);
+
+ case B_LPID_TO_SPAIR:
+ /*
+ * Given a Linux pid as arg1, return the Solaris pid in arg2 and
+ * the Solaris LWP in arg3. We also translate pid 1 (which is
+ * hardcoded in many applications) to the zone's init process.
+ */
+ {
+ pid_t s_pid;
+ id_t s_tid;
+
+ if ((pid_t)arg1 == 1) {
+ s_pid = p->p_zone->zone_proc_initpid;
+ /* handle the dead/missing init(1M) case */
+ if (s_pid == -1)
+ s_pid = 1;
+ s_tid = 1;
+ } else if (lx_lpid_to_spair((pid_t)arg1, &s_pid,
+ &s_tid) < 0)
+ return (ESRCH);
+
+ if (copyout(&s_pid, (void *)arg2,
+ sizeof (s_pid)) != 0 ||
+ copyout(&s_tid, (void *)arg3, sizeof (s_tid)) != 0)
+ return (EFAULT);
+
+ *rval = 0;
+ return (0);
+ }
+
+ case B_PTRACE_SYSCALL:
+ *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2,
+ (int)arg3);
+ return (0);
+
+ case B_SYSENTRY:
+ if (lx_systrace_enabled) {
+ uint32_t args[6];
+
+ ASSERT(lx_systrace_entry_ptr != NULL);
+
+ if (copyin((void *)arg2, args, sizeof (args)) != 0)
+ return (EFAULT);
+
+ (*lx_systrace_entry_ptr)(arg1, args[0], args[1],
+ args[2], args[3], args[4], args[5]);
+ }
+
+ lx_ptrace_fire();
+
+ pd = p->p_brand_data;
+
+ /*
+ * If neither DTrace not ptrace are interested in tracing
+ * this process any more, turn off the trace flag.
+ */
+ if (!lx_systrace_enabled && !pd->l_ptrace)
+ (void) suword32((void *)pd->l_traceflag, 0);
+
+ *rval = 0;
+ return (0);
+
+ case B_SYSRETURN:
+ if (lx_systrace_enabled) {
+ ASSERT(lx_systrace_return_ptr != NULL);
+
+ (*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0);
+ }
+
+ lx_ptrace_fire();
+
+ pd = p->p_brand_data;
+
+ /*
+ * If neither DTrace not ptrace are interested in tracing
+ * this process any more, turn off the trace flag.
+ */
+ if (!lx_systrace_enabled && !pd->l_ptrace)
+ (void) suword32((void *)pd->l_traceflag, 0);
+
+ *rval = 0;
+ return (0);
+
+ case B_SET_AFFINITY_MASK:
+ case B_GET_AFFINITY_MASK:
+ /*
+ * Retrieve or store the CPU affinity mask for the
+ * requested linux pid.
+ *
+ * arg1 is a linux PID (0 means curthread).
+ * arg2 is the size of the given mask.
+ * arg3 is the address of the affinity mask.
+ */
+ return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval));
+
+ default:
+ linux_call = cmd - B_EMULATE_SYSCALL;
+ /*
+ * Only checking against highest syscall number for all kernel
+ * versions, since check for specific kernel version is done
+ * in userland prior to this call, and duplicating logic would
+ * be redundant.
+ */
+ if (linux_call >= 0 && linux_call < LX_NSYSCALLS) {
+ *rval = lx_emulate_syscall(linux_call, arg1, arg2,
+ arg3, arg4, arg5, arg6);
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+int
+lx_get_zone_kern_version(zone_t *zone)
+{
+ return (((lx_zone_data_t *)zone->zone_brand_data)->lxzd_kernel_version);
+}
+
+int
+lx_get_kern_version()
+{
+ return (lx_get_zone_kern_version(curzone));
+}
+
+void
+lx_set_kern_version(zone_t *zone, int vers)
+{
+ lx_zone_data_t *lxzd = (lx_zone_data_t *)zone->zone_brand_data;
+
+ lxzd->lxzd_kernel_version = vers;
+ if (vers == LX_KERN_2_6)
+ lxzd->lxzd_max_syscall = LX_NSYSCALLS_2_6;
+}
+
+/*
+ * Copy the per-process brand data from a parent proc to a child.
+ */
+void
+lx_copy_procdata(proc_t *child, proc_t *parent)
+{
+ lx_proc_data_t *cpd, *ppd;
+
+ ppd = parent->p_brand_data;
+
+ ASSERT(ppd != NULL);
+
+ cpd = kmem_alloc(sizeof (lx_proc_data_t), KM_SLEEP);
+ *cpd = *ppd;
+
+ child->p_brand_data = cpd;
+}
+
+/*
+ * Currently, only 32-bit branded ELF executables are supported.
+ */
+#if defined(_LP64)
+#define elfexec elf32exec
+#define mapexec_brand mapexec32_brand
+#endif /* _LP64 */
+
+/*
+ * Exec routine called by elfexec() to load 32-bit Linux binaries.
+ */
+static int
+lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+ struct intpdata *idata, int level, long *execsz, int setid,
+ caddr_t exec_file, struct cred *cred, int brand_action)
+{
+ int error;
+ vnode_t *nvp;
+ auxv32_t phdr_auxv32[3] = {
+ { AT_SUN_BRAND_LX_PHDR, 0 },
+ { AT_SUN_BRAND_AUX2, 0 },
+ { AT_SUN_BRAND_AUX3, 0 }
+ };
+ Elf32_Ehdr ehdr;
+ Elf32_Addr uphdr_vaddr;
+ intptr_t voffset;
+ int interp;
+ int i;
+ struct execenv env;
+ struct user *up = PTOU(ttoproc(curthread));
+ lx_elf_data_t *edp =
+ &((lx_proc_data_t *)ttoproc(curthread)->p_brand_data)->l_elf_data;
+
+ ASSERT(ttoproc(curthread)->p_brand == &lx_brand);
+ ASSERT(ttoproc(curthread)->p_brand_data != NULL);
+
+ /*
+ * Set the brandname and library name for the new process so that
+ * elfexec() puts them onto the stack.
+ */
+ args->brandname = LX_BRANDNAME;
+ args->emulator = LX_LIB_PATH;
+
+ /*
+ * We will exec the brand library, and map in the linux linker and the
+ * linux executable.
+ */
+ if ((error = lookupname(LX_LIB_PATH, UIO_SYSSPACE, FOLLOW, NULLVPP,
+ &nvp))) {
+ uprintf("%s: not found.", LX_LIB);
+ return (error);
+ }
+
+ if ((error = elfexec(nvp, uap, args, idata, level + 1, execsz, setid,
+ exec_file, cred, brand_action))) {
+ VN_RELE(nvp);
+ return (error);
+ }
+ VN_RELE(nvp);
+
+ bzero(&env, sizeof (env));
+
+ if ((error = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
+ exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
+ &env.ex_brksize, NULL)))
+ return (error);
+
+ /*
+ * Save off the important properties of the lx executable. The brand
+ * library will ask us for this data later, when it is ready to set
+ * things up for the lx executable.
+ */
+ edp->ed_phdr = (uphdr_vaddr == -1) ? voffset + ehdr.e_phoff :
+ voffset + uphdr_vaddr;
+ edp->ed_entry = voffset + ehdr.e_entry;
+ edp->ed_phent = ehdr.e_phentsize;
+ edp->ed_phnum = ehdr.e_phnum;
+
+ if (interp) {
+ if (ehdr.e_type == ET_DYN) {
+ /*
+ * This is a shared object executable, so we need to
+ * pick a reasonable place to put the heap. Just don't
+ * use the first page.
+ */
+ env.ex_brkbase = (caddr_t)PAGESIZE;
+ env.ex_bssbase = (caddr_t)PAGESIZE;
+ }
+
+ /*
+ * If the program needs an interpreter (most do), map it in and
+ * store relevant information about it in the aux vector, where
+ * the brand library can find it.
+ */
+ if ((error = lookupname(LX_LINKER, UIO_SYSSPACE, FOLLOW, NULLVPP,
+ &nvp))) {
+ uprintf("%s: not found.", LX_LINKER);
+ return (error);
+ }
+ if ((error = mapexec_brand(nvp, args, &ehdr, &uphdr_vaddr,
+ &voffset, exec_file, &interp, NULL, NULL, NULL, NULL))) {
+ VN_RELE(nvp);
+ return (error);
+ }
+ VN_RELE(nvp);
+
+ /*
+ * Now that we know the base address of the brand's linker,
+ * place it in the aux vector.
+ */
+ edp->ed_base = voffset;
+ edp->ed_ldentry = voffset + ehdr.e_entry;
+ } else {
+ /*
+ * This program has no interpreter. The lx brand library will
+ * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
+ * so in this case, put the entry point of the main executable
+ * there.
+ */
+ if (ehdr.e_type == ET_EXEC) {
+ /*
+ * An executable with no interpreter, this must be a
+ * statically linked executable, which means we loaded
+ * it at the address specified in the elf header, in
+ * which case the e_entry field of the elf header is an
+ * absolute address.
+ */
+ edp->ed_ldentry = ehdr.e_entry;
+ edp->ed_entry = ehdr.e_entry;
+ } else {
+ /*
+ * A shared object with no interpreter, we use the
+ * calculated address from above.
+ */
+ edp->ed_ldentry = edp->ed_entry;
+
+ /*
+ * In all situations except an ET_DYN elf object with no
+ * interpreter, we want to leave the brk and base
+ * values set by mapexec_brand alone. Normally when
+ * running ET_DYN objects on Solaris (most likely
+ * /lib/ld.so.1) the kernel sets brk and base to 0 since
+ * it doesn't know where to put the heap, and later the
+ * linker will call brk() to initialize the heap in:
+ * usr/src/cmd/sgs/rtld/common/setup.c:setup()
+ * after it has determined where to put it. (This
+ * decision is made after the linker loads and inspects
+ * elf properties of the target executable being run.)
+ *
+ * So for ET_DYN Linux executables, we also don't know
+ * where the heap should go, so we'll set the brk and
+ * base to 0. But in this case the Solaris linker will
+ * not initialize the heap, so when the Linux linker
+ * starts running there is no heap allocated. This
+ * seems to be ok on Linux 2.4 based systems because the
+ * Linux linker/libc fall back to using mmap() to
+ * allocate memory. But on 2.6 systems, running
+ * applications by specifying them as command line
+ * arguments to the linker results in segfaults for an
+ * as yet undetermined reason (which seems to indicatej
+ * that a more permanent fix for heap initalization in
+ * these cases may be necessary).
+ */
+ if (ehdr.e_type == ET_DYN) {
+ env.ex_bssbase = (caddr_t)0;
+ env.ex_brkbase = (caddr_t)0;
+ env.ex_brksize = 0;
+ }
+ }
+
+ }
+
+ env.ex_vp = vp;
+ setexecenv(&env);
+
+ /*
+ * We don't need to copy this stuff out. It is only used by our
+ * tools to locate the lx linker's debug section. But we should at
+ * least try to keep /proc's view of the aux vector consistent with
+ * what's on the process stack.
+ */
+ phdr_auxv32[0].a_un.a_val = edp->ed_phdr;
+
+ /*
+ * Linux 2.6 programs such as ps will print an error message if the
+ * following aux entry is missing
+ */
+ if (lx_get_kern_version() >= LX_KERN_2_6) {
+ phdr_auxv32[1].a_type = AT_CLKTCK;
+ phdr_auxv32[1].a_un.a_val = hz;
+ }
+
+ if (copyout(&phdr_auxv32, args->auxp_brand,
+ sizeof (phdr_auxv32)) == -1)
+ return (EFAULT);
+
+ /*
+ * /proc uses the AT_ENTRY aux vector entry to deduce
+ * the location of the executable in the address space. The user
+ * structure contains a copy of the aux vector that needs to have those
+ * entries patched with the values of the real lx executable (they
+ * currently contain the values from the lx brand library that was
+ * elfexec'd, above).
+ *
+ * For live processes, AT_BASE is used to locate the linker segment,
+ * which /proc and friends will later use to find Solaris symbols
+ * (such as rtld_db_preinit). However, for core files, /proc uses
+ * AT_ENTRY to find the right segment to label as the executable.
+ * So we set AT_ENTRY to be the entry point of the linux executable,
+ * but leave AT_BASE to be the address of the Solaris linker.
+ */
+ for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
+ if (up->u_auxv[i].a_type == AT_ENTRY)
+ up->u_auxv[i].a_un.a_val = edp->ed_entry;
+ if (up->u_auxv[i].a_type == AT_SUN_BRAND_LX_PHDR)
+ up->u_auxv[i].a_un.a_val = edp->ed_phdr;
+ }
+
+ return (0);
+}
+
+int
+_init(void)
+{
+ int err = 0;
+
+ /* pid/tid conversion hash tables */
+ lx_pid_init();
+
+ /* for lx_futex() */
+ lx_futex_init();
+
+ err = mod_install(&modlinkage);
+ if (err != 0) {
+ cmn_err(CE_WARN, "Couldn't install lx brand module");
+
+ /*
+ * This looks drastic, but it should never happen. These
+ * two data structures should be completely free-able until
+ * they are used by Linux processes. Since the brand
+ * wasn't loaded there should be no Linux processes, and
+ * thus no way for these data structures to be modified.
+ */
+ lx_pid_fini();
+ if (lx_futex_fini())
+ panic("lx brand module cannot be loaded or unloaded.");
+ }
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+ int futex_done = 0;
+
+ /*
+ * If there are any zones using this brand, we can't allow it to be
+ * unloaded.
+ */
+ if (brand_zone_count(&lx_brand))
+ return (EBUSY);
+
+ lx_pid_fini();
+
+ if ((err = lx_futex_fini()) != 0)
+ goto done;
+ futex_done = 1;
+
+ err = mod_remove(&modlinkage);
+
+done:
+ if (err) {
+ /*
+ * If we can't unload the module, then we have to get it
+ * back into a sane state.
+ */
+ lx_pid_init();
+
+ if (futex_done)
+ lx_futex_init();
+
+ }
+
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c
new file mode 100644
index 0000000000..dd279eb6e2
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_misc.c
@@ -0,0 +1,362 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/archsystm.h>
+#include <sys/privregs.h>
+#include <sys/exec.h>
+#include <sys/lwp.h>
+#include <sys/sem.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_futex.h>
+
+/* Linux specific functions and definitions */
+void lx_setrval(klwp_t *, int, int);
+void lx_exec();
+int lx_initlwp(klwp_t *);
+void lx_forklwp(klwp_t *, klwp_t *);
+void lx_exitlwp(klwp_t *);
+void lx_freelwp(klwp_t *);
+static void lx_save(klwp_t *);
+static void lx_restore(klwp_t *);
+extern void lx_ptrace_free(proc_t *);
+
+/*
+ * Set the return code for the forked child, always zero
+ */
+/*ARGSUSED*/
+void
+lx_setrval(klwp_t *lwp, int v1, int v2)
+{
+ lwptoregs(lwp)->r_r0 = 0;
+}
+
+/*
+ * Reset process state on exec(2)
+ */
+void
+lx_exec()
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ int err;
+
+ /*
+ * There are two mutually exclusive special cases we need to
+ * address. First, if this was a native process prior to this
+ * exec(), then this lwp won't have its brand-specific data
+ * initialized and it won't be assigned a Linux PID yet. Second,
+ * if this was a multi-threaded Linux process and this lwp wasn't
+ * the main lwp, then we need to make its Solaris and Linux PIDS
+ * match.
+ */
+ if (lwpd == NULL) {
+ err = lx_initlwp(lwp);
+ /*
+ * Only possible failure from this routine should be an
+ * inability to allocate a new PID. Since single-threaded
+ * processes don't need a new PID, we should never hit this
+ * error.
+ */
+ ASSERT(err == 0);
+ lwpd = lwptolxlwp(lwp);
+ } else if (curthread->t_tid != 1) {
+ lx_pid_reassign(curthread);
+ }
+
+ installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save,
+ NULL);
+
+ /*
+ * clear out the tls array
+ */
+ bzero(lwpd->br_tls, sizeof (lwpd->br_tls));
+
+ /*
+ * reset the tls entries in the gdt
+ */
+ kpreempt_disable();
+ lx_restore(lwp);
+ kpreempt_enable();
+}
+
+void
+lx_exitlwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ proc_t *p;
+ kthread_t *t;
+ sigqueue_t *sqp = NULL;
+ pid_t ppid;
+ id_t ptid;
+
+ if (lwpd == NULL)
+ return; /* second time thru' */
+
+ if (lwpd->br_clear_ctidp != NULL) {
+ (void) suword32(lwpd->br_clear_ctidp, 0);
+ (void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1,
+ NULL, NULL, 0);
+ }
+
+ if (lwpd->br_signal != 0) {
+ /*
+ * The first thread in a process doesn't cause a signal to
+ * be sent when it exits. It was created by a fork(), not
+ * a clone(), so the parent should get signalled when the
+ * process exits.
+ */
+ if (lwpd->br_ptid == -1)
+ goto free;
+
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+ /*
+ * If br_ppid is 0, it means this is a CLONE_PARENT thread,
+ * so the signal goes to the parent process - not to a
+ * specific thread in this process.
+ */
+ p = lwptoproc(lwp);
+ if (lwpd->br_ppid == 0) {
+ mutex_enter(&p->p_lock);
+ ppid = p->p_ppid;
+ t = NULL;
+ } else {
+ /*
+ * If we have been reparented to init or if our
+ * parent thread is gone, then nobody gets
+ * signaled.
+ */
+ if ((lx_lwp_ppid(lwp, &ppid, &ptid) == 1) ||
+ (ptid == -1))
+ goto free;
+
+ mutex_enter(&pidlock);
+ if ((p = prfind(ppid)) == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ goto free;
+ }
+ mutex_enter(&p->p_lock);
+ mutex_exit(&pidlock);
+
+ if ((t = idtot(p, ptid)) == NULL) {
+ mutex_exit(&p->p_lock);
+ goto free;
+ }
+ }
+
+ sqp->sq_info.si_signo = lwpd->br_signal;
+ sqp->sq_info.si_code = lwpd->br_exitwhy;
+ sqp->sq_info.si_status = lwpd->br_exitwhat;
+ sqp->sq_info.si_pid = lwpd->br_pid;
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(p, t, sqp);
+ mutex_exit(&p->p_lock);
+ sqp = NULL;
+ }
+
+free:
+ if (sqp)
+ kmem_free(sqp, sizeof (sigqueue_t));
+
+ lx_freelwp(lwp);
+}
+
+void
+lx_freelwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+
+ if (lwpd != NULL) {
+ (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore,
+ NULL, NULL, lx_save, NULL);
+ if (lwpd->br_pid != 0)
+ lx_pid_rele(lwptoproc(lwp)->p_pid,
+ lwptot(lwp)->t_tid);
+
+ lwp->lwp_brand = NULL;
+ kmem_free(lwpd, sizeof (struct lx_lwp_data));
+ }
+}
+
+int
+lx_initlwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd;
+ struct lx_lwp_data *plwpd;
+ kthread_t *tp = lwptot(lwp);
+
+ lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP);
+ lwpd->br_exitwhy = CLD_EXITED;
+ lwpd->br_lwp = lwp;
+ lwpd->br_clear_ctidp = NULL;
+ lwpd->br_set_ctidp = NULL;
+ lwpd->br_signal = 0;
+ /*
+ * lwpd->br_affinitymask was zeroed by kmem_zalloc().
+ */
+
+ /*
+ * The first thread in a process has ppid set to the parent
+ * process's pid, and ptid set to -1. Subsequent threads in the
+ * process have their ppid set to the pid of the thread that
+ * created them, and their ptid to that thread's tid.
+ */
+ if (tp->t_next == tp) {
+ lwpd->br_ppid = tp->t_procp->p_ppid;
+ lwpd->br_ptid = -1;
+ } else if (ttolxlwp(curthread) != NULL) {
+ plwpd = ttolxlwp(curthread);
+ bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls));
+ lwpd->br_ppid = plwpd->br_pid;
+ lwpd->br_ptid = curthread->t_tid;
+ } else {
+ /*
+ * Oddball case: the parent thread isn't a Linux process.
+ */
+ lwpd->br_ppid = 0;
+ lwpd->br_ptid = -1;
+ }
+ lwp->lwp_brand = lwpd;
+
+ if (lx_pid_assign(tp)) {
+ kmem_free(lwpd, sizeof (struct lx_lwp_data));
+ lwp->lwp_brand = NULL;
+ return (-1);
+ }
+ lwpd->br_tgid = lwpd->br_pid;
+
+ installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
+ lx_save, NULL);
+
+ return (0);
+}
+
+/*
+ * There is no need to have any locking for either the source or
+ * destination struct lx_lwp_data structs. This is always run in the
+ * thread context of the source thread, and the destination thread is
+ * always newly created and not referred to from anywhere else.
+ */
+void
+lx_forklwp(klwp_t *srclwp, klwp_t *dstlwp)
+{
+ struct lx_lwp_data *src = srclwp->lwp_brand;
+ struct lx_lwp_data *dst = dstlwp->lwp_brand;
+
+ dst->br_ppid = src->br_pid;
+ dst->br_ptid = lwptot(srclwp)->t_tid;
+ bcopy(src->br_tls, dst->br_tls, sizeof (dst->br_tls));
+
+ /*
+ * copy only these flags
+ */
+ dst->br_lwp_flags = src->br_lwp_flags & BR_CPU_BOUND;
+ dst->br_clone_args = NULL;
+}
+
+/*
+ * When switching a Linux process off the CPU, clear its GDT entries.
+ */
+/* ARGSUSED */
+static void
+lx_save(klwp_t *t)
+{
+ int i;
+
+#if defined(__amd64)
+ reset_sregs();
+#endif
+ for (i = 0; i < LX_TLSNUM; i++)
+ gdt_update_usegd(GDT_TLSMIN + i, &null_udesc);
+}
+
+/*
+ * When switching a Linux process on the CPU, set its GDT entries.
+ */
+static void
+lx_restore(klwp_t *t)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(t);
+ user_desc_t *tls;
+ int i;
+
+ ASSERT(lwpd);
+
+ tls = lwpd->br_tls;
+ for (i = 0; i < LX_TLSNUM; i++)
+ gdt_update_usegd(GDT_TLSMIN + i, &tls[i]);
+}
+
+void
+lx_set_gdt(int entry, user_desc_t *descrp)
+{
+
+ gdt_update_usegd(entry, descrp);
+}
+
+void
+lx_clear_gdt(int entry)
+{
+ gdt_update_usegd(entry, &null_udesc);
+}
+
+longlong_t
+lx_nosys()
+{
+ return (set_errno(ENOSYS));
+}
+
+longlong_t
+lx_opnotsupp()
+{
+ return (set_errno(EOPNOTSUPP));
+}
+
+/*
+ * Brand-specific routine to check if given non-Solaris standard segment
+ * register values should be modified to other values.
+ */
+/*ARGSUSED*/
+greg_t
+lx_fixsegreg(greg_t sr, model_t datamodel)
+{
+ ASSERT(sr == (sr & 0xffff));
+
+ /*
+ * Force the SR into the LDT in ring 3 for 32-bit processes.
+ *
+ * 64-bit processes get the null GDT selector since they are not
+ * allowed to have a private LDT.
+ */
+#if defined(__amd64)
+ return (datamodel == DATAMODEL_ILP32 ? (sr | SEL_TI_LDT | SEL_UPL) : 0);
+#elif defined(__i386)
+ datamodel = datamodel; /* datamodel currently unused for 32-bit */
+ return (sr | SEL_TI_LDT | SEL_UPL);
+#endif /* __amd64 */
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c
new file mode 100644
index 0000000000..0fdde8c20e
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_pid.c
@@ -0,0 +1,369 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/bitmap.h>
+#include <sys/var.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/zone.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+
+#define LINUX_PROC_FACTOR 8 /* factor down the hash table by this */
+static int hash_len = 4; /* desired average hash chain length */
+static int hash_size; /* no of buckets in the hash table */
+
+static struct lx_pid **stol_pid_hash;
+static struct lx_pid **ltos_pid_hash;
+
+#define LTOS_HASH(pid) ((pid) & (hash_size - 1))
+#define STOL_HASH(pid, tid) (((pid) + (tid)) & (hash_size - 1))
+
+static kmutex_t hash_lock;
+
+static void
+lx_pid_insert_hash(struct lx_pid *lpidp)
+{
+ int shash = STOL_HASH(lpidp->s_pid, lpidp->s_tid);
+ int lhash = LTOS_HASH(lpidp->l_pid);
+
+ ASSERT(MUTEX_HELD(&hash_lock));
+
+ lpidp->stol_next = stol_pid_hash[shash];
+ stol_pid_hash[shash] = lpidp;
+
+ lpidp->ltos_next = ltos_pid_hash[lhash];
+ ltos_pid_hash[lhash] = lpidp;
+}
+
+static struct lx_pid *
+lx_pid_remove_hash(pid_t pid, id_t tid)
+{
+ struct lx_pid **hpp;
+ struct lx_pid *lpidp = NULL;
+
+ ASSERT(MUTEX_HELD(&hash_lock));
+
+ hpp = &stol_pid_hash[STOL_HASH(pid, tid)];
+ while (*hpp) {
+ if ((*hpp)->s_pid == pid && (*hpp)->s_tid == tid) {
+ lpidp = *hpp;
+ *hpp = (*hpp)->stol_next;
+ break;
+ }
+ hpp = &(*hpp)->stol_next;
+ }
+
+ /*
+ * when called during error recovery the pid may already
+ * be released
+ */
+ if (lpidp == NULL)
+ return (NULL);
+
+ hpp = &ltos_pid_hash[LTOS_HASH(lpidp->l_pid)];
+ while (*hpp) {
+ if (*hpp == lpidp) {
+ *hpp = lpidp->ltos_next;
+ break;
+ }
+ hpp = &(*hpp)->ltos_next;
+ }
+
+ return (lpidp);
+}
+
+struct pid * pid_find(pid_t pid);
+
+/*
+ * given a solaris pid/tid pair, create a linux pid
+ */
+int
+lx_pid_assign(kthread_t *t)
+{
+ proc_t *p = ttoproc(t);
+ pid_t s_pid = p->p_pid;
+ id_t s_tid = t->t_tid;
+ struct pid *pidp;
+ struct lx_pid *lpidp;
+ lx_lwp_data_t *lwpd = ttolxlwp(t);
+ pid_t newpid;
+
+ if (p->p_lwpcnt > 0) {
+ /*
+ * Allocate a pid for any thread other than the first
+ */
+ if ((newpid = pid_allocate(p, 0, 0)) < 0)
+ return (-1);
+
+ pidp = pid_find(newpid);
+ } else {
+ pidp = NULL;
+ newpid = s_pid;
+ }
+
+ lpidp = kmem_alloc(sizeof (struct lx_pid), KM_SLEEP);
+ lpidp->l_pid = newpid;
+ lpidp->s_pid = s_pid;
+ lpidp->s_tid = s_tid;
+ lpidp->l_pidp = pidp;
+ lpidp->l_start = t->t_start;
+
+ /*
+ * now put the pid into the linux-solaris and solaris-linux
+ * conversion hash tables
+ */
+ mutex_enter(&hash_lock);
+ lx_pid_insert_hash(lpidp);
+ mutex_exit(&hash_lock);
+
+ lwpd->br_pid = newpid;
+
+ return (0);
+}
+
+/*
+ * If we are exec()ing the process, this thread's tid is about to be reset
+ * to 1. Make sure the Linux PID bookkeeping reflects that change.
+ */
+void
+lx_pid_reassign(kthread_t *t)
+{
+ proc_t *p = ttoproc(t);
+ struct pid *old_pidp;
+ struct lx_pid *lpidp;
+
+ ASSERT(p->p_lwpcnt == 1);
+
+ mutex_enter(&hash_lock);
+
+ /*
+ * Clean up all the traces of this thread's 'fake' Linux PID.
+ */
+ lpidp = lx_pid_remove_hash(p->p_pid, t->t_tid);
+ ASSERT(lpidp != NULL);
+ old_pidp = lpidp->l_pidp;
+ lpidp->l_pidp = NULL;
+
+ /*
+ * Now register this thread as (pid, 1).
+ */
+ lpidp->l_pid = p->p_pid;
+ lpidp->s_pid = p->p_pid;
+ lpidp->s_tid = 1;
+ lx_pid_insert_hash(lpidp);
+
+ mutex_exit(&hash_lock);
+
+ if (old_pidp)
+ (void) pid_rele(old_pidp);
+}
+
+/*
+ * release a solaris pid/tid pair
+ */
+void
+lx_pid_rele(pid_t pid, id_t tid)
+{
+ struct lx_pid *lpidp;
+
+ mutex_enter(&hash_lock);
+ lpidp = lx_pid_remove_hash(pid, tid);
+ mutex_exit(&hash_lock);
+
+ if (lpidp) {
+ if (lpidp->l_pidp)
+ (void) pid_rele(lpidp->l_pidp);
+
+ kmem_free(lpidp, sizeof (*lpidp));
+ }
+}
+
+/*
+ * given a linux pid, return the solaris pid/tid pair
+ */
+int
+lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid)
+{
+ struct lx_pid *hp;
+
+ mutex_enter(&hash_lock);
+ for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) {
+ if (l_pid == hp->l_pid) {
+ if (s_pid)
+ *s_pid = hp->s_pid;
+ if (s_tid)
+ *s_tid = hp->s_tid;
+ break;
+ }
+ }
+ mutex_exit(&hash_lock);
+ if (hp != NULL)
+ return (0);
+
+ /*
+ * We didn't find this pid in our translation table.
+ * But this still could be the pid of a native process
+ * running in the current zone so check for that here.
+ *
+ * Note that prfind() only searches for processes in the current zone.
+ */
+ mutex_enter(&pidlock);
+ if (prfind(l_pid) != NULL) {
+ mutex_exit(&pidlock);
+ if (s_pid)
+ *s_pid = l_pid;
+ if (s_tid)
+ *s_tid = 0;
+ return (0);
+ }
+ mutex_exit(&pidlock);
+
+ return (-1);
+}
+
+/*
+ * Given an lwp, return the Linux pid of its parent. If the caller
+ * wants them, we return the Solaris (pid, tid) as well.
+ */
+pid_t
+lx_lwp_ppid(klwp_t *lwp, pid_t *ppidp, id_t *ptidp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ proc_t *p = lwptoproc(lwp);
+ struct lx_pid *hp;
+ pid_t zoneinit = curproc->p_zone->zone_proc_initpid;
+ pid_t lppid, ppid;
+
+ /*
+ * Be sure not to return a parent pid that should be invisible
+ * within this zone.
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * If the parent process's pid is the zone's init process, force it
+ * to the Linux init pid value of 1.
+ */
+ if (ppid == zoneinit)
+ ppid = 1;
+
+ /*
+ * There are two cases in which the Linux definition of a 'parent'
+ * matches that of Solaris:
+ *
+ * - if our tgid is the same as our PID, then we are either the
+ * first thread in the process or a CLONE_THREAD thread.
+ *
+ * - if the brand lwp value for ppid is 0, then we are either the
+ * child of a differently-branded process or a CLONE_PARENT thread.
+ */
+ if (p->p_pid == lwpd->br_tgid || lwpd->br_ppid == 0) {
+ if (ppidp != NULL)
+ *ppidp = ppid;
+ if (ptidp != NULL)
+ *ptidp = -1;
+ return (ppid);
+ }
+
+ /*
+ * Set the default Linux parent pid to be the pid of the zone's init
+ * process; this will get converted back to the Linux default of 1
+ * later.
+ */
+ lppid = zoneinit;
+
+ /*
+ * If the process's parent isn't init, try and look up the Linux "pid"
+ * corresponding to the process's parent.
+ */
+ if (ppid != 1) {
+ /*
+ * In all other cases, we are looking for the parent of this
+ * specific thread, which in Linux refers to the thread that
+ * clone()d it. We stashed that thread's PID away when this
+ * thread was created.
+ */
+ mutex_enter(&hash_lock);
+ for (hp = ltos_pid_hash[LTOS_HASH(lwpd->br_ppid)]; hp;
+ hp = hp->ltos_next) {
+ if (lwpd->br_ppid == hp->l_pid) {
+ /*
+ * We found the PID we were looking for, but
+ * since we cached its value in this LWP's brand
+ * structure, it has exited and been reused by
+ * another process.
+ */
+ if (hp->l_start > lwptot(lwp)->t_start)
+ break;
+
+ lppid = lwpd->br_ppid;
+ if (ppidp != NULL)
+ *ppidp = hp->s_pid;
+ if (ptidp != NULL)
+ *ptidp = hp->s_tid;
+
+ break;
+ }
+ }
+ mutex_exit(&hash_lock);
+ }
+
+ if (lppid == zoneinit) {
+ lppid = 1;
+
+ if (ppidp != NULL)
+ *ppidp = lppid;
+ if (ptidp != NULL)
+ *ptidp = -1;
+ }
+
+ return (lppid);
+}
+
+void
+lx_pid_init(void)
+{
+ hash_size = 1 << highbit(v.v_proc / (hash_len * LINUX_PROC_FACTOR));
+
+ stol_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+ KM_SLEEP);
+ ltos_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+ KM_SLEEP);
+
+ mutex_init(&hash_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+lx_pid_fini(void)
+{
+ kmem_free(stol_pid_hash, sizeof (struct lx_pid *) * hash_size);
+ kmem_free(ltos_pid_hash, sizeof (struct lx_pid *) * hash_size);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c
new file mode 100644
index 0000000000..33c340d572
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c
@@ -0,0 +1,454 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/brand.h>
+#include <sys/machbrand.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+
+/*
+ * Some system calls return either a 32-bit or a 64-bit value, depending
+ * on the datamodel.
+ */
+#ifdef _LP64
+#define V_RVAL SE_64RVAL
+#else
+#define V_RVAL SE_32RVAL1
+#endif
+
+/*
+ * Define system calls that return a native 'long' quantity i.e. a 32-bit
+ * or 64-bit integer - depending on how the kernel is itself compiled
+ * e.g. read(2) returns 'ssize_t' in the kernel and in userland.
+ */
+#define LX_CL(name, call, narg) \
+ { V_RVAL, (name), (llfcn_t)(call), (narg) }
+
+/*
+ * Returns a 32 bit quantity regardless of datamodel
+ */
+#define LX_CI(name, call, narg) \
+ { SE_32RVAL1, (name), (llfcn_t)(call), (narg) }
+
+extern longlong_t lx_nosys(void);
+#define LX_NOSYS(name) \
+ {SE_64RVAL, (name), (llfcn_t)lx_nosys, 0}
+
+lx_sysent_t lx_sysent[] =
+{
+ LX_NOSYS("lx_nosys"), /* 0 */
+ LX_NOSYS("exit"), /* 0 */
+ LX_NOSYS("lx_fork"),
+ LX_NOSYS("read"),
+ LX_NOSYS("write"),
+ LX_NOSYS("open"),
+ LX_NOSYS("close"),
+ LX_NOSYS("waitpid"),
+ LX_NOSYS("creat"),
+ LX_NOSYS("link"),
+ LX_NOSYS("unlink"), /* 10 */
+ LX_NOSYS("exec"),
+ LX_NOSYS("chdir"),
+ LX_NOSYS("gtime"),
+ LX_NOSYS("mknod"),
+ LX_NOSYS("chmod"),
+ LX_NOSYS("lchown16"),
+ LX_NOSYS("break"),
+ LX_NOSYS("stat"),
+ LX_NOSYS("lseek"),
+ LX_CL("getpid", lx_getpid, 0), /* 20 */
+ LX_NOSYS("mount"),
+ LX_NOSYS("umount"),
+ LX_NOSYS("setuid16"),
+ LX_NOSYS("getuid16"),
+ LX_NOSYS("stime"),
+ LX_NOSYS("ptrace"),
+ LX_NOSYS("alarm"),
+ LX_NOSYS("fstat"),
+ LX_NOSYS("pause"),
+ LX_NOSYS("utime"), /* 30 */
+ LX_NOSYS("stty"),
+ LX_NOSYS("gtty"),
+ LX_NOSYS("access"),
+ LX_NOSYS("nice"),
+ LX_NOSYS("ftime"),
+ LX_NOSYS("sync"),
+ LX_CL("kill", lx_kill, 2),
+ LX_NOSYS("rename"),
+ LX_NOSYS("mkdir"),
+ LX_NOSYS("rmdir"), /* 40 */
+ LX_NOSYS("dup"),
+ LX_NOSYS("pipe"),
+ LX_NOSYS("times"),
+ LX_NOSYS("prof"),
+ LX_CL("brk", lx_brk, 1),
+ LX_NOSYS("setgid16"),
+ LX_NOSYS("getgid16"),
+ LX_NOSYS("signal"),
+ LX_NOSYS("geteuid16"),
+ LX_NOSYS("getegid16"), /* 50 */
+ LX_NOSYS("sysacct"),
+ LX_NOSYS("umount2"),
+ LX_NOSYS("lock"),
+ LX_NOSYS("ioctl"),
+ LX_NOSYS("fcntl"),
+ LX_NOSYS("mpx"),
+ LX_NOSYS("setpgid"),
+ LX_NOSYS("ulimit"),
+ LX_NOSYS("olduname"),
+ LX_NOSYS("umask"), /* 60 */
+ LX_NOSYS("chroot"),
+ LX_NOSYS("ustat"),
+ LX_NOSYS("dup2"),
+ LX_CL("getppid", lx_getppid, 0),
+ LX_NOSYS("pgrp"),
+ LX_NOSYS("setsid"),
+ LX_NOSYS("sigaction"),
+ LX_NOSYS("sgetmask"),
+ LX_NOSYS("ssetmask"),
+ LX_NOSYS("setreuid16"), /* 70 */
+ LX_NOSYS("setregid16"),
+ LX_NOSYS("sigsuspend"),
+ LX_NOSYS("sigpending"),
+ LX_NOSYS("sethostname"),
+ LX_NOSYS("setrlimit"),
+ LX_NOSYS("old_getrlimit"),
+ LX_NOSYS("getrusage"),
+ LX_NOSYS("gettimeofday"),
+ LX_NOSYS("settimeofday"),
+ LX_NOSYS("getgroups16"), /* 80 */
+ LX_NOSYS("setgroups16"),
+ LX_NOSYS("old_select"),
+ LX_NOSYS("symlink"),
+ LX_NOSYS("oldlstat"),
+ LX_NOSYS("readlink"),
+ LX_NOSYS("uselib"),
+ LX_NOSYS("swapon"),
+ LX_NOSYS("reboot"),
+ LX_NOSYS("old_readdir"),
+ LX_NOSYS("old_mmap"), /* 90 */
+ LX_NOSYS("munmap"),
+ LX_NOSYS("truncate"),
+ LX_NOSYS("ftruncate"),
+ LX_NOSYS("fchmod"),
+ LX_NOSYS("fchown16"),
+ LX_NOSYS("getpriority"),
+ LX_NOSYS("setpriority"),
+ LX_NOSYS("profil"),
+ LX_NOSYS("statfs"),
+ LX_NOSYS("fstatfs"), /* 100 */
+ LX_NOSYS("ioperm"),
+ LX_NOSYS("socketcall"),
+ LX_NOSYS("syslog"),
+ LX_NOSYS("setitimer"),
+ LX_NOSYS("getitimer"),
+ LX_NOSYS("newstat"),
+ LX_NOSYS("newsltat"),
+ LX_NOSYS("newsftat"),
+ LX_NOSYS("uname"),
+ LX_NOSYS("oldiopl"), /* 110 */
+ LX_NOSYS("oldvhangup"),
+ LX_NOSYS("idle"),
+ LX_NOSYS("vm86old"),
+ LX_NOSYS("wait4"),
+ LX_NOSYS("swapoff"),
+ LX_CL("sysinfo", lx_sysinfo, 1),
+ LX_NOSYS("ipc"),
+ LX_NOSYS("fsync"),
+ LX_NOSYS("sigreturn"),
+ LX_CL("clone", lx_clone, 5), /* 120 */
+ LX_NOSYS("setdomainname"),
+ LX_NOSYS("newuname"),
+ LX_CL("modify_ldt", lx_modify_ldt, 3),
+ LX_NOSYS("adjtimex"),
+ LX_NOSYS("mprotect"),
+ LX_NOSYS("sigprocmask"),
+ LX_NOSYS("create_module"),
+ LX_NOSYS("init_module"),
+ LX_NOSYS("delete_module"),
+ LX_NOSYS("get_kernel_syms"), /* 130 */
+ LX_NOSYS("quotactl"),
+ LX_NOSYS("getpgid"),
+ LX_NOSYS("fchdir"),
+ LX_NOSYS("bdflush"),
+ LX_NOSYS("sysfs"),
+ LX_NOSYS("personality"),
+ LX_NOSYS("afs_syscall"),
+ LX_NOSYS("setfsuid16"),
+ LX_NOSYS("setfsgid16"),
+ LX_NOSYS("llseek"), /* 140 */
+ LX_NOSYS("getdents"),
+ LX_NOSYS("select"),
+ LX_NOSYS("flock"),
+ LX_NOSYS("msync"),
+ LX_NOSYS("readv"),
+ LX_NOSYS("writev"),
+ LX_NOSYS("getsid"),
+ LX_NOSYS("fdatasync"),
+ LX_NOSYS("sysctl"),
+ LX_NOSYS("mlock"), /* 150 */
+ LX_NOSYS("munlock"),
+ LX_NOSYS("mlockall"),
+ LX_NOSYS("munlockall"),
+ LX_CL("sched_setparam", lx_sched_setparam, 2),
+ LX_CL("sched_getparam", lx_sched_getparam, 2),
+ LX_NOSYS("sched_setscheduler"),
+ LX_NOSYS("sched_getscheduler"),
+ LX_NOSYS("yield"),
+ LX_NOSYS("sched_get_priority_max"),
+ LX_NOSYS("sched_get_priority_min"), /* 160 */
+ LX_CL("sched_rr_get_interval", lx_sched_rr_get_interval, 2),
+ LX_NOSYS("nanosleep"),
+ LX_NOSYS("mremap"),
+ LX_CL("setresuid16", lx_setresuid16, 3),
+ LX_NOSYS("getresuid16"),
+ LX_NOSYS("vm86"),
+ LX_NOSYS("query_module"),
+ LX_NOSYS("poll"),
+ LX_NOSYS("nfsserctl"),
+ LX_CL("setresgid16", lx_setresgid16, 3), /* 170 */
+ LX_NOSYS("getresgid16"),
+ LX_NOSYS("prctl"),
+ LX_NOSYS("rt_sigreturn"),
+ LX_NOSYS("rt_sigaction"),
+ LX_NOSYS("rt_sigprocmask"),
+ LX_NOSYS("rt_sigpending"),
+ LX_NOSYS("rt_sigtimedwait"),
+ LX_NOSYS("rt_sigqueueinfo"),
+ LX_NOSYS("rt_sigsuspend"),
+ LX_NOSYS("pread64"), /* 180 */
+ LX_NOSYS("pwrite64"),
+ LX_NOSYS("chown16"),
+ LX_NOSYS("getcwd"),
+ LX_NOSYS("capget"),
+ LX_NOSYS("capset"),
+ LX_NOSYS("sigaltstack"),
+ LX_NOSYS("sendfile"),
+ LX_NOSYS("getpmsg"),
+ LX_NOSYS("putpmsg"),
+ LX_NOSYS("vfork"), /* 190 */
+ LX_NOSYS("getrlimit"),
+ LX_NOSYS("mmap2"),
+ LX_NOSYS("truncate64"),
+ LX_NOSYS("ftruncate64"),
+ LX_NOSYS("stat64"),
+ LX_NOSYS("lstat64"),
+ LX_NOSYS("fstat64"),
+ LX_NOSYS("lchown"),
+ LX_NOSYS("getuid"),
+ LX_NOSYS("getgid"), /* 200 */
+ LX_NOSYS("geteuid"),
+ LX_NOSYS("getegid"),
+ LX_NOSYS("setreuid"),
+ LX_NOSYS("setregid"),
+ LX_NOSYS("getgroups"),
+ LX_CL("setgroups", lx_setgroups, 2),
+ LX_NOSYS("fchown"),
+ LX_CL("setresuid", lx_setresuid, 3),
+ LX_NOSYS("getresuid"),
+ LX_CL("setresgid", lx_setresgid, 3), /* 210 */
+ LX_NOSYS("getresgid"),
+ LX_NOSYS("chown"),
+ LX_NOSYS("setuid"),
+ LX_NOSYS("setgid"),
+ LX_NOSYS("setfsuid"),
+ LX_NOSYS("setfsgid"),
+ LX_NOSYS("pivot_root"),
+ LX_NOSYS("mincore"),
+ LX_NOSYS("madvise"),
+ LX_NOSYS("getdents64"), /* 220 */
+ LX_NOSYS("fcntl64"),
+ LX_NOSYS("lx_nosys"),
+ LX_NOSYS("security"),
+ LX_CL("gettid", lx_gettid, 0),
+ LX_NOSYS("readahead"),
+ LX_NOSYS("setxattr"),
+ LX_NOSYS("lsetxattr"),
+ LX_NOSYS("fsetxattr"),
+ LX_NOSYS("getxattr"),
+ LX_NOSYS("lgetxattr"), /* 230 */
+ LX_NOSYS("fgetxattr"),
+ LX_NOSYS("listxattr"),
+ LX_NOSYS("llistxattr"),
+ LX_NOSYS("flistxattr"),
+ LX_NOSYS("removexattr"),
+ LX_NOSYS("lremovexattr"),
+ LX_NOSYS("fremovexattr"),
+ LX_CL("tkill", lx_tkill, 2),
+ LX_NOSYS("sendfile64"),
+ LX_CL("futex", lx_futex, 6), /* 240 */
+ LX_NOSYS("sched_setaffinity"),
+ LX_NOSYS("sched_getaffinity"),
+ LX_CL("set_thread_area", lx_set_thread_area, 1),
+ LX_CL("get_thread_area", lx_get_thread_area, 1),
+ LX_NOSYS("io_setup"),
+ LX_NOSYS("io_destroy"),
+ LX_NOSYS("io_getevents"),
+ LX_NOSYS("io_submit"),
+ LX_NOSYS("io_cancel"),
+ LX_NOSYS("fadvise64"), /* 250 */
+ LX_NOSYS("lx_nosys"),
+ LX_NOSYS("exit_group"),
+ LX_NOSYS("lookup_dcookie"),
+ LX_NOSYS("epoll_create"),
+ LX_NOSYS("epoll_ctl"),
+ LX_NOSYS("epoll_wait"),
+ LX_NOSYS("remap_file_pages"),
+ LX_CL("set_tid_address", lx_set_tid_address, 1),
+ LX_NOSYS("timer_create"),
+ LX_NOSYS("timer_settime"), /* 260 */
+ LX_NOSYS("timer_gettime"),
+ LX_NOSYS("timer_getoverrun"),
+ LX_NOSYS("timer_delete"),
+ LX_NOSYS("clock_settime"),
+ LX_NOSYS("clock_gettime"),
+ LX_NOSYS("clock_getres"),
+ LX_NOSYS("clock_nanosleep"),
+ LX_NOSYS("statfs64"),
+ LX_NOSYS("fstatfs64"),
+ LX_NOSYS("tgkill"), /* 270 */
+ /* The following are Linux 2.6 system calls */
+ LX_NOSYS("utimes"),
+ LX_NOSYS("fadvise64_64"),
+ LX_NOSYS("vserver"),
+ LX_NOSYS("mbind"),
+ LX_NOSYS("get_mempolicy"),
+ LX_NOSYS("set_mempolicy"),
+ LX_NOSYS("mq_open"),
+ LX_NOSYS("mq_unlink"),
+ LX_NOSYS("mq_timedsend"),
+ LX_NOSYS("mq_timedreceive"), /* 280 */
+ LX_NOSYS("mq_notify"),
+ LX_NOSYS("mq_getsetattr"),
+ LX_NOSYS("kexec_load"),
+ LX_NOSYS("waitid"),
+ LX_NOSYS("sys_setaltroot"),
+ LX_NOSYS("add_key"),
+ LX_NOSYS("request_key"),
+ LX_NOSYS("keyctl"),
+ LX_NOSYS("ioprio_set"),
+ LX_NOSYS("ioprio_get"), /* 290 */
+ LX_NOSYS("inotify_init"),
+ LX_NOSYS("inotify_add_watch"),
+ LX_NOSYS("inotify_rm_watch"),
+ LX_NOSYS("migrate_pages"),
+ LX_NOSYS("openat"),
+ LX_NOSYS("mkdirat"),
+ LX_NOSYS("mknodat"),
+ LX_NOSYS("fchownat"),
+ LX_NOSYS("futimesat"),
+ LX_NOSYS("fstatat64"), /* 300 */
+ LX_NOSYS("unlinkat"),
+ LX_NOSYS("renameat"),
+ LX_NOSYS("linkat"),
+ LX_NOSYS("syslinkat"),
+ LX_NOSYS("readlinkat"),
+ LX_NOSYS("fchmodat"),
+ LX_NOSYS("faccessat"),
+ LX_NOSYS("pselect6"),
+ LX_NOSYS("ppoll"),
+ LX_NOSYS("unshare"), /* 310 */
+ LX_NOSYS("set_robust_list"),
+ LX_NOSYS("get_robust_list"),
+ LX_NOSYS("splice"),
+ LX_NOSYS("sync_file_range"),
+ LX_NOSYS("tee"),
+ LX_NOSYS("vmsplice"),
+ LX_NOSYS("move_pages"),
+ NULL /* NULL-termination is required for lx_systrace */
+};
+
+int64_t
+lx_emulate_syscall(int num, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+ struct lx_sysent *jsp;
+ int64_t rval;
+
+ rval = (int64_t)0;
+
+ jsp = &(lx_sysent[num]);
+
+ switch (jsp->sy_narg) {
+ case 0: {
+ lx_print("--> %s()\n", jsp->sy_name);
+ rval = (int64_t)jsp->sy_callc();
+ break;
+ }
+ case 1: {
+ lx_print("--> %s(0x%lx)\n", jsp->sy_name, arg1);
+ rval = (int64_t)jsp->sy_callc(arg1);
+ break;
+ }
+ case 2: {
+ lx_print("--> %s(0x%lx, 0x%lx)\n", jsp->sy_name, arg1, arg2);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2);
+ break;
+ }
+ case 3: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3);
+ break;
+ }
+ case 4: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3, arg4);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4);
+ break;
+ }
+ case 5: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3, arg4, arg5);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5);
+ break;
+ }
+ case 6: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx,"
+ " 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3, arg4, arg5, arg6);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5,
+ arg6);
+ break;
+ }
+ default:
+ panic("Invalid syscall entry: #%d at 0x%p\n", num, (void *)jsp);
+ }
+ lx_print("----------> return (0x%llx)\n", (long long)rval);
+ return (rval);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
new file mode 100644
index 0000000000..a2bd74a817
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
@@ -0,0 +1,232 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LXPROC_H
+#define _LXPROC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * lxproc.h: declarations, data structures and macros for lxprocfs
+ */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/debug.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/user.h>
+#include <sys/t_lock.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/dnlc.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <vm/as.h>
+#include <vm/anon.h>
+
+/*
+ * Convert a vnode into an lxpr_mnt_t
+ */
+#define VTOLXPM(vp) ((lxpr_mnt_t *)(vp)->v_vfsp->vfs_data)
+
+/*
+ * convert a vnode into an lxpr_node
+ */
+#define VTOLXP(vp) ((lxpr_node_t *)(vp)->v_data)
+
+/*
+ * convert a lxprnode into a vnode
+ */
+#define LXPTOV(lxpnp) ((lxpnp)->lxpr_vnode)
+
+/*
+ * convert a lxpr_node into zone for fs
+ */
+#define LXPTOZ(lxpnp) \
+ (((lxpr_mnt_t *)(lxpnp)->lxpr_vnode->v_vfsp->vfs_data)->lxprm_zone)
+
+#define LXPNSIZ 256 /* max size of lx /proc file name entries */
+
+/*
+ * Pretend that a directory entry takes 16 bytes
+ */
+#define LXPR_SDSIZE 16
+
+/*
+ * Node/file types for lx /proc files
+ * (directories and files contained therein).
+ */
+typedef enum lxpr_nodetype {
+ LXPR_PROCDIR, /* /proc */
+ LXPR_PIDDIR, /* /proc/<pid> */
+ LXPR_PID_CMDLINE, /* /proc/<pid>/cmdline */
+ LXPR_PID_CPU, /* /proc/<pid>/cpu */
+ LXPR_PID_CURDIR, /* /proc/<pid>/cwd */
+ LXPR_PID_ENV, /* /proc/<pid>/environ */
+ LXPR_PID_EXE, /* /proc/<pid>/exe */
+ LXPR_PID_MAPS, /* /proc/<pid>/maps */
+ LXPR_PID_MEM, /* /proc/<pid>/mem */
+ LXPR_PID_ROOTDIR, /* /proc/<pid>/root */
+ LXPR_PID_STAT, /* /proc/<pid>/stat */
+ LXPR_PID_STATM, /* /proc/<pid>/statm */
+ LXPR_PID_STATUS, /* /proc/<pid>/status */
+ LXPR_PID_FDDIR, /* /proc/<pid>/fd */
+ LXPR_PID_FD_FD, /* /proc/<pid>/fd/nn */
+ LXPR_CMDLINE, /* /proc/cmdline */
+ LXPR_CPUINFO, /* /proc/cpuinfo */
+ LXPR_DEVICES, /* /proc/devices */
+ LXPR_DMA, /* /proc/dma */
+ LXPR_FILESYSTEMS, /* /proc/filesystems */
+ LXPR_INTERRUPTS, /* /proc/interrupts */
+ LXPR_IOPORTS, /* /proc/ioports */
+ LXPR_KCORE, /* /proc/kcore */
+ LXPR_KMSG, /* /proc/kmsg */
+ LXPR_LOADAVG, /* /proc/loadavg */
+ LXPR_MEMINFO, /* /proc/meminfo */
+ LXPR_MOUNTS, /* /proc/mounts */
+ LXPR_NETDIR, /* /proc/net */
+ LXPR_NET_ARP, /* /proc/net/arp */
+ LXPR_NET_DEV, /* /proc/net/dev */
+ LXPR_NET_DEV_MCAST, /* /proc/net/dev_mcast */
+ LXPR_NET_IGMP, /* /proc/net/igmp */
+ LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */
+ LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */
+ LXPR_NET_MCFILTER, /* /proc/net/mcfilter */
+ LXPR_NET_NETSTAT, /* /proc/net/netstat */
+ LXPR_NET_RAW, /* /proc/net/raw */
+ LXPR_NET_ROUTE, /* /proc/net/route */
+ LXPR_NET_RPC, /* /proc/net/rpc */
+ LXPR_NET_RT_CACHE, /* /proc/net/rt_cache */
+ LXPR_NET_SOCKSTAT, /* /proc/net/sockstat */
+ LXPR_NET_SNMP, /* /proc/net/snmp */
+ LXPR_NET_STAT, /* /proc/net/stat */
+ LXPR_NET_TCP, /* /proc/net/tcp */
+ LXPR_NET_UDP, /* /proc/net/udp */
+ LXPR_NET_UNIX, /* /proc/net/unix */
+ LXPR_PARTITIONS, /* /proc/partitions */
+ LXPR_SELF, /* /proc/self */
+ LXPR_STAT, /* /proc/stat */
+ LXPR_UPTIME, /* /proc/uptime */
+ LXPR_VERSION, /* /proc/version */
+ LXPR_NFILES /* number of lx /proc file types */
+} lxpr_nodetype_t;
+
+
+/*
+ * Number of fds allowed for in the inode number calculation
+ * per process (if a process has more fds then inode numbers
+ * may be duplicated)
+ */
+#define LXPR_FD_PERPROC 2000
+
+/*
+ * external dirent characteristics
+ */
+#define LXPRMAXNAMELEN 14
+typedef struct {
+ lxpr_nodetype_t d_type;
+ char d_name[LXPRMAXNAMELEN];
+} lxpr_dirent_t;
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to v_data in the vnode structure
+ */
+typedef struct lxpr_node {
+ lxpr_nodetype_t lxpr_type; /* type of this node */
+ vnode_t *lxpr_vnode; /* vnode for the node */
+ vnode_t *lxpr_parent; /* parent directory */
+ vnode_t *lxpr_realvp; /* real vnode, file in dirs */
+ timestruc_t lxpr_time; /* creation etc time for file */
+ mode_t lxpr_mode; /* file mode bits */
+ uid_t lxpr_uid; /* file owner */
+ gid_t lxpr_gid; /* file group owner */
+ pid_t lxpr_pid; /* pid of proc referred to */
+ ino_t lxpr_ino; /* node id */
+ ldi_handle_t lxpr_cons_ldih; /* ldi handle for console device */
+} lxpr_node_t;
+
+struct zone; /* forward declaration */
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to vfs_data in the vfs structure
+ */
+typedef struct lxpr_mnt {
+ lxpr_node_t *lxprm_node; /* node at root of proc mount */
+ struct zone *lxprm_zone; /* zone for this mount */
+ ldi_ident_t lxprm_li; /* ident for ldi */
+} lxpr_mnt_t;
+
+extern vnodeops_t *lxpr_vnodeops;
+extern int nproc_highbit; /* highbit(v.v_nproc) */
+
+typedef struct mounta mounta_t;
+
+extern void lxpr_initnodecache();
+extern void lxpr_fininodecache();
+extern void lxpr_initrootnode(lxpr_node_t **, vfs_t *);
+extern ino_t lxpr_inode(lxpr_nodetype_t, pid_t, int);
+extern ino_t lxpr_parentinode(lxpr_node_t *);
+extern lxpr_node_t *lxpr_getnode(vnode_t *, lxpr_nodetype_t, proc_t *, int);
+extern void lxpr_freenode(lxpr_node_t *);
+
+typedef struct lxpr_uiobuf lxpr_uiobuf_t;
+extern lxpr_uiobuf_t *lxpr_uiobuf_new(uio_t *);
+extern void lxpr_uiobuf_free(lxpr_uiobuf_t *);
+extern int lxpr_uiobuf_flush(lxpr_uiobuf_t *);
+extern void lxpr_uiobuf_seek(lxpr_uiobuf_t *, offset_t);
+extern void lxpr_uiobuf_write(lxpr_uiobuf_t *, const char *, size_t);
+extern void lxpr_uiobuf_printf(lxpr_uiobuf_t *, const char *, ...);
+extern void lxpr_uiobuf_seterr(lxpr_uiobuf_t *, int);
+
+proc_t *lxpr_lock(pid_t);
+void lxpr_unlock(proc_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LXPROC_H */
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
new file mode 100644
index 0000000000..c3ba5024a4
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
@@ -0,0 +1,516 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * lxprsubr.c: Various functions for the /lxproc vnodeops.
+ */
+
+#include <sys/varargs.h>
+
+#include <sys/cpuvar.h>
+#include <sys/mman.h>
+#include <sys/vmsystm.h>
+#include <sys/prsystm.h>
+
+#include "lx_proc.h"
+
+#define LXPRCACHE_NAME "lxpr_cache"
+
+static int lxpr_node_constructor(void*, void*, int);
+static void lxpr_node_destructor(void*, void*);
+
+static kmem_cache_t *lxpr_node_cache;
+
+struct lxpr_uiobuf {
+ uio_t *uiop;
+ char *buffer;
+ uint32_t buffsize;
+ char *pos;
+ size_t beg;
+ int error;
+};
+
+#define BUFSIZE 4000
+
+struct lxpr_uiobuf *
+lxpr_uiobuf_new(uio_t *uiop)
+{
+ /* Allocate memory for both lxpr_uiobuf and output buffer */
+ struct lxpr_uiobuf *uiobuf =
+ kmem_alloc(sizeof (struct lxpr_uiobuf) + BUFSIZE, KM_SLEEP);
+
+ uiobuf->uiop = uiop;
+ uiobuf->buffer = (char *)&uiobuf[1];
+ uiobuf->buffsize = BUFSIZE;
+ uiobuf->pos = uiobuf->buffer;
+ uiobuf->beg = 0;
+ uiobuf->error = 0;
+
+ return (uiobuf);
+}
+
+void
+lxpr_uiobuf_free(struct lxpr_uiobuf *uiobuf)
+{
+ ASSERT(uiobuf != NULL);
+ ASSERT(uiobuf->pos == uiobuf->buffer);
+
+ kmem_free(uiobuf, sizeof (struct lxpr_uiobuf) + uiobuf->buffsize);
+}
+
+void
+lxpr_uiobuf_seek(struct lxpr_uiobuf *uiobuf, offset_t offset)
+{
+ uiobuf->uiop->uio_offset = offset;
+}
+
+void
+lxpr_uiobuf_seterr(struct lxpr_uiobuf *uiobuf, int err)
+{
+ ASSERT(uiobuf->error == 0);
+
+ uiobuf->error = err;
+}
+
+int
+lxpr_uiobuf_flush(struct lxpr_uiobuf *uiobuf)
+{
+ off_t off = uiobuf->uiop->uio_offset;
+ caddr_t uaddr = uiobuf->buffer;
+ size_t beg = uiobuf->beg;
+
+ size_t size = uiobuf->pos - uaddr;
+
+ if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ ASSERT(off >= beg);
+
+ if (beg+size > off && off >= 0)
+ uiobuf->error =
+ uiomove(uaddr+(off-beg), size-(off-beg),
+ UIO_READ, uiobuf->uiop);
+
+ uiobuf->beg += size;
+ }
+
+ uiobuf->pos = uaddr;
+
+ return (uiobuf->error);
+}
+
+void
+lxpr_uiobuf_write(struct lxpr_uiobuf *uiobuf, const char *buf, size_t size)
+{
+ /* While we can still carry on */
+ while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ uint_t remain
+ = uiobuf->buffsize-(uiobuf->pos-uiobuf->buffer);
+
+ /* Enough space in buffer? */
+ if (remain >= size) {
+ bcopy(buf, uiobuf->pos, size);
+ uiobuf->pos += size;
+ return;
+ }
+
+ /* Not enough space, so copy all we can and try again */
+ bcopy(buf, uiobuf->pos, remain);
+ uiobuf->pos += remain;
+ (void) lxpr_uiobuf_flush(uiobuf);
+ buf += remain;
+ size -= remain;
+ }
+}
+
+#define TYPBUFFSIZE 256
+void
+lxpr_uiobuf_printf(struct lxpr_uiobuf *uiobuf, const char *fmt, ...)
+{
+ va_list args;
+ char buff[TYPBUFFSIZE];
+ int len;
+ char *buffer;
+
+ /* Can we still do any output */
+ if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0)
+ return;
+
+ va_start(args, fmt);
+
+ /* Try using stack allocated buffer */
+ len = vsnprintf(buff, TYPBUFFSIZE, fmt, args);
+ if (len < TYPBUFFSIZE) {
+ va_end(args);
+ lxpr_uiobuf_write(uiobuf, buff, len);
+ return;
+ }
+
+ /* Not enough space in pre-allocated buffer */
+ buffer = kmem_alloc(len+1, KM_SLEEP);
+
+ /*
+ * We know we allocated the correct amount of space
+ * so no check on the return value
+ */
+ (void) vsnprintf(buffer, len+1, fmt, args);
+ lxpr_uiobuf_write(uiobuf, buffer, len);
+ va_end(args);
+ kmem_free(buffer, len+1);
+}
+
+/*
+ * lxpr_lock():
+ *
+ * Lookup process from pid and return with p_plock and P_PR_LOCK held.
+ */
+proc_t *
+lxpr_lock(pid_t pid)
+{
+ proc_t *p;
+ kmutex_t *mp;
+
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ for (;;) {
+ mutex_enter(&pidlock);
+
+ /*
+ * If the pid is 1, we really want the zone's init process
+ */
+ p = prfind((pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (NULL);
+ }
+ /*
+ * p_lock is persistent, but p itself is not -- it could
+ * vanish during cv_wait(). Load p->p_lock now so we can
+ * drop it after cv_wait() without referencing p.
+ */
+ mp = &p->p_lock;
+ mutex_enter(mp);
+
+ mutex_exit(&pidlock);
+
+ if (!(p->p_proc_flag & P_PR_LOCK))
+ break;
+
+ cv_wait(&pr_pid_cv[p->p_slot], mp);
+ mutex_exit(mp);
+ }
+ p->p_proc_flag |= P_PR_LOCK;
+ THREAD_KPRI_REQUEST();
+ return (p);
+}
+
+/*
+ * lxpr_unlock()
+ *
+ * Unlock locked process
+ */
+void
+lxpr_unlock(proc_t *p)
+{
+ ASSERT(p->p_proc_flag & P_PR_LOCK);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ cv_signal(&pr_pid_cv[p->p_slot]);
+ p->p_proc_flag &= ~P_PR_LOCK;
+ mutex_exit(&p->p_lock);
+ THREAD_KPRI_RELEASE();
+}
+
+void
+lxpr_initnodecache()
+{
+ lxpr_node_cache = kmem_cache_create(LXPRCACHE_NAME,
+ sizeof (lxpr_node_t), 0,
+ lxpr_node_constructor, lxpr_node_destructor, NULL, NULL, NULL, 0);
+}
+
+void
+lxpr_fininodecache()
+{
+ kmem_cache_destroy(lxpr_node_cache);
+}
+
+/* ARGSUSED */
+static int
+lxpr_node_constructor(void *buf, void *un, int kmflags)
+{
+ lxpr_node_t *lxpnp = buf;
+ vnode_t *vp;
+
+ vp = lxpnp->lxpr_vnode = vn_alloc(kmflags);
+ if (vp == NULL)
+ return (-1);
+
+ (void) vn_setops(vp, lxpr_vnodeops);
+ vp->v_data = lxpnp;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+lxpr_node_destructor(void *buf, void *un)
+{
+ lxpr_node_t *lxpnp = buf;
+
+ vn_free(LXPTOV(lxpnp));
+}
+
+/*
+ * Calculate an inode number
+ *
+ * This takes various bits of info and munges them
+ * to give the inode number for an lxproc node
+ */
+ino_t
+lxpr_inode(lxpr_nodetype_t type, pid_t pid, int fd)
+{
+ if (pid == 1)
+ pid = curproc->p_zone->zone_proc_initpid;
+
+ switch (type) {
+ case LXPR_PIDDIR:
+ return (pid + 1);
+ case LXPR_PROCDIR:
+ return (maxpid + 2);
+ case LXPR_PID_FD_FD:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ LXPR_NFILES + fd);
+ default:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ type);
+ }
+}
+
+/*
+ * Return inode number of parent (directory)
+ */
+ino_t
+lxpr_parentinode(lxpr_node_t *lxpnp)
+{
+ /*
+ * If the input node is the root then the parent inode
+ * is the mounted on inode so just return our inode number
+ */
+ if (lxpnp->lxpr_type != LXPR_PROCDIR)
+ return (VTOLXP(lxpnp->lxpr_parent)->lxpr_ino);
+ else
+ return (lxpnp->lxpr_ino);
+}
+
+/*
+ * Allocate a new lxproc node
+ *
+ * This also allocates the vnode associated with it
+ */
+lxpr_node_t *
+lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd)
+{
+ lxpr_node_t *lxpnp;
+ vnode_t *vp;
+ user_t *up;
+ timestruc_t now;
+
+ /*
+ * Allocate a new node. It is deallocated in vop_innactive
+ */
+ lxpnp = kmem_cache_alloc(lxpr_node_cache, KM_SLEEP);
+
+ /*
+ * Set defaults (may be overridden below)
+ */
+ gethrestime(&now);
+ lxpnp->lxpr_type = type;
+ lxpnp->lxpr_realvp = NULL;
+ lxpnp->lxpr_parent = dp;
+ VN_HOLD(dp);
+ if (p != NULL) {
+ lxpnp->lxpr_pid = ((p->p_pid ==
+ curproc->p_zone->zone_proc_initpid) ? 1 : p->p_pid);
+
+ lxpnp->lxpr_time = PTOU(p)->u_start;
+ lxpnp->lxpr_uid = crgetruid(p->p_cred);
+ lxpnp->lxpr_gid = crgetrgid(p->p_cred);
+ lxpnp->lxpr_ino = lxpr_inode(type, p->p_pid, fd);
+ } else {
+ /* Pretend files without a proc belong to sched */
+ lxpnp->lxpr_pid = 0;
+ lxpnp->lxpr_time = now;
+ lxpnp->lxpr_uid = lxpnp->lxpr_gid = 0;
+ lxpnp->lxpr_ino = lxpr_inode(type, 0, 0);
+ }
+
+ /* initialize the vnode data */
+ vp = lxpnp->lxpr_vnode;
+ vn_reinit(vp);
+ vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
+ vp->v_vfsp = dp->v_vfsp;
+
+ /*
+ * Do node specific stuff
+ */
+ switch (type) {
+ case LXPR_PROCDIR:
+ vp->v_flag |= VROOT;
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by everyone */
+ break;
+
+ case LXPR_PID_CURDIR:
+ ASSERT(p != NULL);
+
+ /*
+ * Zombie check. p_stat is officially protected by pidlock,
+ * but we can't grab pidlock here because we already hold
+ * p_lock. Luckily if we look at the process exit code
+ * we see that p_stat only transisions from SRUN to SZOMB
+ * while p_lock is held. Aside from this, the only other
+ * p_stat transition that we need to be aware about is
+ * SIDL to SRUN, but that's not a problem since lxpr_lock()
+ * ignores nodes in the SIDL state so we'll never get a node
+ * that isn't already in the SRUN state.
+ */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp = up->u_cdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_ROOTDIR:
+ ASSERT(p != NULL);
+ /* Zombie check. see locking comment above */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp =
+ up->u_rdir != NULL ? up->u_rdir : rootdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_EXE:
+ ASSERT(p != NULL);
+ lxpnp->lxpr_realvp = p->p_exec;
+ if (lxpnp->lxpr_realvp != NULL) {
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777;
+ break;
+
+ case LXPR_SELF:
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_FD_FD:
+ ASSERT(p != NULL);
+ /* lxpr_realvp is set after we return */
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */
+ break;
+
+ case LXPR_PID_FDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0500; /* read-search by owner only */
+ break;
+
+ case LXPR_PIDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0511;
+ break;
+
+ case LXPR_NETDIR:
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by all */
+ break;
+
+ case LXPR_PID_ENV:
+ case LXPR_PID_MEM:
+ ASSERT(p != NULL);
+ /*FALLTHRU*/
+ case LXPR_KCORE:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0400; /* read-only by owner only */
+ break;
+
+ default:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0444; /* read-only by all */
+ break;
+ }
+
+ return (lxpnp);
+}
+
+
+/*
+ * Free the storage obtained from lxpr_getnode().
+ */
+void
+lxpr_freenode(lxpr_node_t *lxpnp)
+{
+ ASSERT(lxpnp != NULL);
+ ASSERT(LXPTOV(lxpnp) != NULL);
+
+ /*
+ * delete any association with realvp
+ */
+ if (lxpnp->lxpr_realvp != NULL)
+ VN_RELE(lxpnp->lxpr_realvp);
+
+ /*
+ * delete any association with parent vp
+ */
+ if (lxpnp->lxpr_parent != NULL)
+ VN_RELE(lxpnp->lxpr_parent);
+
+ /*
+ * Release the lxprnode.
+ */
+ kmem_cache_free(lxpr_node_cache, lxpnp);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
new file mode 100644
index 0000000000..a50695ff32
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
@@ -0,0 +1,374 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * lxprvfsops.c: vfs operations for /lxprocfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/mode.h>
+#include <sys/signal.h>
+#include <sys/user.h>
+#include <sys/mount.h>
+#include <sys/bitmap.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/modctl.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/lx_impl.h>
+
+#include "lx_proc.h"
+
+/* Module level parameters */
+static int lxprocfstype;
+static dev_t lxprocdev;
+static kmutex_t lxpr_mount_lock;
+
+int nproc_highbit; /* highbit(v.v_nproc) */
+
+static int lxpr_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *);
+static int lxpr_unmount(vfs_t *, int, cred_t *);
+static int lxpr_root(vfs_t *, vnode_t **);
+static int lxpr_statvfs(vfs_t *, statvfs64_t *);
+static int lxpr_init(int, char *);
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "lx_proc",
+ lxpr_init,
+ VSW_ZMOUNT,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "generic linux procfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int retval;
+
+ /*
+ * attempt to unload the module
+ */
+ if ((retval = mod_remove(&modlinkage)) != 0)
+ goto done;
+
+ /*
+ * destroy lxpr_node cache
+ */
+ lxpr_fininodecache();
+
+ /*
+ * clean out the vfsops and vnodeops
+ */
+ (void) vfs_freevfsops_by_type(lxprocfstype);
+ vn_freevnodeops(lxpr_vnodeops);
+
+ mutex_destroy(&lxpr_mount_lock);
+done:
+ return (retval);
+}
+
+static int
+lxpr_init(int fstype, char *name)
+{
+ static const fs_operation_def_t lxpr_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = lxpr_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = lxpr_unmount },
+ VFSNAME_ROOT, { .vfs_root = lxpr_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = lxpr_statvfs },
+ NULL, NULL
+ };
+ extern const fs_operation_def_t lxpr_vnodeops_template[];
+ int error;
+ major_t dev;
+
+ nproc_highbit = highbit(v.v_proc);
+ lxprocfstype = fstype;
+ ASSERT(lxprocfstype != 0);
+
+ mutex_init(&lxpr_mount_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * Associate VFS ops vector with this fstype.
+ */
+ error = vfs_setfsops(fstype, lxpr_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "lxpr_init: bad vfs ops template");
+ return (error);
+ }
+
+ /*
+ * Set up vnode ops vector too.
+ */
+ error = vn_make_ops(name, lxpr_vnodeops_template, &lxpr_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "lxpr_init: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * Assign a unique "device" number (used by stat(2)).
+ */
+ if ((dev = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lxpr_init: can't get unique device number");
+ dev = 0;
+ }
+
+ /*
+ * Make the pseudo device
+ */
+ lxprocdev = makedevice(dev, 0);
+
+ /*
+ * Initialise cache for lxpr_nodes
+ */
+ lxpr_initnodecache();
+
+ return (0);
+}
+
+static int
+lxpr_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt;
+ zone_t *zone = curproc->p_zone;
+ ldi_ident_t li;
+ int err;
+
+ /*
+ * must be root to mount
+ */
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ /*
+ * mount point must be a directory
+ */
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (zone == global_zone) {
+ zone_t *mntzone;
+
+ mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
+ zone_rele(mntzone);
+ if (zone != mntzone)
+ return (EBUSY);
+ }
+
+ /*
+ * Having the resource be anything but "lxproc" doesn't make sense
+ */
+ vfs_setresource(vfsp, "lxproc", 0);
+
+ lxpr_mnt = kmem_alloc(sizeof (*lxpr_mnt), KM_SLEEP);
+
+ if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) {
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+ return (err);
+ }
+
+ lxpr_mnt->lxprm_li = li;
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * Ensure we don't allow overlaying mounts
+ */
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ mutex_exit(&lxpr_mount_lock);
+ kmem_free(lxpr_mnt, sizeof ((*lxpr_mnt)));
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /*
+ * allocate the first vnode
+ */
+ zone_hold(lxpr_mnt->lxprm_zone = zone);
+
+ /* Arbitrarily set the parent vnode to the mounted over directory */
+ lxpr_mnt->lxprm_node = lxpr_getnode(mvp, LXPR_PROCDIR, NULL, 0);
+
+ /* Correctly set the fs for the root node */
+ lxpr_mnt->lxprm_node->lxpr_vnode->v_vfsp = vfsp;
+
+ vfs_make_fsid(&vfsp->vfs_fsid, lxprocdev, lxprocfstype);
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lxprocfstype;
+ vfsp->vfs_data = (caddr_t)lxpr_mnt;
+ vfsp->vfs_dev = lxprocdev;
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt = (lxpr_mnt_t *)vfsp->vfs_data;
+ vnode_t *vp;
+ int count;
+
+ ASSERT(lxpr_mnt != NULL);
+ vp = LXPTOV(lxpr_mnt->lxprm_node);
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * must be root to unmount
+ */
+ if (secpolicy_fs_unmount(cr, vfsp) != 0) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EPERM);
+ }
+
+ /*
+ * forced unmount is not supported by this file system
+ */
+ if (flag & MS_FORCE) {
+ mutex_exit(&lxpr_mount_lock);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Ensure that no vnodes are in use on this mount point.
+ */
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+ if (count > 1) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EBUSY);
+ }
+
+
+ /*
+ * purge the dnlc cache for vnode entries
+ * associated with this file system
+ */
+ count = dnlc_purge_vfsp(vfsp, 0);
+
+ /*
+ * free up the lxprnode
+ */
+ lxpr_freenode(lxpr_mnt->lxprm_node);
+ zone_rele(lxpr_mnt->lxprm_zone);
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lxpr_node_t *lxpnp = ((lxpr_mnt_t *)vfsp->vfs_data)->lxprm_node;
+ vnode_t *vp = LXPTOV(lxpnp);
+
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+lxpr_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ int n;
+ dev32_t d32;
+ extern uint_t nproc;
+
+ n = v.v_proc - nproc;
+
+ bzero((caddr_t)sp, sizeof (*sp));
+ sp->f_bsize = DEV_BSIZE;
+ sp->f_frsize = DEV_BSIZE;
+ sp->f_blocks = (fsblkcnt64_t)0;
+ sp->f_bfree = (fsblkcnt64_t)0;
+ sp->f_bavail = (fsblkcnt64_t)0;
+ sp->f_files = (fsfilcnt64_t)v.v_proc + 2;
+ sp->f_ffree = (fsfilcnt64_t)n;
+ sp->f_favail = (fsfilcnt64_t)n;
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ /* It is guaranteed that vsw_name will fit in f_basetype */
+ (void) strcpy(sp->f_basetype, vfssw[lxprocfstype].vsw_name);
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sp->f_namemax = 64; /* quite arbitrary */
+ bzero(sp->f_fstr, sizeof (sp->f_fstr));
+
+ /* We know f_fstr is 32 chars */
+ (void) strcpy(sp->f_fstr, "/proc");
+ (void) strcpy(&sp->f_fstr[6], "/proc");
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
new file mode 100644
index 0000000000..4e7b0844a4
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -0,0 +1,3030 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * lxpr_vnops.c: Vnode operations for the lx /proc file system
+ *
+ * Assumptions and Gotchas:
+ *
+ * In order to preserve Solaris' security policy. This file system's
+ * functionality does not override Solaris' security policies even if
+ * that means breaking Linux compatibility.
+ *
+ * Linux has no concept of lwps so we only implement procs here as in the
+ * old /proc interface.
+ */
+
+#include <sys/cpupart.h>
+#include <sys/cpuvar.h>
+#include <sys/session.h>
+#include <sys/vmparam.h>
+#include <sys/mman.h>
+#include <vm/rm.h>
+#include <vm/seg_vn.h>
+#include <sys/sdt.h>
+#include <lx_signum.h>
+#include <sys/strlog.h>
+#include <sys/stropts.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_brand.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
+#include <sys/fp.h>
+#include <sys/pool_pset.h>
+#include <sys/pset.h>
+#include <sys/zone.h>
+#include <sys/pghw.h>
+#include <sys/vfs_opreg.h>
+
+/* Dependent on the Solaris procfs */
+extern kthread_t *prchoose(proc_t *);
+
+#include "lx_proc.h"
+
+extern pgcnt_t swapfs_minfree;
+extern time_t boot_time;
+
+/*
+ * Pointer to the vnode ops vector for this fs.
+ * This is instantiated in lxprinit() in lxpr_vfsops.c
+ */
+vnodeops_t *lxpr_vnodeops;
+
+static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
+static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
+ caller_context_t *);
+static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
+static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
+ caller_context_t *);
+static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
+static int lxpr_lookup(vnode_t *, char *, vnode_t **,
+ pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
+ pathname_t *);
+static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
+ caller_context_t *, int);
+static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
+static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
+static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
+static int lxpr_sync(void);
+static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
+
+static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
+
+static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
+
+static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
+
+/*
+ * Simple conversion
+ */
+#define btok(x) ((x) >> 10) /* bytes to kbytes */
+#define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
+
+/*
+ * The lx /proc vnode operations vector
+ */
+const fs_operation_def_t lxpr_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = lxpr_open },
+ VOPNAME_CLOSE, { .vop_close = lxpr_close },
+ VOPNAME_READ, { .vop_read = lxpr_read },
+ VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr },
+ VOPNAME_ACCESS, { .vop_access = lxpr_access },
+ VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup },
+ VOPNAME_READDIR, { .vop_readdir = lxpr_readdir },
+ VOPNAME_READLINK, { .vop_readlink = lxpr_readlink },
+ VOPNAME_FSYNC, { .error = lxpr_sync },
+ VOPNAME_SEEK, { .error = lxpr_sync },
+ VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive },
+ VOPNAME_CMP, { .vop_cmp = lxpr_cmp },
+ VOPNAME_REALVP, { .vop_realvp = lxpr_realvp },
+ NULL, NULL
+};
+
+
+/*
+ * file contents of an lx /proc directory.
+ */
+static lxpr_dirent_t lx_procdir[] = {
+ { LXPR_CMDLINE, "cmdline" },
+ { LXPR_CPUINFO, "cpuinfo" },
+ { LXPR_DEVICES, "devices" },
+ { LXPR_DMA, "dma" },
+ { LXPR_FILESYSTEMS, "filesystems" },
+ { LXPR_INTERRUPTS, "interrupts" },
+ { LXPR_IOPORTS, "ioports" },
+ { LXPR_KCORE, "kcore" },
+ { LXPR_KMSG, "kmsg" },
+ { LXPR_LOADAVG, "loadavg" },
+ { LXPR_MEMINFO, "meminfo" },
+ { LXPR_MOUNTS, "mounts" },
+ { LXPR_NETDIR, "net" },
+ { LXPR_PARTITIONS, "partitions" },
+ { LXPR_SELF, "self" },
+ { LXPR_STAT, "stat" },
+ { LXPR_UPTIME, "uptime" },
+ { LXPR_VERSION, "version" }
+};
+
+#define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
+
+/*
+ * Contents of an lx /proc/<pid> directory.
+ */
+static lxpr_dirent_t piddir[] = {
+ { LXPR_PID_CMDLINE, "cmdline" },
+ { LXPR_PID_CPU, "cpu" },
+ { LXPR_PID_CURDIR, "cwd" },
+ { LXPR_PID_ENV, "environ" },
+ { LXPR_PID_EXE, "exe" },
+ { LXPR_PID_MAPS, "maps" },
+ { LXPR_PID_MEM, "mem" },
+ { LXPR_PID_ROOTDIR, "root" },
+ { LXPR_PID_STAT, "stat" },
+ { LXPR_PID_STATM, "statm" },
+ { LXPR_PID_STATUS, "status" },
+ { LXPR_PID_FDDIR, "fd" }
+};
+
+#define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
+
+/*
+ * contents of lx /proc/net directory
+ */
+static lxpr_dirent_t netdir[] = {
+ { LXPR_NET_ARP, "arp" },
+ { LXPR_NET_DEV, "dev" },
+ { LXPR_NET_DEV_MCAST, "dev_mcast" },
+ { LXPR_NET_IGMP, "igmp" },
+ { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
+ { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
+ { LXPR_NET_MCFILTER, "mcfilter" },
+ { LXPR_NET_NETSTAT, "netstat" },
+ { LXPR_NET_RAW, "raw" },
+ { LXPR_NET_ROUTE, "route" },
+ { LXPR_NET_RPC, "rpc" },
+ { LXPR_NET_RT_CACHE, "rt_cache" },
+ { LXPR_NET_SOCKSTAT, "sockstat" },
+ { LXPR_NET_SNMP, "snmp" },
+ { LXPR_NET_STAT, "stat" },
+ { LXPR_NET_TCP, "tcp" },
+ { LXPR_NET_UDP, "udp" },
+ { LXPR_NET_UNIX, "unix" }
+};
+
+#define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
+
+/*
+ * lxpr_open(): Vnode operation for VOP_OPEN()
+ */
+static int
+lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ vnode_t *vp = *vpp;
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ vnode_t *rvp;
+ int error = 0;
+
+ /*
+ * We only allow reading in this file systrem
+ */
+ if (flag & FWRITE)
+ return (EROFS);
+
+ /*
+ * If we are opening an underlying file only allow regular files
+ * reject the open for anything but a regular file.
+ * Just do it if we are opening the current or root directory.
+ */
+ if (lxpnp->lxpr_realvp != NULL) {
+ rvp = lxpnp->lxpr_realvp;
+
+ if (type == LXPR_PID_FD_FD && rvp->v_type != VREG)
+ error = EACCES;
+ else {
+ /*
+ * Need to hold rvp since VOP_OPEN() may release it.
+ */
+ VN_HOLD(rvp);
+ error = VOP_OPEN(&rvp, flag, cr, ct);
+ if (error) {
+ VN_RELE(rvp);
+ } else {
+ *vpp = rvp;
+ VN_RELE(vp);
+ }
+ }
+ }
+
+ if (type == LXPR_KMSG) {
+ ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
+ struct strioctl str;
+ int rv;
+
+ /*
+ * Open the zone's console device using the layered driver
+ * interface.
+ */
+ if ((error = ldi_open_by_name("/dev/log", FREAD, cr,
+ &lxpnp->lxpr_cons_ldih, li)) != 0)
+ return (error);
+
+ /*
+ * Send an ioctl to the underlying console device, letting it
+ * know we're interested in getting console messages.
+ */
+ str.ic_cmd = I_CONSLOG;
+ str.ic_timout = 0;
+ str.ic_len = 0;
+ str.ic_dp = NULL;
+ if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR,
+ (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
+ return (error);
+ }
+
+ return (error);
+}
+
+
+/*
+ * lxpr_close(): Vnode operation for VOP_CLOSE()
+ */
+/* ARGSUSED */
+static int
+lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpr = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpr->lxpr_type;
+ int err;
+
+ /*
+ * we should never get here because the close is done on the realvp
+ * for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR &&
+ type != LXPR_PID_EXE);
+
+ if (type == LXPR_KMSG) {
+ if ((err = ldi_close(lxpr->lxpr_cons_ldih, 0, cr)) != 0)
+ return (err);
+ }
+
+ return (0);
+}
+
+static void (*lxpr_read_function[LXPR_NFILES])() = {
+ lxpr_read_isdir, /* /proc */
+ lxpr_read_isdir, /* /proc/<pid> */
+ lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
+ lxpr_read_empty, /* /proc/<pid>/cpu */
+ lxpr_read_invalid, /* /proc/<pid>/cwd */
+ lxpr_read_empty, /* /proc/<pid>/environ */
+ lxpr_read_invalid, /* /proc/<pid>/exe */
+ lxpr_read_pid_maps, /* /proc/<pid>/maps */
+ lxpr_read_empty, /* /proc/<pid>/mem */
+ lxpr_read_invalid, /* /proc/<pid>/root */
+ lxpr_read_pid_stat, /* /proc/<pid>/stat */
+ lxpr_read_pid_statm, /* /proc/<pid>/statm */
+ lxpr_read_pid_status, /* /proc/<pid>/status */
+ lxpr_read_isdir, /* /proc/<pid>/fd */
+ lxpr_read_fd, /* /proc/<pid>/fd/nn */
+ lxpr_read_empty, /* /proc/cmdline */
+ lxpr_read_cpuinfo, /* /proc/cpuinfo */
+ lxpr_read_empty, /* /proc/devices */
+ lxpr_read_empty, /* /proc/dma */
+ lxpr_read_empty, /* /proc/filesystems */
+ lxpr_read_empty, /* /proc/interrupts */
+ lxpr_read_empty, /* /proc/ioports */
+ lxpr_read_empty, /* /proc/kcore */
+ lxpr_read_kmsg, /* /proc/kmsg */
+ lxpr_read_loadavg, /* /proc/loadavg */
+ lxpr_read_meminfo, /* /proc/meminfo */
+ lxpr_read_mounts, /* /proc/mounts */
+ lxpr_read_isdir, /* /proc/net */
+ lxpr_read_net_arp, /* /proc/net/arp */
+ lxpr_read_net_dev, /* /proc/net/dev */
+ lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
+ lxpr_read_net_igmp, /* /proc/net/igmp */
+ lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
+ lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
+ lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
+ lxpr_read_net_netstat, /* /proc/net/netstat */
+ lxpr_read_net_raw, /* /proc/net/raw */
+ lxpr_read_net_route, /* /proc/net/route */
+ lxpr_read_net_rpc, /* /proc/net/rpc */
+ lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
+ lxpr_read_net_sockstat, /* /proc/net/sockstat */
+ lxpr_read_net_snmp, /* /proc/net/snmp */
+ lxpr_read_net_stat, /* /proc/net/stat */
+ lxpr_read_net_tcp, /* /proc/net/tcp */
+ lxpr_read_net_udp, /* /proc/net/udp */
+ lxpr_read_net_unix, /* /proc/net/unix */
+ lxpr_read_partitions, /* /proc/partitions */
+ lxpr_read_invalid, /* /proc/self */
+ lxpr_read_stat, /* /proc/stat */
+ lxpr_read_uptime, /* /proc/uptime */
+ lxpr_read_version, /* /proc/version */
+};
+
+/*
+ * Array of lookup functions, indexed by lx /proc file type.
+ */
+static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
+ lxpr_lookup_procdir, /* /proc */
+ lxpr_lookup_piddir, /* /proc/<pid> */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
+ lxpr_lookup_fddir, /* /proc/<pid>/fd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_lookup_not_a_dir, /* /proc/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
+ lxpr_lookup_not_a_dir, /* /proc/devices */
+ lxpr_lookup_not_a_dir, /* /proc/dma */
+ lxpr_lookup_not_a_dir, /* /proc/filesystems */
+ lxpr_lookup_not_a_dir, /* /proc/interrupts */
+ lxpr_lookup_not_a_dir, /* /proc/ioports */
+ lxpr_lookup_not_a_dir, /* /proc/kcore */
+ lxpr_lookup_not_a_dir, /* /proc/kmsg */
+ lxpr_lookup_not_a_dir, /* /proc/loadavg */
+ lxpr_lookup_not_a_dir, /* /proc/meminfo */
+ lxpr_lookup_not_a_dir, /* /proc/mounts */
+ lxpr_lookup_netdir, /* /proc/net */
+ lxpr_lookup_not_a_dir, /* /proc/net/arp */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_lookup_not_a_dir, /* /proc/net/igmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_lookup_not_a_dir, /* /proc/net/netstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/raw */
+ lxpr_lookup_not_a_dir, /* /proc/net/route */
+ lxpr_lookup_not_a_dir, /* /proc/net/rpc */
+ lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/snmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/stat */
+ lxpr_lookup_not_a_dir, /* /proc/net/tcp */
+ lxpr_lookup_not_a_dir, /* /proc/net/udp */
+ lxpr_lookup_not_a_dir, /* /proc/net/unix */
+ lxpr_lookup_not_a_dir, /* /proc/partitions */
+ lxpr_lookup_not_a_dir, /* /proc/self */
+ lxpr_lookup_not_a_dir, /* /proc/stat */
+ lxpr_lookup_not_a_dir, /* /proc/uptime */
+ lxpr_lookup_not_a_dir, /* /proc/version */
+};
+
+/*
+ * Array of readdir functions, indexed by /proc file type.
+ */
+static int (*lxpr_readdir_function[LXPR_NFILES])() = {
+ lxpr_readdir_procdir, /* /proc */
+ lxpr_readdir_piddir, /* /proc/<pid> */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
+ lxpr_readdir_fddir, /* /proc/<pid>/fd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_readdir_not_a_dir, /* /proc/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
+ lxpr_readdir_not_a_dir, /* /proc/devices */
+ lxpr_readdir_not_a_dir, /* /proc/dma */
+ lxpr_readdir_not_a_dir, /* /proc/filesystems */
+ lxpr_readdir_not_a_dir, /* /proc/interrupts */
+ lxpr_readdir_not_a_dir, /* /proc/ioports */
+ lxpr_readdir_not_a_dir, /* /proc/kcore */
+ lxpr_readdir_not_a_dir, /* /proc/kmsg */
+ lxpr_readdir_not_a_dir, /* /proc/loadavg */
+ lxpr_readdir_not_a_dir, /* /proc/meminfo */
+ lxpr_readdir_not_a_dir, /* /proc/mounts */
+ lxpr_readdir_netdir, /* /proc/net */
+ lxpr_readdir_not_a_dir, /* /proc/net/arp */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_readdir_not_a_dir, /* /proc/net/igmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_readdir_not_a_dir, /* /proc/net/netstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/raw */
+ lxpr_readdir_not_a_dir, /* /proc/net/route */
+ lxpr_readdir_not_a_dir, /* /proc/net/rpc */
+ lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/snmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/stat */
+ lxpr_readdir_not_a_dir, /* /proc/net/tcp */
+ lxpr_readdir_not_a_dir, /* /proc/net/udp */
+ lxpr_readdir_not_a_dir, /* /proc/net/unix */
+ lxpr_readdir_not_a_dir, /* /proc/partitions */
+ lxpr_readdir_not_a_dir, /* /proc/self */
+ lxpr_readdir_not_a_dir, /* /proc/stat */
+ lxpr_readdir_not_a_dir, /* /proc/uptime */
+ lxpr_readdir_not_a_dir, /* /proc/version */
+};
+
+
+/*
+ * lxpr_read(): Vnode operation for VOP_READ()
+ *
+ * As the format of all the files that can be read in the lx procfs is human
+ * readable and not binary structures there do not have to be different
+ * read variants depending on whether the reading process model is 32 or 64 bits
+ * (at least in general, and certainly the difference is unlikely to be enough
+ * to justify have different routines for 32 and 64 bit reads
+ */
+/* ARGSUSED */
+static int
+lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
+ int error;
+
+ ASSERT(type < LXPR_NFILES);
+
+ lxpr_read_function[type](lxpnp, uiobuf);
+
+ error = lxpr_uiobuf_flush(uiobuf);
+ lxpr_uiobuf_free(uiobuf);
+
+ return (error);
+}
+
+
+/*
+ * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
+ *
+ * Various special case reads:
+ * - trying to read a directory
+ * - invalid file (used to mean a file that should be implemented,
+ * but isn't yet)
+ * - empty file
+ * - wait to be able to read a file that will never have anything to read
+ */
+/* ARGSUSED */
+static void
+lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EISDIR);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_pid_cmdline():
+ *
+ * This is not precisely compatible with linux:
+ *
+ * The linux cmdline returns argv with the correct separation
+ * using \0 between the arguments, we cannot do that without
+ * copying the real argv from the correct process context.
+ * This is too difficult to attempt so we pretend that the
+ * entire cmdline is just argv[0]. This is good enough for
+ * ps to display correctly, but might cause some other things
+ * not to work correctly.
+ */
+static void
+lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ if (PTOU(p)->u_argv != 0) {
+ char *buff = PTOU(p)->u_psargs;
+ int len = strlen(buff);
+ lxpr_unlock(p);
+ lxpr_uiobuf_write(uiobuf, buff, len+1);
+ } else {
+ lxpr_unlock(p);
+ }
+}
+
+
+/*
+ * lxpr_read_pid_maps(): memory map file
+ */
+static void
+lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ struct seg *seg;
+ char *buf;
+ int buflen = MAXPATHLEN;
+ struct print_data {
+ caddr_t saddr;
+ caddr_t eaddr;
+ int type;
+ char prot[5];
+ uint32_t offset;
+ vnode_t *vp;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *pbuf;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ if (as == &kas) {
+ lxpr_unlock(p);
+ return;
+ }
+
+ mutex_exit(&p->p_lock);
+
+ /* Iterate over all segments in the address space */
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
+ vnode_t *vp;
+ uint_t protbits;
+
+ pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
+
+ pbuf->saddr = seg->s_base;
+ pbuf->eaddr = seg->s_base+seg->s_size;
+ pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
+
+ /*
+ * Cheat and only use the protection bits of the first page
+ * in the segment
+ */
+ (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
+ (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
+
+ if (protbits & PROT_READ) pbuf->prot[0] = 'r';
+ if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
+ if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
+ if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
+ else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
+
+ if (seg->s_ops == &segvn_ops &&
+ SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
+ vp != NULL && vp->v_type == VREG) {
+ VN_HOLD(vp);
+ pbuf->vp = vp;
+ } else {
+ pbuf->vp = NULL;
+ }
+
+ pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr);
+
+ pbuf->next = NULL;
+ *print_tail = pbuf;
+ print_tail = &pbuf->next;
+ }
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ buf = kmem_alloc(buflen, KM_SLEEP);
+
+ /* print the data we've extracted */
+ pbuf = print_head;
+ while (pbuf != NULL) {
+ struct print_data *pbuf_next;
+ vattr_t vattr;
+
+ int maj = 0;
+ int min = 0;
+ int inode = 0;
+
+ *buf = '\0';
+ if (pbuf->vp != NULL) {
+ vattr.va_mask = AT_FSID | AT_NODEID;
+ if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
+ NULL) == 0) {
+ maj = getmajor(vattr.va_fsid);
+ min = getminor(vattr.va_fsid);
+ inode = vattr.va_nodeid;
+ }
+ (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
+ VN_RELE(pbuf->vp);
+ }
+
+ if (*buf != '\0') {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02d:%03d %d %s\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode, buf);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02d:%03d %d\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode);
+ }
+
+ pbuf_next = pbuf->next;
+ kmem_free(pbuf, sizeof (*pbuf));
+ pbuf = pbuf_next;
+ }
+
+ kmem_free(buf, buflen);
+}
+
+/*
+ * lxpr_read_pid_statm(): memory status file
+ */
+static void
+lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ size_t vsize;
+ size_t rss;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ mutex_exit(&p->p_lock);
+
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = btopr(as->a_resvsize);
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%lu %lu %lu %lu %lu %lu %lu\n",
+ vsize, rss, 0l, rss, 0l, 0l, 0l);
+}
+
+/*
+ * lxpr_read_pid_status(): status file
+ */
+static void
+lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ user_t *up;
+ cred_t *cr;
+ const gid_t *groups;
+ int ngroups;
+ struct as *as;
+ char *status;
+ pid_t pid, ppid;
+ size_t vsize;
+ size_t rss;
+ k_sigset_t current, ignore, handle;
+ int i, lx_sig;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's init
+ * process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1;
+ ppid = 0; /* parent pid for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ status = "S (sleeping)";
+ break;
+ case TS_RUN:
+ case TS_ONPROC:
+ status = "R (running)";
+ break;
+ case TS_ZOMB:
+ status = "Z (zombie)";
+ break;
+ case TS_STOPPED:
+ status = "T (stopped)";
+ break;
+ default:
+ status = "! (unknown)";
+ break;
+ }
+ thread_unlock(t);
+ } else {
+ /*
+ * there is a hole in the exit code, where a proc can have
+ * no threads but it is yet to be flagged SZOMB. We will
+ * assume we are about to become a zombie
+ */
+ status = "Z (zombie)";
+ }
+
+ up = PTOU(p);
+ mutex_enter(&p->p_crlock);
+ crhold(cr = p->p_cred);
+ mutex_exit(&p->p_crlock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "Name:\t%s\n"
+ "State:\t%s\n"
+ "Tgid:\t%d\n"
+ "Pid:\t%d\n"
+ "PPid:\t%d\n"
+ "TracerPid:\t%d\n"
+ "Uid:\t%u\t%u\t%u\t%u\n"
+ "Gid:\t%u\t%u\t%u\t%u\n"
+ "FDSize:\t%d\n"
+ "Groups:\t",
+ up->u_comm,
+ status,
+ pid, /* thread group id - same as pid until we map lwps to procs */
+ pid,
+ ppid,
+ 0,
+ crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
+ crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
+ p->p_fno_ctl);
+
+ ngroups = crgetngroups(cr);
+ groups = crgetgroups(cr);
+ for (i = 0; i < ngroups; i++) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%u ",
+ groups[i]);
+ }
+ crfree(cr);
+
+ as = p->p_as;
+ if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "VmSize:\t%8lu kB\n"
+ "VmLck:\t%8lu kB\n"
+ "VmRSS:\t%8lu kB\n"
+ "VmData:\t%8lu kB\n"
+ "VmStk:\t%8lu kB\n"
+ "VmExe:\t%8lu kB\n"
+ "VmLib:\t%8lu kB",
+ btok(vsize),
+ 0l,
+ ptok(rss),
+ 0l,
+ btok(p->p_stksize),
+ ptok(rss),
+ 0l);
+ }
+
+ sigemptyset(&current);
+ sigemptyset(&ignore);
+ sigemptyset(&handle);
+
+ for (i = 1; i < NSIG; i++) {
+ lx_sig = stol_signo[i];
+
+ if ((lx_sig > 0) && (lx_sig < LX_NSIG)) {
+ if (sigismember(&p->p_sig, i))
+ sigaddset(&current, lx_sig);
+
+ if (up->u_signal[i - 1] == SIG_IGN)
+ sigaddset(&ignore, lx_sig);
+ else if (up->u_signal[i - 1] != SIG_DFL)
+ sigaddset(&handle, lx_sig);
+ }
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "SigPnd:\t%08x%08x\n"
+ "SigBlk:\t%08x%08x\n"
+ "SigIgn:\t%08x%08x\n"
+ "SigCgt:\t%08x%08x\n"
+ "CapInh:\t%016x\n"
+ "CapPrm:\t%016x\n"
+ "CapEff:\t%016x\n",
+ current.__sigbits[1], current.__sigbits[0],
+ 0, 0, /* signals blocked on per thread basis */
+ ignore.__sigbits[1], ignore.__sigbits[0],
+ handle.__sigbits[1], handle.__sigbits[0],
+ /* Can't do anything with linux capabilities */
+ 0,
+ 0,
+ 0);
+
+ lxpr_unlock(p);
+}
+
+
+/*
+ * lxpr_read_pid_stat(): pid stat file
+ */
+static void
+lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ struct as *as;
+ char stat;
+ pid_t pid, ppid, pgpid, spid;
+ gid_t psgid;
+ dev_t psdev;
+ size_t rss, vsize;
+ int nice, pri;
+ caddr_t wchan;
+ processorid_t cpu;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Set Linux defaults if we're the zone's init process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1; /* PID for init */
+ ppid = 0; /* parent PID for init is 0 */
+ pgpid = 0; /* process group for init is 0 */
+ psgid = (gid_t)-1; /* credential GID for init is -1 */
+ spid = 0; /* session id for init is 0 */
+ psdev = 0; /* session device for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+
+ pgpid = p->p_pgrp;
+
+ mutex_enter(&p->p_splock);
+ mutex_enter(&p->p_sessp->s_lock);
+ spid = p->p_sessp->s_sid;
+ /* XXBRAND psdev = DEV_TO_LXDEV(p->p_sessp->s_dev, VCHR); */
+ psdev = p->p_sessp->s_dev;
+ if (p->p_sessp->s_cred)
+ psgid = crgetgid(p->p_sessp->s_cred);
+ else
+ psgid = crgetgid(p->p_cred);
+
+ mutex_exit(&p->p_sessp->s_lock);
+ mutex_exit(&p->p_splock);
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ stat = 'S'; break;
+ case TS_RUN:
+ case TS_ONPROC:
+ stat = 'R'; break;
+ case TS_ZOMB:
+ stat = 'Z'; break;
+ case TS_STOPPED:
+ stat = 'T'; break;
+ default:
+ stat = '!'; break;
+ }
+
+ if (CL_DONICE(t, NULL, 0, &nice) != 0)
+ nice = 0;
+
+ pri = v.v_maxsyspri - t->t_pri;
+ wchan = t->t_wchan;
+ cpu = t->t_cpu->cpu_seqid;
+ thread_unlock(t);
+ } else {
+ /* Only zombies have no threads */
+ stat = 'Z';
+ nice = 0;
+ pri = 0;
+ wchan = 0;
+ cpu = 0;
+ }
+ as = p->p_as;
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%d (%s) %c %d %d %d %d %d "
+ "%lu %lu %lu %lu %lu "
+ "%lu %lu %ld %ld "
+ "%d %d "
+ "0 "
+ "%ld %lu "
+ "%lu %ld %llu "
+ "%lu %lu %u "
+ "%lu %lu "
+ "%lu %lu %lu %lu "
+ "%lu "
+ "%lu %lu "
+ "%d "
+ "%d"
+ "\n",
+ pid,
+ PTOU(p)->u_comm,
+ stat,
+ ppid, pgpid,
+ spid, psdev, psgid,
+ 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
+ p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
+ pri, nice,
+ 0l, PTOU(p)->u_ticks, /* ticks till next SIGALARM, start time */
+ vsize, rss, p->p_vmem_ctl,
+ 0l, 0l, USRSTACK, /* startcode, endcode, startstack */
+ 0l, 0l, /* kstkesp, kstkeip */
+ 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
+ wchan,
+ 0l, 0l, /* nswap, cnswap */
+ 0, /* exit_signal */
+ cpu);
+
+ lxpr_unlock(p);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
+ " | Transmit\n");
+ lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
+ " frame compressed multicast|bytes packets errs drop fifo"
+ " colls carrier compressed\n");
+
+ /*
+ * XXX: data about each interface should go here, but we'll wait to
+ * see if anybody wants to use it.
+ */
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_kmsg(): read the contents of the kernel message queue. We
+ * translate this into the reception of console messages for this lx zone; each
+ * read copies out a single zone console message, or blocks until the next one
+ * is produced.
+ */
+
+#define LX_KMSG_PRI "<0>"
+
+static void
+lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf)
+{
+ ldi_handle_t lh = lxpnp->lxpr_cons_ldih;
+ mblk_t *mp;
+
+ if (ldi_getmsg(lh, &mp, NULL) == 0) {
+ /*
+ * lx procfs doesn't like successive reads to the same file
+ * descriptor unless we do an explicit rewind each time.
+ */
+ lxpr_uiobuf_seek(uiobuf, 0);
+
+ lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
+ mp->b_cont->b_rptr);
+
+ freemsg(mp);
+ }
+}
+
+/*
+ * lxpr_read_loadavg(): read the contents of the "loadavg" file.
+ *
+ * Just enough for uptime to work
+ */
+extern int nthread;
+
+static void
+lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ulong_t avenrun1;
+ ulong_t avenrun5;
+ ulong_t avenrun15;
+ ulong_t avenrun1_cs;
+ ulong_t avenrun5_cs;
+ ulong_t avenrun15_cs;
+ int loadavg[3];
+ int *loadbuf;
+ cpupart_t *cp;
+
+ uint_t nrunnable = 0;
+ rctl_qty_t nlwps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Need to add up values over all CPU partitions. If pools are active,
+ * only report the values of the zone's partition, which by definition
+ * includes the current CPU.
+ */
+ if (pool_pset_enabled()) {
+ psetid_t psetid = zone_pset_get(curproc->p_zone);
+
+ ASSERT(curproc->p_zone != &zone0);
+ cp = CPU->cpu_part;
+
+ nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
+ (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
+ loadbuf = &loadavg[0];
+
+ /*
+ * We'll report the total number of lwps in the zone for the
+ * "nproc" parameter of /proc/loadavg; good enough for lx.
+ */
+ nlwps = curproc->p_zone->zone_nlwps;
+ } else {
+ cp = cp_list_head;
+ do {
+ nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
+ } while ((cp = cp->cp_next) != cp_list_head);
+
+ loadbuf = &avenrun[0];
+
+ /*
+ * This will report kernel threads as well as user lwps, but it
+ * should be good enough for lx consumers.
+ */
+ nlwps = nthread;
+ }
+
+ mutex_exit(&cpu_lock);
+
+ avenrun1 = loadbuf[0] >> FSHIFT;
+ avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun5 = loadbuf[1] >> FSHIFT;
+ avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun15 = loadbuf[2] >> FSHIFT;
+ avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
+ avenrun1, avenrun1_cs,
+ avenrun5, avenrun5_cs,
+ avenrun15, avenrun15_cs,
+ nrunnable, nlwps, 0);
+}
+
+/*
+ * lxpr_read_meminfo(): read the contents of the "meminfo" file.
+ */
+static void
+lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ long total_mem = physmem * PAGESIZE;
+ long free_mem = freemem * PAGESIZE;
+ long total_swap = k_anoninfo.ani_max * PAGESIZE;
+ long used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
+
+ lxpr_uiobuf_printf(uiobuf,
+ " total: used: free: shared: buffers: cached:\n"
+ "Mem: %8lu %8lu %8lu %8u %8u %8u\n"
+ "Swap: %8lu %8lu %8lu\n"
+ "MemTotal: %8lu kB\n"
+ "MemFree: %8lu kB\n"
+ "MemShared: %8u kB\n"
+ "Buffers: %8u kB\n"
+ "Cached: %8u kB\n"
+ "SwapCached:%8u kB\n"
+ "Active: %8u kB\n"
+ "Inactive: %8u kB\n"
+ "HighTotal: %8u kB\n"
+ "HighFree: %8u kB\n"
+ "LowTotal: %8u kB\n"
+ "LowFree: %8u kB\n"
+ "SwapTotal: %8lu kB\n"
+ "SwapFree: %8lu kB\n",
+ total_mem, total_mem - free_mem, free_mem, 0, 0, 0,
+ total_swap, used_swap, total_swap - used_swap,
+ btok(total_mem), /* MemTotal */
+ btok(free_mem), /* MemFree */
+ 0, /* MemShared */
+ 0, /* Buffers */
+ 0, /* Cached */
+ 0, /* SwapCached */
+ 0, /* Active */
+ 0, /* Inactive */
+ 0, /* HighTotal */
+ 0, /* HighFree */
+ btok(total_mem), /* LowTotal */
+ btok(free_mem), /* LowFree */
+ btok(total_swap), /* SwapTotal */
+ btok(total_swap - used_swap)); /* SwapFree */
+}
+
+/*
+ * lxpr_read_mounts():
+ */
+/* ARGSUSED */
+static void
+lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ struct vfs *vfsp;
+ struct vfs *vfslist;
+ zone_t *zone = LXPTOZ(lxpnp);
+ struct print_data {
+ refstr_t *vfs_mntpt;
+ refstr_t *vfs_resource;
+ uint_t vfs_flag;
+ int vfs_fstype;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *printp;
+
+ vfs_list_read_lock();
+
+ if (zone == global_zone) {
+ vfsp = vfslist = rootvfs;
+ } else {
+ vfsp = vfslist = zone->zone_vfslist;
+ /*
+ * If the zone has a root entry, it will be the first in
+ * the list. If it doesn't, we conjure one up.
+ */
+ if (vfslist == NULL ||
+ strcmp(refstr_value(vfsp->vfs_mntpt),
+ zone->zone_rootpath) != 0) {
+ struct vfs *tvfsp;
+ /*
+ * The root of the zone is not a mount point. The vfs
+ * we want to report is that of the zone's root vnode.
+ */
+ tvfsp = zone->zone_rootvp->v_vfsp;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "/ / %s %s 0 0\n",
+ vfssw[tvfsp->vfs_fstype].vsw_name,
+ tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+ }
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ return;
+ }
+ }
+
+ /*
+ * Later on we have to do a lookupname, which can end up causing
+ * another vfs_list_read_lock() to be called. Which can lead to a
+ * deadlock. To avoid this, we extract the data we need into a local
+ * list, then we can run this list without holding vfs_list_read_lock()
+ * We keep the list in the same order as the vfs_list
+ */
+ do {
+ /* Skip mounts we shouldn't show */
+ if (vfsp->vfs_flag & VFS_NOMNTTAB) {
+ goto nextfs;
+ }
+
+ printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
+ refstr_hold(vfsp->vfs_mntpt);
+ printp->vfs_mntpt = vfsp->vfs_mntpt;
+ refstr_hold(vfsp->vfs_resource);
+ printp->vfs_resource = vfsp->vfs_resource;
+ printp->vfs_flag = vfsp->vfs_flag;
+ printp->vfs_fstype = vfsp->vfs_fstype;
+ printp->next = NULL;
+
+ *print_tail = printp;
+ print_tail = &printp->next;
+
+nextfs:
+ vfsp = (zone == global_zone) ?
+ vfsp->vfs_next : vfsp->vfs_zone_next;
+
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+ /*
+ * now we can run through what we've extracted without holding
+ * vfs_list_read_lock()
+ */
+ printp = print_head;
+ while (printp != NULL) {
+ struct print_data *printp_next;
+ const char *resource;
+ char *mntpt;
+ struct vnode *vp;
+ int error;
+
+ mntpt = (char *)refstr_value(printp->vfs_mntpt);
+ resource = refstr_value(printp->vfs_resource);
+
+ if (mntpt != NULL && mntpt[0] != '\0')
+ mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
+ else
+ mntpt = "-";
+
+ error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
+
+ if (error != 0)
+ goto nextp;
+
+ if (!(vp->v_flag & VROOT)) {
+ VN_RELE(vp);
+ goto nextp;
+ }
+ VN_RELE(vp);
+
+ if (resource != NULL && resource[0] != '\0') {
+ if (resource[0] == '/') {
+ resource = ZONE_PATH_VISIBLE(resource, zone) ?
+ ZONE_PATH_TRANSLATE(resource, zone) :
+ mntpt;
+ }
+ } else {
+ resource = "-";
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%s %s %s %s 0 0\n",
+ resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
+ printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+nextp:
+ printp_next = printp->next;
+ refstr_rele(printp->vfs_mntpt);
+ refstr_rele(printp->vfs_resource);
+ kmem_free(printp, sizeof (*printp));
+ printp = printp_next;
+
+ }
+}
+
+/*
+ * lxpr_read_partitions():
+ *
+ * We don't support partitions in a local zone because it requires access to
+ * physical devices. But we need to fake up enough of the file to show that we
+ * have no partitions.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf,
+ "major minor #blocks name rio rmerge rsect ruse "
+ "wio wmerge wsect wuse running use aveq\n\n");
+}
+
+/*
+ * lxpr_read_version(): read the contents of the "version" file.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ char *vers;
+ if (lx_get_zone_kern_version(LXPTOZ(lxpnp)) <= LX_KERN_2_4)
+ vers = LX_UNAME_RELEASE_2_4;
+ else
+ vers = LX_UNAME_RELEASE_2_6;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%s version %s (%s version %d.%d.%d) "
+ "#%s SMP %s\n",
+ LX_UNAME_SYSNAME, vers,
+#if defined(__GNUC__)
+ "gcc",
+ __GNUC__,
+ __GNUC_MINOR__,
+ __GNUC_PATCHLEVEL__,
+#else
+ "Sun C",
+ __SUNPRO_C / 0x100,
+ (__SUNPRO_C & 0xff) / 0x10,
+ __SUNPRO_C & 0xf,
+#endif
+ LX_UNAME_VERSION,
+ "00:00:00 00/00/00");
+}
+
+
+/*
+ * lxpr_read_stat(): read the contents of the "stat" file.
+ *
+ */
+/* ARGSUSED */
+
+static void
+lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t sys_cum = 0;
+ ulong_t user_cum = 0;
+ ulong_t irq_cum = 0;
+ uint_t cpu_nrunnable_cum = 0;
+ uint_t w_io_cum = 0;
+
+ ulong_t pgpgin_cum = 0;
+ ulong_t pgpgout_cum = 0;
+ ulong_t pgswapout_cum = 0;
+ ulong_t pgswapin_cum = 0;
+ ulong_t intr_cum = 0;
+ ulong_t pswitch_cum = 0;
+ ulong_t forks_cum = 0;
+ hrtime_t msnsecs[NCMSTATES];
+ int lx_kern_version = lx_get_zone_kern_version(LXPTOZ(lxpnp));
+ /* temporary variable since scalehrtime modifies data in place */
+ hrtime_t tmptime;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_STAT);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ /* Calculate cumulative stats */
+ cp = cpstart = CPU;
+ do {
+ int i;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
+ pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
+ pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
+ pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
+
+ if (lx_kern_version >= LX_KERN_2_6) {
+ cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
+ w_io_cum += CPU_STATS(cp, sys.iowait);
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_cum += NSEC_TO_TICK(tmptime);
+ }
+ }
+
+ for (i = 0; i < PIL_MAX; i++)
+ intr_cum += CPU_STATS(cp, sys.intr[i]);
+
+ pswitch_cum += CPU_STATS(cp, sys.pswitch);
+ forks_cum += CPU_STATS(cp, sys.sysfork);
+ forks_cum += CPU_STATS(cp, sys.sysvfork);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ if (lx_kern_version >= LX_KERN_2_6) {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu %ld %ld %ld %ld %ld %ld %ld\n",
+ user_cum, 0, sys_cum, idle_cum, 0, irq_cum, 0);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu %ld %ld %ld %ld\n",
+ user_cum, 0, sys_cum, idle_cum);
+ }
+
+ /* Do per processor stats */
+ do {
+ int i;
+
+ ulong_t idle_ticks;
+ ulong_t sys_ticks;
+ ulong_t user_ticks;
+ ulong_t irq_ticks = 0;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ if (lx_kern_version >= LX_KERN_2_6) {
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_ticks += NSEC_TO_TICK(tmptime);
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %ld %ld %ld %ld %ld %ld %ld\n",
+ cp->cpu_id, user_ticks, 0, sys_ticks, idle_ticks,
+ 0, irq_ticks, 0);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %ld %ld %ld %ld\n",
+ cp->cpu_id,
+ user_ticks, 0, sys_ticks, idle_ticks);
+ }
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+
+ if (lx_kern_version >= LX_KERN_2_6) {
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n"
+ "procs_running %lu\n"
+ "procs_blocked %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum,
+ cpu_nrunnable_cum,
+ w_io_cum);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum);
+ }
+}
+
+
+/*
+ * lxpr_read_uptime(): read the contents of the "uptime" file.
+ *
+ * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
+ * Use fixed point arithmetic to get 2 decimal places
+ */
+/* ARGSUSED */
+static void
+lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t cpu_count = 0;
+ ulong_t idle_s;
+ ulong_t idle_cs;
+ ulong_t up_s;
+ ulong_t up_cs;
+ hrtime_t birthtime;
+ hrtime_t centi_sec = 10000000; /* 10^7 */
+
+ ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
+
+ /* Calculate cumulative stats */
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU;
+ do {
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
+ cpu_count += 1;
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+ mutex_exit(&cpu_lock);
+
+ /* Getting the Zone zsched process startup time */
+ birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
+ up_cs = (gethrtime() - birthtime) / centi_sec;
+ up_s = up_cs / 100;
+ up_cs %= 100;
+
+ ASSERT(cpu_count > 0);
+ idle_cum /= cpu_count;
+ idle_s = idle_cum / hz;
+ idle_cs = idle_cum % hz;
+ idle_cs *= 100;
+ idle_cs /= hz;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
+}
+
+static const char *amd_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "mp",
+ "nx", NULL, "mmxext", NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", "3dnowext", "3dnow"
+};
+
+static const char *amd_x_ecx[] = {
+ "lahf_lm", NULL, "svm", NULL,
+ "altmovcr8"
+};
+
+static const char *tm_x_edx[] = {
+ "recovery", "longrun", NULL, "lrti"
+};
+
+/*
+ * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
+ */
+static const char *intc_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ "nx", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", NULL, NULL
+};
+
+static const char *intc_edx[] = {
+ "fpu", "vme", "de", "pse",
+ "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep",
+ "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush",
+ NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss",
+ "ht", "tm", "ia64", "pbe"
+};
+
+/*
+ * "sse3" on linux is called "pni" (Prescott New Instructions).
+ */
+static const char *intc_ecx[] = {
+ "pni", NULL, NULL, "monitor",
+ "ds_cpl", NULL, NULL, "est",
+ "tm2", NULL, "cid", NULL,
+ NULL, "cx16", "xtpr"
+};
+
+static void
+lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ int i;
+ uint32_t bits;
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ const char **fp;
+ char brandstr[CPU_IDSTRLEN];
+ struct cpuid_regs cpr;
+ int maxeax;
+ int std_ecx, std_edx, ext_ecx, ext_edx;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU;
+ do {
+ /*
+ * This returns the maximum eax value for standard cpuid
+ * functions in eax.
+ */
+ cpr.cp_eax = 0;
+ (void) cpuid_insn(cp, &cpr);
+ maxeax = cpr.cp_eax;
+
+ /*
+ * Get standard x86 feature flags.
+ */
+ cpr.cp_eax = 1;
+ (void) cpuid_insn(cp, &cpr);
+ std_ecx = cpr.cp_ecx;
+ std_edx = cpr.cp_edx;
+
+ /*
+ * Now get extended feature flags.
+ */
+ cpr.cp_eax = 0x80000001;
+ (void) cpuid_insn(cp, &cpr);
+ ext_ecx = cpr.cp_ecx;
+ ext_edx = cpr.cp_edx;
+
+ (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "processor\t: %d\n"
+ "vendor_id\t: %s\n"
+ "cpu family\t: %d\n"
+ "model\t\t: %d\n"
+ "model name\t: %s\n"
+ "stepping\t: %d\n"
+ "cpu MHz\t\t: %u.%03u\n",
+ cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
+ cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
+ (uint32_t)(cpu_freq_hz / 1000000),
+ ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
+
+ lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
+ getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
+
+ if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
+ /*
+ * 'siblings' is used for HT-style threads
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "physical id\t: %lu\n"
+ "siblings\t: %u\n",
+ pg_plat_hw_instance_id(cp, PGHW_CHIP),
+ cpuid_get_ncpu_per_chip(cp));
+ }
+
+ /*
+ * Since we're relatively picky about running on older hardware,
+ * we can be somewhat cavalier about the answers to these ones.
+ *
+ * In fact, given the hardware we support, we just say:
+ *
+ * fdiv_bug : no (if we're on a 64-bit kernel)
+ * hlt_bug : no
+ * f00f_bug : no
+ * coma_bug : no
+ * wp : yes (write protect in supervsr mode)
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "fdiv_bug\t: %s\n"
+ "hlt_bug \t: no\n"
+ "f00f_bug\t: no\n"
+ "coma_bug\t: no\n"
+ "fpu\t\t: %s\n"
+ "fpu_exception\t: %s\n"
+ "cpuid level\t: %d\n"
+ "flags\t\t:",
+#if defined(__i386)
+ fpu_pentium_fdivbug ? "yes" : "no",
+#else
+ "no",
+#endif /* __i386 */
+ fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
+ maxeax);
+
+ for (bits = std_edx, fp = intc_edx, i = 0;
+ i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ /*
+ * name additional features where appropriate
+ */
+ switch (x86_vendor) {
+ case X86_VENDOR_Intel:
+ for (bits = ext_edx, fp = intc_x_edx, i = 0;
+ i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_AMD:
+ for (bits = ext_edx, fp = amd_x_edx, i = 0;
+ i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
+ i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_TM:
+ for (bits = ext_edx, fp = tm_x_edx, i = 0;
+ i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+ default:
+ break;
+ }
+
+ for (bits = std_ecx, fp = intc_ecx, i = 0;
+ i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ lxpr_uiobuf_printf(uiobuf, "\n\n");
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
+ lxpr_uiobuf_seterr(uiobuf, EFAULT);
+}
+
+
+
+/*
+ * lxpr_getattr(): Vnode operation for VOP_GETATTR()
+ */
+static int
+lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ register lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ extern uint_t nproc;
+ int error;
+
+ /*
+ * Return attributes of underlying vnode if ATTR_REAL
+ *
+ * but keep fd files with the symlink permissions
+ */
+ if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+
+ /*
+ * withold attribute information to owner or root
+ */
+ if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * now its attributes
+ */
+ if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * if it's a file in lx /proc/pid/fd/xx then set its
+ * mode and keep it looking like a symlink
+ */
+ if (type == LXPR_PID_FD_FD) {
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_type = vp->v_type;
+ vap->va_size = 0;
+ vap->va_nlink = 1;
+ }
+ return (0);
+ }
+
+ /* Default attributes, that may be overridden below */
+ bzero(vap, sizeof (*vap));
+ vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
+ vap->va_nlink = 1;
+ vap->va_type = vp->v_type;
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ vap->va_blksize = DEV_BSIZE;
+ vap->va_uid = lxpnp->lxpr_uid;
+ vap->va_gid = lxpnp->lxpr_gid;
+ vap->va_nodeid = lxpnp->lxpr_ino;
+
+ switch (type) {
+ case LXPR_PROCDIR:
+ vap->va_nlink = nproc + 2 + PROCDIRFILES;
+ vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
+ break;
+ case LXPR_PIDDIR:
+ vap->va_nlink = PIDDIRFILES;
+ vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
+ break;
+ case LXPR_SELF:
+ vap->va_uid = crgetruid(curproc->p_cred);
+ vap->va_gid = crgetrgid(curproc->p_cred);
+ break;
+ default:
+ break;
+ }
+
+ vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
+ return (0);
+}
+
+
+/*
+ * lxpr_access(): Vnode operation for VOP_ACCESS()
+ */
+static int
+lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ int shift = 0;
+ proc_t *tp;
+
+ /* lx /proc is a read only file system */
+ if (mode & VWRITE)
+ return (EROFS);
+
+ /*
+ * If this is a restricted file, check access permissions.
+ */
+ switch (lxpnp->lxpr_type) {
+ case LXPR_PIDDIR:
+ return (0);
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ENV:
+ case LXPR_PID_EXE:
+ case LXPR_PID_MAPS:
+ case LXPR_PID_MEM:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_FDDIR:
+ case LXPR_PID_FD_FD:
+ if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
+ return (ENOENT);
+ if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
+ priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
+ lxpr_unlock(tp);
+ return (EACCES);
+ }
+ lxpr_unlock(tp);
+ default:
+ break;
+ }
+
+ if (lxpnp->lxpr_realvp != NULL) {
+ /*
+ * For these we use the underlying vnode's accessibility.
+ */
+ return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
+ }
+
+ /* If user is root allow access regardless of permission bits */
+ if (secpolicy_proc_access(cr) == 0)
+ return (0);
+
+ /*
+ * Access check is based on only
+ * one of owner, group, public.
+ * If not owner, then check group.
+ * If not a member of the group, then
+ * check public access.
+ */
+ if (crgetuid(cr) != lxpnp->lxpr_uid) {
+ shift += 3;
+ if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
+ shift += 3;
+ }
+
+ mode &= ~(lxpnp->lxpr_mode << shift);
+
+ if (mode == 0)
+ return (0);
+
+ return (EACCES);
+}
+
+
+
+
+/* ARGSUSED */
+static vnode_t *
+lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
+{
+ return (NULL);
+}
+
+
+/*
+ * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
+ */
+/* ARGSUSED */
+static int
+lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the lookup
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict lookup permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * Just return the parent vnode
+ * if thats where we are trying to go
+ */
+ if (strcmp(comp, "..") == 0) {
+ VN_HOLD(lxpnp->lxpr_parent);
+ *vpp = lxpnp->lxpr_parent;
+ return (0);
+ }
+
+ /*
+ * Special handling for directory searches
+ * Note: null component name is synonym for
+ * current directory being searched.
+ */
+ if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
+ VN_HOLD(dp);
+ *vpp = dp;
+ return (0);
+ }
+
+ *vpp = (lxpr_lookup_function[type](dp, comp));
+ return ((*vpp == NULL) ? ENOENT : 0);
+}
+
+/*
+ * Do a sequential search on the given directory table
+ */
+static vnode_t *
+lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ lxpr_node_t *lxpnp;
+ int count;
+
+ for (count = 0; count < dirtablen; count++) {
+ if (strcmp(dirtab[count].d_name, comp) == 0) {
+ lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+ return (dp);
+ }
+ }
+ return (NULL);
+}
+
+
+static vnode_t *
+lxpr_lookup_piddir(vnode_t *dp, char *comp)
+{
+ proc_t *p;
+
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
+
+ p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
+ if (p == NULL)
+ return (NULL);
+
+ dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
+
+ lxpr_unlock(p);
+
+ return (dp);
+}
+
+
+/*
+ * Lookup one of the process's open files.
+ */
+static vnode_t *
+lxpr_lookup_fddir(vnode_t *dp, char *comp)
+{
+ lxpr_node_t *dlxpnp = VTOLXP(dp);
+ lxpr_node_t *lxpnp;
+ vnode_t *vp = NULL;
+ proc_t *p;
+ file_t *fp;
+ uint_t fd;
+ int c;
+ uf_entry_t *ufp;
+ uf_info_t *fip;
+
+ ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ /*
+ * convert the string rendition of the filename
+ * to a file descriptor
+ */
+ fd = 0;
+ while ((c = *comp++) != '\0') {
+ int ofd;
+ if (c < '0' || c > '9')
+ return (NULL);
+
+ ofd = fd;
+ fd = 10*fd + c - '0';
+ /* integer overflow */
+ if (fd / 10 != ofd)
+ return (NULL);
+ }
+
+ /*
+ * get the proc to work with and lock it
+ */
+ p = lxpr_lock(dlxpnp->lxpr_pid);
+ if ((p == NULL))
+ return (NULL);
+
+ /*
+ * If the process is a zombie or system process
+ * it can't have any open files.
+ */
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * get us a fresh node/vnode
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
+
+ /*
+ * get open file info
+ */
+ fip = (&(p)->p_user.u_finfo);
+ mutex_enter(&fip->fi_lock);
+
+ /*
+ * got the fd data so now done with this proc
+ */
+ lxpr_unlock(p);
+
+ if (fd < fip->fi_nfiles) {
+ UF_ENTER(ufp, fip, fd);
+ /*
+ * ensure the fd is still kosher.
+ * it may have gone between the readdir and
+ * the lookup
+ */
+ if (fip->fi_list[fd].uf_file == NULL) {
+ mutex_exit(&fip->fi_lock);
+ UF_EXIT(ufp);
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ }
+
+ if ((fp = ufp->uf_file) != NULL)
+ vp = fp->f_vnode;
+ UF_EXIT(ufp);
+ }
+ mutex_exit(&fip->fi_lock);
+
+ if (vp == NULL) {
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ } else {
+ /*
+ * Fill in the lxpr_node so future references will
+ * be able to find the underlying vnode.
+ * The vnode is held on the realvp.
+ */
+ lxpnp->lxpr_realvp = vp;
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+}
+
+
+static vnode_t *
+lxpr_lookup_netdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
+
+ dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
+
+ return (dp);
+}
+
+
+static vnode_t *
+lxpr_lookup_procdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
+
+ /*
+ * We know all the names of files & dirs in our
+ * file system structure except those that are pid names.
+ * These change as pids are created/deleted etc.
+ * So just look for a number as the first char to see if we
+ * are we doing pid lookups?
+ *
+ * Don't need to check for "self" as it is implemented as a symlink
+ */
+ if (*comp >= '0' && *comp <= '9') {
+ pid_t pid = 0;
+ lxpr_node_t *lxpnp = NULL;
+ proc_t *p;
+ int c;
+
+ while ((c = *comp++) != '\0')
+ pid = 10*pid + c - '0';
+
+ /*
+ * Can't continue if the process is still loading
+ * or it doesn't really exist yet (or maybe it just died!)
+ */
+ p = lxpr_lock(pid);
+ if (p == NULL)
+ return (NULL);
+
+ if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * allocate and fill in a new lx /proc node
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
+
+ lxpr_unlock(p);
+
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+
+ }
+
+ /* Lookup fixed names */
+ return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
+}
+
+
+
+
+/*
+ * lxpr_readdir(): Vnode operation for VOP_READDIR()
+ */
+/* ARGSUSED */
+static int
+lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the readdir
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict readdir permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
+ return (error);
+
+ uoffset = uiop->uio_offset;
+ uresid = uiop->uio_resid;
+
+ /* can't do negative reads */
+ if (uoffset < 0 || uresid <= 0)
+ return (EINVAL);
+
+ /* can't read directory entries that don't exist! */
+ if (uoffset % LXPR_SDSIZE)
+ return (ENOENT);
+
+ return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
+}
+
+
+/* ARGSUSED */
+static int
+lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ return (ENOTDIR);
+}
+
+/*
+ * This has the common logic for returning directory entries
+ */
+static int
+lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+
+ oresid = uiop->uio_resid;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Satisfy user request
+ */
+ while ((uresid = uiop->uio_resid) > 0) {
+ int dirindex;
+ off_t uoffset;
+ int reclen;
+ int error;
+
+ uoffset = uiop->uio_offset;
+ dirindex = (uoffset / LXPR_SDSIZE) - 2;
+
+ if (uoffset == 0) {
+
+ dirent->d_ino = lxpnp->lxpr_ino;
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '\0';
+ reclen = DIRENT64_RECLEN(1);
+
+ } else if (uoffset == LXPR_SDSIZE) {
+
+ dirent->d_ino = lxpr_parentinode(lxpnp);
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '.';
+ dirent->d_name[2] = '\0';
+ reclen = DIRENT64_RECLEN(2);
+
+ } else if (dirindex < dirtablen) {
+ int slen = strlen(dirtab[dirindex].d_name);
+
+ dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
+ lxpnp->lxpr_pid, 0);
+
+ ASSERT(slen < LXPNSIZ);
+ (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
+ reclen = DIRENT64_RECLEN(slen);
+
+ } else {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid) {
+ return (EINVAL);
+ }
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and
+ * uiop->uio_offset by the same amount. But we want
+ * uiop->uio_offset to change in increments
+ * of LXPR_SDSIZE, which is different from the number of bytes
+ * being returned to the user.
+ * So we set uiop->uio_offset separately, ignoring what
+ * uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))) {
+ return (error);
+ }
+
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ /* Have run out of space, but could have just done last table entry */
+ if (eofp) {
+ *eofp =
+ (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+ return (0);
+}
+
+
+static int
+lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ zoneid_t zoneid;
+ pid_t pid;
+ int error;
+ int ceof;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
+
+ oresid = uiop->uio_resid;
+ zoneid = LXPTOZ(lxpnp)->zone_id;
+
+ /*
+ * We return directory entries in the order:
+ * "." and ".." then the unique lx procfs files, then the
+ * directories corresponding to the running processes.
+ *
+ * This is a good order because it allows us to more easily
+ * keep track of where we are betwen calls to getdents().
+ * If the number of processes changes between calls then we
+ * can't lose track of where we are in the lx procfs files.
+ */
+
+ /* Do the fixed entries */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
+ PROCDIRFILES);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ return (error);
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /* Do the process entries */
+ while ((uresid = uiop->uio_resid) > 0) {
+ proc_t *p;
+ int len;
+ int reclen;
+ int i;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop when entire proc table has been examined.
+ */
+ i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
+ if (i >= v.v_proc) {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+ mutex_enter(&pidlock);
+
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, a PID of 0,
+ * and anything the security policy doesn't allow
+ * us to look at.
+ */
+ if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
+ p->p_pid == 0 ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ mutex_exit(&pidlock);
+ goto next;
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's
+ * init process, otherwise use the value from the proc
+ * structure
+ */
+ pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
+ p->p_pid : 1);
+
+ /*
+ * If this /proc was mounted in the global zone, view
+ * all procs; otherwise, only view zone member procs.
+ */
+ if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
+ goto next;
+ }
+
+ ASSERT(p->p_stat != 0);
+
+ dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ return (EINVAL);
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and
+ * uiop->uio_offset by the same amount. But we want
+ * uiop->uio_offset to change in increments
+ * of LXPR_SDSIZE, which is different from the number of bytes
+ * being returned to the user.
+ * So we set uiop->uio_offset separately, in the
+ * increment of this for loop, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)))
+ return (error);
+
+next:
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ if (eofp)
+ *eofp =
+ (uiop->uio_offset >=
+ ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
+
+ return (0);
+}
+
+
+static int
+lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ proc_t *p;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
+
+ /* can't read its contents if it died */
+ mutex_enter(&pidlock);
+
+ p = prfind((lxpnp->lxpr_pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
+}
+
+
+static int
+lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
+}
+
+
+static int
+lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+ int ceof;
+ proc_t *p;
+ int fddirsize;
+ uf_info_t *fip;
+
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ oresid = uiop->uio_resid;
+
+ /* can't read its contents if it died */
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL)
+ return (ENOENT);
+
+ /* Get open file info */
+ fip = (&(p)->p_user.u_finfo);
+
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
+ fddirsize = 0;
+ else
+ fddirsize = fip->fi_nfiles;
+
+ mutex_enter(&fip->fi_lock);
+ lxpr_unlock(p);
+
+ /* Do the fixed entries (in this case just "." & "..") */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ return (error);
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Loop until user's request is satisfied or until
+ * all file descriptors have been examined.
+ */
+ for (; (uresid = uiop->uio_resid) > 0;
+ uiop->uio_offset = uoffset + LXPR_SDSIZE) {
+ int reclen;
+ int fd;
+ int len;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop at the end of the fd list
+ */
+ fd = (uoffset / LXPR_SDSIZE) - 2;
+ if (fd >= fddirsize) {
+ if (eofp) {
+ *eofp = 1;
+ }
+ goto out;
+ }
+
+ if (fip->fi_list[fd].uf_file == NULL)
+ continue;
+
+ dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ error = EINVAL;
+ goto out;
+ }
+
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)))
+ goto out;
+ }
+
+ if (eofp)
+ *eofp =
+ (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
+
+out:
+ mutex_exit(&fip->fi_lock);
+ return (error);
+}
+
+
+/*
+ * lxpr_readlink(): Vnode operation for VOP_READLINK()
+ */
+/* ARGSUSED */
+static int
+lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
+{
+ char bp[MAXPATHLEN + 1];
+ size_t buflen = sizeof (bp);
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+ pid_t pid;
+ int error = 0;
+
+ /* must be a symbolic link file */
+ if (vp->v_type != VLNK)
+ return (EINVAL);
+
+ /* Try to produce a symlink name for anything that has a realvp */
+ if (rvp != NULL) {
+ if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
+ return (error);
+ if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
+ return (error);
+ } else {
+ switch (lxpnp->lxpr_type) {
+ case LXPR_SELF:
+ /*
+ * Don't need to check result as every possible int
+ * will fit within MAXPATHLEN bytes
+ */
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the
+ * zone's init process
+ */
+ pid = ((curproc->p_pid !=
+ curproc->p_zone->zone_proc_initpid)
+ ? curproc->p_pid : 1);
+
+ (void) snprintf(bp, buflen, "%d", pid);
+ break;
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_EXE:
+ return (EACCES);
+ default:
+ /*
+ * Need to return error so that nothing thinks
+ * that the symlink is empty and hence "."
+ */
+ return (EINVAL);
+ }
+ }
+
+ /* copy the link data to user space */
+ return (uiomove(bp, strlen(bp), UIO_READ, uiop));
+}
+
+
+/*
+ * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
+ * Vnode is no longer referenced, deallocate the file
+ * and all its resources.
+ */
+/* ARGSUSED */
+static void
+lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_freenode(VTOLXP(vp));
+}
+
+
+/*
+ * lxpr_sync(): Vnode operation for VOP_SYNC()
+ */
+static int
+lxpr_sync()
+{
+ /*
+ * nothing to sync but this
+ * function must never fail
+ */
+ return (0);
+}
+
+
+/*
+ * lxpr_cmp(): Vnode operation for VOP_CMP()
+ */
+static int
+lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ while (vn_matchops(vp1, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL)
+ vp1 = rvp;
+ while (vn_matchops(vp2, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL)
+ vp2 = rvp;
+ if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
+ return (vp1 == vp2);
+ return (VOP_CMP(vp1, vp2, ct));
+}
+
+
+/*
+ * lxpr_realvp(): Vnode operation for VOP_REALVP()
+ */
+static int
+lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
+ vp = rvp;
+ if (VOP_REALVP(vp, &rvp, ct) == 0)
+ vp = rvp;
+ }
+
+ *vpp = vp;
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/sys/ldlinux.h b/usr/src/uts/common/brand/lx/sys/ldlinux.h
new file mode 100644
index 0000000000..b259c05d97
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/ldlinux.h
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LDLINUX_H
+#define _SYS_LDLINUX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * The ldlinux streams module is only intended for use in lx branded zones.
+ * This streams module implements the following ioctls:
+ * TIOCSETLD and TIOCGETLD
+ *
+ * These ioctls are special ioctls supported only by the ldlinux streams
+ * module and invoked only by the lx brand emulation library. These ioctls
+ * do not exist on native Linux systems.
+ *
+ * The TIOCSETLD ioctl is used when emulating the following Linux ioctls:
+ * TCSETS/TCSETSW/TCSETSF
+ * TCSETA/TCSETAW/TCSETAF
+ *
+ * The TIOCGETLD ioctl is used when emulating the following Linux ioctls:
+ * TCGETS/TCGETA
+ *
+ * This module is needed to emulate these ioctls because the following arrays:
+ * termio.c_cc
+ * termios.c_cc
+ * which are parameters for the following ioctls:
+ * TCSETS/TCSETSW/TCSETSF
+ * TCSETA/TCSETAW/TCSETAF
+ * TCGETS/TCGETA
+ *
+ * are defined differently on Solaris and Linux.
+ *
+ * According to the termio(7I) man page on Solaris the following is true of
+ * the members of the c_cc array:
+ * The VMIN element is the same element as the VEOF element.
+ * The VTIME element is the same element as the VEOL element.
+ *
+ * But on Linux the termios(3) man page states:
+ * These symbolic subscript values are all different, except that
+ * VTIME, VMIN may have the same value as VEOL, VEOF, respectively.
+ *
+ * While the man page indicates that these values may be the same empirical
+ * tests shows them to be different. Since these values are different on
+ * Linux systems it's possible that applications could set the members of
+ * the c_cc array to different values and then later expect to be able to
+ * read back those same separate values. The ldlinux module exists to provide
+ * a per-stream storage area where the lx_brand emulation library can save
+ * these values. The values are set and retrieved via the TIOCSETLD and
+ * TIOCGETLD ioctls respectively.
+ */
+
+#include <sys/termios.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LDLINUX_MOD "ldlinux"
+
+#ifdef _KERNEL
+
+/*
+ * LDLINUX_MODID - This should be a unique number associated with
+ * this particular module. Unfortunatly there is no authority responsible
+ * for administering this name space, hence there's no real guarantee that
+ * whatever number we choose will be unique. Luckily, this constant
+ * is not really used anywhere by the system. It is used by some
+ * kernel subsystems to check for the presence of certain streams
+ * modules with known id vaules. Since no other kernel subsystem
+ * checks for the presence of this module we'll just set the id to 0.
+ */
+#define LDLINUX_MODID 0
+
+struct ldlinux {
+ int state; /* state information */
+ /* Linux expects the next four c_cc values */
+ /* to be distinct, whereas solaris (legally) */
+ /* overlaps their storage */
+ unsigned char veof; /* veof value */
+ unsigned char veol; /* veol value */
+ unsigned char vmin; /* vmin value */
+ unsigned char vtime; /* vtime value */
+};
+
+#define ISPTSTTY 0x01
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LDLINUX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_audio.h b/usr/src/uts/common/brand/lx/sys/lx_audio.h
new file mode 100644
index 0000000000..cbb3431c4b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_audio.h
@@ -0,0 +1,130 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_AUDIO_H
+#define _LX_AUDIO_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/zone.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * name for this driver
+ */
+#define LX_AUDIO_DRV "lx_audio"
+
+/*
+ * names for the minor nodes this driver exports
+ */
+#define LXA_MINORNAME_DEVCTL "lx_devctl"
+#define LXA_MINORNAME_DSP "lx_dsp"
+#define LXA_MINORNAME_MIXER "lx_mixer"
+
+/*
+ * minor numbers for the minor nodes this driver exporrts
+ */
+#define LXA_MINORNUM_DEVCTL 0
+#define LXA_MINORNUM_DSP 1
+#define LXA_MINORNUM_MIXER 2
+#define LXA_MINORNUM_COUNT 3
+
+/*
+ * driver ioctls
+ *
+ * note that we're layering on top of solaris audio devices so we want
+ * to make sure that our ioctls namespace doesn't conflict with theirs.
+ * looking in sys/audioio.h and sys/mixer.h we see that they seem to
+ * use an _IO key of 'A' and 'M', so we'll choose an _IO key of 'a.'
+ */
+
+/*
+ * administrative ioctls.
+ * these ioctls are only supported on the DEVCTL minor node
+ */
+#define LXA_IOC_ZONE_REG (_IOR('a', 0, lxa_zone_reg_t))
+#define LXA_IOC_ZONE_UNREG (_IOR('a', 1, lxa_zone_reg_t))
+
+
+/*
+ * audio and mixer device ioctls
+ * these ioctls are supported on DSP and MIXER minor nodes.
+ */
+#define LXA_IOC_GETMINORNUM (_IOR('a', 20, int))
+
+/*
+ * audio device ioctls.
+ * these ioctls are supports on DSP minor nodes.
+ */
+#define LXA_IOC_MMAP_OUTPUT (_IOR('a', 41, int))
+#define LXA_IOC_MMAP_PTR (_IOR('a', 42, int))
+#define LXA_IOC_GET_FRAG_INFO (_IOR('a', 43, lxa_frag_info_t))
+#define LXA_IOC_SET_FRAG_INFO (_IOR('a', 44, lxa_frag_info_t))
+
+/*
+ * mixer device ioctls.
+ * these ioctls are supports on MIXER minor nodes.
+ */
+#define LXA_IOC_MIXER_GET_VOL (_IOR('a', 60, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_SET_VOL (_IOR('a', 61, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_GET_MIC (_IOR('a', 62, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_SET_MIC (_IOR('a', 63, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_GET_PCM (_IOR('a', 64, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_SET_PCM (_IOR('a', 65, lxa_mixer_levels_t))
+
+/* command structure for LXA_IOC_ZONE_REG */
+#define LXA_INTSTRLEN 32
+typedef struct lxa_zone_reg {
+ char lxa_zr_zone_name[ZONENAME_MAX];
+ char lxa_zr_inputdev[LXA_INTSTRLEN];
+ char lxa_zr_outputdev[LXA_INTSTRLEN];
+} lxa_zone_reg_t;
+
+/* command structure for LXA_IOC_GET_FRAG_INFO and LXA_IOC_SET_FRAG_INFO */
+typedef struct lxa_frag_info {
+ int lxa_fi_size;
+ int lxa_fi_cnt;
+} lxa_frag_info_t;
+
+/* command structure for LXA_IOC_MIXER_GET_* and LXA_IOC_MIXER_SET_* */
+typedef struct lxa_mixer_levels {
+ int lxa_ml_gain;
+ int lxa_ml_balance;
+} lxa_mixer_levels_t;
+
+/* verify that a solaris mixer level structure has valid values */
+#define LXA_MIXER_LEVELS_OK(x) (((x)->lxa_ml_gain >= AUDIO_MIN_GAIN) && \
+ ((x)->lxa_ml_gain <= AUDIO_MAX_GAIN) && \
+ ((x)->lxa_ml_balance >= AUDIO_LEFT_BALANCE) && \
+ ((x)->lxa_ml_balance <= AUDIO_RIGHT_BALANCE))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUDIO_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs.h b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
new file mode 100644
index 0000000000..4436226deb
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
@@ -0,0 +1,334 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_AUTOFS_H
+#define _LX_AUTOFS_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * The lx_autofs filesystem exists to emulate the Linux autofs filesystem
+ * and provide support for the Linux "automount" automounter.
+ *
+ *
+ *
+ * +++ Linux automounter background.
+ *
+ * Linux has two automounters: "amd" and "automount"
+ *
+ * 1) "amd" is a userland NFS server. It basically mounts an NFS filesystem
+ * at an automount point, and it acts as the NFS server for the mount. When
+ * an access is done to that NFS filesystem, the access is redirected by the
+ * kernel to the "amd" process via rpc. "amd" then looks up any information
+ * required to resolve the requests, mounts real NFS filesystems if
+ * necessary, and returns. "amd" has it's own strange configuration
+ * mechanism that doesn't seem to be very compatabile with Solaris's network
+ * based automounter map support.
+ *
+ * 2) "automount" is the other Linux automounter. It utilizes a kernel
+ * filesystem (autofs) to provide it's functionality. Basically, it mounts
+ * the autofs filesystem at any automounter controlled mount point. This
+ * filesystem then intercepts and redirects lookup operations (and only
+ * lookup ops) to the userland automounter process via a pipe. (The
+ * pipe to the automounter is establised via mount options when the autofs
+ * filesystem is mounted.) When the automounter recieves a request via this
+ * pipe, it does lookups to whatever backing store it's configured to use,
+ * does mkdir operations on the autofs filesystem, mounts remote NFS
+ * filesystems on any leaf directories it just created, and signals the
+ * autofs filesystem via an ioctl to let it know that the lookup can
+ * continue.
+ *
+ *
+ *
+ * +++ Linux autofs (and automount daemon) notes
+ *
+ * Since we're mimicking the behavior of the Linux autofs filesystem it's
+ * important to document some of it's observed behavior here since there's
+ * no doubt that in the future this behavior will change. These comments
+ * apply to the behavior of the automounter as observed on a system
+ * running Linux v2.4.21 (autofs is bundled with the Linux kernel).
+ *
+ * A) Autofs allows root owned, non-automounter processes to create
+ * directories in the autofs filesystem. The autofs filesystem treats the
+ * automounter's process group as special, but it doesn't prevent root
+ * processes outside of the automounter's process group from creating new
+ * directories in the autofs filesystem.
+ *
+ * B) Autofs doesn't allow creation of any non-directory entries in the
+ * autofs filesystem. No entity can create files (e.g. /bin/touch or
+ * VOP_CREATE/VOP_SYMLINK/etc.) The only entries that can exist within
+ * the autofs filesystem are directories.
+ *
+ * C) Autofs only intercepts vop lookup operations. Notably, it does _not_
+ * intercept and re-direct vop readdir operations. This means that the
+ * observed behavior of the Linux automounter can be considerably different
+ * from that of the Solaris automounter. Specifically, on Solaris if autofs
+ * mount point is mounted _without_ the -nobrowse option then if a user does
+ * an ls operation (which translates into a vop readdir operation) then the
+ * automounter will intercept that operation and list all the possible
+ * directories and mount points without actually mounting any filesystems.
+ * Essentially, all automounter managed mount points on Linux will behave
+ * like "-nobrowse" mount points on Solaris. Here's an example to
+ * illustrate this. If /ws was mounted on Solaris without the -nobrowse
+ * option and an auto_ws yp map was setup as the backing store for this
+ * mount point, then an "ls /ws" would list all the keys in the map as
+ * valid directories, but an "ls /ws" on Linux would list an emptry
+ * directory.
+ *
+ * D) NFS mounts are performed by the automount process. When the automount
+ * process gets a redirected lookup request, it determines _all_ the
+ * possible remote mount points for that request, creates directory paths
+ * via mkdir, and mounts the remote filesystems on the newly created paths.
+ * So for example, if a machine called mcescher exported /var/crash and
+ * /var/core, an "ls /net/mcescher" would result in the following actions
+ * being done by the automounter:
+ * mkdir /net/mcescher
+ * mkdir /net/mcescher/var
+ * mkdir /net/mcescher/var/crash
+ * mkdir /net/mcescher/var/core
+ * mount mcescher:/var/crash /var/crash
+ * mount mcescher:/var/crash /var/core
+ * once the automounter compleated the work above it would signal the autofs
+ * filesystem (via an ioctl) that the lookup could continue.
+ *
+ * E.1) Autofs only redirects vop lookup operations for path entries that
+ * don't already exist in the autofs filesystem. So for the example above,
+ * an initial (after the start of the automounter) "ls /net/mcescher" would
+ * result in a request to the automounter. A subsequest "ls /net/mcescher"
+ * would not result in a request to the automounter. Even if
+ * /net/mcescher/var/crash and /net/mcescher/var/core were manually unmounted
+ * after the initial "ls /net/mcescher", a subsequest "ls /net/mcescher"
+ * would not result in a new request to the automounter.
+ *
+ * E.2) Autofs lookup requests that are sent to the automounter only include
+ * the root directory path component. So for example, after starting up
+ * the automounter if a user were to do a "ls /net/mcescher/var/crash", the
+ * lookup request actually sent to the automounter would just be for
+ * "mcescher". (The same request as if the user had done "ls /net/mcescher".)
+ *
+ * E.3) The two statements above aren't entirely entirely true. The Linux
+ * autofs filesystem will also redirect lookup operations for leaf
+ * directories that don't have a filesystem mounted on them. Using the
+ * example above, if a user did a "ls /net/mcescher", then manually
+ * unmounted /net/mcescher/var/crash, and then did an "ls
+ * /net/mcescher/var/crash", this would result in a request for
+ * "mcescher/var/crash" being sent to the automounter. The strange thing
+ * (a Linux bug perhaps) is that the automounter won't do anything with this
+ * request and the lookup will fail.
+ *
+ * F) The autofs filesystem communication protocol (what ioctls it supports
+ * and what data it passes to the automount process) are versioned. The
+ * source for the userland automount daemon (i looked at version v3.1.7)
+ * seemed to support two versions of the Linux kernel autofs implementation.
+ * Both versions supported communiciation with a pipe and the format of the
+ * structure passed via this pipe was the same. The difference between the
+ * two versions was in the functionality supported. (The v3 version has
+ * additional ioctls to support automount timeouts.)
+ *
+ *
+ *
+ * +++ lx_autofs notes
+ *
+ * 1) In general, the lx_autofs filesystem tries to mimic the behavior of the
+ * Linux autofs filesystem with the following exceptions:
+ *
+ * 1.1) We don't bother to implement the E.3 functionality listed above
+ * since it doesn't appear to be of any use.
+ *
+ * 1.2) We only implement v2 of the automounter protocol since
+ * implementing v3 would take a _lot_ more work. If this proves to be a
+ * problem we can re-visit this decision later. (More details about v3
+ * support are included in comments below.)
+ *
+ * 2) In general, the approach taken for lx_autofs is to keep it as simple
+ * as possible and to minimize it's memory usage. To do this all information
+ * about the contents of the lx_autofs filesystem are mirrored in the
+ * underlying filesystem that lx_autofs is mounted on and most vop operations
+ * are simply passed onto this underlying filesystem. This means we don't
+ * have to implement most the complex operations that a full filesystem
+ * normally has to implement. It also means that most of our filesystem state
+ * (wrt the contents of the filesystem) doesn't actually have to be stored
+ * in memory, we can simply go to the underlying filesystem to get it when
+ * it's requested. For the purposes of discussion, we'll call the underlying
+ * filesystem the "backing store."
+ *
+ * The backing store is actually directory called ".lx_afs" which is created in
+ * the directory where the lx_autofs filesystem is mounted. When the lx_autofs
+ * filesystem is unmounted this backing store directory is deleted. If this
+ * directory exists at mount time (perhaps the system crashed while a previous
+ * lx_autofs instance was mounted at the same location) it will be deleted.
+ * There are a few implications of using a backing store worth mentioning.
+ *
+ * 2.1) lx_autofs can't be mounted on a read only filesystem. If this
+ * proves to be a problem we can probably move the location of the
+ * backing store.
+ *
+ * 2.2) If the backing store filesystem runs out of space then the
+ * automounter process won't be able to create more directories and mount
+ * new filesystems. Of course, strange failures usually happen when
+ * filesystems run out of space.
+ *
+ * 3) Why aren't we using gfs? gfs has two different usage models.
+ *
+ * 3.1) I'm my own filesystem but i'm using gfs to help with managing
+ * readdir operations.
+ *
+ * 3.2) I'm a gfs filesystem and gfs is managing all my vnodes
+ *
+ * We're not using the 3.1 interfaces because we don't implement readdir
+ * ourselves. We pass all readdir operations onto the backing store
+ * filesystem and utilize its readdir implementation.
+ *
+ * We're not using the 3.2 interfaces because they are really designed for
+ * in memory filesystems where all of the filesystem state is stored in
+ * memory. They don't lend themselves to filesystems where part of the
+ * state is in memory and part of the state is on disk.
+ *
+ * For more information on gfs take a look at the block comments in the
+ * top of gfs.c
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Note that the name of the actual Solaris filesystem is lx_afs and not
+ * lx_autofs. This is becase filesystem names are stupidly limited to 8
+ * characters.
+ */
+#define LX_AUTOFS_NAME "lx_afs"
+
+/*
+ * Mount options supported.
+ */
+#define LX_MNTOPT_FD "fd"
+#define LX_MNTOPT_PGRP "pgrp"
+#define LX_MNTOPT_MINPROTO "minproto"
+#define LX_MNTOPT_MAXPROTO "maxproto"
+
+/* Version of the Linux kernel automount protocol we support. */
+#define LX_AUTOFS_PROTO_VERSION 2
+
+/*
+ * Command structure sent to automount process from lx_autofs via a pipe.
+ * This structure is the same for v2 and v3 of the automount protocol
+ * (the communication pipe is established at mount time).
+ */
+typedef struct lx_autofs_pkt {
+ int lap_protover; /* protocol version number */
+ int lap_constant; /* always set to 0 */
+ int lap_id; /* every pkt must have a unique id */
+ int lap_name_len; /* don't include newline or NULL */
+ char lap_name[256]; /* path component to lookup */
+} lx_autofs_pkt_t;
+
+/*
+ * Ioctls supprted (v2 protocol).
+ */
+#define LX_AUTOFS_IOC_READY 0x00009360 /* arg: int */
+#define LX_AUTOFS_IOC_FAIL 0x00009361 /* arg: int */
+#define LX_AUTOFS_IOC_CATATONIC 0x00009362 /* arg: <none> */
+
+/*
+ * Ioctls not supported (v3 protocol).
+ *
+ * Initially we're only going to support v2 of the Linux kernel automount
+ * protocol. This means that we don't support the following ioctls.
+ *
+ * 1) The protocol version ioctl (by not supporting it the automounter
+ * will assume version 2).
+ *
+ * 2) Automounter timeout ioctls. For v3 and later the automounter can
+ * be started with a timeout option. It will notify the filesystem of
+ * this timeout and, if any automounter filesystem root directory entry
+ * is not in use, it will notify the automounter via the LX_AUTOFS_IOC_EXPIRE
+ * ioctl. For example, if the timeout is 60 seconds, the Linux
+ * automounter will use the LX_AUTOFS_IOC_EXPIRE ioctl to query for
+ * timeouts more often than that. (v3.1.7 of the automount daemon would
+ * perform this ioctl every <timeout>/4 seconds.) Then, if the autofs
+ * filesystem will
+ * report top level directories that aren't in use to the automounter
+ * via this ioctl. If /net was managed by the automounter and
+ * there were the following mount points:
+ * /net/jurassic/var/crash
+ * /net/mcescher/var/crash
+ * and no one was looking at any crash dumps on mcescher but someone
+ * was analyzing a crash dump on jurassic, then after <timeout> seconds
+ * had passed the autofs filesystem would let the automounter know that
+ * "mcescher" could be unmounted. (Note the granularity of notification
+ * is directories in the root of the autofs filesystem.) Here's two
+ * ideas for how this functionality could be implemented on Solaris:
+ *
+ * 2.1) The easy incomplete way. Don't do any in-use detection. Simply
+ * tell the automounter it can try to unmount the filesystem every time
+ * the specified timeout passes. If the filesystem is in use then the
+ * unmount will fail. This would break down for remote hosts with multiple
+ * mounts. For example, if the automounter had mounted the following
+ * filesystems:
+ * /net/jurassic/var/crash
+ * /net/jurassic/var/core
+ * and the user was looking at a core file, and the timeout expired, the
+ * automounter would recieve notification to unmount "jurassic". Then
+ * it would unmount crash (which would succeed) and then to try unmount
+ * core (which would fail). After that (since the automounter only
+ * performs mounts for failed lookups in the root autofs directory)
+ * future access to /net/jurassic/var/crash would result to access
+ * to an empty autofs directory. We might be able to work around
+ * this by caching which root autofs directories we've timed out,
+ * then any access to paths that contain those directories could be
+ * stalled and we could resend another request to the automounter.
+ * This could work if the automounter ignores mount failures.
+ *
+ * 2.2) The hard correct way. The real difficulty here is detecting
+ * files in use on other filesystems (say NFS) that have been mounted
+ * on top of autofs. (Detecting in use autofs vnodes should be easy.)
+ * to do this we would probably have to create a new brand op to intercept
+ * mount/umount filesystem operations. Then using this entry point we
+ * could detect mounts of other filesystems on top of lx_autofs. When
+ * a successful mount finishes we would use the FEM (file event
+ * monitoring) framework to push a module onto that filesystem and
+ * intercept VOP operations that allocate/free vnodes in that filesystem.
+ * (We would also then have to track mount operations on top of that
+ * filesystem, etc.) this would allow us to properly detect any
+ * usage of subdirectories of an autofs directory.
+ */
+#define LX_AUTOFS_IOC_PROTOVER 0x80049363 /* arg: int */
+#define LX_AUTOFS_IOC_EXPIRE 0x81109365 /* arg: lx_autofs_expire * */
+#define LX_AUTOFS_IOC_SETTIMEOUT 0xc0049364 /* arg: ulong_t */
+
+typedef struct lx_autofs_expire {
+ int lap_protover; /* protol version number */
+ int lap_constant; /* always set to 1 */
+ int lap_name_len; /* don't include newline or NULL */
+ char lap_name[256]; /* path component that has timed out */
+} lx_autofs_expire_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUTOFS_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
new file mode 100644
index 0000000000..9c5517b8d5
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_AUTOFS_IMPL_H
+#define _LX_AUTOFS_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/file.h>
+#include <sys/id_space.h>
+#include <sys/modhash.h>
+#include <sys/vnode.h>
+
+#include <sys/lx_autofs.h>
+
+/*
+ * Space key.
+ * Used to persist data across lx_autofs filesystem module unloads.
+ */
+#define LX_AUTOFS_SPACE_KEY_UDEV LX_AUTOFS_NAME "_udev"
+
+/*
+ * Name of the backing store directory.
+ */
+#define LX_AUTOFS_BS_DIR "." LX_AUTOFS_NAME
+
+#define LX_AUTOFS_VFS_ID_HASH_SIZE 15
+#define LX_AUTOFS_VFS_PATH_HASH_SIZE 15
+#define LX_AUTOFS_VFS_VN_HASH_SIZE 15
+
+/*
+ * VFS data object.
+ */
+typedef struct lx_autofs_vfs {
+ /* Info about the underlying filesystem and backing store. */
+ vnode_t *lav_mvp;
+ char *lav_bs_name;
+ vnode_t *lav_bs_vp;
+
+ /* Info about the automounter process managing this filesystem. */
+ int lav_fd;
+ pid_t lav_pgrp;
+ file_t *lav_fifo_wr;
+ file_t *lav_fifo_rd;
+
+ /* Each automount requests needs a unique id. */
+ id_space_t *lav_ids;
+
+ /* All remaining structure members are protected by lav_lock. */
+ kmutex_t lav_lock;
+
+ /* Hashes to keep track of outstanding automounter requests. */
+ mod_hash_t *lav_path_hash;
+ mod_hash_t *lav_id_hash;
+
+ /* We need to keep track of all our vnodes. */
+ vnode_t *lav_root;
+ mod_hash_t *lav_vn_hash;
+} lx_autofs_vfs_t;
+
+/*
+ * Structure to keep track of requests sent to the automounter.
+ */
+typedef struct lx_autofs_lookup_req {
+ /* Packet that gets sent to the automounter. */
+ lx_autofs_pkt_t lalr_pkt;
+
+ /* Reference count. Always updated atomically. */
+ uint_t lalr_ref;
+
+ /*
+ * Fields to keep track and sync threads waiting on a lookup.
+ * Fields are protected by lalr_lock.
+ */
+ kmutex_t lalr_lock;
+ kcondvar_t lalr_cv;
+ int lalr_complete;
+} lx_autofs_lookup_req_t;
+
+/*
+ * Generic stack structure.
+ */
+typedef struct stack_elem {
+ list_node_t se_list;
+ caddr_t se_ptr1;
+ caddr_t se_ptr2;
+ caddr_t se_ptr3;
+} stack_elem_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUTOFS_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
new file mode 100644
index 0000000000..b489fabc9f
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -0,0 +1,232 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_BRAND_H
+#define _LX_BRAND_H
+
+#ifndef _ASM
+#include <sys/types.h>
+#include <sys/cpuvar.h>
+#include <sys/zone.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_BRANDNAME "lx"
+
+/*
+ * Brand uname info
+ */
+#define LX_UNAME_SYSNAME "Linux"
+#define LX_UNAME_RELEASE_2_6 "2.6.18"
+#define LX_UNAME_RELEASE_2_4 "2.4.21"
+#define LX_UNAME_VERSION "BrandZ fake linux"
+#define LX_UNAME_MACHINE "i686"
+
+#define LX_LINKER_NAME "ld-linux.so.2"
+#define LX_LINKER "/lib/" LX_LINKER_NAME
+#define LX_LIBC_NAME "libc.so.6"
+#define LIB_PATH "/native/usr/lib/"
+#define LX_LIB "lx_brand.so.1"
+#define LX_LIB_PATH LIB_PATH LX_LIB
+
+#define LX_NSYSCALLS_2_4 270
+#define LX_NSYSCALLS_2_6 317
+#define LX_NSYSCALLS LX_NSYSCALLS_2_6
+
+#define LX_KERN_2_4 0
+#define LX_KERN_2_6 1
+
+/*
+ * brand(2) subcommands
+ *
+ * Everything >= 128 is a brand-specific subcommand.
+ * 192 to 462 are reserved for system calls, although most of that space is
+ * unused.
+ */
+#define B_LPID_TO_SPAIR 128
+#define B_SYSENTRY 129
+#define B_SYSRETURN 130
+#define B_PTRACE_SYSCALL 131
+#define B_SET_AFFINITY_MASK 132
+#define B_GET_AFFINITY_MASK 133
+
+#define B_EMULATE_SYSCALL 192
+
+#define LX_VERSION_1 1
+#define LX_VERSION LX_VERSION_1
+
+#define LX_ATTR_RESTART_INIT ZONE_ATTR_BRAND_ATTRS
+#define LX_KERN_VERSION_NUM (ZONE_ATTR_BRAND_ATTRS + 1)
+
+/* Aux vector containing phdr of linux executable, used by lx_librtld_db */
+#define AT_SUN_BRAND_LX_PHDR AT_SUN_BRAND_AUX1
+
+/* Aux vector containing hz value */
+#define AT_CLKTCK 17
+
+#ifndef _ASM
+
+typedef struct lx_brand_registration {
+ uint_t lxbr_version; /* version number */
+ void *lxbr_handler; /* base address of handler */
+ void *lxbr_tracehandler; /* base address of trace handler */
+ void *lxbr_traceflag; /* address of trace flag */
+} lx_brand_registration_t;
+
+#ifdef _SYSCALL32
+typedef struct lx_brand_registration32 {
+ uint32_t lxbr_version; /* version number */
+ caddr32_t lxbr_handler; /* base address of handler */
+ caddr32_t lxbr_tracehandler; /* base address of trace handler */
+ caddr32_t lxbr_traceflag; /* address of trace flag */
+} lx_brand_registration32_t;
+#endif
+
+typedef struct lx_regs {
+ long lxr_gs;
+ long lxr_edi;
+ long lxr_esi;
+ long lxr_ebp;
+ long lxr_esp;
+ long lxr_ebx;
+ long lxr_edx;
+ long lxr_ecx;
+ long lxr_eax;
+ long lxr_eip;
+
+ long lxr_orig_eax;
+} lx_regs_t;
+
+#endif /* _ASM */
+
+/*
+ * GDT usage
+ */
+#define GDT_TLSMIN (GDT_BRANDMIN)
+#define GDT_TLSMAX (GDT_TLSMIN + 2)
+#define LX_TLSNUM (GDT_TLSMAX - GDT_TLSMIN)
+
+#ifndef _ASM
+
+/*
+ * Stores information needed by the lx linker to launch the main
+ * lx executable.
+ */
+typedef struct lx_elf_data {
+ int ed_phdr;
+ int ed_phent;
+ int ed_phnum;
+ int ed_entry;
+ int ed_base;
+ int ed_ldentry;
+} lx_elf_data_t;
+
+#ifdef _KERNEL
+
+typedef struct lx_proc_data {
+ uintptr_t l_handler; /* address of user-space handler */
+ uintptr_t l_tracehandler; /* address of user-space traced handler */
+ uintptr_t l_traceflag; /* address of 32-bit tracing flag */
+ void (*l_sigrestorer[MAXSIG])(void); /* array of sigrestorer fns */
+ pid_t l_ppid; /* pid of originating parent proc */
+ uint64_t l_ptrace; /* process being observed with ptrace */
+ lx_elf_data_t l_elf_data; /* ELF data for linux executable */
+} lx_proc_data_t;
+
+#endif /* _KERNEL */
+
+/*
+ * A data type big enough to bitmap all Linux possible cpus.
+ * The bitmap size is defined as 1024 cpus in the Linux 2.4 and 2.6 man pages
+ * for sched_getaffinity() and sched_getaffinity().
+ */
+#define LX_NCPU (1024)
+#define LX_AFF_ULONGS (LX_NCPU / (8 * sizeof (ulong_t)))
+typedef ulong_t lx_affmask_t[LX_AFF_ULONGS];
+
+#ifdef _KERNEL
+
+/*
+ * lx-specific data in the klwp_t
+ */
+typedef struct lx_lwp_data {
+ uint_t br_lwp_flags; /* misc. flags */
+ klwp_t *br_lwp; /* back pointer to container lwp */
+ int br_signal; /* signal to send to parent when */
+ /* clone()'ed child terminates */
+ int br_exitwhy; /* reason for thread (process) exit */
+ int br_exitwhat; /* exit code / killing signal */
+ lx_affmask_t br_affinitymask; /* bitmask of CPU sched affinities */
+ struct user_desc br_tls[LX_TLSNUM];
+ /* descriptors used by libc for TLS */
+ pid_t br_pid; /* converted pid for this thread */
+ pid_t br_tgid; /* thread group ID for this thread */
+ pid_t br_ppid; /* parent pid for this thread */
+ id_t br_ptid; /* parent tid for this thread */
+ void *br_clear_ctidp; /* clone thread id ptr */
+ void *br_set_ctidp; /* clone thread id ptr */
+
+ /*
+ * The following struct is used by lx_clone()
+ * to pass info into fork()
+ */
+ void *br_clone_args;
+
+ uint_t br_ptrace; /* ptrace is active for this LWP */
+} lx_lwp_data_t;
+
+/* brand specific data */
+typedef struct lx_zone_data {
+ int lxzd_kernel_version;
+ int lxzd_max_syscall;
+} lx_zone_data_t;
+
+#define BR_CPU_BOUND 0x0001
+
+#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
+#define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l))
+#define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data)
+
+void lx_brand_int80_callback(void);
+int64_t lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+
+extern int lx_get_zone_kern_version(zone_t *);
+extern int lx_get_kern_version(void);
+
+extern int lx_debug;
+#define lx_print if (lx_debug) printf
+
+#endif /* _KERNEL */
+#endif /* _ASM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_BRAND_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_futex.h b/usr/src/uts/common/brand/lx/sys/lx_futex.h
new file mode 100644
index 0000000000..b5c5334bff
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_futex.h
@@ -0,0 +1,53 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_FUTEX_H
+#define _SYS_LX_FUTEX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_FD 2
+#define FUTEX_REQUEUE 3
+#define FUTEX_CMP_REQUEUE 4
+#define FUTEX_MAX_CMD FUTEX_CMP_REQUEUE
+
+#ifdef _KERNEL
+extern long lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout,
+ uintptr_t addr2, int val2);
+extern void lx_futex_init(void);
+extern int lx_futex_fini(void);
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_FUTEX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_impl.h b/usr/src/uts/common/brand/lx/sys/lx_impl.h
new file mode 100644
index 0000000000..12f1aab2b3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_impl.h
@@ -0,0 +1,62 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_IMPL_H
+#define _LX_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int64_t (*llfcn_t)();
+
+typedef struct lx_sysent {
+ int sy_flags;
+ char *sy_name;
+ llfcn_t sy_callc;
+ char sy_narg;
+} lx_sysent_t;
+
+typedef void (lx_systrace_f)(ulong_t, ulong_t, ulong_t, ulong_t, ulong_t,
+ ulong_t, ulong_t);
+
+
+extern lx_sysent_t lx_sysent[];
+
+extern lx_systrace_f *lx_systrace_entry_ptr;
+extern lx_systrace_f *lx_systrace_return_ptr;
+
+extern void lx_brand_systrace_enable(void);
+extern void lx_brand_systrace_disable(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_ldt.h b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
new file mode 100644
index 0000000000..5080c3adae
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_LDT_H
+#define _SYS_LINUX_LDT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/segments.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ldt_info {
+ uint_t entry_number;
+ uint_t base_addr;
+ uint_t limit;
+ uint_t seg_32bit:1,
+ contents:2,
+ read_exec_only:1,
+ limit_in_pages:1,
+ seg_not_present:1,
+ useable:1;
+};
+
+#define LDT_INFO_EMPTY(info) \
+ ((info)->base_addr == 0 && (info)->limit == 0 && \
+ (info)->contents == 0 && (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && (info)->useable == 0)
+
+#if defined(__amd64)
+#define SETMODE(desc) (desc)->usd_long = SDP_SHORT;
+#else
+#define SETMODE(desc)
+#endif
+
+#define LDT_INFO_TO_DESC(info, desc) { \
+ USEGD_SETBASE(desc, (info)->base_addr); \
+ USEGD_SETLIMIT(desc, (info)->limit); \
+ (desc)->usd_type = ((info)->contents << 2) | \
+ ((info)->read_exec_only ^ 1) << 1 | 0x10; \
+ (desc)->usd_dpl = SEL_UPL; \
+ (desc)->usd_p = (info)->seg_not_present ^ 1; \
+ (desc)->usd_def32 = (info)->seg_32bit; \
+ (desc)->usd_gran = (info)->limit_in_pages; \
+ (desc)->usd_avl = (info)->useable; \
+ SETMODE(desc); \
+}
+
+#define DESC_TO_LDT_INFO(desc, info) { \
+ bzero((info), sizeof (*(info))); \
+ (info)->base_addr = USEGD_GETBASE(desc); \
+ (info)->limit = USEGD_GETLIMIT(desc); \
+ (info)->seg_not_present = (desc)->usd_p ^ 1; \
+ (info)->contents = ((desc)->usd_type >> 2) & 3; \
+ (info)->read_exec_only = (((desc)->usd_type >> 1) & 1) ^ 1; \
+ (info)->seg_32bit = (desc)->usd_def32; \
+ (info)->limit_in_pages = (desc)->usd_gran; \
+ (info)->useable = (desc)->usd_avl; \
+}
+
+extern void lx_set_gdt(int, user_desc_t *);
+extern void lx_clear_gdt(int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_LDT_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_pid.h b/usr/src/uts/common/brand/lx/sys/lx_pid.h
new file mode 100644
index 0000000000..80c8079f0b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_pid.h
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_PID_H
+#define _SYS_LX_PID_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/note.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+struct lx_pid {
+ pid_t s_pid; /* the solaris pid and ... */
+ id_t s_tid; /* ... tid pair */
+ pid_t l_pid; /* the corresponding linux pid */
+ time_t l_start; /* birthday of this pid */
+ struct pid *l_pidp;
+ struct lx_pid *stol_next; /* link in stol hash table */
+ struct lx_pid *ltos_next; /* link in ltos hash table */
+};
+
+extern int lx_pid_assign(kthread_t *);
+extern void lx_pid_reassign(kthread_t *);
+extern void lx_pid_rele(pid_t, id_t);
+extern pid_t lx_lpid_to_spair(pid_t, pid_t *, id_t *);
+extern pid_t lx_lwp_ppid(klwp_t *, pid_t *, id_t *);
+extern void lx_pid_init(void);
+extern void lx_pid_fini(void);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_PID_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_ptm.h b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
new file mode 100644
index 0000000000..74bbc939a3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_PTM_LINUX_H
+#define _SYS_PTM_LINUX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_PTM_DRV "lx_ptm"
+#define LX_PTM_MINOR_NODE "lx_ptmajor"
+
+#define LX_PTM_DEV_TO_PTS(dev) (getminor(dev) - 1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_PTM_LINUX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_sched.h b/usr/src/uts/common/brand/lx/sys/lx_sched.h
new file mode 100644
index 0000000000..b0ae748f3c
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_sched.h
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_SCHED_H
+#define _SYS_LINUX_SCHED_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/procset.h>
+#include <sys/priocntl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Linux scheduler policies.
+ */
+#define LX_SCHED_OTHER 0
+#define LX_SCHED_FIFO 1
+#define LX_SCHED_RR 2
+
+#define LX_PRI_MAX 99
+
+typedef int l_pid_t;
+
+struct lx_sched_param {
+ int lx_sched_prio;
+};
+
+extern int sched_setprocset(procset_t *, l_pid_t);
+extern long do_priocntlsys(int, procset_t *, void *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_SCHED_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
new file mode 100644
index 0000000000..b4d41d5241
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
@@ -0,0 +1,68 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_SYSCALLS_H
+#define _SYS_LINUX_SYSCALLS_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+extern long lx_brk();
+extern long lx_getpid();
+extern long lx_getppid();
+extern long lx_clone();
+extern long lx_kill();
+extern long lx_tkill();
+extern long lx_modify_ldt();
+extern long lx_gettid();
+extern long lx_futex();
+extern long lx_get_thread_area();
+extern long lx_sched_getparam();
+extern long lx_sched_getscheduler();
+extern long lx_sched_rr_get_interval();
+extern long lx_sched_setparam();
+extern long lx_sched_setscheduler();
+extern long lx_set_thread_area();
+extern long lx_set_tid_address();
+extern long lx_setresgid();
+extern long lx_setresgid16();
+extern long lx_setresuid();
+extern long lx_setresuid16();
+extern long lx_sysinfo();
+extern long lx_setgroups();
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_SYSCALLS_H */
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_brk.c b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
new file mode 100644
index 0000000000..19a7577ac0
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
@@ -0,0 +1,57 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+
+/*
+ * The brk() system call needs to be in-kernel because Linux expects a call to
+ * brk(0) to return the current breakpoint. In Solaris, the process breakpoint
+ * is setup and managed by libc. Due to the way we link our libraries and the
+ * need for Linux to manage its own breakpoint, this has to remain in the
+ * kernel.
+ */
+extern int brk(caddr_t);
+
+long
+lx_brk(caddr_t nva)
+{
+ proc_t *p = curproc;
+ klwp_t *lwp = ttolwp(curthread);
+
+ if (nva != 0) {
+ (void) brk(nva);
+
+ /*
+ * Despite claims to the contrary in the manpage, when Linux
+ * brk() fails, errno is left unchanged.
+ */
+ lwp->lwp_errno = 0;
+ }
+ return ((long)(p->p_brkbase + p->p_brksize));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
new file mode 100644
index 0000000000..2af3c00bae
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
@@ -0,0 +1,135 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+
+#define LX_CSIGNAL 0x000000ff
+#define LX_CLONE_VM 0x00000100
+#define LX_CLONE_FS 0x00000200
+#define LX_CLONE_FILES 0x00000400
+#define LX_CLONE_SIGHAND 0x00000800
+#define LX_CLONE_PID 0x00001000
+#define LX_CLONE_PTRACE 0x00002000
+#define LX_CLONE_PARENT 0x00008000
+#define LX_CLONE_THREAD 0x00010000
+#define LX_CLONE_SYSVSEM 0x00040000
+#define LX_CLONE_SETTLS 0x00080000
+#define LX_CLONE_PARENT_SETTID 0x00100000
+#define LX_CLONE_CHILD_CLEARTID 0x00200000
+#define LX_CLONE_DETACH 0x00400000
+#define LX_CLONE_CHILD_SETTID 0x01000000
+
+/*
+ * Our lwp has already been created at this point, so this routine is
+ * responsible for setting up all the state needed to track this as a
+ * linux cloned thread.
+ */
+/* ARGSUSED */
+long
+lx_clone(int flags, void *stkp, void *ptidp, void *ldtinfo, void *ctidp)
+{
+ struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+ struct ldt_info info;
+ struct user_desc descr;
+ int tls_index;
+ int entry = -1;
+ int signo;
+
+ signo = flags & LX_CSIGNAL;
+ if (signo < 0 || signo > MAXSIG)
+ return (set_errno(EINVAL));
+
+ if (flags & LX_CLONE_SETTLS) {
+ if (copyin((caddr_t)ldtinfo, &info, sizeof (info)))
+ return (set_errno(EFAULT));
+
+ if (LDT_INFO_EMPTY(&info))
+ return (set_errno(EINVAL));
+
+ entry = info.entry_number;
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ tls_index = entry - GDT_TLSMIN;
+
+ /*
+ * Convert the user-space structure into a real x86
+ * descriptor and copy it into this LWP's TLS array. We
+ * also load it into the GDT.
+ */
+ LDT_INFO_TO_DESC(&info, &descr);
+ bcopy(&descr, &lwpd->br_tls[tls_index], sizeof (descr));
+ lx_set_gdt(entry, &lwpd->br_tls[tls_index]);
+ } else {
+ tls_index = -1;
+ bzero(&descr, sizeof (descr));
+ }
+
+ lwpd->br_clear_ctidp =
+ (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL;
+
+ if (signo && ! (flags & LX_CLONE_DETACH))
+ lwpd->br_signal = signo;
+ else
+ lwpd->br_signal = 0;
+
+ if (flags & LX_CLONE_THREAD)
+ lwpd->br_tgid = curthread->t_procp->p_pid;
+
+ if (flags & LX_CLONE_PARENT)
+ lwpd->br_ppid = 0;
+
+ if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) &&
+ (suword32(ctidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
+ if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) &&
+ (suword32(ptidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
+
+ return (lwpd->br_pid);
+}
+
+long
+lx_set_tid_address(int *tidp)
+{
+ struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+
+ lwpd->br_clear_ctidp = tidp;
+
+ return (lwpd->br_pid);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_futex.c b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
new file mode 100644
index 0000000000..ee5fa7993d
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
@@ -0,0 +1,471 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_vn.h>
+#include <vm/page.h>
+#include <sys/mman.h>
+#include <sys/timer.h>
+#include <sys/condvar.h>
+#include <sys/inttypes.h>
+#include <sys/lx_futex.h>
+
+/*
+ * Futexes are a Linux-specific implementation of inter-process mutexes.
+ * They are designed to use shared memory for simple, uncontested
+ * operations, and rely on the kernel to resolve any contention issues.
+ *
+ * Most of the information in this section comes from the paper "Futexes
+ * Are Tricky", by Ulrich Drepper. This paper is currently available at:
+ * http://people.redhat.com/~drepper/futex.pdf.
+ *
+ * A futex itself a 4-byte integer, which must be 4-byte aligned. The
+ * value of this integer is expected to be modified using user-level atomic
+ * operations. The futex(4) design itself does not impose any semantic
+ * constraints on the value stored in the futex; it is up to the
+ * application to define its own protocol.
+ *
+ * When the application decides that kernel intervention is required, it
+ * will use the futex(2) system call. There are 5 different operations
+ * that can be performed on a futex, using this system call. Since this
+ * interface has evolved over time, there are several different prototypes
+ * available to the user. Fortunately, there is only a single kernel-level
+ * interface:
+ *
+ * long sys_futex(void *futex1, int cmd, int val1,
+ * struct timespec *timeout, void *futex2, int val2)
+ *
+ * The kernel-level operations that may be performed on a futex are:
+ *
+ * FUTEX_WAIT
+ *
+ * Atomically verify that futex1 contains the value val1. If it
+ * doesn't, return EWOULDBLOCK. If it does contain the expected
+ * value, the thread will sleep until somebody performs a FUTEX_WAKE
+ * on the futex. The caller may also specify a timeout, indicating
+ * the maximum time the thread should sleep. If the timer expires,
+ * the call returns ETIMEDOUT. If the thread is awoken with a signal,
+ * the call returns EINTR. Otherwise, the call returns 0.
+ *
+ * FUTEX_WAKE
+ *
+ * Wake up val1 processes that are waiting on futex1. The call
+ * returns the number of blocked threads that were woken up.
+ *
+ * FUTEX_CMP_REQUEUE
+ *
+ * If the value stored in futex1 matches that passed in in val2, wake
+ * up val1 processes that are waiting on futex1. Otherwise, return
+ * EAGAIN.
+ *
+ * If there are more than val1 threads waiting on the futex, remove
+ * the remaining threads from this futex, and requeue them on futex2.
+ * The caller can limit the number of threads being requeued by
+ * encoding an integral numerical value in the position usually used
+ * for the timeout pointer.
+ *
+ * The call returns the number of blocked threads that were woken up
+ * or requeued.
+ *
+ * FUTEX_REQUEUE
+ *
+ * Identical to FUTEX_CMP_REQUEUE except that it does not use val2.
+ * This command has been declared broken and obsolete, but we still
+ * need to support it.
+ *
+ * FUTEX_FD
+ *
+ * Return a file descriptor, which can be used to refer to the futex.
+ * We don't support this operation.
+ */
+
+/*
+ * This structure is used to track all the threads currently waiting on a
+ * futex. There is one fwaiter_t for each blocked thread. We store all
+ * fwaiter_t's in a hash structure, indexed by the memid_t of the integer
+ * containing the futex's value.
+ *
+ * At the moment, all fwaiter_t's for a single futex are simply dumped into
+ * the hash bucket. If futex contention ever becomes a hot path, we can
+ * chain a single futex's waiters together.
+ */
+typedef struct fwaiter {
+ memid_t fw_memid; /* memid of the user-space futex */
+ kcondvar_t fw_cv; /* cond var */
+ struct fwaiter *fw_next; /* hash queue */
+ struct fwaiter *fw_prev; /* hash queue */
+ volatile int fw_woken;
+} fwaiter_t;
+
+#define MEMID_COPY(s, d) \
+ { (d)->val[0] = (s)->val[0]; (d)->val[1] = (s)->val[1]; }
+#define MEMID_EQUAL(s, d) \
+ ((d)->val[0] == (s)->val[0] && (d)->val[1] == (s)->val[1])
+
+/* Borrowed from the page freelist hash code. */
+#define HASH_SHIFT_SZ 7
+#define HASH_SIZE (1 << HASH_SHIFT_SZ)
+#define HASH_FUNC(id) \
+ ((((uintptr_t)((id)->val[1]) >> PAGESHIFT) + \
+ ((uintptr_t)((id)->val[1]) >> (PAGESHIFT + HASH_SHIFT_SZ)) + \
+ ((uintptr_t)((id)->val[0]) >> 3) + \
+ ((uintptr_t)((id)->val[0]) >> (3 + HASH_SHIFT_SZ)) + \
+ ((uintptr_t)((id)->val[0]) >> (3 + 2 * HASH_SHIFT_SZ))) & \
+ (HASH_SIZE - 1))
+
+static fwaiter_t *futex_hash[HASH_SIZE];
+static kmutex_t futex_hash_lock[HASH_SIZE];
+
+static void
+futex_hashin(fwaiter_t *fwp)
+{
+ int index;
+
+ index = HASH_FUNC(&fwp->fw_memid);
+ ASSERT(MUTEX_HELD(&futex_hash_lock[index]));
+
+ fwp->fw_prev = NULL;
+ fwp->fw_next = futex_hash[index];
+ if (fwp->fw_next)
+ fwp->fw_next->fw_prev = fwp;
+ futex_hash[index] = fwp;
+}
+
+static void
+futex_hashout(fwaiter_t *fwp)
+{
+ int index;
+
+ index = HASH_FUNC(&fwp->fw_memid);
+ ASSERT(MUTEX_HELD(&futex_hash_lock[index]));
+
+ if (fwp->fw_prev)
+ fwp->fw_prev->fw_next = fwp->fw_next;
+ if (fwp->fw_next)
+ fwp->fw_next->fw_prev = fwp->fw_prev;
+ if (futex_hash[index] == fwp)
+ futex_hash[index] = fwp->fw_next;
+
+ fwp->fw_prev = NULL;
+ fwp->fw_next = NULL;
+}
+
+/*
+ * Go to sleep until somebody does a WAKE operation on this futex, we get a
+ * signal, or the timeout expires.
+ */
+static int
+futex_wait(memid_t *memid, caddr_t addr, int val, timespec_t *timeout)
+{
+ int err, ret;
+ int32_t curval;
+ fwaiter_t fw;
+ int index;
+
+ fw.fw_woken = 0;
+ MEMID_COPY(memid, &fw.fw_memid);
+ cv_init(&fw.fw_cv, NULL, CV_DEFAULT, NULL);
+
+ index = HASH_FUNC(&fw.fw_memid);
+ mutex_enter(&futex_hash_lock[index]);
+
+ if (fuword32(addr, (uint32_t *)&curval)) {
+ err = set_errno(EFAULT);
+ goto out;
+ }
+ if (curval != val) {
+ err = set_errno(EWOULDBLOCK);
+ goto out;
+ }
+
+ futex_hashin(&fw);
+
+ err = 0;
+ while ((fw.fw_woken == 0) && (err == 0)) {
+ ret = cv_waituntil_sig(&fw.fw_cv, &futex_hash_lock[index],
+ timeout, timechanged);
+ if (ret < 0)
+ err = set_errno(ETIMEDOUT);
+ else if (ret == 0)
+ err = set_errno(EINTR);
+ }
+
+ /*
+ * The futex is normally hashed out in wakeup. If we timed out or
+ * got a signal, we need to hash it out here instead.
+ */
+ if (fw.fw_woken == 0)
+ futex_hashout(&fw);
+
+out:
+ mutex_exit(&futex_hash_lock[index]);
+
+ return (err);
+}
+
+/*
+ * Wake up to wake_threads threads that are blocked on the futex at memid.
+ */
+static int
+futex_wake(memid_t *memid, int wake_threads)
+{
+ fwaiter_t *fwp, *next;
+ int index;
+ int ret = 0;
+
+ index = HASH_FUNC(memid);
+
+ mutex_enter(&futex_hash_lock[index]);
+
+ for (fwp = futex_hash[index]; fwp && ret < wake_threads; fwp = next) {
+ next = fwp->fw_next;
+ if (MEMID_EQUAL(&fwp->fw_memid, memid)) {
+ futex_hashout(fwp);
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ ret++;
+ }
+ }
+
+ mutex_exit(&futex_hash_lock[index]);
+
+ return (ret);
+}
+
+/*
+ * Wake up to wake_threads waiting on the futex at memid. If there are
+ * more than that many threads waiting, requeue the remaining threads on
+ * the futex at requeue_memid.
+ */
+static int
+futex_requeue(memid_t *memid, memid_t *requeue_memid, int wake_threads,
+ ulong_t requeue_threads, caddr_t addr, int *cmpval)
+{
+ fwaiter_t *fwp, *next;
+ int index1, index2;
+ int ret = 0;
+ int32_t curval;
+ kmutex_t *l1, *l2;
+
+ /*
+ * To ensure that we don't miss a wakeup if the value of cmpval
+ * changes, we need to grab locks on both the original and new hash
+ * buckets. To avoid deadlock, we always grab the lower-indexed
+ * lock first.
+ */
+ index1 = HASH_FUNC(memid);
+ index2 = HASH_FUNC(requeue_memid);
+
+ if (index1 == index2) {
+ l1 = &futex_hash_lock[index1];
+ l2 = NULL;
+ } else if (index1 < index2) {
+ l1 = &futex_hash_lock[index1];
+ l2 = &futex_hash_lock[index2];
+ } else {
+ l1 = &futex_hash_lock[index2];
+ l2 = &futex_hash_lock[index1];
+ }
+
+ mutex_enter(l1);
+ if (l2 != NULL)
+ mutex_enter(l2);
+
+ if (cmpval != NULL) {
+ if (fuword32(addr, (uint32_t *)&curval)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (curval != *cmpval) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+
+ for (fwp = futex_hash[index1]; fwp; fwp = next) {
+ next = fwp->fw_next;
+ if (!MEMID_EQUAL(&fwp->fw_memid, memid))
+ continue;
+
+ futex_hashout(fwp);
+ if (ret++ < wake_threads) {
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ } else {
+ MEMID_COPY(requeue_memid, &fwp->fw_memid);
+ futex_hashin(fwp);
+
+ if ((ret - wake_threads) >= requeue_threads)
+ break;
+ }
+ }
+
+out:
+ if (l2 != NULL)
+ mutex_exit(l2);
+ mutex_exit(l1);
+
+ if (ret < 0)
+ return (set_errno(-ret));
+ return (ret);
+}
+
+/*
+ * Copy in the relative timeout provided by the application and convert it
+ * to an absolute timeout.
+ */
+static int
+get_timeout(void *lx_timeout, timestruc_t *timeout)
+{
+ timestruc_t now;
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(lx_timeout, timeout, sizeof (timestruc_t)))
+ return (EFAULT);
+ }
+#ifdef _SYSCALL32_IMPL
+ else {
+ timestruc32_t timeout32;
+ if (copyin(lx_timeout, &timeout32, sizeof (timestruc32_t)))
+ return (EFAULT);
+ timeout->tv_sec = (time_t)timeout32.tv_sec;
+ timeout->tv_nsec = timeout32.tv_nsec;
+ }
+#endif
+ gethrestime(&now);
+
+ if (itimerspecfix(timeout))
+ return (EINVAL);
+
+ timespecadd(timeout, &now);
+ return (0);
+}
+
+long
+lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout,
+ uintptr_t addr2, int val2)
+{
+ struct as *as = curproc->p_as;
+ memid_t memid, requeue_memid;
+ timestruc_t timeout;
+ timestruc_t *tptr = NULL;
+ int requeue_threads = NULL;
+ int *requeue_cmp = NULL;
+ int rval = 0;
+
+ /* must be aligned on int boundary */
+ if (addr & 0x3)
+ return (set_errno(EINVAL));
+
+ /* Sanity check the futex command */
+ if (cmd < 0 || cmd > FUTEX_MAX_CMD)
+ return (set_errno(EINVAL));
+
+ /* Copy in the timeout structure from userspace. */
+ if (cmd == FUTEX_WAIT && lx_timeout != NULL) {
+ rval = get_timeout((timespec_t *)lx_timeout, &timeout);
+ if (rval != 0)
+ return (set_errno(rval));
+ tptr = &timeout;
+ }
+
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) {
+ if (cmd == FUTEX_CMP_REQUEUE)
+ requeue_cmp = &val2;
+
+ /*
+ * lx_timeout is nominally a pointer to a userspace
+ * address. For these two commands, it actually contains
+ * an integer which indicates the maximum number of threads
+ * to requeue. This is horrible, and I'm sorry.
+ */
+ requeue_threads = (int)lx_timeout;
+ }
+
+ /*
+ * Translate the process-specific, user-space futex virtual
+ * address(es) to universal memid.
+ */
+ rval = as_getmemid(as, (void *)addr, &memid);
+ if (rval != 0)
+ return (set_errno(rval));
+
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) {
+ rval = as_getmemid(as, (void *)addr2, &requeue_memid);
+ if (rval)
+ return (set_errno(rval));
+ }
+
+ switch (cmd) {
+ case FUTEX_WAIT:
+ rval = futex_wait(&memid, (void *)addr, val, tptr);
+ break;
+
+ case FUTEX_WAKE:
+ rval = futex_wake(&memid, val);
+ break;
+
+ case FUTEX_CMP_REQUEUE:
+ case FUTEX_REQUEUE:
+ rval = futex_requeue(&memid, &requeue_memid, val,
+ requeue_threads, (void *)addr2, requeue_cmp);
+
+ break;
+ }
+
+ return (rval);
+}
+
+void
+lx_futex_init(void)
+{
+ int i;
+
+ for (i = 0; i < HASH_SIZE; i++)
+ mutex_init(&futex_hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
+ bzero(futex_hash, sizeof (futex_hash));
+}
+
+int
+lx_futex_fini(void)
+{
+ int i, err;
+
+ err = 0;
+ for (i = 0; (err == 0) && (i < HASH_SIZE); i++) {
+ mutex_enter(&futex_hash_lock[i]);
+ if (futex_hash[i] != NULL)
+ err = EBUSY;
+ mutex_exit(&futex_hash_lock[i]);
+ }
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getpid.c b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
new file mode 100644
index 0000000000..aa8b2b40e1
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
@@ -0,0 +1,74 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/zone.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+
+/*
+ * return the pid
+ */
+long
+lx_getpid()
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ long rv;
+
+ if (curproc->p_pid == curproc->p_zone->zone_proc_initpid) {
+ rv = 1;
+ } else {
+ ASSERT(lwpd != NULL);
+ rv = lwpd->br_tgid;
+ }
+
+ return (rv);
+}
+
+/*
+ * return the parent pid
+ */
+long
+lx_getppid(void)
+{
+ return (lx_lwp_ppid(ttolwp(curthread), NULL, NULL));
+}
+
+/*
+ * return the thread id
+ */
+long
+lx_gettid(void)
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+
+ return (lwpd->br_pid);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_id.c b/usr/src/uts/common/brand/lx/syscall/lx_id.c
new file mode 100644
index 0000000000..5ca18b7556
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_id.c
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/zone.h>
+#include <sys/cred_impl.h>
+#include <sys/policy.h>
+
+typedef ushort_t l_uid16_t;
+typedef ushort_t l_gid16_t;
+typedef uint_t l_uid_t;
+typedef uint_t l_gid_t;
+
+#define LINUX_UID16_TO_UID32(uid16) \
+ (((uid16) == (l_uid16_t)-1) ? ((l_uid_t)-1) : (l_uid_t)(uid16))
+
+#define LINUX_GID16_TO_GID32(gid16) \
+ (((gid16) == (l_gid16_t)-1) ? ((l_gid_t)-1) : (l_gid_t)(gid16))
+
+#define LX_NGROUPS_MAX 32
+extern int setgroups(int, gid_t *);
+
+/*
+ * This function is based on setreuid in common/syscall/uid.c and exists
+ * because Solaris does not have a way to explicitly set the saved uid (suid)
+ * from any other system call.
+ */
+long
+lx_setresuid(l_uid_t ruid, l_uid_t euid, l_uid_t suid)
+{
+ proc_t *p;
+ int error = 0;
+ int do_nocd = 0;
+ int uidchge = 0;
+ uid_t oldruid = ruid;
+ cred_t *cr, *newcr;
+ zoneid_t zoneid = getzoneid();
+
+ if ((ruid != -1 && (ruid > MAXUID)) ||
+ (euid != -1 && (euid > MAXUID)) ||
+ (suid != -1 && (suid > MAXUID))) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * Need to pre-allocate the new cred structure before grabbing
+ * the p_crlock mutex.
+ */
+ newcr = cralloc();
+
+ p = ttoproc(curthread);
+
+retry:
+ mutex_enter(&p->p_crlock);
+ cr = p->p_cred;
+
+ if (ruid != -1 &&
+ ruid != cr->cr_ruid && ruid != cr->cr_uid &&
+ ruid != cr->cr_suid && secpolicy_allow_setid(cr, ruid, B_FALSE)) {
+ error = EPERM;
+ } else if (euid != -1 &&
+ euid != cr->cr_ruid && euid != cr->cr_uid &&
+ euid != cr->cr_suid && secpolicy_allow_setid(cr, euid, B_FALSE)) {
+ error = EPERM;
+ } else if (suid != -1 &&
+ suid != cr->cr_ruid && suid != cr->cr_uid &&
+ suid != cr->cr_suid && secpolicy_allow_setid(cr, suid, B_FALSE)) {
+ error = EPERM;
+ } else {
+ if (!uidchge && ruid != -1 && cr->cr_ruid != ruid) {
+ /*
+ * The ruid of the process is going to change. In order
+ * to avoid a race condition involving the
+ * process count associated with the newly given ruid,
+ * we increment the count before assigning the
+ * credential to the process.
+ * To do that, we'll have to take pidlock, so we first
+ * release p_crlock.
+ */
+ mutex_exit(&p->p_crlock);
+ uidchge = 1;
+ mutex_enter(&pidlock);
+ upcount_inc(ruid, zoneid);
+ mutex_exit(&pidlock);
+ /*
+ * As we released p_crlock we can't rely on the cr
+ * we read. So retry the whole thing.
+ */
+ goto retry;
+ }
+ crhold(cr);
+ crcopy_to(cr, newcr);
+ p->p_cred = newcr;
+
+ if (euid != -1)
+ newcr->cr_uid = euid;
+ if (suid != -1)
+ newcr->cr_suid = suid;
+ if (ruid != -1) {
+ oldruid = newcr->cr_ruid;
+ newcr->cr_ruid = ruid;
+ ASSERT(ruid != oldruid ? uidchge : 1);
+ }
+
+ /*
+ * A process that gives up its privilege
+ * must be marked to produce no core dump.
+ */
+ if ((cr->cr_uid != newcr->cr_uid ||
+ cr->cr_ruid != newcr->cr_ruid ||
+ cr->cr_suid != newcr->cr_suid))
+ do_nocd = 1;
+
+ crfree(cr);
+ }
+ mutex_exit(&p->p_crlock);
+
+ /*
+ * We decrement the number of processes associated with the oldruid
+ * to match the increment above, even if the ruid of the process
+ * did not change or an error occurred (oldruid == uid).
+ */
+ if (uidchge) {
+ ASSERT(oldruid != -1 && ruid != -1);
+ mutex_enter(&pidlock);
+ upcount_dec(oldruid, zoneid);
+ mutex_exit(&pidlock);
+ }
+
+ if (error == 0) {
+ if (do_nocd) {
+ mutex_enter(&p->p_lock);
+ p->p_flag |= SNOCD;
+ mutex_exit(&p->p_lock);
+ }
+ crset(p, newcr); /* broadcast to process threads */
+ goto done;
+ }
+ crfree(newcr);
+done:
+ if (error)
+ return (set_errno(error));
+ else
+ return (0);
+}
+
+long
+lx_setresuid16(l_uid16_t ruid16, l_uid16_t euid16, l_uid16_t suid16)
+{
+ long rval;
+
+ rval = lx_setresuid(
+ LINUX_UID16_TO_UID32(ruid16),
+ LINUX_UID16_TO_UID32(euid16),
+ LINUX_UID16_TO_UID32(suid16));
+
+ return (rval);
+}
+
+/*
+ * This function is based on setregid in common/syscall/gid.c
+ */
+long
+lx_setresgid(l_gid_t rgid, l_gid_t egid, l_gid_t sgid)
+{
+ proc_t *p;
+ int error = 0;
+ int do_nocd = 0;
+ cred_t *cr, *newcr;
+
+ if ((rgid != -1 && (rgid > MAXUID)) ||
+ (egid != -1 && (egid > MAXUID)) ||
+ (sgid != -1 && (sgid > MAXUID))) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * Need to pre-allocate the new cred structure before grabbing
+ * the p_crlock mutex.
+ */
+ newcr = cralloc();
+
+ p = ttoproc(curthread);
+ mutex_enter(&p->p_crlock);
+ cr = p->p_cred;
+
+ if (rgid != -1 &&
+ rgid != cr->cr_rgid && rgid != cr->cr_gid &&
+ rgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else if (egid != -1 &&
+ egid != cr->cr_rgid && egid != cr->cr_gid &&
+ egid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else if (sgid != -1 &&
+ sgid != cr->cr_rgid && sgid != cr->cr_gid &&
+ sgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else {
+ crhold(cr);
+ crcopy_to(cr, newcr);
+ p->p_cred = newcr;
+
+ if (egid != -1)
+ newcr->cr_gid = egid;
+ if (sgid != -1)
+ newcr->cr_sgid = sgid;
+ if (rgid != -1)
+ newcr->cr_rgid = rgid;
+
+ /*
+ * A process that gives up its privilege
+ * must be marked to produce no core dump.
+ */
+ if ((cr->cr_gid != newcr->cr_gid ||
+ cr->cr_rgid != newcr->cr_rgid ||
+ cr->cr_sgid != newcr->cr_sgid))
+ do_nocd = 1;
+
+ crfree(cr);
+ }
+ mutex_exit(&p->p_crlock);
+
+ if (error == 0) {
+ if (do_nocd) {
+ mutex_enter(&p->p_lock);
+ p->p_flag |= SNOCD;
+ mutex_exit(&p->p_lock);
+ }
+ crset(p, newcr); /* broadcast to process threads */
+ goto done;
+ }
+ crfree(newcr);
+done:
+ if (error)
+ return (set_errno(error));
+ else
+ return (0);
+}
+
+long
+lx_setresgid16(l_gid16_t rgid16, l_gid16_t egid16, l_gid16_t sgid16)
+{
+ long rval;
+
+ rval = lx_setresgid(
+ LINUX_GID16_TO_GID32(rgid16),
+ LINUX_GID16_TO_GID32(egid16),
+ LINUX_GID16_TO_GID32(sgid16));
+
+ return (rval);
+}
+
+/*
+ * Linux defines NGROUPS_MAX to be 32, but on Solaris it is only 16. We employ
+ * the terrible hack below so that tests may proceed, if only on DEBUG kernels.
+ */
+long
+lx_setgroups(int ngroups, gid_t *grouplist)
+{
+#ifdef DEBUG
+ if (ngroups > ngroups_max && ngroups <= LX_NGROUPS_MAX)
+ ngroups = ngroups_max;
+#endif /* DEBUG */
+
+ return (setgroups(ngroups, grouplist));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_kill.c b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
new file mode 100644
index 0000000000..a3983eeb75
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
@@ -0,0 +1,253 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/zone.h>
+#include <sys/thread.h>
+#include <sys/signal.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+#include <lx_signum.h>
+
+extern int kill(pid_t, int);
+
+/*
+ * Check if it is legal to send this signal to the init process. Linux
+ * kill(2) semantics dictate that no _unhandled_ signal may be sent to pid
+ * 1.
+ */
+static int
+init_sig_check(int sig, pid_t pid)
+{
+ proc_t *p;
+ int rv = 0;
+
+ mutex_enter(&pidlock);
+
+ if (((p = prfind(pid)) == NULL) || (p->p_stat == SIDL))
+ rv = ESRCH;
+ else if (sig && (sigismember(&cantmask, sig) ||
+ (PTOU(p)->u_signal[sig-1] == SIG_DFL) ||
+ (PTOU(p)->u_signal[sig-1] == SIG_IGN)))
+ rv = EPERM;
+
+ mutex_exit(&pidlock);
+
+ return (rv);
+}
+
+long
+lx_tkill(pid_t pid, int lx_sig)
+{
+ kthread_t *t;
+ proc_t *pp;
+ pid_t initpid;
+ sigqueue_t *sqp;
+ struct lx_lwp_data *br = ttolxlwp(curthread);
+ int tid = 1; /* default tid */
+ int sig, rv;
+
+ /*
+ * Unlike kill(2), Linux tkill(2) doesn't allow signals to
+ * be sent to process IDs <= 0 as it doesn't overlay any special
+ * semantics on the pid.
+ */
+ if ((pid <= 0) || ((lx_sig < 0) || (lx_sig >= LX_NSIG)) ||
+ ((sig = ltos_signo[lx_sig]) < 0))
+ return (set_errno(EINVAL));
+
+ /*
+ * If the Linux pid is 1, translate the pid to the actual init
+ * pid for the zone. Note that Linux dictates that no unhandled
+ * signals may be sent to init, so check for that, too.
+ *
+ * Otherwise, extract the tid and real pid from the Linux pid.
+ */
+ initpid = curproc->p_zone->zone_proc_initpid;
+ if (pid == 1)
+ pid = initpid;
+ if ((pid == initpid) && ((rv = init_sig_check(sig, pid)) != 0))
+ return (set_errno(rv));
+ else if (lx_lpid_to_spair(pid, &pid, &tid) < 0)
+ return (set_errno(ESRCH));
+
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+
+ /*
+ * Find the process for the passed pid...
+ */
+ mutex_enter(&pidlock);
+ if (((pp = prfind(pid)) == NULL) || (pp->p_stat == SIDL)) {
+ mutex_exit(&pidlock);
+ rv = set_errno(ESRCH);
+ goto free_and_exit;
+ }
+ mutex_enter(&pp->p_lock);
+ mutex_exit(&pidlock);
+
+ /*
+ * Deny permission to send the signal if either of the following
+ * is true:
+ *
+ * + The signal is SIGCONT and the target pid is not in the same
+ * session as the sender
+ *
+ * + prochasprocperm() shows the user lacks sufficient permission
+ * to send the signal to the target pid
+ */
+ if (((sig == SIGCONT) && (pp->p_sessp != curproc->p_sessp)) ||
+ (!prochasprocperm(pp, curproc, CRED()))) {
+ mutex_exit(&pp->p_lock);
+ rv = set_errno(EPERM);
+ goto free_and_exit;
+ }
+
+ /* check for the tid */
+ if ((t = idtot(pp, tid)) == NULL) {
+ mutex_exit(&pp->p_lock);
+ rv = set_errno(ESRCH);
+ goto free_and_exit;
+ }
+
+ /* a signal of 0 means just check for the existence of the thread */
+ if (lx_sig == 0) {
+ mutex_exit(&pp->p_lock);
+ rv = 0;
+ goto free_and_exit;
+ }
+
+ sqp->sq_info.si_signo = sig;
+ sqp->sq_info.si_code = SI_LWP;
+ sqp->sq_info.si_pid = br->br_pid;
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(pp, t, sqp);
+
+ mutex_exit(&pp->p_lock);
+
+ return (0);
+
+free_and_exit:
+ kmem_free(sqp, sizeof (sigqueue_t));
+ return (rv);
+}
+
+long
+lx_kill(pid_t lx_pid, int lx_sig)
+{
+ pid_t s_pid, initpid;
+ sigsend_t v;
+ zone_t *zone = curproc->p_zone;
+ struct proc *p;
+ int err, sig, nfound;
+
+ if ((lx_sig < 0) || (lx_sig >= LX_NSIG) ||
+ ((sig = ltos_signo[lx_sig]) < 0))
+ return (set_errno(EINVAL));
+
+ /*
+ * Since some linux apps rely on init(1M) having PID 1, we
+ * transparently translate 1 to the real init(1M)'s pid. We then
+ * check to be sure that it is legal for this process to send this
+ * signal to init(1M).
+ */
+ initpid = zone->zone_proc_initpid;
+ if (lx_pid == 1 || lx_pid == -1) {
+ s_pid = initpid;
+ } else if (lx_pid == 0) {
+ s_pid = 0;
+ } else if (lx_pid > 0) {
+ if (lx_lpid_to_spair(lx_pid, &s_pid, NULL) != 0) {
+ /*
+ * If we didn't find this pid that means it doesn't
+ * exist in this zone.
+ */
+ return (set_errno(ESRCH));
+ }
+ } else {
+ ASSERT(lx_pid < 0);
+ if (lx_lpid_to_spair(-lx_pid, &s_pid, NULL) != 0) {
+ /*
+ * If we didn't find this pid it means that the
+ * process group leader doesn't exist in this zone.
+ * In this case assuming that the Linux pid is
+ * the same as the Solaris pid will get us the
+ * correct behavior.
+ */
+ s_pid = -lx_pid;
+ }
+ }
+
+ if ((s_pid == initpid) && ((err = init_sig_check(sig, s_pid)) != 0))
+ return (set_errno(err));
+
+ /*
+ * For individual processes, kill() semantics are the same between
+ * Solaris and Linux.
+ */
+ if (lx_pid >= 0)
+ return (kill(s_pid, sig));
+
+ /*
+ * In Solaris, sending a signal to -pid means "send a signal to
+ * everyone in process group pid." In Linux it means "send a
+ * signal to everyone in the group other than init." Sending a
+ * signal to -1 means "send a signal to every process except init
+ * and myself."
+ */
+
+ bzero(&v, sizeof (v));
+ v.sig = sig;
+ v.checkperm = 1;
+ v.sicode = SI_USER;
+ err = 0;
+
+ mutex_enter(&pidlock);
+
+ p = (lx_pid == -1) ? practive : pgfind(s_pid);
+ nfound = 0;
+ while (err == 0 && p != NULL) {
+ if ((p->p_zone == zone) && (p->p_stat != SIDL) &&
+ (p->p_pid != initpid) && (lx_pid < -1 || p != curproc)) {
+ nfound++;
+ err = sigsendproc(p, &v);
+ }
+
+ p = (lx_pid == -1) ? p->p_next : p->p_pglink;
+ }
+ mutex_exit(&pidlock);
+ if (nfound == 0)
+ err = ESRCH;
+ else if (err == 0 && v.perm == 0)
+ err = EPERM;
+ return (err ? set_errno(err) : 0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
new file mode 100644
index 0000000000..aa6e12a7d8
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/segments.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/sysi86.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_ldt.h>
+
+/*
+ * Read the ldt_info structure in from the Linux app, convert it to an ssd
+ * structure, and then call setdscr() to do all the heavy lifting.
+ */
+static int
+write_ldt(void *data, ulong_t count)
+{
+ user_desc_t usd;
+ struct ssd ssd;
+ struct ldt_info ldt_inf;
+ proc_t *pp = curthread->t_procp;
+ int err;
+
+ if (count != sizeof (ldt_inf))
+ return (set_errno(EINVAL));
+
+ if (copyin(data, &ldt_inf, sizeof (ldt_inf)))
+ return (set_errno(EFAULT));
+
+ if (ldt_inf.entry_number >= MAXNLDT)
+ return (set_errno(EINVAL));
+
+ LDT_INFO_TO_DESC(&ldt_inf, &usd);
+ usd_to_ssd(&usd, &ssd, SEL_LDT(ldt_inf.entry_number));
+
+ /*
+ * Get everyone into a safe state before changing the LDT.
+ */
+ if (!holdlwps(SHOLDFORK1))
+ return (set_errno(EINTR));
+
+ err = setdscr(&ssd);
+
+ /*
+ * Release the hounds!
+ */
+ mutex_enter(&pp->p_lock);
+ continuelwps(pp);
+ mutex_exit(&pp->p_lock);
+
+ return (err ? set_errno(err) : 0);
+}
+
+static int
+read_ldt(void *uptr, ulong_t count)
+{
+ proc_t *pp = curproc;
+ int bytes;
+
+ if (pp->p_ldt == NULL)
+ return (0);
+
+ bytes = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
+ if (bytes > count)
+ bytes = count;
+
+ if (copyout(pp->p_ldt, uptr, bytes))
+ return (set_errno(EFAULT));
+
+ return (bytes);
+}
+
+long
+lx_modify_ldt(int op, void *data, ulong_t count)
+{
+ int rval;
+
+ switch (op) {
+ case 0:
+ rval = read_ldt(data, count);
+ break;
+
+ case 1:
+ rval = write_ldt(data, count);
+ break;
+
+ default:
+ rval = set_errno(ENOSYS);
+ break;
+ }
+
+ return (rval);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sched.c b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
new file mode 100644
index 0000000000..bb91a752d2
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
@@ -0,0 +1,513 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/cpu.h>
+#include <sys/rtpriocntl.h>
+#include <sys/tspriocntl.h>
+#include <sys/processor.h>
+#include <sys/brand.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_sched.h>
+#include <sys/lx_brand.h>
+
+extern long priocntl_common(int, procset_t *, int, caddr_t, caddr_t, uio_seg_t);
+
+int
+lx_sched_affinity(int cmd, uintptr_t pid, int len, uintptr_t maskp,
+ int64_t *rval)
+{
+ pid_t s_pid;
+ id_t s_tid;
+ kthread_t *t = curthread;
+ lx_lwp_data_t *lx_lwp;
+
+ if (cmd != B_GET_AFFINITY_MASK && cmd != B_SET_AFFINITY_MASK)
+ return (set_errno(EINVAL));
+
+ /*
+ * The caller wants to know how large the mask should be.
+ */
+ if (cmd == B_GET_AFFINITY_MASK && len == 0) {
+ *rval = sizeof (lx_affmask_t);
+ return (0);
+ }
+
+ /*
+ * Otherwise, ensure they have a large enough mask.
+ */
+ if (cmd == B_GET_AFFINITY_MASK && len < sizeof (lx_affmask_t)) {
+ *rval = -1;
+ return (set_errno(EINVAL));
+ }
+
+ if (pid == 0) {
+ s_pid = curproc->p_pid;
+ s_tid = curthread->t_tid;
+ } else if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) == -1) {
+ return (set_errno(ESRCH));
+ }
+
+ /*
+ * For now, we only support manipulating threads in the
+ * same process.
+ */
+ if (curproc->p_pid != s_pid)
+ return (set_errno(EPERM));
+
+ /*
+ * We must hold the process lock so that the thread list
+ * doesn't change while we're looking at it. We'll hold
+ * the lock until we no longer reference the
+ * corresponding lwp.
+ */
+
+ mutex_enter(&curproc->p_lock);
+
+ do {
+ if (t->t_tid == s_tid)
+ break;
+ t = t->t_forw;
+ } while (t != curthread);
+
+ /*
+ * If the given PID is in the current thread's process,
+ * then we _must_ find it in the process's thread list.
+ */
+ ASSERT(t->t_tid == s_tid);
+
+ lx_lwp = t->t_lwp->lwp_brand;
+
+ if (cmd == B_SET_AFFINITY_MASK) {
+ if (copyin_nowatch((void *)maskp, &lx_lwp->br_affinitymask,
+ sizeof (lx_affmask_t)) != 0) {
+ mutex_exit(&curproc->p_lock);
+ return (set_errno(EFAULT));
+ }
+
+ *rval = 0;
+ } else {
+ if (copyout_nowatch(&lx_lwp->br_affinitymask, (void *)maskp,
+ sizeof (lx_affmask_t)) != 0) {
+ mutex_exit(&curproc->p_lock);
+ return (set_errno(EFAULT));
+ }
+
+ *rval = sizeof (lx_affmask_t);
+ }
+
+ mutex_exit(&curproc->p_lock);
+ return (0);
+}
+
+long
+lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ procset_t procset_cid;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ struct lx_sched_param sched_param;
+ tsparms_t *tsp;
+ int prio, maxupri;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ if (copyin(param, &sched_param, sizeof (sched_param)))
+ return (set_errno(EFAULT));
+
+ prio = sched_param.lx_sched_prio;
+
+ if (policy < 0) {
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the current policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ return (set_errno(EINVAL));
+ }
+
+ bzero(&pcinfo, sizeof (pcinfo));
+ bzero(&pcparm, sizeof (pcparm));
+ setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (prio < 0 ||
+ prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+ return (set_errno(EINVAL));
+ pcparm.pc_cid = pcinfo.pc_cid;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+ policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+ break;
+
+ case LX_SCHED_OTHER:
+ (void) strcpy(pcinfo.pc_clname, "TS");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+ if (prio > maxupri || prio < -maxupri)
+ return (set_errno(EINVAL));
+
+ pcparm.pc_cid = pcinfo.pc_cid;
+ tsp = (tsparms_t *)pcparm.pc_clparms;
+ tsp->ts_upri = prio;
+ tsp->ts_uprilim = TS_NOCHANGE;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * finally set scheduling policy and parameters
+ */
+ (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+ return (0);
+}
+
+long
+lx_sched_getscheduler(l_pid_t pid)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ int policy;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ policy = set_errno(EINVAL);
+
+ return (policy);
+}
+
+long
+lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ procset_t procset_cid;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ struct lx_sched_param sched_param;
+ tsparms_t *tsp;
+ int policy;
+ int prio, maxupri;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ if (copyin(param, &sched_param, sizeof (sched_param)))
+ return (set_errno(EFAULT));
+
+ prio = sched_param.lx_sched_prio;
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the current policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ return (set_errno(EINVAL));
+
+ bzero(&pcinfo, sizeof (pcinfo));
+ bzero(&pcparm, sizeof (pcparm));
+ setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (prio < 0 ||
+ prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+ return (set_errno(EINVAL));
+ pcparm.pc_cid = pcinfo.pc_cid;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+ policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+ break;
+
+ case LX_SCHED_OTHER:
+ (void) strcpy(pcinfo.pc_clname, "TS");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+ if (prio > maxupri || prio < -maxupri)
+ return (set_errno(EINVAL));
+
+ pcparm.pc_cid = pcinfo.pc_cid;
+ tsp = (tsparms_t *)pcparm.pc_clparms;
+ tsp->ts_upri = prio;
+ tsp->ts_uprilim = TS_NOCHANGE;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * finally set scheduling policy and parameters
+ */
+ (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+ return (0);
+}
+
+long
+lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct lx_sched_param local_param;
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ tsinfo_t *tsi;
+ int prio, scale;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ bzero(&local_param, sizeof (local_param));
+ if (strcmp(pcinfo.pc_clname, "TS") == 0) {
+ /*
+ * I don't know if we need to do this, coz it can't be
+ * changed from zero anyway.....
+ */
+ tsi = (tsinfo_t *)pcinfo.pc_clinfo;
+ prio = ((tsparms_t *)pcparm.pc_clparms)->ts_upri;
+ scale = tsi->ts_maxupri;
+ if (scale == 0)
+ local_param.lx_sched_prio = 0;
+ else
+ local_param.lx_sched_prio = -(prio * 20) / scale;
+ } else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ local_param.lx_sched_prio =
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri;
+ else
+ rv = set_errno(EINVAL);
+
+ if (rv == 0)
+ if (copyout(&local_param, param, sizeof (local_param)))
+ return (set_errno(EFAULT));
+
+ return (rv);
+}
+
+long
+lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct timespec interval;
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ setprocset(&procset, POP_AND, P_PID, 0, P_ALL, 0);
+ bzero(&pcinfo, sizeof (pcinfo));
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (pcparm.pc_cid == pcinfo.pc_cid &&
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) {
+ interval.tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs;
+ interval.tv_nsec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs;
+
+ if (copyout(&interval, ival, sizeof (interval)))
+ return (set_errno(EFAULT));
+
+ return (0);
+ }
+
+ return (set_errno(EINVAL));
+}
+
+int
+sched_setprocset(procset_t *procset, l_pid_t pid)
+{
+ id_t lid, rid;
+ idtype_t lidtype, ridtype;
+
+ /*
+ * define the target lwp
+ */
+ if (pid == 0) {
+ ridtype = P_ALL;
+ lidtype = P_PID;
+ rid = 0;
+ lid = P_MYID;
+ } else {
+ if (lx_lpid_to_spair(pid, &pid, &lid) < 0)
+ return (set_errno(ESRCH));
+ if (pid != curproc->p_pid)
+ return (set_errno(ESRCH));
+ rid = 0;
+ ridtype = P_ALL;
+ lidtype = P_LWPID;
+ }
+ setprocset(procset, POP_AND, lidtype, lid, ridtype, rid);
+
+ return (0);
+}
+
+long
+do_priocntlsys(int cmd, procset_t *procset, void *arg)
+{
+ return (priocntl_common(PC_VERSION, procset, cmd, (caddr_t)arg, 0,
+ UIO_SYSSPACE));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
new file mode 100644
index 0000000000..9fdb734805
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
@@ -0,0 +1,118 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <vm/anon.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/zone.h>
+#include <sys/time.h>
+
+struct lx_sysinfo {
+ int32_t si_uptime; /* Seconds since boot */
+ uint32_t si_loads[3]; /* 1, 5, and 15 minute avg runq length */
+ uint32_t si_totalram; /* Total memory size */
+ uint32_t si_freeram; /* Available memory */
+ uint32_t si_sharedram; /* Shared memory */
+ uint32_t si_bufferram; /* Buffer memory */
+ uint32_t si_totalswap; /* Total swap space */
+ uint32_t si_freeswap; /* Avail swap space */
+ uint16_t si_procs; /* Process count */
+ uint32_t si_totalhigh; /* High memory size */
+ uint32_t si_freehigh; /* Avail high memory */
+ uint32_t si_mem_unit; /* Unit size of memory fields */
+};
+
+long
+lx_sysinfo(struct lx_sysinfo *sip)
+{
+ struct lx_sysinfo si;
+ hrtime_t birthtime;
+ zone_t *zone = curthread->t_procp->p_zone;
+ proc_t *init_proc;
+
+ /*
+ * We don't record the time a zone was booted, so we use the
+ * birthtime of that zone's init process instead.
+ */
+ mutex_enter(&pidlock);
+ init_proc = prfind(zone->zone_proc_initpid);
+ if (init_proc != NULL)
+ birthtime = init_proc->p_mstart;
+ else
+ birthtime = p0.p_mstart;
+ mutex_exit(&pidlock);
+ si.si_uptime = (gethrtime() - birthtime) / NANOSEC;
+
+ /*
+ * We scale down the load in avenrun to allow larger load averages
+ * to fit in 32 bits. Linux doesn't, so we remove the scaling
+ * here.
+ */
+ si.si_loads[0] = avenrun[0] << FSHIFT;
+ si.si_loads[1] = avenrun[1] << FSHIFT;
+ si.si_loads[2] = avenrun[2] << FSHIFT;
+
+ /*
+ * In linux each thread looks like a process, so we conflate the
+ * two in this stat as well.
+ */
+ si.si_procs = (int32_t)zone->zone_nlwps;
+
+ /*
+ * If the maximum memory stat is less than 1^20 pages (i.e. 4GB),
+ * then we report the result in bytes. Otherwise we use pages.
+ * Once we start supporting >1TB x86 systems, we'll need a third
+ * option.
+ */
+ if (MAX(physmem, k_anoninfo.ani_max) < 1024 * 1024) {
+ si.si_totalram = physmem * PAGESIZE;
+ si.si_freeram = freemem * PAGESIZE;
+ si.si_totalswap = k_anoninfo.ani_max * PAGESIZE;
+ si.si_freeswap = k_anoninfo.ani_free * PAGESIZE;
+ si.si_mem_unit = 1;
+ } else {
+ si.si_totalram = physmem;
+ si.si_freeram = freemem;
+ si.si_totalswap = k_anoninfo.ani_max;
+ si.si_freeswap = k_anoninfo.ani_free;
+ si.si_mem_unit = PAGESIZE;
+ }
+ si.si_bufferram = 0;
+ si.si_sharedram = 0;
+
+ /*
+ * These two stats refer to high physical memory. If an
+ * application running in a Linux zone cares about this, then
+ * either it or we are broken.
+ */
+ si.si_totalhigh = 0;
+ si.si_freehigh = 0;
+
+ if (copyout(&si, sip, sizeof (si)) != 0)
+ return (set_errno(EFAULT));
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
new file mode 100644
index 0000000000..f9751819f9
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
@@ -0,0 +1,128 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/cpuvar.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+
+long
+lx_get_thread_area(struct ldt_info *inf)
+{
+ struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+ struct ldt_info ldt_inf;
+ user_desc_t *dscrp;
+ int entry;
+
+ if (fuword32(&inf->entry_number, (uint32_t *)&entry))
+ return (set_errno(EFAULT));
+
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+ /*
+ * convert the solaris ldt to the linux format expected by the
+ * caller
+ */
+ DESC_TO_LDT_INFO(dscrp, &ldt_inf);
+ ldt_inf.entry_number = entry;
+
+ if (copyout(&ldt_inf, inf, sizeof (struct ldt_info)))
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+long
+lx_set_thread_area(struct ldt_info *inf)
+{
+ struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+ struct ldt_info ldt_inf;
+ user_desc_t *dscrp;
+ int entry;
+ int i;
+
+ if (copyin(inf, &ldt_inf, sizeof (ldt_inf)))
+ return (set_errno(EFAULT));
+
+ entry = ldt_inf.entry_number;
+ if (entry == -1) {
+ /*
+ * find an empty entry in the tls for this thread
+ */
+ for (i = 0, dscrp = jlwp->br_tls;
+ i < LX_TLSNUM; i++, dscrp++)
+ if (((unsigned long *)dscrp)[0] == 0 &&
+ ((unsigned long *)dscrp)[1] == 0)
+ break;
+
+ if (i < LX_TLSNUM) {
+ /*
+ * found one
+ */
+ entry = i + GDT_TLSMIN;
+ if (suword32(&inf->entry_number, entry))
+ return (set_errno(EFAULT));
+ } else {
+ return (set_errno(ESRCH));
+ }
+ }
+
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ /*
+ * convert the linux ldt info to standard intel descriptor
+ */
+ dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+ if (LDT_INFO_EMPTY(&ldt_inf)) {
+ ((unsigned long *)dscrp)[0] = 0;
+ ((unsigned long *)dscrp)[1] = 0;
+ } else {
+ LDT_INFO_TO_DESC(&ldt_inf, dscrp);
+ }
+
+ /*
+ * update the gdt with the new descriptor
+ */
+ kpreempt_disable();
+
+ for (i = 0, dscrp = jlwp->br_tls; i < LX_TLSNUM; i++, dscrp++)
+ lx_set_gdt(GDT_TLSMIN + i, dscrp);
+
+ kpreempt_enable();
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c
index d61928d578..ab733a07cc 100644
--- a/usr/src/uts/common/brand/sn1/sn1_brand.c
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.c
@@ -94,9 +94,11 @@ struct brand_mach_ops sn1_mops = {
struct brand_mach_ops sn1_mops = {
sn1_brand_sysenter_callback,
+ NULL,
sn1_brand_int91_callback,
sn1_brand_syscall_callback,
- sn1_brand_syscall32_callback
+ sn1_brand_syscall32_callback,
+ NULL
};
#else /* ! __amd64 */
@@ -104,7 +106,9 @@ struct brand_mach_ops sn1_mops = {
struct brand_mach_ops sn1_mops = {
sn1_brand_sysenter_callback,
NULL,
+ NULL,
sn1_brand_syscall_callback,
+ NULL,
NULL
};
#endif /* __amd64 */
diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.c b/usr/src/uts/common/brand/solaris10/s10_brand.c
index f24b864eef..2e2309a33e 100644
--- a/usr/src/uts/common/brand/solaris10/s10_brand.c
+++ b/usr/src/uts/common/brand/solaris10/s10_brand.c
@@ -99,9 +99,11 @@ struct brand_mach_ops s10_mops = {
struct brand_mach_ops s10_mops = {
s10_brand_sysenter_callback,
+ NULL,
s10_brand_int91_callback,
s10_brand_syscall_callback,
- s10_brand_syscall32_callback
+ s10_brand_syscall32_callback,
+ NULL
};
#else /* ! __amd64 */
@@ -109,7 +111,9 @@ struct brand_mach_ops s10_mops = {
struct brand_mach_ops s10_mops = {
s10_brand_sysenter_callback,
NULL,
+ NULL,
s10_brand_syscall_callback,
+ NULL,
NULL
};
#endif /* __amd64 */
diff --git a/usr/src/uts/common/io/ptm.c b/usr/src/uts/common/io/ptm.c
index 400e9ffd10..07ffddc123 100644
--- a/usr/src/uts/common/io/ptm.c
+++ b/usr/src/uts/common/io/ptm.c
@@ -447,6 +447,18 @@ ptmclose(queue_t *rqp, int flag, cred_t *credp)
return (0);
}
+static boolean_t
+ptmptsopencb(ptmptsopencb_arg_t arg)
+{
+ struct pt_ttys *ptmp = (struct pt_ttys *)arg;
+ boolean_t rval;
+
+ PT_ENTER_READ(ptmp);
+ rval = (ptmp->pt_nullmsg != NULL);
+ PT_EXIT_READ(ptmp);
+ return (rval);
+}
+
/*
* The wput procedure will only handle ioctl and flush messages.
*/
@@ -574,6 +586,41 @@ ptmwput(queue_t *qp, mblk_t *mp)
miocack(qp, mp, 0, 0);
break;
}
+ case PTMPTSOPENCB:
+ {
+ mblk_t *dp; /* ioctl reply data */
+ ptmptsopencb_t *ppocb;
+
+ /* only allow the kernel to invoke this ioctl */
+ if (iocp->ioc_cr != kcred) {
+ miocnak(qp, mp, 0, EINVAL);
+ break;
+ }
+
+ /* we don't support transparent ioctls */
+ ASSERT(iocp->ioc_count != TRANSPARENT);
+ if (iocp->ioc_count == TRANSPARENT) {
+ miocnak(qp, mp, 0, EINVAL);
+ break;
+ }
+
+ /* allocate a response message */
+ dp = allocb(sizeof (ptmptsopencb_t), BPRI_MED);
+ if (dp == NULL) {
+ miocnak(qp, mp, 0, EAGAIN);
+ break;
+ }
+
+ /* initialize the ioctl results */
+ ppocb = (ptmptsopencb_t *)dp->b_rptr;
+ ppocb->ppocb_func = ptmptsopencb;
+ ppocb->ppocb_arg = (ptmptsopencb_arg_t)ptmp;
+
+ /* send the reply data */
+ mioc2ack(mp, dp, sizeof (ptmptsopencb_t), 0);
+ qreply(qp, mp);
+ break;
+ }
}
break;
diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c
index eb8c6e730a..532b74ec9b 100644
--- a/usr/src/uts/common/os/brand.c
+++ b/usr/src/uts/common/os/brand.c
@@ -45,7 +45,7 @@ struct brand_mach_ops native_mach_ops = {
};
#else /* !__sparcv9 */
struct brand_mach_ops native_mach_ops = {
- NULL, NULL, NULL, NULL
+ NULL, NULL, NULL, NULL, NULL, NULL
};
#endif /* !__sparcv9 */
diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c
index b555bb82b7..39db5cb27d 100644
--- a/usr/src/uts/common/os/pid.c
+++ b/usr/src/uts/common/os/pid.c
@@ -112,6 +112,18 @@ pid_lookup(pid_t pid)
return (pidp);
}
+struct pid *
+pid_find(pid_t pid)
+{
+ struct pid *pidp;
+
+ mutex_enter(&pidlinklock);
+ pidp = pid_lookup(pid);
+ mutex_exit(&pidlinklock);
+
+ return (pidp);
+}
+
void
pid_setmin(void)
{
diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c
index c6ebe8b110..f9df89923f 100644
--- a/usr/src/uts/common/os/streamio.c
+++ b/usr/src/uts/common/os/streamio.c
@@ -3177,6 +3177,7 @@ job_control_type(int cmd)
case JAGENT: /* Obsolete */
case JTRUN: /* Obsolete */
case JXTPROTO: /* Obsolete */
+ case TIOCSETLD:
return (JCSETP);
}
diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h
index 6c79ee266d..ba8b2b1210 100644
--- a/usr/src/uts/common/sys/ptms.h
+++ b/usr/src/uts/common/sys/ptms.h
@@ -126,6 +126,12 @@ extern void ptms_logp(char *, uintptr_t);
#define DDBGP(a, b)
#endif
+typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t;
+typedef struct ptmptsopencb {
+ boolean_t (*ppocb_func)(ptmptsopencb_arg_t);
+ ptmptsopencb_arg_t ppocb_arg;
+} ptmptsopencb_t;
+
#endif /* _KERNEL */
typedef struct pt_own {
@@ -157,6 +163,19 @@ typedef struct pt_own {
#define ZONEPT (('P'<<8)|4) /* set zone of master/slave pair */
#define OWNERPT (('P'<<8)|5) /* set owner/group for slave device */
+#ifdef _KERNEL
+/*
+ * kernel ioctl commands
+ *
+ * PTMPTSOPENCB: Returns a callback function pointer and opaque argument.
+ * The return value of the callback function when it's invoked
+ * with the opaque argument passed to it will indicate if the
+ * pts slave device is currently open.
+ */
+#define PTMPTSOPENCB (('P'<<8)|6) /* check if the slave is open */
+
+#endif /* _KERNEL */
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h
index e66ba0bc6b..1e1124d554 100644
--- a/usr/src/uts/common/sys/termios.h
+++ b/usr/src/uts/common/sys/termios.h
@@ -380,6 +380,24 @@ extern pid_t tcgetsid();
#define TCSETSF (_TIOC|16)
/*
+ * linux terminal ioctls we need to be aware of
+ */
+#define TIOCSETLD (_TIOC|123) /* set line discipline parms */
+#define TIOCGETLD (_TIOC|124) /* get line discipline parms */
+
+/*
+ * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is
+ * perfectly legal except, linux expects them to be separate. So we keep
+ * them separately.
+ */
+struct lx_cc {
+ unsigned char veof; /* veof value */
+ unsigned char veol; /* veol value */
+ unsigned char vmin; /* vmin value */
+ unsigned char vtime; /* vtime value */
+};
+
+/*
* NTP PPS ioctls
*/
#define TIOCGPPS (_TIOC|125)
diff --git a/usr/src/uts/i86pc/ml/syscall_asm.s b/usr/src/uts/i86pc/ml/syscall_asm.s
index 61ef4ac6c3..68181be28a 100644
--- a/usr/src/uts/i86pc/ml/syscall_asm.s
+++ b/usr/src/uts/i86pc/ml/syscall_asm.s
@@ -631,6 +631,36 @@ _sysenter_done:
sysexit
SET_SIZE(sys_sysenter)
SET_SIZE(brand_sys_sysenter)
+#endif /* __lint */
+
+#if defined(__lint)
+/*
+ * System call via an int80. This entry point is only used by the Linux
+ * application environment. Unlike the sysenter path, there is no default
+ * action to take if no callback is registered for this process.
+ */
+void
+sys_int80()
+{}
+
+#else /* __lint */
+
+ ENTRY_NP(brand_sys_int80)
+ BRAND_CALLBACK(BRAND_CB_INT80)
+
+ ALTENTRY(sys_int80)
+ /*
+ * We hit an int80, but this process isn't of a brand with an int80
+ * handler. Bad process! Make it look as if the INT failed.
+ * Modify %eip to point before the INT, push the expected error
+ * code and fake a GP fault.
+ *
+ */
+ subl $2, (%esp) /* int insn 2-bytes */
+ pushl $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
+ jmp gptrap / GP fault
+ SET_SIZE(sys_int80)
+ SET_SIZE(brand_sys_int80)
/*
* Declare a uintptr_t which covers the entire pc range of syscall
diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
index 443689cec3..fec8301bbc 100644
--- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
+++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
@@ -1159,6 +1159,48 @@ sys_sysenter()
SET_SIZE(brand_sys_sysenter)
#endif /* __lint */
+
+#if defined(__lint)
+/*
+ * System call via an int80. This entry point is only used by the Linux
+ * application environment. Unlike the other entry points, there is no
+ * default action to take if no callback is registered for this process.
+ */
+void
+sys_int80()
+{}
+
+#else /* __lint */
+
+ ENTRY_NP(brand_sys_int80)
+ SWAPGS /* kernel gsbase */
+ XPV_TRAP_POP
+ BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
+ SWAPGS /* user gsbase */
+ jmp nopop_int80
+
+ ENTRY_NP(sys_int80)
+ /*
+ * We hit an int80, but this process isn't of a brand with an int80
+ * handler. Bad process! Make it look as if the INT failed.
+ * Modify %rip to point before the INT, push the expected error
+ * code and fake a GP fault. Note on 64-bit hypervisor we need
+ * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
+ * because gptrap will pop them again with its own XPV_TRAP_POP.
+ */
+ XPV_TRAP_POP
+nopop_int80:
+ subq $2, (%rsp) /* int insn 2-bytes */
+ pushq $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
+#if defined(__xpv)
+ push %r11
+ push %rcx
+#endif
+ jmp gptrap / GP fault
+ SET_SIZE(sys_int80)
+ SET_SIZE(brand_sys_int80)
+#endif /* __lint */
+
/*
* This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h
index b632cea09c..8f9803290c 100644
--- a/usr/src/uts/i86pc/sys/apic.h
+++ b/usr/src/uts/i86pc/sys/apic.h
@@ -380,7 +380,7 @@ struct apic_io_intr {
/* special or reserve vectors */
#define APIC_CHECK_RESERVE_VECTORS(v) \
(((v) == T_FASTTRAP) || ((v) == APIC_SPUR_INTR) || \
- ((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET))
+ ((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET) || ((v) == 0x80))
/* cmos shutdown code for BIOS */
#define BIOS_SHUTDOWN 0x0a
diff --git a/usr/src/uts/intel/Makefile b/usr/src/uts/intel/Makefile
index 72b2d89989..37e10e011e 100644
--- a/usr/src/uts/intel/Makefile
+++ b/usr/src/uts/intel/Makefile
@@ -64,7 +64,7 @@ install_h.prereq := TARGET= install_h
.PARALLEL: $(PARALLEL_KMODS) $(XMODS) config $(LINT_DEPS)
-def all install clean clobber modlist: $(KMODS) $(XMODS) config
+def all install clean clobber modlist: genassym $(KMODS) $(XMODS) config
clobber: clobber.targ
@@ -106,7 +106,7 @@ CLOBBERFILES += $(PRIVS_C)
# intel/dtrace depends on i86pc/genassym, so we need to build both
# i86pc/genassym and intel/genassym.
#
-all.prereq install.prereq def.prereq: genunix FRC
+all.prereq install.prereq def.prereq: genassym genunix FRC
@cd ../i86pc/genassym; pwd; $(MAKE) $(@:%.prereq=%)
#
@@ -124,7 +124,7 @@ genunix: $(PRIVS_C)
modlintlib clean.lint: $(LINT_KMODS) $(XMODS)
-$(KMODS) $(SUBDIRS) config: FRC
+genassym $(KMODS) $(SUBDIRS) config: FRC
@cd $@; pwd; $(MAKE) $(NO_STATE) $(TARGET)
$(XMODS): FRC
diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files
index 1661f3ff93..4321c3586e 100644
--- a/usr/src/uts/intel/Makefile.files
+++ b/usr/src/uts/intel/Makefile.files
@@ -99,6 +99,14 @@ GENUNIX_OBJS += \
#
CORE_OBJS += \
prmachdep.o
+
+LX_PROC_OBJS += \
+ lx_prsubr.o \
+ lx_prvfsops.o \
+ lx_prvnops.o
+
+LX_AUTOFS_OBJS += \
+ lx_autofs.o
#
# ZFS file system module
@@ -267,6 +275,23 @@ IOMMULIB_OBJS = iommulib.o
SN1_BRAND_OBJS = sn1_brand.o sn1_brand_asm.o
SNGL_BRAND_OBJS = sngl_brand.o sngl_brand_asm.o
S10_BRAND_OBJS = s10_brand.o s10_brand_asm.o
+LX_BRAND_OBJS = \
+ lx_brand.o \
+ lx_brand_asm.o \
+ lx_brk.o \
+ lx_clone.o \
+ lx_futex.o \
+ lx_getpid.o \
+ lx_id.o \
+ lx_kill.o \
+ lx_misc.o \
+ lx_modify_ldt.o \
+ lx_pid.o \
+ lx_sched.o \
+ lx_signum.o \
+ lx_syscall.o \
+ lx_sysinfo.o \
+ lx_thread_area.o
#
# special files
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index 7e95f5fa23..b818b1860c 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -42,6 +42,7 @@ PLATFORM = i86pc
#
UNIX_DIR = $(UTSBASE)/i86pc/unix
GENLIB_DIR = $(UTSBASE)/intel/genunix
+GENASSYM_DIR = $(UTSBASE)/intel/genassym
IPDRV_DIR = $(UTSBASE)/intel/ip
MODSTUBS_DIR = $(UNIX_DIR)
DSF_DIR = $(UTSBASE)/$(PLATFORM)/genassym
@@ -134,6 +135,7 @@ ASFLAGS_XARCH_64 = $(amd64_ASFLAGS)
ASFLAGS_XARCH = $(ASFLAGS_XARCH_$(CLASS))
ASFLAGS += $(ASFLAGS_XARCH)
+AS_INC_PATH += -I$(GENASSYM_DIR)/$(OBJS_DIR)
#
# Define the base directory for installation.
@@ -513,7 +515,9 @@ MISC_KMODS += md_sp
#
# Brand modules
#
-BRAND_KMODS += sn1_brand sngl_brand s10_brand
+BRAND_KMODS += sn1_brand sngl_brand s10_brand lx_brand
+DRV_KMODS += lx_systrace lx_ptm lx_audio
+STRMOD_KMODS += ldlinux
#
# Exec Class Modules (/kernel/exec):
@@ -529,7 +533,7 @@ SCHED_KMODS += IA RT TS RT_DPTBL TS_DPTBL FSS FX FX_DPTBL SDC
# File System Modules (/kernel/fs):
#
FS_KMODS += autofs cachefs ctfs dcfs dev devfs fdfs fifofs hsfs hyprlofs
-FS_KMODS += lofs lxprocfs mntfs namefs nfs objfs zfs zut
+FS_KMODS += lofs lx_afs lx_proc lxprocfs mntfs namefs nfs objfs zfs zut
FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs sharefs
FS_KMODS += smbfs
diff --git a/usr/src/uts/intel/brand/lx/lx_brand_asm.s b/usr/src/uts/intel/brand/lx/lx_brand_asm.s
new file mode 100644
index 0000000000..7f3ed908f4
--- /dev/null
+++ b/usr/src/uts/intel/brand/lx/lx_brand_asm.s
@@ -0,0 +1,162 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#if defined(__lint)
+
+#include <sys/systm.h>
+
+#else /* __lint */
+
+#include "genassym.h"
+#include "../common/brand_asm.h"
+
+#endif /* __lint */
+
+#ifdef __lint
+
+void
+lx_brand_int80_callback(void)
+{
+}
+
+#else /* __lint */
+
+#if defined(__amd64)
+
+/*
+ * See "64-BIT INTERPOSITION STACK" in brand_asm.h.
+ */
+ENTRY(lx_brand_int80_callback)
+ GET_PROCP(SP_REG, 0, %r15)
+ movq P_ZONE(%r15), %r15 /* grab the zone pointer */
+ /* grab the 'max syscall num' for this process from 'zone brand data' */
+ movq ZONE_BRAND_DATA(%r15), %r15 /* grab the zone brand ptr */
+ movl LXZD_MAX_SYSCALL(%r15), %r15d /* get the 'max sysnum' word */
+ cmpq %r15, %rax /* is 0 <= syscall <= MAX? */
+ jbe 0f /* yes, syscall is OK */
+ xorl %eax, %eax /* no, zero syscall number */
+0:
+
+.lx_brand_int80_patch_point:
+ jmp .lx_brand_int80_notrace
+
+.lx_brand_int80_notrace:
+ CALC_TABLE_ADDR(%r15, L_HANDLER)
+1:
+ movq %r15, %rax
+ GET_V(%rsp, 0, V_SSP, %rsp) /* restore intr. stack pointer */
+ xchgq (%rsp), %rax /* swap %rax and return addr */
+ jmp sys_sysint_swapgs_iret
+
+.lx_brand_int80_trace:
+ /*
+ * If tracing is active, we vector to an alternate trace-enabling
+ * handler table instead.
+ */
+ CALC_TABLE_ADDR(%r15, L_TRACEHANDLER)
+ jmp 1b
+SET_SIZE(lx_brand_int80_callback)
+
+#define PATCH_POINT _CONST(.lx_brand_int80_patch_point + 1)
+#define PATCH_VAL _CONST(.lx_brand_int80_trace - .lx_brand_int80_notrace)
+
+ENTRY(lx_brand_int80_enable)
+ movl $1, lx_systrace_brand_enabled(%rip)
+ movq $PATCH_POINT, %r8
+ movb $PATCH_VAL, (%r8)
+ ret
+SET_SIZE(lx_brand_int80_enable)
+
+ENTRY(lx_brand_int80_disable)
+ movq $PATCH_POINT, %r8
+ movb $0, (%r8)
+ movl $0, lx_systrace_brand_enabled(%rip)
+ ret
+SET_SIZE(lx_brand_int80_disable)
+
+
+#elif defined(__i386)
+
+/*
+ * See "32-BIT INTERPOSITION STACK" in brand_asm.h.
+ */
+ENTRY(lx_brand_int80_callback)
+ GET_PROCP(SP_REG, 0, %ebx)
+ movl P_ZONE(%ebx), %ebx /* grab the zone pointer */
+ /* grab the 'max syscall num' for this process from 'zone brand data' */
+ movl ZONE_BRAND_DATA(%ebx), %ebx /* grab the zone brand data */
+ movl LXZD_MAX_SYSCALL(%ebx), %ebx /* get the max sysnum */
+
+ cmpl %ebx, %eax /* is 0 <= syscall <= MAX? */
+ jbe 0f /* yes, syscall is OK */
+ xorl %eax, %eax /* no, zero syscall number */
+0:
+
+.lx_brand_int80_patch_point:
+ jmp .lx_brand_int80_notrace
+
+.lx_brand_int80_notrace:
+ CALC_TABLE_ADDR(%ebx, L_HANDLER)
+
+1:
+ movl %ebx, %eax
+ GET_V(%esp, 0, V_U_EBX, %ebx) /* restore scratch register */
+ addl $V_END, %esp /* restore intr. stack ptr */
+ xchgl (%esp), %eax /* swap new and orig. return addrs */
+ jmp nopop_sys_rtt_syscall
+
+.lx_brand_int80_trace:
+ CALC_TABLE_ADDR(%ebx, L_TRACEHANDLER)
+ jmp 1b
+SET_SIZE(lx_brand_int80_callback)
+
+
+#define PATCH_POINT _CONST(.lx_brand_int80_patch_point + 1)
+#define PATCH_VAL _CONST(.lx_brand_int80_trace - .lx_brand_int80_notrace)
+
+ENTRY(lx_brand_int80_enable)
+ pushl %ebx
+ pushl %eax
+ movl $1, lx_systrace_brand_enabled
+ movl $PATCH_POINT, %ebx
+ movl $PATCH_VAL, %eax
+ movb %al, (%ebx)
+ popl %eax
+ popl %ebx
+ ret
+SET_SIZE(lx_brand_int80_enable)
+
+ENTRY(lx_brand_int80_disable)
+ pushl %ebx
+ movl $PATCH_POINT, %ebx
+ movb $0, (%ebx)
+ movl $0, lx_systrace_brand_enabled
+ popl %ebx
+ ret
+SET_SIZE(lx_brand_int80_disable)
+
+#endif /* __i386 */
+#endif /* __lint */
diff --git a/usr/src/uts/intel/genassym/Makefile b/usr/src/uts/intel/genassym/Makefile
new file mode 100644
index 0000000000..ce01dc8610
--- /dev/null
+++ b/usr/src/uts/intel/genassym/Makefile
@@ -0,0 +1,85 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of genassym.h through
+# compile time intialized data.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+GENASSYM_H = $(GENASSYM_DIR)/$(OBJS_DIR)/genassym.h
+OFFSETS_SRC = $(GENASSYM_DIR)/offsets.in
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(GENASSYM_H)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx
+
+#
+# Overrides
+#
+CLEANFILES = Nothing_to_remove
+CLOBBERFILES = $(GENASSYM_H) Nothing_to_remove
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+clean.lint:
+
+install: def
+
+#
+# Create genassym.h
+#
+$(GENASSYM_H): $(OFFSETS_SRC)
+ $(OFFSETS_CREATE) <$(OFFSETS_SRC) >$@
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/genassym/offsets.in b/usr/src/uts/intel/genassym/offsets.in
new file mode 100644
index 0000000000..f389fe6da3
--- /dev/null
+++ b/usr/src/uts/intel/genassym/offsets.in
@@ -0,0 +1,43 @@
+\
+\ CDDL HEADER START
+\
+\ The contents of this file are subject to the terms of the
+\ Common Development and Distribution License (the "License").
+\ You may not use this file except in compliance with the License.
+\
+\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+\ or http://www.opensolaris.org/os/licensing.
+\ See the License for the specific language governing permissions
+\ and limitations under the License.
+\
+\ When distributing Covered Code, include this CDDL HEADER in each
+\ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+\ If applicable, add the following below this CDDL HEADER, with the
+\ fields enclosed by brackets "[]" replaced with your own identifying
+\ information: Portions Copyright [yyyy] [name of copyright owner]
+\
+\ CDDL HEADER END
+\
+\
+\ Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+\ Use is subject to license terms.
+\
+
+\
+\ offsets.in: input file to produce the architecture-dependent genassym.h
+\ using the ctfstabs program
+\
+
+#ifndef _GENASSYM
+#define _GENASSYM
+#endif
+
+#include <sys/lx_brand.h>
+
+lx_proc_data
+ l_handler
+ l_tracehandler
+ l_traceflag
+
+lx_zone_data
+ lxzd_max_syscall
diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c
index d83b16d673..42cc0d4d10 100644
--- a/usr/src/uts/intel/ia32/os/archdep.c
+++ b/usr/src/uts/intel/ia32/os/archdep.c
@@ -632,6 +632,8 @@ getuserpc()
static greg_t
fix_segreg(greg_t sr, int iscs, model_t datamodel)
{
+ kthread_t *t = curthread;
+
switch (sr &= 0xffff) {
case 0:
@@ -667,6 +669,19 @@ fix_segreg(greg_t sr, int iscs, model_t datamodel)
break;
}
+ /*
+ * Allow this process's brand to do any necessary segment register
+ * manipulation.
+ */
+ if (PROC_IS_BRANDED(t->t_procp) && BRMOP(t->t_procp)->b_fixsegreg) {
+ greg_t bsr = BRMOP(t->t_procp)->b_fixsegreg(sr, datamodel);
+
+ if (bsr == 0 && iscs == IS_CS)
+ return (0 | SEL_UPL);
+ else
+ return (bsr);
+ }
+
/*
* Force it into the LDT in ring 3 for 32-bit processes, which by
* default do not have an LDT, so that any attempt to use an invalid
diff --git a/usr/src/uts/intel/ia32/os/desctbls.c b/usr/src/uts/intel/ia32/os/desctbls.c
index a05137eee6..97024b7b59 100644
--- a/usr/src/uts/intel/ia32/os/desctbls.c
+++ b/usr/src/uts/intel/ia32/os/desctbls.c
@@ -161,7 +161,7 @@ struct interposing_handler {
* The brand infrastructure interposes on two handlers, and we use one as a
* NULL signpost.
*/
-static struct interposing_handler brand_tbl[2];
+static struct interposing_handler brand_tbl[3];
/*
* software prototypes for default local descriptor table
@@ -976,6 +976,12 @@ init_idt_common(gate_desc_t *idt)
set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
+ /*
+ * install "int80" handler at, well, 0x80.
+ */
+ set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
+ 0);
+
/*
* install fast trap handler at 210.
*/
@@ -1001,21 +1007,27 @@ init_idt_common(gate_desc_t *idt)
SDT_SYSIGT, TRP_UPL, 0);
/*
- * Prepare interposing descriptor for the syscall handler
- * and cache copy of the default descriptor.
+- * Prepare interposing descriptors for the branded "int80"
+- * and syscall handlers and cache copies of the default
+- * descriptors.
*/
- brand_tbl[0].ih_inum = T_SYSCALLINT;
- brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
+ brand_tbl[0].ih_inum = T_INT80;
+ brand_tbl[0].ih_default_desc = idt0[T_INT80];
+ set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
+ SDT_SYSIGT, TRP_UPL, 0);
+
+ brand_tbl[1].ih_inum = T_SYSCALLINT;
+ brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
#if defined(__amd64)
- set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
+ set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
#elif defined(__i386)
- set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
+ set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
#endif /* __i386 */
- brand_tbl[1].ih_inum = 0;
+ brand_tbl[2].ih_inum = 0;
}
#if defined(__xpv)
diff --git a/usr/src/uts/intel/ldlinux/Makefile b/usr/src/uts/intel/ldlinux/Makefile
new file mode 100644
index 0000000000..5177fc5799
--- /dev/null
+++ b/usr/src/uts/intel/ldlinux/Makefile
@@ -0,0 +1,103 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/ldlinux/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the ldlinux streams kernel
+# module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = ldlinux
+OBJECTS = $(LDLINUX_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LDLINUX_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_STRMOD_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
+#
+# Overrides.
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_afs/Makefile b/usr/src/uts/intel/lx_afs/Makefile
new file mode 100644
index 0000000000..657ce7f4f0
--- /dev/null
+++ b/usr/src/uts/intel/lx_afs/Makefile
@@ -0,0 +1,108 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This makefile drives the production of the lxproc file system
+# kernel module.
+#
+# i86 architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+# Note that the name of the actual filesystem is lx_afs and
+# not lx_autofs. This is becase filesystem names are stupidly
+# limited to 8 characters.
+#
+MODULE = lx_afs
+OBJECTS = $(LX_AUTOFS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_AUTOFS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_FS_DIR)/$(MODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides.
+#
+CFLAGS += $(CCVERBOSE)
+LDFLAGS += -dy
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Include brand-specific rules
+#
+
+include $(UTSBASE)/intel/lx_afs/Makefile.rules
diff --git a/usr/src/uts/intel/lx_afs/Makefile.rules b/usr/src/uts/intel/lx_afs/Makefile.rules
new file mode 100644
index 0000000000..2793fedaa4
--- /dev/null
+++ b/usr/src/uts/intel/lx_afs/Makefile.rules
@@ -0,0 +1,40 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Section 1a: C object build rules
+#
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/autofs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+#
+# Section 1b: Lint `object' build rules.
+#
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/autofs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_audio/Makefile b/usr/src/uts/intel/lx_audio/Makefile
new file mode 100644
index 0000000000..9341fc7def
--- /dev/null
+++ b/usr/src/uts/intel/lx_audio/Makefile
@@ -0,0 +1,100 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lx_audio/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the lx_audio driver
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_audio
+OBJECTS = $(LX_AUDIO_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_AUDIO_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_brand/Makefile b/usr/src/uts/intel/lx_brand/Makefile
new file mode 100644
index 0000000000..b2a430de51
--- /dev/null
+++ b/usr/src/uts/intel/lx_brand/Makefile
@@ -0,0 +1,107 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the kernel component of
+# the lx brand
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Path to where brand common sources live
+#
+LX_CMN = $(SRC)/common/brand/lx
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_brand
+OBJECTS = $(LX_BRAND_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_BRAND_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_BRAND_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+AS_INC_PATH += -I$(UTSBASE)/i86pc/genassym/$(OBJS_DIR)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nexec/elfexec
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Include brand-specific rules
+#
+
+include $(UTSBASE)/intel/lx_brand/Makefile.rules
diff --git a/usr/src/uts/intel/lx_brand/Makefile.rules b/usr/src/uts/intel/lx_brand/Makefile.rules
new file mode 100644
index 0000000000..0862baef84
--- /dev/null
+++ b/usr/src/uts/intel/lx_brand/Makefile.rules
@@ -0,0 +1,85 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+
+#
+# Section 1a: C object build rules
+#
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c
+ $(COMPILE.c) -D_ELF32_COMPAT -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c
+ $(COMPILE.c) -D_ELF32_COMPAT -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c
+ $(COMPILE.c) -D_ELF32_COMPAT -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c
+ $(COMPILE.c) -D_ELF32_COMPAT -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/intel/brand/lx/%.s
+ $(COMPILE.s) -D_ELF32_COMPAT -o $@ $<
+
+$(OBJS_DIR_OBJ64)/%.o: $(LX_CMN)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/intel/brand/lx/%.s
+ $(COMPILE.s) -D_ELF32_COMPAT -o $@ $<
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(LX_CMN)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/lx/%.s
+ $(COMPILE.s) -o $@ $<
+
+#
+# Section 1b: Lint `object' build rules.
+#
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/os/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/syscall/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(LX_CMN)/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/lx/%.s
+ @($(LHEAD) $(LINT.s) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_proc/Makefile b/usr/src/uts/intel/lx_proc/Makefile
new file mode 100644
index 0000000000..0aaf2cabfa
--- /dev/null
+++ b/usr/src/uts/intel/lx_proc/Makefile
@@ -0,0 +1,113 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lx_proc/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the lxproc file system
+# kernel module.
+#
+# i86 architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Path to where brand common sources live
+#
+LX_CMN = $(SRC)/common/brand/lx
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_proc
+OBJECTS = $(LX_PROC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_PROC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_FS_DIR)/$(MODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides.
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# Depends on procfs and lx_brand
+#
+LDFLAGS += -dy -Nfs/procfs -Nbrand/lx_brand
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Include brand-specific rules
+#
+
+include $(UTSBASE)/intel/lx_proc/Makefile.rules
diff --git a/usr/src/uts/intel/lx_proc/Makefile.rules b/usr/src/uts/intel/lx_proc/Makefile.rules
new file mode 100644
index 0000000000..b8592d2fdd
--- /dev/null
+++ b/usr/src/uts/intel/lx_proc/Makefile.rules
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+#
+# Section 1a: C object build rules
+#
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/procfs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+#
+# Section 1b: Lint `object' build rules.
+#
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/procfs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_ptm/Makefile b/usr/src/uts/intel/lx_ptm/Makefile
new file mode 100644
index 0000000000..dcead27da7
--- /dev/null
+++ b/usr/src/uts/intel/lx_ptm/Makefile
@@ -0,0 +1,91 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lx_ptm/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the lx_ptm driver
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_ptm
+OBJECTS = $(LX_PTM_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_PTM_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_systrace/Makefile b/usr/src/uts/intel/lx_systrace/Makefile
new file mode 100644
index 0000000000..20c4a6a3a3
--- /dev/null
+++ b/usr/src/uts/intel/lx_systrace/Makefile
@@ -0,0 +1,80 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+UTSBASE = ../..
+
+MODULE = lx_systrace
+OBJECTS = $(LX_SYSTRACE_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_SYSTRACE_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+ROOTLINK = $(USR_DTRACE_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/brand/lx/dtrace
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
+LDFLAGS += -dy -Ndrv/dtrace -Nbrand/lx_brand
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_STATIC_UNUSED
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+$(ROOTLINK): $(USR_DTRACE_DIR) $(ROOTMODULE)
+ -$(RM) $@; ln $(ROOTMODULE) $@
+
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/dtrace/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/dtrace/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/sys/machbrand.h b/usr/src/uts/intel/sys/machbrand.h
index 3f9ebdb6b7..c1d045515c 100644
--- a/usr/src/uts/intel/sys/machbrand.h
+++ b/usr/src/uts/intel/sys/machbrand.h
@@ -35,17 +35,20 @@ extern "C" {
struct brand_mach_ops {
void (*b_sysenter)(void);
+ void (*b_int80)(void);
void (*b_int91)(void);
void (*b_syscall)(void);
void (*b_syscall32)(void);
+ greg_t (*b_fixsegreg)(greg_t, model_t);
};
#endif /* _ASM */
#define BRAND_CB_SYSENTER 0
-#define BRAND_CB_INT91 1
-#define BRAND_CB_SYSCALL 2
-#define BRAND_CB_SYSCALL32 3
+#define BRAND_CB_INT80 1
+#define BRAND_CB_INT91 2
+#define BRAND_CB_SYSCALL 3
+#define BRAND_CB_SYSCALL32 4
#ifdef __cplusplus
}
diff --git a/usr/src/uts/intel/sys/segments.h b/usr/src/uts/intel/sys/segments.h
index c4b194fcd8..8a6e398eec 100644
--- a/usr/src/uts/intel/sys/segments.h
+++ b/usr/src/uts/intel/sys/segments.h
@@ -683,6 +683,8 @@ extern void _start(), cmnint();
extern void achktrap(), mcetrap();
extern void xmtrap();
extern void fasttrap();
+extern void sys_int80();
+extern void brand_sys_int80();
extern void dtrace_ret();
#if !defined(__amd64)