diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2014-03-20 19:10:46 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2014-03-20 19:10:59 +0000 |
commit | e8facfd99e91cf5fefa4291a3ba0b6a0710eea09 (patch) | |
tree | df73671fcd2421ade9cb711f1f67e4241c5493bc /usr/src | |
parent | 76436dc0854d1e18a76ccb58a9b496a0e93ae7c7 (diff) | |
download | illumos-joyent-e8facfd99e91cf5fefa4291a3ba0b6a0710eea09.tar.gz |
OS-2834 ship lx brand
Diffstat (limited to 'usr/src')
188 files changed, 49502 insertions, 37 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint index 03850d711c..96faacb9e9 100644 --- a/usr/src/Makefile.lint +++ b/usr/src/Makefile.lint @@ -486,6 +486,7 @@ i386_SUBDIRS= \ cmd/biosdev \ cmd/rtc \ cmd/ucodeadm \ + lib/brand/lx \ lib/cfgadm_plugins/sata \ lib/cfgadm_plugins/sbd \ lib/libfdisk diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs index b81bfd4517..0374d13e5e 100644 --- a/usr/src/Targetdirs +++ b/usr/src/Targetdirs @@ -50,6 +50,9 @@ i386_DIRS= \ /boot/grub \ /boot/grub/bin \ /platform/i86pc \ + /usr/lib/brand/lx \ + /usr/lib/brand/lx/amd64 \ + /usr/lib/brand/lx/distros \ /usr/lib/xen \ /usr/lib/xen/bin @@ -594,6 +597,9 @@ SYM.DIRS= \ /usr/ucblib/32 \ /var/ld/32 +i386_SYM.DIRS64= \ + /usr/lib/brand/lx/64 + sparc_SYM.DIRS64= SYM.DIRS64= \ @@ -711,6 +717,7 @@ $(BUILD64) $(ROOT)/lib/crypto/64:= LINKDEST=$(MACH64) $(BUILD64) $(ROOT)/lib/secure/64:= LINKDEST=$(MACH64) $(BUILD64) $(ROOT)/usr/lib/64:= LINKDEST=$(MACH64) $(BUILD64) $(ROOT)/usr/lib/elfedit/64:= LINKDEST=$(MACH64) +$(BUILD64) $(ROOT)/usr/lib/brand/lx/64:= LINKDEST=$(MACH64) $(BUILD64) $(ROOT)/usr/lib/brand/sn1/64:= LINKDEST=$(MACH64) $(BUILD64) $(ROOT)/usr/lib/brand/sngl/64:= LINKDEST=$(MACH64) $(BUILD64) $(ROOT)/usr/lib/brand/solaris10/64:= LINKDEST=$(MACH64) diff --git a/usr/src/cmd/devfsadm/i386/Makefile b/usr/src/cmd/devfsadm/i386/Makefile index 1f14c93dad..75f2da3436 100644 --- a/usr/src/cmd/devfsadm/i386/Makefile +++ b/usr/src/cmd/devfsadm/i386/Makefile @@ -24,8 +24,11 @@ LINK_OBJS_i386 = \ misc_link_i386.o \ + lx_link_i386.o \ xen_link.o +lx_link_i386.o lx_link_i386.po lx_link_i386.ln := CPPFLAGS += -I$(UTSBASE)/common/brand/lx + xen_link.o xen_link.ln xen_link.po := CPPFLAGS += -I$(UTSBASE)/i86xpv include ../Makefile.com diff --git a/usr/src/cmd/devfsadm/i386/lx_link_i386.c b/usr/src/cmd/devfsadm/i386/lx_link_i386.c new file mode 100644 index 0000000000..855f4f7383 --- /dev/null +++ b/usr/src/cmd/devfsadm/i386/lx_link_i386.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <devfsadm.h> +#include <strings.h> +#include <stdio.h> +#include <sys/lx_ptm.h> +#include <sys/lx_audio.h> + +static int lx_ptm(di_minor_t minor, di_node_t node); +static int lx_audio(di_minor_t minor, di_node_t node); +static int lx_systrace(di_minor_t minor, di_node_t node); + +static devfsadm_create_t lx_create_cbt[] = { + { "pseudo", "ddi_pseudo", LX_PTM_DRV, + TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_ptm }, + { "pseudo", "ddi_pseudo", LX_AUDIO_DRV, + TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_audio }, + { "pseudo", "ddi_pseudo", "lx_systrace", + TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_systrace }, +}; + +DEVFSADM_CREATE_INIT_V0(lx_create_cbt); + +static int +lx_ptm(di_minor_t minor, di_node_t node) +{ + char *mname = di_minor_name(minor); + + if (strcmp(LX_PTM_MINOR_NODE, mname) == 0) + (void) devfsadm_mklink("brand/lx/ptmx", node, minor, 0); + + return (DEVFSADM_CONTINUE); +} + +static int +lx_audio(di_minor_t minor, di_node_t node) +{ + char *mname = di_minor_name(minor); + + if (strcmp(LXA_MINORNAME_DEVCTL, mname) == 0) + (void) devfsadm_mklink("brand/lx/audio_devctl", node, minor, 0); + if (strcmp(LXA_MINORNAME_DSP, mname) == 0) + (void) devfsadm_mklink("brand/lx/dsp", node, minor, 0); + if (strcmp(LXA_MINORNAME_MIXER, mname) == 0) + (void) devfsadm_mklink("brand/lx/mixer", node, minor, 0); + + return (DEVFSADM_CONTINUE); +} + +static int +lx_systrace(di_minor_t minor, di_node_t node) +{ + char *mname = di_minor_name(minor); + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), "dtrace/provider/%s", mname); + (void) devfsadm_mklink(path, node, minor, 0); + + return (DEVFSADM_CONTINUE); +} diff --git a/usr/src/cmd/zlogin/zlogin.c b/usr/src/cmd/zlogin/zlogin.c index f8d2656ee1..cadbda4a0b 100644 --- a/usr/src/cmd/zlogin/zlogin.c +++ b/usr/src/cmd/zlogin/zlogin.c @@ -2233,8 +2233,18 @@ main(int argc, char **argv) /* * In failsafe mode, we don't use login(1), so don't try * setting up a utmpx entry. - */ - if (!failsafe) + * + * A branded zone may have very different utmpx semantics. + * At the moment, we only have two brand types: + * Solaris-like (native, sn1) and Linux. In the Solaris + * case, we know exactly how to do the necessary utmpx + * setup. Fortunately for us, the Linux /bin/login is + * prepared to deal with a non-initialized utmpx entry, so + * we can simply skip it. If future brands don't fall into + * either category, we'll have to add a per-brand utmpx + * setup hook. + */ + if (!failsafe && (strcmp(zonebrand, "lx") != 0)) if (setup_utmpx(slaveshortname) == -1) return (1); diff --git a/usr/src/cmd/zoneadm/svc-zones b/usr/src/cmd/zoneadm/svc-zones index 9d307835bd..30d54f5272 100644 --- a/usr/src/cmd/zoneadm/svc-zones +++ b/usr/src/cmd/zoneadm/svc-zones @@ -32,7 +32,7 @@ shutdown_zones() { zoneadm list -p | nawk -F: '{ - if ($2 != "global") { + if (($5 != "lx") && ($2 != "global")) { print $2 } }' diff --git a/usr/src/common/brand/lx/lx_signum.c b/usr/src/common/brand/lx/lx_signum.c new file mode 100644 index 0000000000..5554750874 --- /dev/null +++ b/usr/src/common/brand/lx/lx_signum.c @@ -0,0 +1,242 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/signal.h> +#include <lx_signum.h> + +/* + * Delivering signals to a Linux process is complicated by differences in + * signal numbering, stack structure and contents, and the action taken when a + * signal handler exits. In addition, many signal-related structures, such as + * sigset_ts, vary between Solaris and Linux. + * + * The simplest transformation that must be done when sending signals is to + * translate between Linux and Solaris signal numbers. + * + * These are the major signal number differences between Linux and Solaris: + * + * ==================================== + * | Number | Linux | Solaris | + * | ====== | ========= | ========== | + * | 7 | SIGBUS | SIGEMT | + * | 10 | SIGUSR1 | SIGBUS | + * | 12 | SIGUSR2 | SIGSYS | + * | 16 | SIGSTKFLT | SIGUSR1 | + * | 17 | SIGCHLD | SIGUSR2 | + * | 18 | SIGCONT | SIGCHLD | + * | 19 | SIGSTOP | SIGPWR | + * | 20 | SIGTSTP | SIGWINCH | + * | 21 | SIGTTIN | SIGURG | + * | 22 | SIGTTOU | SIGPOLL | + * | 23 | SIGURG | SIGSTOP | + * | 24 | SIGXCPU | SIGTSTP | + * | 25 | SIGXFSZ | SIGCONT | + * | 26 | SIGVTALARM | SIGTTIN | + * | 27 | SIGPROF | SIGTTOU | + * | 28 | SIGWINCH | SIGVTALARM | + * | 29 | SIGPOLL | SIGPROF | + * | 30 | SIGPWR | SIGXCPU | + * | 31 | SIGSYS | SIGXFSZ | + * ==================================== + * + * Not every Linux signal maps to a Solaris signal, nor does every Solaris + * signal map to a Linux counterpart. However, when signals do map, the + * mapping is unique. + * + * One mapping issue is that Linux supports 33 real time signals, with SIGRTMIN + * typically starting at or near 32 (SIGRTMIN) and proceeding to 64 (SIGRTMAX) + * (SIGRTMIN is "at or near" 32 because glibc usually "steals" one ore more of + * these signals for its own internal use, adjusting SIGRTMIN and SIGRTMAX as + * needed.) Conversely, Solaris actively uses signals 32-40 for other purposes + * and supports exactly 32 real time signals, in the range 41 (SIGRTMIN) + * to 72 (SIGRTMAX). + * + * At present, attempting to translate a Linux signal equal to 63 + * will generate an error (we allow SIGRTMAX because a program + * should be able to send SIGRTMAX without getting an EINVAL, though obviously + * anything that loops through the signals from SIGRTMIN to SIGRTMAX will + * fail.) + * + * Similarly, attempting to translate a native Solaris signal in the range + * 32-40 will also generate an error as we don't want to support the receipt of + * those signals from the Solaris global zone. + */ + +/* + * Linux to Solaris signal map + * + * Usage: solaris_signal = ltos_signum[lx_signal]; + */ +const int +ltos_signo[LX_NSIG] = { + 0, + SIGHUP, + SIGINT, + SIGQUIT, + SIGILL, + SIGTRAP, + SIGABRT, + SIGBUS, + SIGFPE, + SIGKILL, + SIGUSR1, + SIGSEGV, + SIGUSR2, + SIGPIPE, + SIGALRM, + SIGTERM, + SIGEMT, /* 16: Linux SIGSTKFLT; use Solaris SIGEMT */ + SIGCHLD, + SIGCONT, + SIGSTOP, + SIGTSTP, + SIGTTIN, + SIGTTOU, + SIGURG, + SIGXCPU, + SIGXFSZ, + SIGVTALRM, + SIGPROF, + SIGWINCH, + SIGPOLL, + SIGPWR, + SIGSYS, + _SIGRTMIN, /* 32: Linux SIGRTMIN */ + _SIGRTMIN + 1, + _SIGRTMIN + 2, + _SIGRTMIN + 3, + _SIGRTMIN + 4, + _SIGRTMIN + 5, + _SIGRTMIN + 6, + _SIGRTMIN + 7, + _SIGRTMIN + 8, + _SIGRTMIN + 9, + _SIGRTMIN + 10, + _SIGRTMIN + 11, + _SIGRTMIN + 12, + _SIGRTMIN + 13, + _SIGRTMIN + 14, + _SIGRTMIN + 15, + _SIGRTMIN + 16, + _SIGRTMIN + 17, + _SIGRTMIN + 18, + _SIGRTMIN + 19, + _SIGRTMIN + 20, + _SIGRTMIN + 21, + _SIGRTMIN + 22, + _SIGRTMIN + 23, + _SIGRTMIN + 24, + _SIGRTMIN + 25, + _SIGRTMIN + 26, + _SIGRTMIN + 27, + _SIGRTMIN + 28, + _SIGRTMIN + 29, + _SIGRTMIN + 30, + -1, /* 63: Linux SIGRTMIN + 31, or SIGRTMAX - 1 */ + _SIGRTMAX, /* 64: Linux SIGRTMAX */ +}; + +/* + * Solaris to Linux signal map + * + * Usage: lx_signal = stol_signo[solaris_signal]; + */ +const int +stol_signo[NSIG] = { + 0, + LX_SIGHUP, + LX_SIGINT, + LX_SIGQUIT, + LX_SIGILL, + LX_SIGTRAP, + LX_SIGABRT, + LX_SIGSTKFLT, /* 7: Solaris SIGEMT; use for LX_SIGSTKFLT */ + LX_SIGFPE, + LX_SIGKILL, + LX_SIGBUS, + LX_SIGSEGV, + LX_SIGSYS, + LX_SIGPIPE, + LX_SIGALRM, + LX_SIGTERM, + LX_SIGUSR1, + LX_SIGUSR2, + LX_SIGCHLD, + LX_SIGPWR, + LX_SIGWINCH, + LX_SIGURG, + LX_SIGPOLL, + LX_SIGSTOP, + LX_SIGTSTP, + LX_SIGCONT, + LX_SIGTTIN, + LX_SIGTTOU, + LX_SIGVTALRM, + LX_SIGPROF, + LX_SIGXCPU, + LX_SIGXFSZ, + -1, /* 32: Solaris SIGWAITING */ + -1, /* 33: Solaris SIGLWP */ + -1, /* 34: Solaris SIGFREEZE */ + -1, /* 35: Solaris SIGTHAW */ + -1, /* 36: Solaris SIGCANCEL */ + -1, /* 37: Solaris SIGLOST */ + -1, /* 38: Solaris SIGXRES */ + -1, /* 39: Solaris SIGJVM1 */ + -1, /* 40: Solaris SIGJVM2 */ + LX_SIGRTMIN, /* 41: Solaris _SIGRTMIN */ + LX_SIGRTMIN + 1, + LX_SIGRTMIN + 2, + LX_SIGRTMIN + 3, + LX_SIGRTMIN + 4, + LX_SIGRTMIN + 5, + LX_SIGRTMIN + 6, + LX_SIGRTMIN + 7, + LX_SIGRTMIN + 8, + LX_SIGRTMIN + 9, + LX_SIGRTMIN + 10, + LX_SIGRTMIN + 11, + LX_SIGRTMIN + 12, + LX_SIGRTMIN + 13, + LX_SIGRTMIN + 14, + LX_SIGRTMIN + 15, + LX_SIGRTMIN + 16, + LX_SIGRTMIN + 17, + LX_SIGRTMIN + 18, + LX_SIGRTMIN + 19, + LX_SIGRTMIN + 20, + LX_SIGRTMIN + 21, + LX_SIGRTMIN + 22, + LX_SIGRTMIN + 23, + LX_SIGRTMIN + 24, + LX_SIGRTMIN + 25, + LX_SIGRTMIN + 26, + LX_SIGRTMIN + 27, + LX_SIGRTMIN + 28, + LX_SIGRTMIN + 29, + LX_SIGRTMIN + 30, + LX_SIGRTMAX, /* 72: Solaris _SIGRTMAX */ +}; diff --git a/usr/src/common/brand/lx/lx_signum.h b/usr/src/common/brand/lx/lx_signum.h new file mode 100644 index 0000000000..1ec6fa09c9 --- /dev/null +++ b/usr/src/common/brand/lx/lx_signum.h @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_SIGNUM_H +#define _LX_SIGNUM_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define LX_SIGHUP 1 +#define LX_SIGINT 2 +#define LX_SIGQUIT 3 +#define LX_SIGILL 4 +#define LX_SIGTRAP 5 +#define LX_SIGABRT 6 +#define LX_SIGIOT 6 +#define LX_SIGBUS 7 +#define LX_SIGFPE 8 +#define LX_SIGKILL 9 +#define LX_SIGUSR1 10 +#define LX_SIGSEGV 11 +#define LX_SIGUSR2 12 +#define LX_SIGPIPE 13 +#define LX_SIGALRM 14 +#define LX_SIGTERM 15 +#define LX_SIGSTKFLT 16 +#define LX_SIGCHLD 17 +#define LX_SIGCONT 18 +#define LX_SIGSTOP 19 +#define LX_SIGTSTP 20 +#define LX_SIGTTIN 21 +#define LX_SIGTTOU 22 +#define LX_SIGURG 23 +#define LX_SIGXCPU 24 +#define LX_SIGXFSZ 25 +#define LX_SIGVTALRM 26 +#define LX_SIGPROF 27 +#define LX_SIGWINCH 28 +#define LX_SIGIO 29 +#define LX_SIGPOLL LX_SIGIO +#define LX_SIGPWR 30 +#define LX_SIGSYS 31 +#define LX_SIGUNUSED 31 + +#define LX_NSIG_WORDS 2 +#define LX_NBPW 32 +#define LX_NSIG ((LX_NBPW * LX_NSIG_WORDS) + 1) + +#define LX_SIGRTMIN 32 +#define LX_SIGRTMAX LX_NSIG - 1 + +extern const int ltos_signo[]; +extern const int stol_signo[]; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_SIGNUM_H */ diff --git a/usr/src/head/regexp.h b/usr/src/head/regexp.h index c8a58a1981..cceb6c7bed 100644 --- a/usr/src/head/regexp.h +++ b/usr/src/head/regexp.h @@ -422,12 +422,12 @@ register char *lp, *ep; /*FALLTHRU*/ case CBRA: - braslist[*ep++] = (char *)lp; + braslist[(int)*ep++] = (char *)lp; continue; /*FALLTHRU*/ case CKET: - braelist[*ep++] = (char *)lp; + braelist[(int)*ep++] = (char *)lp; continue; /*FALLTHRU*/ @@ -505,8 +505,8 @@ register char *lp, *ep; /*FALLTHRU*/ case CBACK: - bbeg = braslist[*ep]; - ct = braelist[*ep++] - bbeg; + bbeg = braslist[(int)*ep]; + ct = braelist[(int)*ep++] - bbeg; if (ecmp(bbeg, lp, ct)) { lp += ct; @@ -516,8 +516,8 @@ register char *lp, *ep; /*FALLTHRU*/ case CBACK | STAR: - bbeg = braslist[*ep]; - ct = braelist[*ep++] - bbeg; + bbeg = braslist[(int)*ep]; + ct = braelist[(int)*ep++] - bbeg; curlp = lp; while (ecmp(bbeg, lp, ct)) lp += ct; diff --git a/usr/src/lib/brand/Makefile b/usr/src/lib/brand/Makefile index db59df2efc..bd766f3f9d 100644 --- a/usr/src/lib/brand/Makefile +++ b/usr/src/lib/brand/Makefile @@ -30,6 +30,9 @@ include ../../Makefile.master # Build everything in parallel; use .WAIT for dependencies .PARALLEL: +i386_SUBDIRS= lx +i386_MSGSUBDIRS= lx + SUBDIRS= shared .WAIT sn1 sngl solaris10 ipkg labeled $($(MACH)_SUBDIRS) MSGSUBDIRS= solaris10 shared $($(MACH)_MSGSUBDIRS) diff --git a/usr/src/lib/brand/lx/Makefile b/usr/src/lib/brand/lx/Makefile new file mode 100644 index 0000000000..7fafad20da --- /dev/null +++ b/usr/src/lib/brand/lx/Makefile @@ -0,0 +1,56 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +default: all + +include Makefile.lx + +# Build everything in parallel; use .WAIT for dependencies +.PARALLEL: + +SUBDIRS= cmd librtld_db lx_support lx_brand lx_thunk netfiles zone \ + .WAIT lx_nametoaddr +MSGSUBDIRS= lx_brand lx_support zone + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint +_msg := TARGET= _msg + +.KEEP_STATE: + +all install clean clobber lint: $(SUBDIRS) + +_msg: $(MSGSUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/Makefile.lx b/usr/src/lib/brand/lx/Makefile.lx new file mode 100644 index 0000000000..4db4679cef --- /dev/null +++ b/usr/src/lib/brand/lx/Makefile.lx @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/lx/Makefile.lx +# +# include global definitions + +BRAND= lx + +include $(SRC)/lib/brand/Makefile.brand + diff --git a/usr/src/lib/brand/lx/cmd/Makefile b/usr/src/lib/brand/lx/cmd/Makefile new file mode 100644 index 0000000000..1519961954 --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/Makefile @@ -0,0 +1,48 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +PROGS = lx_lockd lx_native lx_statd lx_thunk + +include ../Makefile.lx + +# override the install directory +ROOTBIN = $(ROOTBRANDDIR) +CLOBBERFILES = $(ROOTPROGS) + +.KEEP_STATE: + +lint: + +all: $(PROGS) + +install: all $(ROOTPROGS) + +clean: + $(RM) $(PROGS) + +clobber: clean + $(RM) $(ROOTPROGS) diff --git a/usr/src/lib/brand/lx/cmd/lx_lockd.sh b/usr/src/lib/brand/lx/cmd/lx_lockd.sh new file mode 100644 index 0000000000..cb60d19749 --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/lx_lockd.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LD_LIBRARY_PATH=/usr/lib/brand/lx +LD_PRELOAD=/native/usr/lib/brand/lx/lx_thunk.so.1 +LD_BIND_NOW=1 +export LD_LIBRARY_PATH LD_PRELOAD LD_BIND_NOW + +exec /native/usr/lib/brand/lx/lx_native \ + /native/usr/lib/nfs/lockd -P -U 29 -G 29 diff --git a/usr/src/lib/brand/lx/cmd/lx_native.sh b/usr/src/lib/brand/lx/cmd/lx_native.sh new file mode 100644 index 0000000000..8e8344a375 --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/lx_native.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +exit 0 diff --git a/usr/src/lib/brand/lx/cmd/lx_statd.sh b/usr/src/lib/brand/lx/cmd/lx_statd.sh new file mode 100644 index 0000000000..998fd90af2 --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/lx_statd.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LD_LIBRARY_PATH=/usr/lib/brand/lx +LD_PRELOAD=/native/usr/lib/brand/lx/lx_thunk.so.1 +LD_BIND_NOW=1 +export LD_LIBRARY_PATH LD_PRELOAD LD_BIND_NOW + +exec /native/usr/lib/brand/lx/lx_native \ + /native/usr/lib/nfs/statd -P -U 29 -G 29 diff --git a/usr/src/lib/brand/lx/cmd/lx_thunk.sh b/usr/src/lib/brand/lx/cmd/lx_thunk.sh new file mode 100644 index 0000000000..4e1e6cbc03 --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/lx_thunk.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +exec /native/usr/lib/brand/lx/lx_thunk diff --git a/usr/src/lib/brand/lx/librtld_db/Makefile b/usr/src/lib/brand/lx/librtld_db/Makefile new file mode 100644 index 0000000000..2fc0a818f6 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/Makefile @@ -0,0 +1,54 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +default: all + +include $(SRC)/lib/Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +LINT_SUBDIRS= $(MACH) +$(BUILD64)LINT_SUBDIRS += $(MACH64) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all install clean clobber: $(SUBDIRS) + +lint: $(LINT_SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/librtld_db/Makefile.com b/usr/src/lib/brand/lx/librtld_db/Makefile.com new file mode 100644 index 0000000000..202cc0fe7b --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/Makefile.com @@ -0,0 +1,83 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = lx_librtld_db.a +VERS = .1 +COBJS = lx_librtld_db.o +OBJECTS = $(COBJS) $(COBJS64) + +include $(SRC)/lib/Makefile.lib +include ../../Makefile.lx + +CSRCS = $(COBJS:%o=../common/%c) +SRCS = $(CSRCS) + +SRCDIR = ../common +UTSBASE = $(SRC)/uts + +# +# ATTENTION: +# Librtl_db brand plugin libraries should NOT directly invoke any +# libproc.so interfaces or be linked against libproc. If a librtl_db +# brand plugin library uses libproc.so interfaces then it may break +# any other librtld_db consumers (like mdb) that tries to attach +# to a branded process. The only safe interfaces that the a librtld_db +# brand plugin library can use to access a target process are the +# proc_service(3PROC) apis. +# +DYNFLAGS += $(VERSREF) -M../common/mapfile-vers +LIBS = $(DYNLIB) +LDLIBS += -lc -lrtld_db +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I../ -I$(UTSBASE)/common/brand/lx \ + -I$(SRC)/cmd/sgs/librtld_db/common \ + -I$(SRC)/cmd/sgs/include \ + -I$(SRC)/cmd/sgs/include/$(MACH) + +ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx +ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64) + +# +# The top level Makefiles define define TEXT_DOMAIN. But librtld_db.so.1 +# isn't internationalized and this library won't be either. The only +# messages that this library can generate are messages used for debugging +# the operation of the library itself. +# +DTEXTDOM = + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +pics/%64.o: ../common/%.c + $(COMPILE.c) -D_ELF64 $(PICFLAGS) -o $@ $< + $(POST_PROCESS_O) + +include $(SRC)/lib/Makefile.targ diff --git a/usr/src/lib/brand/lx/librtld_db/amd64/Makefile b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile new file mode 100644 index 0000000000..726e7ef6d3 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile @@ -0,0 +1,38 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +COBJS64 = lx_librtld_db64.o + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +DYNFLAGS += -Mmapfile-vers + +CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) + +install: all $(ROOTLIBS64) diff --git a/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers b/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers new file mode 100644 index 0000000000..4893b02998 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers @@ -0,0 +1,44 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +SUNWprivate_1.1 { + global: + rtld_db_brand_ops64; +}; diff --git a/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c new file mode 100644 index 0000000000..50645b7780 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c @@ -0,0 +1,575 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/link.h> +#include <libproc.h> +#include <proc_service.h> +#include <rtld_db.h> +#include <synch.h> + +#include <sys/lx_brand.h> + +/* + * ATTENTION: + * Librtl_db brand plugin libraries should NOT directly invoke any + * libproc.so interfaces or be linked against libproc. If a librtl_db + * brand plugin library uses libproc.so interfaces then it may break + * any other librtld_db consumers (like mdb) that tries to attach + * to a branded process. The only safe interfaces that the a librtld_db + * brand plugin library can use to access a target process are the + * proc_service(3PROC) apis. + */ + +/* + * M_DATA comes from some streams header file but is also redifined in + * _rtld_db.h, so nuke the old streams definition here. + */ +#ifdef M_DATA +#undef M_DATA +#endif /* M_DATA */ + +/* + * For 32-bit versions of this library, this file get's compiled once. + * For 64-bit versions of this library, this file get's compiled twice, + * once with _ELF64 defined and once without. The expectation is that + * the 64-bit version of the library can properly deal with both 32-bit + * and 64-bit elf files, hence in the 64-bit library there are two copies + * of all the interfaces in this file, one set named *32 and one named *64. + * + * This also means that we need to be careful when declaring local pointers + * that point to objects in another processes address space, since these + * pointers may not match the current processes pointer width. Basically, + * we should avoid using data types that change size between 32 and 64 bit + * modes like: long, void *, uintprt_t, caddr_t, psaddr_t, size_t, etc. + * Instead we should declare all pointers as uint32_t. Then when we + * are compiled to deal with 64-bit targets we'll re-define uint32_t + * to be a uint64_t. + * + * Finally, one last importante note. All the 64-bit elf file code + * is never used and can't be tested. This is because we don't actually + * support 64-bit Linux processes yet. The reason that we have it here + * is because we want to support debugging 32-bit elf targets with the + * 64-bit version of this library, so we need to have a 64-bit version + * of this library. But a 64-bit version of this library is expected + * to provide debugging interfaces for both 32 and 64-bit elf targets. + * So we provide the 64-bit elf target interfaces, but they will never + * be invoked and are untested. If we ever add support for 64-bit elf + * Linux processes, we'll need to verify that this code works correctly + * for those targets. + */ +#ifdef _LP64 +#ifdef _ELF64 +#define lx_ldb_get_dyns32 lx_ldb_get_dyns64 +#define lx_ldb_init32 lx_ldb_init64 +#define lx_ldb_fini32 lx_ldb_fini64 +#define lx_ldb_loadobj_iter32 lx_ldb_loadobj_iter64 +#define lx_ldb_getauxval32 lx_ldb_getauxval64 +#define lx_elf_props32 lx_elf_props64 +#define _rd_get_dyns32 _rd_get_dyns64 +#define _rd_get_ehdr32 _rd_get_ehdr64 +#define uint32_t uint64_t +#define Elf32_Dyn Elf64_Dyn +#define Elf32_Ehdr Elf64_Ehdr +#define Elf32_Phdr Elf64_Phdr +#endif /* _ELF64 */ +#endif /* _LP64 */ + +/* Included from usr/src/cmd/sgs/librtld_db/common */ +#include <_rtld_db.h> + +typedef struct lx_rd { + rd_agent_t *lr_rap; + struct ps_prochandle *lr_php; /* proc handle pointer */ + uint32_t lr_rdebug; /* address of lx r_debug */ + uint32_t lr_exec; /* base address of executable */ +} lx_rd_t; + +typedef struct lx_link_map { + uint32_t lxm_addr; /* Base address shared object is loaded at. */ + uint32_t lxm_name; /* Absolute file name object was found in. */ + uint32_t lxm_ld; /* Dynamic section of the shared object. */ + uint32_t lxm_next; /* Chain of loaded objects. */ +} lx_link_map_t; + +typedef struct lx_r_debug { + int r_version; /* Version number for this protocol. */ + uint32_t r_map; /* Head of the chain of loaded objects. */ + + /* + * This is the address of a function internal to the run-time linker, + * that will always be called when the linker begins to map in a + * library or unmap it, and again when the mapping change is complete. + * The debugger can set a breakpoint at this address if it wants to + * notice shared object mapping changes. + */ + uint32_t r_brk; + r_state_e r_state; /* defined the same way between lx/solaris */ + uint32_t r_ldbase; /* Base address the linker is loaded at. */ +} lx_r_debug_t; + +static uint32_t +lx_ldb_getauxval32(struct ps_prochandle *php, int type) +{ + const auxv_t *auxvp = NULL; + + if (ps_pauxv(php, &auxvp) != PS_OK) + return ((uint32_t)-1); + + while (auxvp->a_type != AT_NULL) { + if (auxvp->a_type == type) + return ((uint32_t)(uintptr_t)auxvp->a_un.a_ptr); + auxvp++; + } + return ((uint32_t)-1); +} + +/* + * A key difference between the linux linker and ours' is that the linux + * linker adds the base address of segments to certain values in the + * segments' ELF header. As an example, look at the address of the + * DT_HASH hash table in a Solaris section - it is a relative address + * which locates the start of the hash table, relative to the beginning + * of the ELF file. However, when the linux linker loads a section, it + * modifies the in-memory ELF image by changing address of the hash + * table to be an absolute address. This is only done for libraries - not for + * executables. + * + * Solaris tools expect the relative address to remain relative, so + * here we will modify the in-memory ELF image so that it once again + * contains relative addresses. + * + * To accomplish this, we walk through all sections in the target. + * Linux sections are identified by pointing to the linux linker or libc in the + * DT_NEEDED section. For all matching sections, we subtract the segment + * base address to get back to relative addresses. + */ +static rd_err_e +lx_ldb_get_dyns32(rd_helper_data_t rhd, + psaddr_t addr, void **dynpp, size_t *dynpp_sz) +{ + lx_rd_t *lx_rd = (lx_rd_t *)rhd; + rd_agent_t *rap = lx_rd->lr_rap; + Elf32_Ehdr ehdr; + Elf32_Dyn *dynp = NULL; + size_t dynp_sz; + uint_t ndyns; + int i; + + ps_plog("lx_ldb_get_dyns: invoked for object at 0x%p", addr); + + /* Read in a copy of the ehdr */ + if (_rd_get_ehdr32(rap, addr, &ehdr, NULL) != RD_OK) { + ps_plog("lx_ldb_get_dyns: _rd_get_ehdr() failed"); + return (RD_ERR); + } + + /* read out the PT_DYNAMIC elements for this object */ + if (_rd_get_dyns32(rap, addr, &dynp, &dynp_sz) != RD_OK) { + ps_plog("lx_ldb_get_dyns: _rd_get_dyns() failed"); + return (RD_ERR); + } + + /* + * From here on out if we encounter an error we'll just return + * success and pass back the unmolested dynamic elements that + * we've already obtained. + */ + *dynpp = dynp; + *dynpp_sz = dynp_sz; + ndyns = dynp_sz / sizeof (Elf32_Dyn); + + /* If this isn't a dynamic object, there's nothing left todo */ + if (ehdr.e_type != ET_DYN) { + ps_plog("lx_ldb_get_dyns: done: not a shared object"); + return (RD_OK); + } + + /* + * Before we blindly start changing dynamic section addresses + * we need to figure out if the current object that we're looking + * at is a linux object or a solaris object. To do this first + * we need to find the string tab dynamic section element. + */ + for (i = 0; i < ndyns; i++) { + if (dynp[i].d_tag == DT_STRTAB) + break; + } + if (i == ndyns) { + ps_plog("lx_ldb_get_dyns: " + "failed to find string tab in the dynamic section"); + return (RD_OK); + } + + /* + * Check if the strtab value looks like an offset or an address. + * It's an offset if the value is less then the base address that + * the object is loaded at, or if the value is less than the offset + * of the section headers in the same elf object. This check isn't + * perfect, but in practice it's good enough. + */ + if ((dynp[i].d_un.d_ptr < addr) || + (dynp[i].d_un.d_ptr < ehdr.e_shoff)) { + ps_plog("lx_ldb_get_dyns: " + "doesn't appear to be an lx object"); + return (RD_OK); + } + + /* + * This seems to be a a linux object, so we'll patch up the dynamic + * section addresses + */ + ps_plog("lx_ldb_get_dyns: " + "patching up lx object dynamic section addresses"); + for (i = 0; i < ndyns; i++) { + switch (dynp[i].d_tag) { + case DT_PLTGOT: + case DT_HASH: + case DT_STRTAB: + case DT_SYMTAB: + case DT_RELA: + case DT_REL: + case DT_DEBUG: + case DT_JMPREL: + case DT_VERSYM: + if (dynp[i].d_un.d_val > addr) { + dynp[i].d_un.d_ptr -= addr; + } + break; + default: + break; + } + } + return (RD_OK); +} + +static void +lx_ldb_fini32(rd_helper_data_t rhd) +{ + lx_rd_t *lx_rd = (lx_rd_t *)rhd; + ps_plog("lx_ldb_fini: cleaning up lx helper"); + free(lx_rd); +} + +/* + * The linux linker has an r_debug structure somewhere in its data section that + * contains the address of the head of the link map list. To find this, we will + * use the DT_DEBUG token in the executable's dynamic section. The linux linker + * wrote the address of its r_debug structure to the DT_DEBUG dynamic entry. We + * get the address of the executable's program headers from the + * AT_SUN_BRAND_LX_PHDR aux vector entry. From there, we calculate the + * address of the Elf header, and from there we can easily get to the DT_DEBUG + * entry. + */ +static rd_helper_data_t +lx_ldb_init32(rd_agent_t *rap, struct ps_prochandle *php) +{ + lx_rd_t *lx_rd; + uint32_t addr, phdr_addr, dyn_addr; + Elf32_Dyn *dyn; + Elf32_Phdr phdr, *ph, *phdrs; + Elf32_Ehdr ehdr; + int i, dyn_count; + + lx_rd = calloc(sizeof (lx_rd_t), 1); + if (lx_rd == NULL) { + ps_plog("lx_ldb_init: cannot allocate memory"); + return (NULL); + } + lx_rd->lr_rap = rap; + lx_rd->lr_php = php; + + phdr_addr = lx_ldb_getauxval32(php, AT_SUN_BRAND_LX_PHDR); + if (phdr_addr == (uint32_t)-1) { + ps_plog("lx_ldb_init: no LX_PHDR found in aux vector"); + return (NULL); + } + ps_plog("lx_ldb_init: found LX_PHDR auxv phdr at: 0x%p", + phdr_addr); + + if (ps_pread(php, phdr_addr, &phdr, sizeof (phdr)) != PS_OK) { + ps_plog("lx_ldb_init: couldn't read phdr at 0x%p", + phdr_addr); + free(lx_rd); + return (NULL); + } + + /* The ELF headher should be before the program header in memory */ + lx_rd->lr_exec = addr = phdr_addr - phdr.p_offset; + if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) { + ps_plog("lx_ldb_init: couldn't read ehdr at 0x%p", + lx_rd->lr_exec); + free(lx_rd); + return (NULL); + } + ps_plog("lx_ldb_init: read ehdr at: 0x%p", addr); + + if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL) { + ps_plog("lx_ldb_init: couldn't alloc phdrs memory"); + free(lx_rd); + return (NULL); + } + + if (ps_pread(php, phdr_addr, phdrs, ehdr.e_phnum * ehdr.e_phentsize) != + PS_OK) { + ps_plog("lx_ldb_init: couldn't read phdrs at 0x%p", + phdr_addr); + free(lx_rd); + free(phdrs); + return (NULL); + } + ps_plog("lx_ldb_init: read %d phdrs at: 0x%p", + ehdr.e_phnum, phdr_addr); + + for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++, + /*LINTED */ + ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) { + if (ph->p_type == PT_DYNAMIC) + break; + } + if (i == ehdr.e_phnum) { + ps_plog("lx_ldb_init: no PT_DYNAMIC in executable"); + free(lx_rd); + free(phdrs); + return (NULL); + } + ps_plog("lx_ldb_init: found PT_DYNAMIC phdr[%d] at: 0x%p", + i, (phdr_addr + ((char *)ph - (char *)phdrs))); + + if ((dyn = malloc(ph->p_filesz)) == NULL) { + ps_plog("lx_ldb_init: couldn't alloc for PT_DYNAMIC"); + free(lx_rd); + free(phdrs); + return (NULL); + } + + dyn_addr = addr + ph->p_offset; + dyn_count = ph->p_filesz / sizeof (Elf32_Dyn); + if (ps_pread(php, dyn_addr, dyn, ph->p_filesz) != PS_OK) { + ps_plog("lx_ldb_init: couldn't read dynamic at 0x%p", + dyn_addr); + free(lx_rd); + free(phdrs); + free(dyn); + return (NULL); + } + ps_plog("lx_ldb_init: read %d dynamic headers at: 0x%p", + dyn_count, dyn_addr); + + for (i = 0; i < dyn_count; i++) { + if (dyn[i].d_tag == DT_DEBUG) { + lx_rd->lr_rdebug = dyn[i].d_un.d_ptr; + break; + } + } + free(phdrs); + free(dyn); + + if (lx_rd->lr_rdebug == 0) { + ps_plog("lx_ldb_init: no DT_DEBUG found in exe"); + free(lx_rd); + return (NULL); + } + ps_plog("lx_ldb_init: found DT_DEBUG: 0x%p", lx_rd->lr_rdebug); + + return ((rd_helper_data_t)lx_rd); +} + +/* + * Given the address of an ELF object in the target, return its size and + * the proper link map ID. + */ +static size_t +lx_elf_props32(struct ps_prochandle *php, uint32_t addr, psaddr_t *data_addr) +{ + Elf32_Ehdr ehdr; + Elf32_Phdr *phdrs, *ph; + int i; + uint32_t min = (uint32_t)-1; + uint32_t max = 0; + size_t sz = NULL; + + if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) { + ps_plog("lx_elf_props: Couldn't read ELF header at 0x%p", + addr); + return (0); + } + + if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL) + return (0); + + if (ps_pread(php, addr + ehdr.e_phoff, phdrs, ehdr.e_phnum * + ehdr.e_phentsize) != PS_OK) { + ps_plog("lx_elf_props: Couldn't read program headers at 0x%p", + addr + ehdr.e_phoff); + return (0); + } + + for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++, + /*LINTED */ + ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) { + + if (ph->p_type != PT_LOAD) + continue; + + if ((ph->p_flags & (PF_W | PF_R)) == (PF_W | PF_R)) { + *data_addr = ph->p_vaddr; + if (ehdr.e_type == ET_DYN) + *data_addr += addr; + if (*data_addr & (ph->p_align - 1)) + *data_addr = *data_addr & (~(ph->p_align -1)); + } + + if (ph->p_vaddr < min) + min = ph->p_vaddr; + + if (ph->p_vaddr > max) { + max = ph->p_vaddr; + sz = ph->p_memsz + max - min; + if (sz & (ph->p_align - 1)) + sz = (sz & (~(ph->p_align - 1))) + ph->p_align; + } + } + + free(phdrs); + return (sz); +} + +static int +lx_ldb_loadobj_iter32(rd_helper_data_t rhd, rl_iter_f *cb, void *client_data) +{ + lx_rd_t *lx_rd = (lx_rd_t *)rhd; + struct ps_prochandle *php = lx_rd->lr_php; + lx_r_debug_t r_debug; + lx_link_map_t map; + uint32_t p = NULL; + int rc; + rd_loadobj_t exec; + + if ((rc = ps_pread(php, (psaddr_t)lx_rd->lr_rdebug, &r_debug, + sizeof (r_debug))) != PS_OK) { + ps_plog("lx_ldb_loadobj_iter: " + "Couldn't read linux r_debug at 0x%p", lx_rd->lr_rdebug); + return (rc); + } + + p = r_debug.r_map; + + /* + * The first item on the link map list is for the executable, but it + * doesn't give us any useful information about it. We need to + * synthesize a rd_loadobj_t for the client. + * + * Linux doesn't give us the executable name, so we'll get it from + * the AT_EXECNAME entry instead. + */ + if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) != PS_OK) { + ps_plog("lx_ldb_loadobj_iter: " + "Couldn't read linux link map at 0x%p", p); + return (rc); + } + + bzero(&exec, sizeof (exec)); + exec.rl_base = lx_rd->lr_exec; + exec.rl_dynamic = map.lxm_ld; + exec.rl_nameaddr = lx_ldb_getauxval32(php, AT_SUN_EXECNAME); + exec.rl_lmident = LM_ID_BASE; + + exec.rl_bend = exec.rl_base + + lx_elf_props32(php, lx_rd->lr_exec, &exec.rl_data_base); + + if ((*cb)(&exec, client_data) == 0) { + ps_plog("lx_ldb_loadobj_iter: " + "client callb failed for executable"); + return (PS_ERR); + } + + for (p = map.lxm_next; p != NULL; p = map.lxm_next) { + rd_loadobj_t obj; + + if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) != + PS_OK) { + ps_plog("lx_ldb_loadobj_iter: " + "Couldn't read lk map at %p", p); + return (rc); + } + + /* + * The linux link map has less information than the Solaris one. + * We need to go fetch the missing information from the ELF + * headers. + */ + + obj.rl_nameaddr = (psaddr_t)map.lxm_name; + obj.rl_base = map.lxm_addr; + obj.rl_refnameaddr = (psaddr_t)map.lxm_name; + obj.rl_plt_base = NULL; + obj.rl_plt_size = 0; + obj.rl_lmident = LM_ID_BASE; + + /* + * Ugh - we have to walk the ELF stuff, find the PT_LOAD + * sections, and calculate the end of the file's mappings + * ourselves. + */ + + obj.rl_bend = map.lxm_addr + + lx_elf_props32(php, map.lxm_addr, &obj.rl_data_base); + obj.rl_padstart = obj.rl_base; + obj.rl_padend = obj.rl_bend; + obj.rl_dynamic = map.lxm_ld; + obj.rl_tlsmodid = 0; + + ps_plog("lx_ldb_loadobj_iter: 0x%p to 0x%p", + obj.rl_base, obj.rl_bend); + + if ((*cb)(&obj, client_data) == 0) { + ps_plog("lx_ldb_loadobj_iter: " + "Client callback failed on %s", map.lxm_name); + return (rc); + } + } + return (RD_OK); +} + +/* + * Librtld_db plugin linkage struct. + * + * When we get loaded by librtld_db, it will look for the symbol below + * to find our plugin entry points. + */ +rd_helper_ops_t RTLD_DB_BRAND_OPS = { + LM_ID_BRAND, + lx_ldb_init32, + lx_ldb_fini32, + lx_ldb_loadobj_iter32, + lx_ldb_get_dyns32 +}; diff --git a/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers new file mode 100644 index 0000000000..5e328d6075 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers @@ -0,0 +1,58 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +{ + global: + rtld_db_brand_ops32; + local: + *; +}; + +#Externally defined symbols +{ + global: + ps_pauxv = NODIRECT PARENT; + ps_pdmodel = NODIRECT PARENT; + ps_pglobal_lookup = NODIRECT PARENT; + ps_pglobal_sym = NODIRECT PARENT; + ps_plog = NODIRECT PARENT; + ps_pread = NODIRECT PARENT; + ps_pwrite = NODIRECT PARENT; +}; diff --git a/usr/src/lib/brand/lx/librtld_db/i386/Makefile b/usr/src/lib/brand/lx/librtld_db/i386/Makefile new file mode 100644 index 0000000000..b5f780c072 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/i386/Makefile @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) + +install: all $(ROOTLIBS) diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile b/usr/src/lib/brand/lx/lx_brand/Makefile new file mode 100644 index 0000000000..de4fa338a0 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/Makefile @@ -0,0 +1,53 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../../../Makefile.lib + +default: all + +SUBDIRS= $(MACH) + +LINT_SUBDIRS= $(MACH) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint +_msg := TARGET= _msg + +.KEEP_STATE: + +all install clean clobber _msg: $(SUBDIRS) + +lint: $(LINT_SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile.com b/usr/src/lib/brand/lx/lx_brand/Makefile.com new file mode 100644 index 0000000000..1b58e78ba0 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/Makefile.com @@ -0,0 +1,102 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LX_CMN = $(SRC)/common/brand/lx + +LIBRARY = lx_brand.a +VERS = .1 +COBJS = clock.o \ + clone.o \ + debug.o \ + dir.o \ + file.o \ + fcntl.o \ + fork.o \ + id.o \ + ioctl.o \ + iovec.o \ + lx_brand.o \ + lx_thunk_server.o \ + mem.o \ + misc.o \ + module.o \ + mount.o \ + open.o \ + pgrp.o \ + poll_select.o \ + priority.o \ + ptrace.o \ + rlimit.o \ + sched.o \ + sendfile.o \ + signal.o \ + socket.o \ + stat.o \ + statfs.o \ + sysctl.o \ + sysv_ipc.o \ + time.o \ + truncate.o \ + wait.o + +CMNOBJS = lx_signum.o +ASOBJS = lx_handler.o lx_runexe.o lx_crt.o +OBJECTS = $(CMNOBJS) $(COBJS) $(ASOBJS) + +include ../../Makefile.lx +include ../../../../Makefile.lib + +CSRCS = $(COBJS:%o=../common/%c) $(CMNOBJS:%o=$(LX_CMN)/%c) +ASSRCS = $(ASOBJS:%o=$(ISASRCDIR)/%s) +SRCS = $(CSRCS) $(ASSRCS) + +SRCDIR = ../common +UTSBASE = ../../../../../uts + +LIBS = $(DYNLIB) +LDLIBS += -lc -lsocket -lmapmalloc -lproc -lrtld_db +DYNFLAGS += -Wl,-e_start -Wl,-I/native/lib/ld.so.1 -M../common/mapfile +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I../ -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) +ASFLAGS = -P $(ASFLAGS_$(CURTYPE)) -D_ASM -I../ \ + -I$(UTSBASE)/common/brand/lx + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../../Makefile.targ + +pics/%.o: $(ISASRCDIR)/%.s + $(COMPILE.s) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o: $(LX_CMN)/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) diff --git a/usr/src/lib/brand/lx/lx_brand/common/clock.c b/usr/src/lib/brand/lx/lx_brand/common/clock.c new file mode 100644 index 0000000000..3880bcbd5c --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/clock.c @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <string.h> +#include <time.h> +#include <sys/lx_misc.h> + +/* + * Linux uses different values for it clock identifiers, so we have to do basic + * translations between the two. Thankfully, both Linux and Solaris implement + * the same POSIX SUSv3 clock types, so the semantics should be identical. + */ + +static int ltos_clock[] = { + CLOCK_REALTIME, + CLOCK_MONOTONIC, + CLOCK_PROCESS_CPUTIME_ID, + CLOCK_THREAD_CPUTIME_ID +}; + +#define LX_CLOCK_MAX (sizeof (ltos_clock) / sizeof (ltos_clock[0])) + +int +lx_clock_gettime(int clock, struct timespec *tp) +{ + struct timespec ts; + + if (clock < 0 || clock > LX_CLOCK_MAX) + return (-EINVAL); + + if (clock_gettime(ltos_clock[clock], &ts) < 0) + return (-errno); + + return ((uucopy(&ts, tp, sizeof (struct timespec)) < 0) ? -EFAULT : 0); +} + +int +lx_clock_settime(int clock, struct timespec *tp) +{ + struct timespec ts; + + if (clock < 0 || clock > LX_CLOCK_MAX) + return (-EINVAL); + + if (uucopy(tp, &ts, sizeof (struct timespec)) < 0) + return (-EFAULT); + + return ((clock_settime(ltos_clock[clock], &ts) < 0) ? -errno : 0); +} + +int +lx_clock_getres(int clock, struct timespec *tp) +{ + struct timespec ts; + + if (clock < 0 || clock > LX_CLOCK_MAX) + return (-EINVAL); + + if (clock_getres(ltos_clock[clock], &ts) < 0) + return (-errno); + + return ((uucopy(&ts, tp, sizeof (struct timespec)) < 0) ? -EFAULT : 0); +} + +int +lx_clock_nanosleep(int clock, int flags, struct timespec *rqtp, + struct timespec *rmtp) +{ + struct timespec rqt, rmt; + + if (clock < 0 || clock > LX_CLOCK_MAX) + return (-EINVAL); + + if (uucopy(rqtp, &rqt, sizeof (struct timespec)) < 0) + return (-EFAULT); + + /* the TIMER_RELTIME and TIMER_ABSTIME flags are the same on Linux */ + if (clock_nanosleep(ltos_clock[clock], flags, &rqt, &rmt) < 0) + return (-errno); + + /* + * Only copy values to rmtp if the timer is TIMER_RELTIME and rmtp is + * non-NULL. + */ + if (((flags & TIMER_RELTIME) == TIMER_RELTIME) && (rmtp != NULL) && + (uucopy(&rmt, rmtp, sizeof (struct timespec)) < 0)) + return (-EFAULT); + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c new file mode 100644 index 0000000000..f271616f49 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c @@ -0,0 +1,546 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <ucontext.h> +#include <thread.h> +#include <strings.h> +#include <libintl.h> +#include <sys/regset.h> +#include <sys/syscall.h> +#include <sys/inttypes.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/segments.h> +#include <signal.h> +#include <sys/lx_misc.h> +#include <sys/lx_types.h> +#include <sys/lx_signal.h> +#include <sys/lx_syscall.h> +#include <sys/lx_brand.h> +#include <sys/lx_debug.h> +#include <sys/lx_thread.h> + +#define LX_CSIGNAL 0x000000ff +#define LX_CLONE_VM 0x00000100 +#define LX_CLONE_FS 0x00000200 +#define LX_CLONE_FILES 0x00000400 +#define LX_CLONE_SIGHAND 0x00000800 +#define LX_CLONE_PID 0x00001000 +#define LX_CLONE_PTRACE 0x00002000 +#define LX_CLONE_VFORK 0x00004000 +#define LX_CLONE_PARENT 0x00008000 +#define LX_CLONE_THREAD 0x00010000 +#define LX_CLONE_SYSVSEM 0x00040000 +#define LX_CLONE_SETTLS 0x00080000 +#define LX_CLONE_PARENT_SETTID 0x00100000 +#define LX_CLONE_CHILD_CLEARTID 0x00200000 +#define LX_CLONE_DETACH 0x00400000 +#define LX_CLONE_CHILD_SETTID 0x01000000 + +#define SHARED_AS \ + (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND) +#define CLONE_VFORK (LX_CLONE_VM | LX_CLONE_VFORK) +#define CLONE_TD (LX_CLONE_THREAD|LX_CLONE_DETACH) + +#define IS_FORK(f) (((f) & SHARED_AS) == 0) +#define IS_VFORK(f) (((f) & CLONE_VFORK) == CLONE_VFORK) + +#define LX_EXIT 1 +#define LX_EXIT_GROUP 2 + +/* + * This is dicey. This seems to be an internal glibc structure, and not + * part of any external interface. Thus, it is subject to change without + * notice. FWIW, clone(2) itself seems to be an internal (or at least + * unstable) interface, since strace(1) shows it differently than the man + * page. + */ +struct lx_desc +{ + uint32_t entry_number; + uint32_t base_addr; + uint32_t limit; + uint32_t seg_32bit:1; + uint32_t contents:2; + uint32_t read_exec_only:1; + uint32_t limit_in_pages:1; + uint32_t seg_not_present:1; + uint32_t useable:1; + uint32_t empty:25; +}; + +struct clone_state { + void *c_retaddr; /* instr after clone()'s int80 */ + int c_flags; /* flags to clone(2) */ + int c_sig; /* signal to send on thread exit */ + void *c_stk; /* %esp of new thread */ + void *c_ptidp; + struct lx_desc *c_ldtinfo; /* thread-specific segment */ + void *c_ctidp; + uintptr_t c_gs; /* Linux's %gs */ + sigset_t c_sigmask; /* signal mask */ + lx_affmask_t c_affmask; /* CPU affinity mask */ + volatile int *c_clone_res; /* pid/error returned to cloner */ +}; + +extern void lx_setup_clone(uintptr_t, void *, void *); + +/* + * Counter incremented when we vfork(2) ourselves, and decremented when the + * vfork(2)ed child exit(2)s or exec(2)s. + */ +static int is_vforked = 0; + +int +lx_exit(uintptr_t p1) +{ + int ret, status = (int)p1; + lx_tsd_t *lx_tsd; + + /* + * If we are a vfork(2)ed child, we need to exit as quickly and + * cleanly as possible to avoid corrupting our parent. + */ + if (is_vforked != 0) { + is_vforked--; + _exit(status); + } + + if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "exit", strerror(ret)); + + assert(lx_tsd != 0); + + lx_tsd->lxtsd_exit = LX_EXIT; + lx_tsd->lxtsd_exit_status = status; + + /* + * Block all signals in the exit context to avoid taking any signals + * (to the degree possible) while exiting. + */ + (void) sigfillset(&lx_tsd->lxtsd_exit_context.uc_sigmask); + + /* + * This thread is exiting. Restore the state of the thread to + * what it was before we started running linux code. + */ + (void) setcontext(&lx_tsd->lxtsd_exit_context); + + /* + * If we returned from the setcontext(2), something is very wrong. + */ + lx_err_fatal(gettext("%s: unable to set exit context: %s"), + "exit", strerror(errno)); + + /*NOTREACHED*/ + return (0); +} + +int +lx_group_exit(uintptr_t p1) +{ + int ret, status = (int)p1; + lx_tsd_t *lx_tsd; + + /* + * If we are a vfork(2)ed child, we need to exit as quickly and + * cleanly as possible to avoid corrupting our parent. + */ + if (is_vforked != 0) { + is_vforked--; + _exit(status); + } + + if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "group_exit", strerror(ret)); + + assert(lx_tsd != 0); + + lx_tsd->lxtsd_exit = LX_EXIT_GROUP; + lx_tsd->lxtsd_exit_status = status; + + /* + * Block all signals in the exit context to avoid taking any signals + * (to the degree possible) while exiting. + */ + (void) sigfillset(&lx_tsd->lxtsd_exit_context.uc_sigmask); + + /* + * This thread is exiting. Restore the state of the thread to + * what it was before we started running linux code. + */ + (void) setcontext(&lx_tsd->lxtsd_exit_context); + + /* + * If we returned from the setcontext(2), something is very wrong. + */ + lx_err_fatal(gettext("%s: unable to set exit context: %s"), + "group_exit", strerror(errno)); + + /*NOTREACHED*/ + return (0); +} + +static void * +clone_start(void *arg) +{ + int rval; + struct clone_state *cs = (struct clone_state *)arg; + lx_tsd_t lx_tsd; + + /* + * Let the kernel finish setting up all the needed state for this + * new thread. + * + * We already created the thread using the thr_create(3C) library + * call, so most of the work required to emulate lx_clone(2) has + * been done by the time we get to this point. Instead of creating + * a new brandsys(2) subcommand to perform the last few bits of + * bookkeeping, we just use the lx_clone() slot in the syscall + * table. + */ + lx_debug("\tre-vectoring to lx kernel module to complete lx_clone()"); + lx_debug("\tLX_SYS_clone(0x%x, 0x%p, 0x%p, 0x%p, 0x%p)", + cs->c_flags, cs->c_stk, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp); + + rval = syscall(SYS_brand, B_EMULATE_SYSCALL + LX_SYS_clone, + cs->c_flags, cs->c_stk, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp, + NULL); + + /* + * At this point the parent is waiting for cs->c_clone_res to go + * non-zero to indicate the thread has been cloned. The value set + * in cs->c_clone_res will be used for the return value from + * clone(). + */ + if (rval < 0) { + *(cs->c_clone_res) = -errno; + lx_debug("\tkernel clone failed, errno %d\n", errno); + return (NULL); + } + + if (lx_sched_setaffinity(0, sizeof (cs->c_affmask), + (uintptr_t)&cs->c_affmask) != 0) { + *(cs->c_clone_res) = -errno; + + lx_err_fatal(gettext( + "Unable to set affinity mask in child thread: %s"), + strerror(errno)); + } + + /* Initialize the thread specific data for this thread. */ + bzero(&lx_tsd, sizeof (lx_tsd)); + lx_tsd.lxtsd_gs = cs->c_gs; + + /* + * Use the address of the stack-allocated lx_tsd as the + * per-thread storage area to cache various values for later + * use. + * + * This address is only used by this thread, so there is no + * danger of other threads using this storage area, nor of it + * being accessed once this stack frame has been freed. + */ + if (thr_setspecific(lx_tsd_key, &lx_tsd) != 0) { + *(cs->c_clone_res) = -errno; + lx_err_fatal( + gettext("Unable to set thread-specific ptr for clone: %s"), + strerror(rval)); + } + + /* + * Save the current context of this thread. + * + * We'll restore this context when this thread attempts to exit. + */ + if (getcontext(&lx_tsd.lxtsd_exit_context) != 0) { + *(cs->c_clone_res) = -errno; + + lx_err_fatal(gettext( + "Unable to initialize thread-specific exit context: %s"), + strerror(errno)); + } + + /* + * Do the final stack twiddling, reset %gs, and return to the + * clone(2) path. + */ + if (lx_tsd.lxtsd_exit == 0) { + if (sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL) < 0) { + *(cs->c_clone_res) = -errno; + + lx_err_fatal(gettext( + "Unable to release held signals for child " + "thread: %s"), strerror(errno)); + } + + /* + * Let the parent know that the clone has (effectively) been + * completed. + */ + *(cs->c_clone_res) = rval; + + lx_setup_clone(cs->c_gs, cs->c_retaddr, cs->c_stk); + + /* lx_setup_clone() should never return. */ + assert(0); + } + + /* + * We are here because the Linux application called the exit() or + * exit_group() system call. In turn the brand library did a + * setcontext() to jump to the thread context state saved in + * getcontext(), above. + */ + if (lx_tsd.lxtsd_exit == LX_EXIT) + thr_exit((void *)lx_tsd.lxtsd_exit_status); + else + exit(lx_tsd.lxtsd_exit_status); + + assert(0); + /*NOTREACHED*/ +} + +int +lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + struct clone_state *cs; + int flags = (int)p1; + void *cldstk = (void *)p2; + void *ptidp = (void *)p3; + struct lx_desc *ldtinfo = (void *)p4; + void *ctidp = (void *)p5; + thread_t tid; + volatile int clone_res; + int sig; + int rval; + int pid; + lx_regs_t *rp; + sigset_t sigmask; + + if (flags & LX_CLONE_SETTLS) { + lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p " + "ctidp=0x%p", flags, cldstk, ptidp, ldtinfo, ctidp); + } else { + lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p)", + flags, cldstk, ptidp); + } + + /* + * Only supported for pid 0 on Linux + */ + if (flags & LX_CLONE_PID) + return (-EINVAL); + + /* + * CLONE_THREAD requires CLONE_SIGHAND. + * + * CLONE_THREAD and CLONE_DETACHED must both be either set or cleared + * in kernel 2.4 and prior. + * In kernel 2.6 CLONE_DETACHED was dropped completely, so we no + * longer have this requirement. + */ + + if (flags & CLONE_TD) { + if (!(flags & LX_CLONE_SIGHAND)) + return (-EINVAL); + if ((lx_get_kern_version() <= LX_KERN_2_4) && + (flags & CLONE_TD) != CLONE_TD) + return (-EINVAL); + } + + rp = lx_syscall_regs(); + + /* test if pointer passed by user are writable */ + if (flags & LX_CLONE_PARENT_SETTID) { + if (uucopy(ptidp, &pid, sizeof (int)) != 0) + return (-EFAULT); + if (uucopy(&pid, ptidp, sizeof (int)) != 0) + return (-EFAULT); + } + if (flags & LX_CLONE_CHILD_SETTID) { + if (uucopy(ctidp, &pid, sizeof (int)) != 0) + return (-EFAULT); + if (uucopy(&pid, ctidp, sizeof (int)) != 0) + return (-EFAULT); + } + + /* See if this is a fork() operation or a thr_create(). */ + if (IS_FORK(flags) || IS_VFORK(flags)) { + if (flags & LX_CLONE_PARENT) { + lx_unsupported(gettext( + "clone(2) only supports CLONE_PARENT " + "for threads.\n")); + return (-ENOTSUP); + } + + if (flags & LX_CLONE_PTRACE) + lx_ptrace_fork(); + + if (flags & LX_CLONE_VFORK) { + is_vforked++; + rval = vfork(); + if (rval != 0) + is_vforked--; + } else { + rval = fork1(); + if (rval == 0 && lx_is_rpm) + (void) sleep(lx_rpm_delay); + } + + /* + * Since we've already forked, we can't do much if uucopy fails, + * so we just ignore failure. Failure is unlikely since we've + * tested the memory before we did the fork. + */ + if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) { + (void) uucopy(&rval, ptidp, sizeof (int)); + } + + if (rval == 0 && (flags & LX_CLONE_CHILD_SETTID)) { + /* + * lx_getpid should not fail, and if it does, there's + * not much we can do about it since we've already + * forked, so on failure, we just don't copy the + * memory. + */ + pid = lx_getpid(); + if (pid >= 0) + (void) uucopy(&pid, ctidp, sizeof (int)); + } + + /* Parent just returns */ + if (rval != 0) + return ((rval < 0) ? -errno : rval); + + /* + * If provided, the child needs its new stack set up. + */ + if (cldstk) + lx_setup_clone(rp->lxr_gs, (void *)rp->lxr_eip, cldstk); + + return (0); + } + + /* + * We have very restricted support.... only exactly these flags are + * supported + */ + if (((flags & SHARED_AS) != SHARED_AS)) { + lx_unsupported(gettext( + "clone(2) requires that all or none of CLONE_VM " + "CLONE_FS, CLONE_FILES, and CLONE_SIGHAND be set.\n")); + return (-ENOTSUP); + } + + if (cldstk == NULL) { + lx_unsupported(gettext( + "clone(2) requires the caller to allocate the " + "child's stack.\n")); + return (-ENOTSUP); + } + + /* + * If we want a signal-on-exit, ensure that the signal is valid. + */ + if ((sig = ltos_signo[flags & LX_CSIGNAL]) == -1) { + lx_unsupported(gettext( + "clone(2) passed unsupported signal: %d"), sig); + return (-ENOTSUP); + } + + /* + * To avoid malloc() here, we steal a part of the new thread's + * stack to store all the info that thread might need for + * initialization. We also make it 64-bit aligned for good + * measure. + */ + cs = (struct clone_state *) + ((p2 - sizeof (struct clone_state)) & -((uintptr_t)8)); + cs->c_flags = flags; + cs->c_sig = sig; + cs->c_stk = cldstk; + cs->c_ptidp = ptidp; + cs->c_ldtinfo = ldtinfo; + cs->c_ctidp = ctidp; + cs->c_clone_res = &clone_res; + cs->c_gs = rp->lxr_gs; + + if (lx_sched_getaffinity(0, sizeof (cs->c_affmask), + (uintptr_t)&cs->c_affmask) == -1) + lx_err_fatal(gettext( + "Unable to get affinity mask for parent thread: %s"), + strerror(errno)); + + /* + * We want the new thread to return directly to the return site for + * the system call. + */ + cs->c_retaddr = (void *)rp->lxr_eip; + clone_res = 0; + + (void) sigfillset(&sigmask); + + /* + * Block all signals because the thread we create won't be able to + * properly handle them until it's fully set up. + */ + if (sigprocmask(SIG_BLOCK, &sigmask, &cs->c_sigmask) < 0) { + lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno)); + return (-errno); + } + + rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid); + + /* + * Release any pending signals + */ + (void) sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL); + + /* + * Wait for the child to be created and have its tid assigned. + */ + if (rval == 0) { + while (clone_res == 0) + ; + + rval = clone_res; + } + + return (rval); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/debug.c b/usr/src/lib/brand/lx/lx_brand/common/debug.c new file mode 100644 index 0000000000..dccdcbb419 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/debug.c @@ -0,0 +1,147 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <thread.h> +#include <unistd.h> + +#include <sys/modctl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <sys/lx_brand.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> + +/* internal debugging state */ +static char *lx_debug_path = NULL; /* debug output file path */ +static char lx_debug_path_buf[MAXPATHLEN]; + +void +lx_debug_enable(void) +{ + /* send all debugging output to /dev/tty */ + lx_debug_path = "/dev/tty"; + lx_debug("lx_debug: debugging output enabled: %s", lx_debug_path); +} + +void +lx_debug_init(void) +{ + if (getenv("LX_DEBUG") == NULL) + return; + + /* + * It's OK to use this value without any locking, as all callers can + * use the return value to decide whether extra work should be done + * before calling lx_debug(). + * + * If debugging is disabled after a routine calls this function it + * doesn't really matter as lx_debug() will see debugging is disabled + * and will not output anything. + */ + lx_debug_enabled = 1; + + /* check if there's a debug log file specified */ + lx_debug_path = getenv("LX_DEBUG_FILE"); + if (lx_debug_path == NULL) { + /* send all debugging output to /dev/tty */ + lx_debug_path = "/dev/tty"; + } + + (void) strlcpy(lx_debug_path_buf, lx_debug_path, + sizeof (lx_debug_path_buf)); + lx_debug_path = lx_debug_path_buf; + + lx_debug("lx_debug: debugging output ENABLED to path: \"%s\"", + lx_debug_path); +} + +void +lx_debug(const char *msg, ...) +{ + va_list ap; + char buf[LX_MSG_MAXLEN + 1]; + int rv, fd, n; + int errno_backup; + + if (lx_debug_enabled == 0) + return; + + errno_backup = errno; + + /* prefix the message with pid/tid */ + if ((n = snprintf(buf, sizeof (buf), "%u/%u: ", + getpid(), thr_self())) == -1) { + errno = errno_backup; + return; + } + + /* format the message */ + va_start(ap, msg); + rv = vsnprintf(&buf[n], sizeof (buf) - n, msg, ap); + va_end(ap); + if (rv == -1) { + errno = errno_backup; + return; + } + + /* add a carrige return if there isn't one already */ + if ((buf[strlen(buf) - 1] != '\n') && + (strlcat(buf, "\n", sizeof (buf)) >= sizeof (buf))) { + errno = errno_backup; + return; + } + + /* + * Open the debugging output file. note that we don't protect + * ourselves against exec or fork1 here. if an mt process were + * to exec/fork1 while we're doing this they'd end up with an + * extra open desciptor in their fd space. a'well. shouldn't + * really matter. + */ + if ((fd = open(lx_debug_path, + O_WRONLY|O_APPEND|O_CREAT|O_NDELAY|O_NOCTTY, 0666)) == -1) { + return; + } + (void) fchmod(fd, 0666); + + /* we retry in case of EINTR */ + do { + rv = write(fd, buf, strlen(buf)); + } while ((rv == -1) && (errno == EINTR)); + (void) fsync(fd); + + (void) close(fd); + errno = errno_backup; +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/dir.c b/usr/src/lib/brand/lx/lx_brand/common/dir.c new file mode 100644 index 0000000000..1c0a5aaf8f --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/dir.c @@ -0,0 +1,160 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <string.h> +#include <stddef.h> +#include <errno.h> +#include <unistd.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/dirent.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> + +#define LX_NAMEMAX 256 + +struct lx_dirent { + long d_ino; /* not l_ino_t */ + long d_off; + ushort_t d_reclen; + char d_name[LX_NAMEMAX]; +}; + +struct lx_dirent64 { + uint64_t d_ino; + int64_t d_off; + ushort_t d_reclen; + uchar_t d_type; + char d_name[LX_NAMEMAX]; +}; + +#define LX_RECLEN(namelen) \ + ((offsetof(struct lx_dirent64, d_name) + 1 + (namelen) + 7) & ~7) + +/* + * Read in one dirent structure from fd into dirp. + * p3 (count) is ignored. + */ +/*ARGSUSED*/ +int +lx_readdir(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + struct lx_dirent *dirp = (struct lx_dirent *)p2; + uint_t count = sizeof (struct lx_dirent); + int rc = 0; + struct lx_dirent _ld; + struct dirent *sd = (struct dirent *)&_ld; + + /* + * The return value from getdents is not applicable, as + * it might have squeezed more than one dirent in the buffer + * we provided. + * + * getdents() will deal with the case of dirp == NULL + */ + if ((rc = getdents(fd, sd, count)) < 0) + return (-errno); + + /* + * Set rc 1 (pass), or 0 (end of directory). + */ + rc = (sd->d_reclen == 0) ? 0 : 1; + + if (uucopy(sd, dirp, count) != 0) + return (-errno); + + return (rc); +} + +/* + * Read in dirent64 structures from p1 (fd) into p2 (buffer). + * p3 (count) is the size of the memory area. + */ +int +lx_getdents64(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (uint_t)p1; + void *buf = (void *)p2; + void *sbuf, *lbuf; + int lbufsz = (uint_t)p3; + int sbufsz; + int namelen; + struct dirent *sd; + struct lx_dirent64 *ld; + int bytes, rc; + + if (lbufsz < sizeof (struct lx_dirent64)) + return (-EINVAL); + + /* + * The Linux dirent64 is bigger than the Solaris dirent64. To + * avoid inadvertently consuming more of the directory than we can + * pass back to the Linux app, we hand the kernel a smaller buffer + * than the app handed us. + */ + sbufsz = (lbufsz / 32) * 24; + + sbuf = SAFE_ALLOCA(sbufsz); + lbuf = SAFE_ALLOCA(lbufsz); + if (sbuf == NULL || lbuf == NULL) + return (-ENOMEM); + + if ((bytes = getdents(fd, sbuf, sbufsz)) < 0) + return (-errno); + + /* munge the Solaris buffer to a linux buffer. */ + sd = (struct dirent *)sbuf; + ld = (struct lx_dirent64 *)lbuf; + rc = 0; + while (bytes > 0) { + namelen = strlen(sd->d_name); + if (namelen >= LX_NAMEMAX) + namelen = LX_NAMEMAX - 1; + ld->d_ino = (uint64_t)sd->d_ino; + ld->d_off = (int64_t)sd->d_off; + ld->d_type = 0; + + (void) strncpy(ld->d_name, sd->d_name, namelen); + ld->d_name[namelen] = 0; + ld->d_reclen = (ushort_t)LX_RECLEN(namelen); + + bytes -= (int)sd->d_reclen; + rc += (int)ld->d_reclen; + + sd = (struct dirent *)(void *)((caddr_t)sd + sd->d_reclen); + ld = (struct lx_dirent64 *)(void *)((caddr_t)ld + ld->d_reclen); + } + + /* now copy the lbuf to the userland buffer */ + assert(rc <= lbufsz); + if (uucopy(lbuf, buf, rc) != 0) + return (-EFAULT); + + return (rc); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/fcntl.c b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c new file mode 100644 index 0000000000..995a3b5e7b --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c @@ -0,0 +1,387 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/filio.h> +#include <unistd.h> +#include <fcntl.h> +#include <stropts.h> +#include <libintl.h> +#include <errno.h> +#include <string.h> + +#include <sys/lx_fcntl.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> + +static int lx_fcntl_com(int fd, int cmd, ulong_t arg); +static void ltos_flock(struct lx_flock *l, struct flock *s); +static void stol_flock(struct flock *s, struct lx_flock *l); +static void ltos_flock64(struct lx_flock64 *l, struct flock64 *s); +static void stol_flock64(struct flock64 *s, struct lx_flock64 *l); +static short ltos_type(short l_type); +static short stol_type(short l_type); +static int lx_fcntl_getfl(int fd); +static int lx_fcntl_setfl(int fd, ulong_t arg); + +int +lx_dup2(uintptr_t p1, uintptr_t p2) +{ + int oldfd = (int)p1; + int newfd = (int)p2; + int rc; + + rc = fcntl(oldfd, F_DUP2FD, newfd); + return ((rc == -1) ? -errno : rc); +} + +int +lx_fcntl(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + int cmd = (int)p2; + ulong_t arg = (ulong_t)p3; + struct lx_flock lxflk; + struct flock fl; + int lk = 0; + int rc; + + /* + * The 64-bit fcntl commands must go through fcntl64(). + */ + if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLK64 || + cmd == LX_F_SETLKW64) + return (-EINVAL); + + if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE || + cmd == LX_F_GETLEASE) { + lx_unsupported(gettext("%s(): unsupported command: %d"), + "fcntl", cmd); + return (-ENOTSUP); + } + + if (cmd == LX_F_GETLK || cmd == LX_F_SETLK || + cmd == LX_F_SETLKW) { + if (uucopy((void *)p3, (void *)&lxflk, + sizeof (struct lx_flock)) != 0) + return (-errno); + lk = 1; + ltos_flock(&lxflk, &fl); + arg = (ulong_t)&fl; + } + + rc = lx_fcntl_com(fd, cmd, arg); + + if (lk) + stol_flock(&fl, (struct lx_flock *)p3); + + return (rc); +} + +int +lx_fcntl64(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + int cmd = (int)p2; + struct lx_flock lxflk; + struct lx_flock64 lxflk64; + struct flock fl; + struct flock64 fl64; + int rc; + + if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE || + cmd == LX_F_GETLEASE) { + lx_unsupported(gettext("%s(): unsupported command: %d"), + "fcntl64", cmd); + return (-ENOTSUP); + } + + if (cmd == LX_F_GETLK || cmd == LX_F_SETLK || cmd == LX_F_SETLKW) { + if (uucopy((void *)p3, (void *)&lxflk, + sizeof (struct lx_flock)) != 0) + return (-errno); + ltos_flock(&lxflk, &fl); + rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl); + stol_flock(&fl, (struct lx_flock *)p3); + } else if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLKW64 || \ + cmd == LX_F_SETLK64) { + if (uucopy((void *)p3, (void *)&lxflk64, + sizeof (struct lx_flock64)) != 0) + return (-errno); + ltos_flock64(&lxflk64, &fl64); + rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl64); + stol_flock64(&fl64, (struct lx_flock64 *)p3); + } else { + rc = lx_fcntl_com(fd, cmd, (ulong_t)p3); + } + + return (rc); +} + +static int +lx_fcntl_com(int fd, int cmd, ulong_t arg) +{ + int rc = 0; + + switch (cmd) { + case LX_F_DUPFD: + rc = fcntl(fd, F_DUPFD, arg); + break; + + case LX_F_GETFD: + rc = fcntl(fd, F_GETFD, 0); + break; + + case LX_F_SETFD: + rc = fcntl(fd, F_SETFD, arg); + break; + + case LX_F_GETFL: + rc = lx_fcntl_getfl(fd); + break; + + case LX_F_SETFL: + rc = lx_fcntl_setfl(fd, arg); + break; + + case LX_F_GETLK: + rc = fcntl(fd, F_GETLK, arg); + break; + + case LX_F_SETLK: + rc = fcntl(fd, F_SETLK, arg); + break; + + case LX_F_SETLKW: + rc = fcntl(fd, F_SETLKW, arg); + break; + + case LX_F_GETLK64: + rc = fcntl(fd, F_GETLK64, arg); + break; + + case LX_F_SETLK64: + rc = fcntl(fd, F_SETLK64, arg); + break; + + case LX_F_SETLKW64: + rc = fcntl(fd, F_SETLKW64, arg); + break; + + case LX_F_SETOWN: + rc = fcntl(fd, F_SETOWN, arg); + break; + + case LX_F_GETOWN: + rc = fcntl(fd, F_GETOWN, arg); + break; + + default: + return (-EINVAL); + } + + return ((rc == -1) ? -errno : rc); +} + + +#define LTOS_FLOCK(l, s) \ +{ \ + s->l_type = ltos_type(l->l_type); \ + s->l_whence = l->l_whence; \ + s->l_start = l->l_start; \ + s->l_len = l->l_len; \ + s->l_sysid = 0; /* not defined in linux */ \ + s->l_pid = (pid_t)l->l_pid; \ +} + +#define STOL_FLOCK(s, l) \ +{ \ + l->l_type = stol_type(s->l_type); \ + l->l_whence = s->l_whence; \ + l->l_start = s->l_start; \ + l->l_len = s->l_len; \ + l->l_pid = (int)s->l_pid; \ +} + +static void +ltos_flock(struct lx_flock *l, struct flock *s) +{ + LTOS_FLOCK(l, s) +} + +static void +stol_flock(struct flock *s, struct lx_flock *l) +{ + STOL_FLOCK(s, l) +} + +static void +ltos_flock64(struct lx_flock64 *l, struct flock64 *s) +{ + LTOS_FLOCK(l, s) +} + +static void +stol_flock64(struct flock64 *s, struct lx_flock64 *l) +{ + STOL_FLOCK(s, l) +} + +static short +ltos_type(short l_type) +{ + switch (l_type) { + case LX_F_RDLCK: + return (F_RDLCK); + case LX_F_WRLCK: + return (F_WRLCK); + case LX_F_UNLCK: + return (F_UNLCK); + default: + return (-1); + } +} + +static short +stol_type(short l_type) +{ + switch (l_type) { + case F_RDLCK: + return (LX_F_RDLCK); + case F_WRLCK: + return (LX_F_WRLCK); + case F_UNLCK: + return (LX_F_UNLCK); + default: + /* can't ever happen */ + return (0); + } +} + +int +lx_fcntl_getfl(int fd) +{ + int retval; + int rc; + + retval = fcntl(fd, F_GETFL, 0); + + if ((retval & O_ACCMODE) == O_RDONLY) + rc = LX_O_RDONLY; + else if ((retval & O_ACCMODE) == O_WRONLY) + rc = LX_O_WRONLY; + else + rc = LX_O_RDWR; + /* O_NDELAY != O_NONBLOCK, so we need to check for both */ + if (retval & O_NDELAY) + rc |= LX_O_NDELAY; + if (retval & O_NONBLOCK) + rc |= LX_O_NONBLOCK; + if (retval & O_APPEND) + rc |= LX_O_APPEND; + if (retval & O_SYNC) + rc |= LX_O_SYNC; + if (retval & O_LARGEFILE) + rc |= LX_O_LARGEFILE; + if (retval & FASYNC) + rc |= LX_O_ASYNC; + + return (rc); +} + +int +lx_fcntl_setfl(int fd, ulong_t arg) +{ + int new_arg; + + new_arg = 0; + /* LX_O_NDELAY == LX_O_NONBLOCK, so we only check for one */ + if (arg & LX_O_NDELAY) + new_arg |= O_NONBLOCK; + if (arg & LX_O_APPEND) + new_arg |= O_APPEND; + if (arg & LX_O_SYNC) + new_arg |= O_SYNC; + if (arg & LX_O_LARGEFILE) + new_arg |= O_LARGEFILE; + if (arg & LX_O_ASYNC) + new_arg |= FASYNC; + + return ((fcntl(fd, F_SETFL, new_arg) == 0) ? 0 : -errno); +} + +/* + * flock() applies or removes an advisory lock on the file + * associated with the file descriptor fd. + * + * Stolen verbatim from usr/src/ucblib/libucb/port/sys/flock.c + * + * operation is: LX_LOCK_SH, LX_LOCK_EX, LX_LOCK_UN, LX_LOCK_NB + */ +int +lx_flock(uintptr_t p1, uintptr_t p2) +{ + int fd = (int)p1; + int operation = (int)p2; + struct flock fl; + int cmd; + int ret; + + /* In non-blocking lock, use F_SETLK for cmd, F_SETLKW otherwise */ + if (operation & LX_LOCK_NB) { + cmd = F_SETLK; + operation &= ~LX_LOCK_NB; /* turn off this bit */ + } else + cmd = F_SETLKW; + + switch (operation) { + case LX_LOCK_UN: + fl.l_type = F_UNLCK; + break; + case LX_LOCK_SH: + fl.l_type = F_RDLCK; + break; + case LX_LOCK_EX: + fl.l_type = F_WRLCK; + break; + default: + return (-EINVAL); + } + + fl.l_whence = 0; + fl.l_start = 0; + fl.l_len = 0; + + ret = fcntl(fd, cmd, &fl); + + if (ret == -1 && errno == EACCES) + return (-EWOULDBLOCK); + + return ((ret == -1) ? -errno : ret); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/file.c b/usr/src/lib/brand/lx/lx_brand/common/file.c new file mode 100644 index 0000000000..eaa5349b6e --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/file.c @@ -0,0 +1,747 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/fstyp.h> +#include <sys/fsid.h> + +#include <errno.h> +#include <unistd.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/vnode.h> +#include <fcntl.h> +#include <string.h> +#include <utime.h> +#include <atomic.h> + +#include <sys/lx_syscall.h> +#include <sys/lx_types.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_fcntl.h> + +static int +install_checkpath(uintptr_t p1) +{ + int saved_errno = errno; + char path[MAXPATHLEN]; + + /* + * The "dev" RPM package wants to modify /dev/pts, but /dev/pts is a + * lofs mounted copy of /native/dev/pts, so that won't work. + * + * Instead, if we're trying to modify /dev/pts from install mode, just + * act as if it succeded. + */ + if (uucopystr((void *)p1, path, MAXPATHLEN) == -1) + return (-errno); + + if (strcmp(path, "/dev/pts") == 0) + return (0); + + errno = saved_errno; + return (-errno); +} + +/* + * Convert linux LX_AT_* flags to solaris AT_* flags, while verifying allowed + * flags have been passed. This also allows EACCESS/REMOVEDIR to be translated + * correctly since on linux they have the same value. + */ +int +ltos_at_flag(int lflag, int allow) +{ + int sflag = 0; + + if ((lflag & LX_AT_EACCESS) && (allow & AT_EACCESS)) { + lflag &= ~LX_AT_EACCESS; + sflag |= AT_EACCESS; + } + + if ((lflag & LX_AT_REMOVEDIR) && (allow & AT_REMOVEDIR)) { + lflag &= ~LX_AT_REMOVEDIR; + sflag |= AT_REMOVEDIR; + } + + if ((lflag & LX_AT_SYMLINK_NOFOLLOW) && (allow & AT_SYMLINK_NOFOLLOW)) { + lflag &= ~LX_AT_SYMLINK_NOFOLLOW; + sflag |= AT_SYMLINK_NOFOLLOW; + } + + /* right now solaris doesn't have a _FOLLOW flag, so use a fake one */ + if ((lflag & LX_AT_SYMLINK_FOLLOW) && (allow & LX_AT_SYMLINK_FOLLOW)) { + lflag &= ~LX_AT_SYMLINK_FOLLOW; + sflag |= LX_AT_SYMLINK_FOLLOW; + } + + /* if flag is not zero than some flags did not hit the above code */ + if (lflag) + return (-EINVAL); + + return (sflag); +} + + +/* + * Miscellaneous file-related system calls. + */ + +/* + * Linux creates half-duplex unnamed pipes and Solaris creates full-duplex + * pipes. Thus, to get the correct semantics, our simple pipe() system + * call actually needs to create a named pipe, do three opens, a close, and + * an unlink. This is woefully expensive. If performance becomes a real + * issue, we can implement a half-duplex pipe() in the brand module. + */ +#define PIPENAMESZ 32 /* enough room for /tmp/.pipe.<pid>.<num> */ + +int +lx_pipe(uintptr_t p1) +{ + static uint32_t pipecnt = 0; + int cnt; + char pipename[PIPENAMESZ]; + int fds[3]; + int r = 0; + + fds[0] = -1; + fds[1] = -1; + fds[2] = -1; + + /* + * Construct a name for the named pipe: /tmp/.pipe.<pid>.<++cnt> + */ + cnt = atomic_inc_32_nv(&pipecnt); + + (void) snprintf(pipename, PIPENAMESZ, "/tmp/.pipe.%d.%d", + getpid(), cnt); + + if (mkfifo(pipename, 0600)) + return (-errno); + + /* + * To prevent either the read-only or write-only open from + * blocking, we first need to open the pipe for both reading and + * writing. + */ + if (((fds[2] = open(pipename, O_RDWR)) < 0) || + ((fds[0] = open(pipename, O_RDONLY)) < 0) || + ((fds[1] = open(pipename, O_WRONLY)) < 0)) { + r = errno; + } else { + /* + * Copy the two one-way fds back to the app's address + * space. + */ + if (uucopy(fds, (void *)p1, 2 * sizeof (int))) + r = errno; + } + + if (fds[2] >= 0) + (void) close(fds[2]); + (void) unlink(pipename); + + if (r != 0) { + if (fds[0] >= 0) + (void) close(fds[0]); + if (fds[1] >= 0) + (void) close(fds[1]); + } + + return (-r); +} + +/* + * On Linux, even root cannot create a link to a directory, so we have to + * add an explicit check. + */ +int +lx_link(uintptr_t p1, uintptr_t p2) +{ + char *from = (char *)p1; + char *to = (char *)p2; + struct stat64 statbuf; + + if ((stat64(from, &statbuf) == 0) && S_ISDIR(statbuf.st_mode)) + return (-EPERM); + + return (link(from, to) ? -errno : 0); +} + +/* + * On Linux, an unlink of a directory returns EISDIR, not EPERM. + */ +int +lx_unlink(uintptr_t p) +{ + char *pathname = (char *)p; + struct stat64 statbuf; + + if ((lstat64(pathname, &statbuf) == 0) && S_ISDIR(statbuf.st_mode)) + return (-EISDIR); + + return (unlink(pathname) ? -errno : 0); +} + +int +lx_unlinkat(uintptr_t ext1, uintptr_t p1, uintptr_t p2) +{ + int atfd = (int)ext1; + char *pathname = (char *)p1; + int flag = (int)p2; + struct stat64 statbuf; + + if (atfd == LX_AT_FDCWD) + atfd = AT_FDCWD; + + flag = ltos_at_flag(flag, AT_REMOVEDIR); + if (flag < 0) + return (-EINVAL); + + if (!(flag & AT_REMOVEDIR)) { + /* Behave like unlink() */ + if ((fstatat64(atfd, pathname, &statbuf, AT_SYMLINK_NOFOLLOW) == + 0) && S_ISDIR(statbuf.st_mode)) + return (-EISDIR); + } + + return (unlinkat(atfd, pathname, flag) ? -errno : 0); +} + +/* + * fsync() and fdatasync() - On Solaris, these calls translate into a common + * fsync() syscall with a different parameter, so we layer on top of the librt + * functions instead. + */ +int +lx_fsync(uintptr_t fd) +{ + int fildes = (int)fd; + struct stat64 statbuf; + + if ((fstat64(fildes, &statbuf) == 0) && + (S_ISCHR(statbuf.st_mode) || S_ISFIFO(statbuf.st_mode))) + return (-EINVAL); + + return (fsync((int)fd) ? -errno : 0); +} + +int +lx_fdatasync(uintptr_t fd) +{ + int fildes = (int)fd; + struct stat64 statbuf; + + if ((fstat64(fildes, &statbuf) == 0) && + (S_ISCHR(statbuf.st_mode) || S_ISFIFO(statbuf.st_mode))) + return (-EINVAL); + + return (fdatasync((int)fd) ? -errno : 0); +} + +/* + * Linux, unlike Solaris, ALWAYS resets the setuid and setgid bits on a + * chown/fchown regardless of whether it was done by root or not. Therefore, + * we must do extra work after each chown/fchown call to emulate this behavior. + */ +#define SETUGID (S_ISUID | S_ISGID) + +/* + * [lf]chown16() - Translate the uid/gid and pass onto the real functions. + */ +int +lx_chown16(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + char *filename = (char *)p1; + struct stat64 statbuf; + + if (chown(filename, LX_UID16_TO_UID32((lx_gid16_t)p2), + LX_GID16_TO_GID32((lx_gid16_t)p3))) + return (-errno); + + if (stat64(filename, &statbuf) == 0) { + statbuf.st_mode &= ~S_ISUID; + if (statbuf.st_mode & S_IXGRP) + statbuf.st_mode &= ~S_ISGID; + (void) chmod(filename, (statbuf.st_mode & MODEMASK)); + } + + return (0); +} + +int +lx_fchown16(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + struct stat64 statbuf; + + if (fchown(fd, LX_UID16_TO_UID32((lx_gid16_t)p2), + LX_GID16_TO_GID32((lx_gid16_t)p3))) + return (-errno); + + if (fstat64(fd, &statbuf) == 0) { + statbuf.st_mode &= ~S_ISUID; + if (statbuf.st_mode & S_IXGRP) + statbuf.st_mode &= ~S_ISGID; + (void) fchmod(fd, (statbuf.st_mode & MODEMASK)); + } + + return (0); +} + +int +lx_lchown16(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + return (lchown((char *)p1, LX_UID16_TO_UID32((lx_gid16_t)p2), + LX_GID16_TO_GID32((lx_gid16_t)p3)) ? -errno : 0); +} + +int +lx_chown(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + char *filename = (char *)p1; + struct stat64 statbuf; + int ret; + + ret = chown(filename, (uid_t)p2, (gid_t)p3); + + if (ret < 0) { + /* + * If chown() failed and we're in install mode, return success + * if the the reason we failed was because the source file + * didn't actually exist or if we're trying to modify /dev/pts. + */ + if ((lx_install != 0) && + ((errno == ENOENT) || (install_checkpath(p1) == 0))) + return (0); + + return (-errno); + } + + if (stat64(filename, &statbuf) == 0) { + statbuf.st_mode &= ~S_ISUID; + if (statbuf.st_mode & S_IXGRP) + statbuf.st_mode &= ~S_ISGID; + (void) chmod(filename, (statbuf.st_mode & MODEMASK)); + } + + return (0); +} + +int +lx_fchown(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + struct stat64 statbuf; + + if (fchown(fd, (uid_t)p2, (gid_t)p3)) + return (-errno); + + if (fstat64(fd, &statbuf) == 0) { + statbuf.st_mode &= ~S_ISUID; + if (statbuf.st_mode & S_IXGRP) + statbuf.st_mode &= ~S_ISGID; + (void) fchmod(fd, (statbuf.st_mode & MODEMASK)); + } + + return (0); +} + +int +lx_chmod(uintptr_t p1, uintptr_t p2) +{ + int ret; + + ret = chmod((const char *)p1, (mode_t)p2); + + if (ret < 0) { + /* + * If chown() failed and we're in install mode, return success + * if the the reason we failed was because the source file + * didn't actually exist or if we're trying to modify /dev/pts. + */ + if ((lx_install != 0) && + ((errno == ENOENT) || (install_checkpath(p1) == 0))) + return (0); + + return (-errno); + } + + return (0); +} + +int +lx_utime(uintptr_t p1, uintptr_t p2) +{ + int ret; + + ret = utime((const char *)p1, (const struct utimbuf *)p2); + + if (ret < 0) { + /* + * If chown() failed and we're in install mode, return success + * if the the reason we failed was because the source file + * didn't actually exist or if we're trying to modify /dev/pts. + */ + if ((lx_install != 0) && + ((errno == ENOENT) || (install_checkpath(p1) == 0))) + return (0); + + return (-errno); + } + + return (0); +} + +/* + * llseek() - The Linux implementation takes an additional parameter, which is + * the resulting position in the file. + */ +int +lx_llseek(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + offset_t ret; + offset_t *res = (offset_t *)p4; + + /* SEEK_DATA and SEEK_HOLE are only valid in Solaris */ + if ((int)p5 > SEEK_END) + return (-EINVAL); + + if ((ret = llseek((int)p1, LX_32TO64(p3, p2), p5)) < 0) + return (-errno); + + *res = ret; + return (0); +} + +/* + * seek() - When the resultant file offset cannot be represented in 32 bits, + * Linux performs the seek but Solaris doesn't, though both set EOVERFLOW. We + * call llseek() and then check to see if we need to return EOVERFLOW. + */ +int +lx_lseek(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + offset_t offset = (offset_t)(off_t)(p2); /* sign extend */ + offset_t ret; + off_t ret32; + + /* SEEK_DATA and SEEK_HOLE are only valid in Solaris */ + if ((int)p3 > SEEK_END) + return (-EINVAL); + + if ((ret = llseek((int)p1, offset, p3)) < 0) + return (-errno); + + ret32 = (off_t)ret; + if ((offset_t)ret32 == ret) + return (ret32); + else + return (-EOVERFLOW); +} + +/* + * Neither Solaris nor Linux actually returns anything to the caller, but glibc + * expects to see SOME value returned, so placate it and return 0. + */ +int +lx_sync(void) +{ + sync(); + return (0); +} + +int +lx_rmdir(uintptr_t p1) +{ + int r; + + r = rmdir((char *)p1); + if (r < 0) + return ((errno == EEXIST) ? -ENOTEMPTY : -errno); + return (0); +} + +/* + * Exactly the same as Solaris' sysfs(2), except Linux numbers their fs indices + * starting at 0, and Solaris starts at 1. + */ +int +lx_sysfs(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int option = (int)p1; + int res; + + /* + * Linux actually doesn't have #defines for these; their sysfs(2) + * man page literally defines the "option" field as being 1, 2 or 3, + * corresponding to Solaris' GETFSIND, GETFSTYP and GETNFSTYP, + * respectively. + */ + switch (option) { + case 1: + if ((res = sysfs(GETFSIND, (const char *)p2)) < 0) + return (-errno); + + return (res - 1); + + case 2: + if ((res = sysfs(GETFSTYP, (int)p2 + 1, + (char *)p3)) < 0) + return (-errno); + + return (0); + + case 3: + if ((res = sysfs(GETNFSTYP)) < 0) + return (-errno); + + return (res); + + default: + break; + } + + return (-EINVAL); +} + +int +lx_faccessat(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + int atfd = (int)p1; + char *path = (char *)p2; + int mode = (mode_t)p3; + int flag = (int)p4; + + if (atfd == LX_AT_FDCWD) + atfd = AT_FDCWD; + + flag = ltos_at_flag(flag, AT_EACCESS); + if (flag < 0) + return (-EINVAL); + + return (faccessat(atfd, path, mode, flag) ? -errno : 0); +} + +int +lx_futimesat(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int atfd = (int)p1; + char *path = (char *)p2; + struct timeval *times = (struct timeval *)p3; + + if (atfd == LX_AT_FDCWD) + atfd = AT_FDCWD; + + return (futimesat(atfd, path, times) ? -errno : 0); +} + + +/* + * Constructs an absolute path string in buf from the path of fd and the + * relative path string pointed to by "p1". This is required for emulating + * *at() system calls. + * Example: + * If the path of fd is "/foo/bar" and path is "etc" the string returned is + * "/foo/bar/etc", if the fd is a file fd then it fails with ENOTDIR. + * If path is absolute then no modifcations are made to it when copied. + */ +static int +getpathat(int fd, uintptr_t p1, char *outbuf, size_t outbuf_size) +{ + char pathbuf[MAXPATHLEN]; + char fdpathbuf[MAXPATHLEN]; + char *fdpath; + struct stat64 statbuf; + + if (uucopystr((void *)p1, pathbuf, MAXPATHLEN) == -1) + return (-errno); + + /* If the path is absolute then we can early out */ + if ((pathbuf[0] == '/') || (fd == LX_AT_FDCWD)) { + (void) strlcpy(outbuf, pathbuf, outbuf_size); + return (0); + } + + fdpath = lx_fd_to_path(fd, fdpathbuf, sizeof (fdpathbuf)); + if (fdpath == NULL) + return (-EBADF); + + if ((fstat64(fd, &statbuf) < 0)) + return (-EBADF); + + if (!S_ISDIR(statbuf.st_mode)) + return (-ENOTDIR); + + if (snprintf(outbuf, outbuf_size, "%s/%s", fdpath, pathbuf) > + (outbuf_size-1)) + return (-ENAMETOOLONG); + + return (0); +} + +int +lx_mkdirat(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int atfd = (int)p1; + mode_t mode = (mode_t)p3; + char pathbuf[MAXPATHLEN]; + int ret; + + ret = getpathat(atfd, p2, pathbuf, sizeof (pathbuf)); + if (ret < 0) + return (ret); + + return (mkdir(pathbuf, mode) ? -errno : 0); +} + +int +lx_mknodat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int atfd = (int)ext1; + char pathbuf[MAXPATHLEN]; + int ret; + + ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf)); + if (ret < 0) + return (ret); + + return (lx_mknod((uintptr_t)pathbuf, p2, p3)); +} + +int +lx_symlinkat(uintptr_t p1, uintptr_t ext1, uintptr_t p2) +{ + int atfd = (int)ext1; + char pathbuf[MAXPATHLEN]; + int ret; + + ret = getpathat(atfd, p2, pathbuf, sizeof (pathbuf)); + if (ret < 0) + return (ret); + + return (symlink((char *)p1, pathbuf) ? -errno : 0); +} + +int +lx_linkat(uintptr_t ext1, uintptr_t p1, uintptr_t ext2, uintptr_t p2, + uintptr_t p3) +{ + int atfd1 = (int)ext1; + int atfd2 = (int)ext2; + char pathbuf1[MAXPATHLEN]; + char pathbuf2[MAXPATHLEN]; + int ret; + + /* + * The flag specifies whether the hardlink will point to a symlink or + * not, on solaris the default behaviour of link() is to dereference a + * symlink and there is no obvious way to trigger the other behaviour. + * So for now we just ignore this flag and act like link(). + */ + /* LINTED [set but not used in function] */ + int flag = p3; + + if (flag != p3) + return (flag); /* workaround */ + + ret = getpathat(atfd1, p1, pathbuf1, sizeof (pathbuf1)); + if (ret < 0) + return (ret); + + ret = getpathat(atfd2, p2, pathbuf2, sizeof (pathbuf2)); + if (ret < 0) + return (ret); + + return (lx_link((uintptr_t)pathbuf1, (uintptr_t)pathbuf2)); +} + +int +lx_readlinkat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int atfd = (int)ext1; + char pathbuf[MAXPATHLEN]; + int ret; + + ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf)); + if (ret < 0) + return (ret); + + ret = readlink(pathbuf, (char *)p2, (size_t)p3); + if (ret < 0) + return (-errno); + + return (ret); +} + +int +lx_fchownat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4) +{ + int flag; + int atfd = (int)ext1; + char pathbuf[MAXPATHLEN]; + int ret; + + flag = ltos_at_flag(p4, AT_SYMLINK_NOFOLLOW); + if (flag < 0) + return (-EINVAL); + + ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf)); + if (ret < 0) + return (ret); + + if (flag & AT_SYMLINK_NOFOLLOW) + return (lchown(pathbuf, (uid_t)p2, (gid_t)p3) ? -errno : 0); + else + return (lx_chown((uintptr_t)pathbuf, p2, p3)); +} + +int +lx_fchmodat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int atfd = (int)ext1; + char pathbuf[MAXPATHLEN]; + int ret; + + /* + * It seems that at least some versions of glibc do not set or clear + * the flags arg, so checking them will result in random behaviour. + */ + /* LINTED [set but not used in function] */ + int flag = p3; + + if (flag != p3) + return (flag); /* workaround */ + + ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf)); + if (ret < 0) + return (ret); + + return (lx_chmod((uintptr_t)pathbuf, p2)); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/fork.c b/usr/src/lib/brand/lx/lx_brand/common/fork.c new file mode 100644 index 0000000000..7e75efaa39 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/fork.c @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <sys/lx_misc.h> + +/* + * fork() and vfork() + * + * These cannot be pass thru system calls because we need libc to do its own + * initialization or else bad things will happen (i.e. ending up with a bad + * schedctl page). On Linux, there is no such thing as forkall(), so we use + * fork1() here. + */ +int +lx_fork(void) +{ + int ret = fork1(); + + if (ret == 0 && lx_is_rpm) + (void) sleep(lx_rpm_delay); + + return (ret == -1 ? -errno : ret); +} + +/* + * For vfork(), we have a serious problem because the child is not allowed to + * return from the current frame because it will corrupt the parent's stack. + * Since the semantics of vfork() are rather ill-defined (other than "it's + * faster than fork"), we should theoretically be safe by falling back to + * fork1(). + */ +int +lx_vfork(void) +{ + int ret = fork1(); + + return (ret == -1 ? -errno : ret); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/id.c b/usr/src/lib/brand/lx/lx_brand/common/id.c new file mode 100644 index 0000000000..a9987cea52 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/id.c @@ -0,0 +1,269 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/zone.h> +#include <sys/lx_types.h> +#include <sys/lx_syscall.h> +#include <sys/cred_impl.h> +#include <sys/policy.h> +#include <sys/ucred.h> +#include <sys/syscall.h> +#include <alloca.h> +#include <errno.h> +#include <ucred.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/lx_misc.h> + +int +lx_setuid16(uintptr_t uid) +{ + return ((setuid(LX_UID16_TO_UID32((lx_uid16_t)uid))) ? -errno : 0); +} + +int +lx_getuid16(void) +{ + return ((int)LX_UID32_TO_UID16(getuid())); +} + +int +lx_setgid16(uintptr_t gid) +{ + return ((setgid(LX_GID16_TO_GID32((lx_gid16_t)gid))) ? -errno : 0); +} + +int +lx_getgid16(void) +{ + return ((int)LX_GID32_TO_GID16(getgid())); +} + +int +lx_geteuid16(void) +{ + return ((int)LX_UID32_TO_UID16(geteuid())); +} + +int +lx_getegid16(void) +{ + return ((int)LX_GID32_TO_GID16(getegid())); +} + +int +lx_geteuid(void) +{ + return ((int)geteuid()); +} + +int +lx_getegid(void) +{ + return ((int)getegid()); +} + +int +lx_getresuid(uintptr_t ruid, uintptr_t euid, uintptr_t suid) +{ + lx_uid_t lx_ruid, lx_euid, lx_suid; + ucred_t *cr; + size_t sz; + + /* + * We allocate a ucred_t ourselves rather than call ucred_get(3C) + * because ucred_get() calls malloc(3C), which the brand library cannot + * use. Because we allocate the space with SAFE_ALLOCA(), there's + * no need to free it when we're done. + */ + sz = ucred_size(); + cr = (ucred_t *)SAFE_ALLOCA(sz); + if (cr == NULL) + return (-ENOMEM); + + if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0) + return (-errno); + + if (((lx_ruid = (lx_uid_t)ucred_getruid(cr)) == (lx_uid_t)-1) || + ((lx_euid = (lx_uid_t)ucred_geteuid(cr)) == (lx_uid_t)-1) || + ((lx_suid = (lx_uid_t)ucred_getsuid(cr)) == (lx_uid_t)-1)) { + return (-errno); + } + + if (uucopy(&lx_ruid, (void *)ruid, sizeof (lx_uid_t)) != 0) + return (-errno); + + if (uucopy(&lx_euid, (void *)euid, sizeof (lx_uid_t)) != 0) + return (-errno); + + return ((uucopy(&lx_suid, (void *)suid, sizeof (lx_uid_t)) != 0) + ? -errno : 0); +} + +int +lx_getresuid16(uintptr_t ruid16, uintptr_t euid16, uintptr_t suid16) +{ + lx_uid_t lx_ruid, lx_euid, lx_suid; + lx_uid16_t lx_ruid16, lx_euid16, lx_suid16; + int rv; + + if ((rv = lx_getresuid((uintptr_t)&lx_ruid, (uintptr_t)&lx_euid, + (uintptr_t)&lx_suid)) != 0) + return (rv); + + lx_ruid16 = LX_UID32_TO_UID16(lx_ruid); + lx_euid16 = LX_UID32_TO_UID16(lx_euid); + lx_suid16 = LX_UID32_TO_UID16(lx_suid); + + if (uucopy(&lx_ruid16, (void *)ruid16, sizeof (lx_uid16_t)) != 0) + return (-errno); + + if (uucopy(&lx_euid16, (void *)euid16, sizeof (lx_uid16_t)) != 0) + return (-errno); + + return ((uucopy(&lx_suid16, (void *)suid16, sizeof (lx_uid16_t)) != 0) + ? -errno : 0); +} + +int +lx_getresgid(uintptr_t rgid, uintptr_t egid, uintptr_t sgid) +{ + ucred_t *cr; + lx_gid_t lx_rgid, lx_egid, lx_sgid; + size_t sz; + + /* + * We allocate a ucred_t ourselves rather than call ucred_get(3C) + * because ucred_get() calls malloc(3C), which the brand library cannot + * use. Because we allocate the space with SAFE_ALLOCA(), there's + * no need to free it when we're done. + */ + sz = ucred_size(); + cr = (ucred_t *)SAFE_ALLOCA(sz); + if (cr == NULL) + return (-ENOMEM); + + if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0) + return (-errno); + + if (((lx_rgid = (lx_gid_t)ucred_getrgid(cr)) == (lx_gid_t)-1) || + ((lx_egid = (lx_gid_t)ucred_getegid(cr)) == (lx_gid_t)-1) || + ((lx_sgid = (lx_gid_t)ucred_getsgid(cr)) == (lx_gid_t)-1)) { + return (-errno); + } + + if (uucopy(&lx_rgid, (void *)rgid, sizeof (lx_gid_t)) != 0) + return (-errno); + + if (uucopy(&lx_egid, (void *)egid, sizeof (lx_gid_t)) != 0) + return (-errno); + + return ((uucopy(&lx_sgid, (void *)sgid, sizeof (lx_gid_t)) != 0) + ? -errno : 0); +} + +int +lx_getresgid16(uintptr_t rgid16, uintptr_t egid16, uintptr_t sgid16) +{ + lx_gid_t lx_rgid, lx_egid, lx_sgid; + lx_gid16_t lx_rgid16, lx_egid16, lx_sgid16; + int rv; + + if ((rv = lx_getresgid((uintptr_t)&lx_rgid, (uintptr_t)&lx_egid, + (uintptr_t)&lx_sgid)) != 0) + return (rv); + + lx_rgid16 = LX_UID32_TO_UID16(lx_rgid); + lx_egid16 = LX_UID32_TO_UID16(lx_egid); + lx_sgid16 = LX_UID32_TO_UID16(lx_sgid); + + if (uucopy(&lx_rgid16, (void *)rgid16, sizeof (lx_gid16_t)) != 0) + return (-errno); + + if (uucopy(&lx_egid16, (void *)egid16, sizeof (lx_gid16_t)) != 0) + return (-errno); + + return ((uucopy(&lx_sgid16, (void *)sgid16, sizeof (lx_gid16_t)) != 0) + ? -errno : 0); +} + +int +lx_setreuid16(uintptr_t ruid, uintptr_t euid) +{ + return ((setreuid(LX_UID16_TO_UID32((lx_uid16_t)ruid), + LX_UID16_TO_UID32((lx_uid16_t)euid))) ? -errno : 0); +} + +int +lx_setregid16(uintptr_t rgid, uintptr_t egid) +{ + return ((setregid(LX_UID16_TO_UID32((lx_gid16_t)rgid), + LX_UID16_TO_UID32((lx_gid16_t)egid))) ? -errno : 0); +} + +/* + * The lx brand cannot support the setfs[ug]id16/setfs[ug]id calls as that + * would require significant rework of Solaris' privilege mechanisms, so + * instead return the current effective [ug]id. + * + * In Linux, fsids track effective IDs, so returning the effective IDs works + * as a substitute; returning the current value also denotes failure of the + * call if the caller had specified something different. We don't need to + * worry about setting error codes because the Linux calls don't set any. + */ +/*ARGSUSED*/ +int +lx_setfsuid16(uintptr_t fsuid16) +{ + return (lx_geteuid16()); +} + +/*ARGSUSED*/ +int +lx_setfsgid16(uintptr_t fsgid16) +{ + return (lx_getegid16()); +} + +/*ARGSUSED*/ +int +lx_setfsuid(uintptr_t fsuid) +{ + return (geteuid()); +} + +/*ARGSUSED*/ +int +lx_setfsgid(uintptr_t fsgid) +{ + return (getegid()); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/ioctl.c b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c new file mode 100644 index 0000000000..2f2b022ed5 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c @@ -0,0 +1,2719 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <fcntl.h> +#include <sys/types.h> +#include <signal.h> +#include <sys/stat.h> +#include <unistd.h> +#include <limits.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stropts.h> +#include <strings.h> +#include <thread.h> +#include <errno.h> +#include <libintl.h> +#include <sys/bitmap.h> +#include <sys/lx_autofs.h> +#include <sys/modctl.h> +#include <sys/filio.h> +#include <sys/termios.h> +#include <sys/termio.h> +#include <sys/sockio.h> +#include <net/if.h> +#include <net/if_arp.h> +#include <sys/ptms.h> +#include <sys/ldlinux.h> +#include <sys/lx_ptm.h> +#include <sys/lx_socket.h> +#include <sys/syscall.h> +#include <sys/brand.h> +#include <sys/lx_audio.h> +#include <sys/lx_ioctl.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/ptyvar.h> +#include <sys/audio.h> +#include <sys/mixer.h> + +/* Define _KERNEL to get the devt manipulation macros. */ +#define _KERNEL +#include <sys/sysmacros.h> +#undef _KERNEL + +/* Maximum number of modules on a stream that we can handle. */ +#define MAX_STRMODS 10 + +/* Maximum buffer size for debugging messages. */ +#define MSGBUF 1024 + +/* Structure used to define an ioctl translator. */ +typedef struct ioc_cmd_translator { + int ict_lx_cmd; + char *ict_lx_cmd_str; + int ict_cmd; + char *ict_cmd_str; + int (*ict_func)(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg); +} ioc_cmd_translator_t; + +/* + * Structures used to associate a group of ioctl translators with + * a specific device. + */ +typedef struct ioc_dev_translator { + char *idt_driver; + major_t idt_major; + + /* Array of command translators. */ + ioc_cmd_translator_t *idt_cmds; +} ioc_dev_translator_t; + +/* + * Structures used to associate a group of ioctl translators with + * a specific filesystem. + */ +typedef struct ioc_fs_translator { + char *ift_filesystem; + + /* Array of command translators. */ + ioc_cmd_translator_t *ift_cmds; +} ioc_fs_translator_t; + +/* Structure used to define a unsupported ioctl error codes. */ +typedef struct ioc_errno_translator { + int iet_lx_cmd; + char *iet_lx_cmd_str; + int iet_errno; +} ioc_errno_translator_t; + +/* Structure used to convert oss format flags into Solaris options. */ +typedef struct oss_fmt_translator { + int oft_oss_fmt; + int oft_encoding; + int oft_precision; +} oss_fmt_translator_t; + +/* Translator forward declerations. */ +static oss_fmt_translator_t oft_table[]; +static ioc_cmd_translator_t ioc_translators_file[]; +static ioc_cmd_translator_t ioc_translators_fifo[]; +static ioc_cmd_translator_t ioc_translators_sock[]; +static ioc_dev_translator_t ioc_translator_ptm; +static ioc_dev_translator_t *ioc_translators_dev[]; +static ioc_fs_translator_t *ioc_translators_fs[]; +static ioc_errno_translator_t ioc_translators_errno[]; + +/* + * Interface name table. + */ +typedef struct ifname_map { + char im_linux[IFNAMSIZ]; + char im_solaris[IFNAMSIZ]; + struct ifname_map *im_next; +} ifname_map_t; + +static ifname_map_t *ifname_map; +static mutex_t ifname_mtx; + +/* + * Macros and structures to help convert integers to string + * values that they represent (for displaying in debug output). + */ +#define I2S_ENTRY(x) { x, #x }, +#define I2S_END { 0, NULL } + +typedef struct int2str { + int i2s_int; + char *i2s_str; +} int2str_t; + +static int2str_t st_mode_strings[] = { + I2S_ENTRY(S_IFIFO) + I2S_ENTRY(S_IFCHR) + I2S_ENTRY(S_IFDIR) + I2S_ENTRY(S_IFBLK) + I2S_ENTRY(S_IFREG) + I2S_ENTRY(S_IFLNK) + I2S_ENTRY(S_IFSOCK) + I2S_ENTRY(S_IFDOOR) + I2S_ENTRY(S_IFPORT) + I2S_END +}; + +static int2str_t oss_fmt_str[] = { + I2S_ENTRY(LX_OSS_AFMT_QUERY) + I2S_ENTRY(LX_OSS_AFMT_MU_LAW) + I2S_ENTRY(LX_OSS_AFMT_A_LAW) + I2S_ENTRY(LX_OSS_AFMT_IMA_ADPCM) + I2S_ENTRY(LX_OSS_AFMT_U8) + I2S_ENTRY(LX_OSS_AFMT_S16_LE) + I2S_ENTRY(LX_OSS_AFMT_S16_BE) + I2S_ENTRY(LX_OSS_AFMT_S8) + I2S_ENTRY(LX_OSS_AFMT_U16_LE) + I2S_ENTRY(LX_OSS_AFMT_U16_BE) + I2S_ENTRY(LX_OSS_AFMT_MPEG) + I2S_END +}; + +static void +lx_ioctl_msg(int fd, int cmd, char *lx_cmd_str, struct stat *stat, char *msg) +{ + int errno_backup = errno; + char *path, path_buf[MAXPATHLEN]; + + assert(msg != NULL); + + if (lx_debug_enabled == 0) + return; + + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + if (path == NULL) + path = "?"; + + if (lx_cmd_str == NULL) + lx_cmd_str = "?"; + + /* Display the initial error message and extended ioctl information. */ + lx_debug("\t%s", msg); + lx_debug("\tlx_ioctl(): cmd = 0x%x - %s, fd = %d - %s", + cmd, lx_cmd_str, fd, path); + + /* Display information about the target file, if it's available. */ + if (stat != NULL) { + major_t fd_major = getmajor(stat->st_rdev); + minor_t fd_minor = getminor(stat->st_rdev); + int fd_mode = stat->st_mode & S_IFMT; + char *fd_mode_str = "unknown"; + char buf[LX_MSG_MAXLEN]; + int i; + + /* Translate the file type bits into a string. */ + for (i = 0; st_mode_strings[i].i2s_str != NULL; i++) { + if (fd_mode != st_mode_strings[i].i2s_int) + continue; + fd_mode_str = st_mode_strings[i].i2s_str; + break; + } + + (void) snprintf(buf, sizeof (buf), + "\tlx_ioctl(): mode = %s", fd_mode_str); + + if ((fd_mode == S_IFCHR) || (fd_mode == S_IFBLK)) { + char *fd_driver[MODMAXNAMELEN + 1]; + int i; + + /* This is a device so display the devt. */ + i = strlen(buf); + (void) snprintf(buf + i, sizeof (buf) - i, + "; rdev = [%d, %d]", fd_major, fd_minor); + + /* Try to display the drivers name. */ + if (modctl(MODGETNAME, + fd_driver, sizeof (fd_driver), &fd_major) == 0) + i = strlen(buf); + (void) snprintf(buf + i, sizeof (buf) - i, + "; driver = %s", fd_driver); + } + lx_debug(buf); + } + + /* Restore errno. */ + errno = errno_backup; +} + +static int +ldlinux_check(int fd) +{ + struct str_mlist mlist[MAX_STRMODS]; + struct str_list strlist; + int i; + + /* Get the number of modules on the stream. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, I_LIST, "I_LIST"); + if ((i = ioctl(fd, I_LIST, (struct str_list *)NULL)) < 0) { + lx_debug("\tldlinux_check(): unable to count stream modules"); + return (-errno); + } + + /* Sanity check the number of modules on the stream. */ + assert(i <= MAX_STRMODS); + + /* Get the list of modules on the stream. */ + strlist.sl_nmods = i; + strlist.sl_modlist = mlist; + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, I_LIST, "I_LIST"); + if (ioctl(fd, I_LIST, &strlist) < 0) { + lx_debug("\tldlinux_check(): unable to list stream modules"); + return (-errno); + } + + for (i = 0; i < strlist.sl_nmods; i++) + if (strcmp(strlist.sl_modlist[i].l_name, LDLINUX_MOD) == 0) + return (1); + + return (0); +} + +static int +ioctl_istr(int fd, int cmd, char *cmd_str, void *arg, int arg_len) +{ + struct strioctl istr; + + istr.ic_cmd = cmd; + istr.ic_len = arg_len; + istr.ic_timout = 0; + istr.ic_dp = arg; + + lx_debug("\tioctl_istr(%d, 0x%x - %s, ...)", fd, cmd, cmd_str); + if (ioctl(fd, I_STR, &istr) < 0) + return (-1); + return (0); +} + +/* + * Add an interface name mapping if it doesn't already exist. + * + * Interfaces with IFF_LOOPBACK flag get renamed to loXXX. + * Interfaces with IFF_BROADCAST flag get renamed to ethXXX. + * + * Caller locks the name table. + */ +static int +ifname_add(char *if_name, int if_flags) +{ + static int eth_index = 0; + static int lo_index = 0; + ifname_map_t **im_pp; + + for (im_pp = &ifname_map; *im_pp; im_pp = &(*im_pp)->im_next) + if (strncmp((*im_pp)->im_solaris, if_name, IFNAMSIZ) == 0) + return (0); + + *im_pp = calloc(1, sizeof (ifname_map_t)); + if (*im_pp == NULL) + return (-1); + + (void) strlcpy((*im_pp)->im_solaris, if_name, IFNAMSIZ); + if (if_flags & IFF_LOOPBACK) { + /* Loopback */ + if (lo_index == 0) + (void) strlcpy((*im_pp)->im_linux, "lo", IFNAMSIZ); + else + (void) snprintf((*im_pp)->im_linux, IFNAMSIZ, + "lo:%d", lo_index); + lo_index++; + } else if (if_flags & IFF_BROADCAST) { + /* Assume ether if it has a broadcast address */ + (void) snprintf((*im_pp)->im_linux, IFNAMSIZ, + "eth%d", eth_index); + eth_index++; + } else { + /* Do not translate unknown interfaces */ + (void) strlcpy((*im_pp)->im_linux, if_name, IFNAMSIZ); + } + + lx_debug("map interface %s -> %s", if_name, (*im_pp)->im_linux); + + return (0); +} + +static int +ifname_cmp(const void *p1, const void *p2) +{ + struct ifreq *rp1 = (struct ifreq *)p1; + struct ifreq *rp2 = (struct ifreq *)p2; + + return (strncmp(rp1->ifr_name, rp2->ifr_name, IFNAMSIZ)); +} + +/* + * (Re-)scan all interfaces and add them to the name table. + * Caller locks the name table. + */ +static int +ifname_scan(void) +{ + struct ifconf conf; + int i, fd, ifcount; + + conf.ifc_buf = NULL; + + if ((fd = socket(PF_INET, SOCK_DGRAM, 0)) < 0) + goto fail; + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGIFNUM, "SIOCGIFNUM"); + if (ioctl(fd, SIOCGIFNUM, &ifcount) < 0) { + lx_debug("\tifname_scan(): unable to get number of interfaces"); + goto fail; + } + + conf.ifc_len = ifcount * sizeof (struct ifreq); + if ((conf.ifc_buf = calloc(ifcount, sizeof (struct ifreq))) == NULL) + goto fail; + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGIFCONF, "SIOCGIFCONF"); + if (ioctl(fd, SIOCGIFCONF, &conf) < 0) { + lx_debug("\tifname_scan(): unable to get interfaces"); + goto fail; + } + + /* Get the interface flags */ + for (i = 0; i < ifcount; i++) { + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, SIOCGIFFLAGS, "SIOCGIFFLAGS"); + if (ioctl(fd, SIOCGIFFLAGS, &conf.ifc_req[i]) < 0) { + conf.ifc_req[i].ifr_flags = 0; + lx_debug("\tifname_scan(): unable to get flags for %s", + conf.ifc_req[i].ifr_name); + } + } + + /* + * Sort the interfaces by name to preserve the order + * across reboots of this zone. Note that the order of + * interface names won't be consistent across network + * configuration changes. ie. If network interfaces + * are added or removed from a zone (either dynamically + * or statically) the network interfaces names to physical + * network interface mappings that linux apps see may + * change. + */ + qsort(conf.ifc_req, ifcount, sizeof (struct ifreq), ifname_cmp); + + /* Add to the name table */ + for (i = 0; i < ifcount; i++) + if (ifname_add(conf.ifc_req[i].ifr_name, + conf.ifc_req[i].ifr_flags) != 0) + goto fail; + + (void) close(fd); + free(conf.ifc_buf); + + return (0); + +fail: + if (fd >= 0) + (void) close(fd); + if (conf.ifc_buf != NULL) + free(conf.ifc_buf); + + return (-1); +} + +static int +ifname_from_linux(char *name) +{ + int pass; + ifname_map_t *im_p; + + (void) mutex_lock(&ifname_mtx); + + for (pass = 0; pass < 2; pass++) { + for (im_p = ifname_map; im_p; im_p = im_p->im_next) + if (strncmp(im_p->im_linux, name, IFNAMSIZ) == 0) + break; + if (im_p != NULL || (pass == 0 && ifname_scan() != 0)) + break; + } + + (void) mutex_unlock(&ifname_mtx); + + if (im_p) { + (void) strlcpy(name, im_p->im_solaris, IFNAMSIZ); + return (0); + } + + return (-1); +} + +static int +ifname_from_solaris(char *name) +{ + int pass; + ifname_map_t *im_p; + + (void) mutex_lock(&ifname_mtx); + + for (pass = 0; pass < 2; pass++) { + for (im_p = ifname_map; im_p; im_p = im_p->im_next) + if (strncmp(im_p->im_solaris, name, IFNAMSIZ) == 0) + break; + if (im_p != NULL || (pass == 0 && ifname_scan() != 0)) + break; + } + + (void) mutex_unlock(&ifname_mtx); + + if (im_p) { + (void) strlcpy(name, im_p->im_linux, IFNAMSIZ); + return (0); + } + + return (-1); +} + +/* + * Called to initialize the ioctl translation subsystem. + */ +int +lx_ioctl_init() +{ + int i, ret; + + /* Figure out the major numbers for our devices translators. */ + for (i = 0; ioc_translators_dev[i] != NULL; i++) { + ioc_dev_translator_t *idt = ioc_translators_dev[i]; + + ret = modctl(MODGETMAJBIND, + idt->idt_driver, strlen(idt->idt_driver) + 1, + &idt->idt_major); + + if (ret != 0) { + lx_err(gettext("%s%s) failed: %s\n"), + "lx_ioctl_init(): modctl(MODGETMAJBIND, ", + idt->idt_driver, strerror(errno)); + lx_err(gettext("%s: %s translator disabled for: %s\n"), + "lx_ioctl_init()", "ioctl", idt->idt_driver); + idt->idt_major = (major_t)-1; + } + } + + /* Create the interface name table */ + if (ifname_scan() != 0) + lx_err("lx_ioctl_init(): ifname_scan() failed\n"); + + return (0); +} + +static ioc_cmd_translator_t * +lx_ioctl_find_ict_cmd(ioc_cmd_translator_t *ict, int cmd) +{ + assert(ict != NULL); + while ((ict != NULL) && (ict->ict_func != NULL)) { + if (cmd == ict->ict_lx_cmd) + return (ict); + ict++; + } + return (NULL); +} + +/* + * Main entry point for the ioctl translater. + */ +int +lx_ioctl(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + int cmd = (int)p2; + intptr_t arg = (uintptr_t)p3; + struct stat stat; + ioc_cmd_translator_t *ict = NULL; + ioc_errno_translator_t *iet = NULL; + major_t fd_major; + int i, ret; + + if (fstat(fd, &stat) != 0) { + lx_ioctl_msg(fd, cmd, NULL, NULL, + "lx_ioctl(): fstat() failed"); + + /* + * Linux ioctl(2) is only documented to return EBADF, EFAULT, + * EINVAL or ENOTTY. + * + * EINVAL is documented to be "Request or argp is not valid", + * so it's reasonable to force any errno that's not EBADF, + * EFAULT or ENOTTY to be EINVAL. + */ + if ((errno != EBADF) && (errno != EFAULT) && (errno != ENOTTY)) + errno = EINVAL; + + return (-errno); /* errno already set. */ + } + + switch (stat.st_mode & S_IFMT) { + default: + break; + case S_IFREG: + /* Use file translators. */ + ict = ioc_translators_file; + break; + + case S_IFSOCK: + /* Use socket translators. */ + ict = ioc_translators_sock; + break; + + case S_IFIFO: + /* Use fifo translators. */ + ict = ioc_translators_fifo; + break; + + case S_IFCHR: + fd_major = getmajor(stat.st_rdev); + + /* + * Look through all the device translators to see if there + * is one for this device. + */ + for (i = 0; ioc_translators_dev[i] != NULL; i++) { + if (fd_major != ioc_translators_dev[i]->idt_major) + continue; + + /* We found a translator for this device. */ + ict = ioc_translators_dev[i]->idt_cmds; + break; + } + break; + } + + /* + * Search the selected translator group to see if we have a + * translator for this specific command. + */ + if ((ict != NULL) && + ((ict = lx_ioctl_find_ict_cmd(ict, cmd)) != NULL)) { + /* We found a translator for this command, invoke it. */ + lx_ioctl_msg(fd, cmd, ict->ict_lx_cmd_str, &stat, + "lx_ioctl(): emulating ioctl"); + + ret = ict->ict_func(fd, &stat, ict->ict_cmd, ict->ict_cmd_str, + arg); + + if ((ret < 0) && (ret != -EBADF) && (ret != -EFAULT) && + (ret != -ENOTTY)) + ret = -EINVAL; + + return (ret); + } + + /* + * If we didn't find a file or device translator for this + * command then try to find a filesystem translator for + * this command. + */ + for (i = 0; ioc_translators_fs[i] != NULL; i++) { + if (strcmp(stat.st_fstype, + ioc_translators_fs[i]->ift_filesystem) != 0) + continue; + + /* We found a translator for this filesystem. */ + ict = ioc_translators_fs[i]->ift_cmds; + break; + } + + /* + * Search the selected translator group to see if we have a + * translator for this specific command. + */ + if ((ict != NULL) && + ((ict = lx_ioctl_find_ict_cmd(ict, cmd)) != NULL)) { + /* We found a translator for this command, invoke it. */ + lx_ioctl_msg(fd, cmd, ict->ict_lx_cmd_str, &stat, + "lx_ioctl(): emulating ioctl"); + ret = ict->ict_func(fd, &stat, ict->ict_cmd, ict->ict_cmd_str, + arg); + + if ((ret < 0) && (ret != -EBADF) && (ret != -EFAULT) && + (ret != -ENOTTY)) + ret = -EINVAL; + + return (ret); + } + + /* + * No translator for this ioctl was found. + * Check if there is an errno translator. + */ + for (iet = ioc_translators_errno; iet->iet_lx_cmd_str != NULL; iet++) { + if (cmd != iet->iet_lx_cmd) + continue; + + /* We found a an errno translator for this ioctl. */ + lx_ioctl_msg(fd, cmd, iet->iet_lx_cmd_str, &stat, + "lx_ioctl(): emulating errno"); + + ret = -iet->iet_errno; + + if ((ret < 0) && (ret != -EBADF) && (ret != -EFAULT) && + (ret != -ENOTTY)) + ret = -EINVAL; + + return (ret); + } + + lx_ioctl_msg(fd, cmd, NULL, &stat, + "lx_ioctl(): unsupported linux ioctl"); + lx_unsupported(gettext("lx_ioctl(): unsupported linux ioctl (%d)"), + cmd); + return (-EINVAL); +} + + +/* + * Ioctl translator functions. + */ +/* + * Used by translators that want to explicitly return EINVAL for an + * ioctl(2) instead of having the translation framework do it implicitly. + * This allows us to indicate which unsupported ioctl(2)s should not + * trigger a SIGSYS when running in LX_STRICT mode. + */ +/* ARGSUSED */ +static int +ict_einval(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + return (-EINVAL); +} + +static int +/*ARGSUSED*/ +ict_pass(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + int ret; + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, cmd, cmd_str); + ret = ioctl(fd, cmd, arg); + return (ret < 0 ? -errno : ret); +} + +static int +/*ARGSUSED*/ +ict_tcsbrkp(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + int ret, dur = 0; + + assert(cmd == LX_TCSBRKP); + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TCSBRK, "TCSBRK"); + ret = ioctl(fd, TCSBRK, (intptr_t)&dur); + return (ret < 0 ? -errno : ret); +} + +static int +/*ARGSUSED*/ +ict_sioifoob(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + int req, *reqp = (int *)arg; + int len, val; + + assert(cmd == SIOCATMARK); + + if (uucopy(reqp, &req, sizeof (req)) != 0) + return (-errno); + + len = sizeof (val); + + /* + * Linux expects a SIOCATMARK of a UDP socket to return EINVAL, while + * Solaris allows it. + */ + if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &val, &len) < 0) { + lx_debug("ict_siofmark: getsockopt failed, errno %d", errno); + return (-EINVAL); + } + + if ((len != sizeof (val)) || (val != SOCK_STREAM)) + return (-EINVAL); + + if (ioctl(fd, cmd, &req) < 0) + return (-errno); + + if (uucopy(&req, reqp, sizeof (req)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_sioifreq(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct ifreq req, *reqp = (struct ifreq *)arg; + + assert(cmd == SIOCGIFFLAGS || cmd == SIOCSIFFLAGS || + cmd == SIOCGIFADDR || cmd == SIOCSIFADDR || + cmd == SIOCGIFDSTADDR || cmd == SIOCSIFDSTADDR || + cmd == SIOCGIFBRDADDR || cmd == SIOCSIFBRDADDR || + cmd == SIOCGIFNETMASK || cmd == SIOCSIFNETMASK || + cmd == SIOCGIFMETRIC || cmd == SIOCSIFMETRIC || + cmd == SIOCGIFMTU || cmd == SIOCSIFMTU); + + /* Copy in the data */ + if (uucopy(reqp, &req, sizeof (struct ifreq)) != 0) + return (-errno); + + if (ifname_from_linux(req.ifr_name) < 0) + return (-EINVAL); + + lx_debug("\tioctl(%d, 0x%x - %s, %.14s", + fd, cmd, cmd_str, req.ifr_name); + + if (ioctl(fd, cmd, &req) < 0) + return (-errno); + + if (ifname_from_solaris(req.ifr_name) < 0) + return (-EINVAL); + + /* Copy out the data */ + if (uucopy(&req, reqp, sizeof (struct ifreq)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_siocgifconf(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct ifconf conf, *confp = (struct ifconf *)arg; + int i, ifcount, ret; + + assert(cmd == LX_SIOCGIFCONF); + + /* Copy in the data. */ + if (uucopy(confp, &conf, sizeof (conf)) != 0) + return (-errno); + + if (conf.ifc_len == 0) { + /* They want to know how many interfaces there are. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, SIOCGIFNUM, "SIOCGIFNUM"); + if (ioctl(fd, SIOCGIFNUM, (intptr_t)&ifcount) < 0) + return (-errno); + conf.ifc_len = ifcount * sizeof (struct ifreq); + + /* Check if we're done. */ + if (conf.ifc_buf == NULL) { + /* Copy out the data. */ + if (uucopy(&conf, confp, sizeof (conf)) != 0) + return (-errno); + return (0); + } + } + + /* Get interface configuration list. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGIFCONF, "SIOCGIFCONF"); + ret = ioctl(fd, SIOCGIFCONF, &conf); + if (ret < 0) + return (-errno); + + /* Rename interfaces to linux */ + for (i = 0; i < conf.ifc_len / sizeof (struct ifreq); i++) + if (ifname_from_solaris(conf.ifc_req[i].ifr_name) < 0) + return (-EINVAL); + + /* Copy out the data */ + if (uucopy(&conf, confp, sizeof (conf)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_siocifhwaddr(int fd, struct stat *stat, int cmd, char *cmd_str, + intptr_t arg) +{ + struct ifreq req, *reqp = (struct ifreq *)arg; + struct arpreq arpreq; + + assert(cmd == LX_SIOCGIFHWADDR || cmd == LX_SIOCSIFHWADDR); + + /* Copy in the data */ + if (uucopy(reqp, &req, sizeof (struct ifreq)) != 0) + return (-errno); + + lx_debug("\tioctl(%d, 0x%x - %s, lx %.14s)", + fd, cmd, + (cmd == LX_SIOCGIFHWADDR) ? "SIOCGIFHWADDR" : "SIOCSIFHWADDR", + req.ifr_name); + + /* + * We're not going to support SIOCSIFHWADDR, but we need to be + * able to check the result of the uucopy first to see if the command + * should have returned EFAULT. + */ + if (cmd == LX_SIOCSIFHWADDR) { + lx_unsupported(gettext( + "lx_ioctl(): unsupported linux ioctl: %s"), + "SIOCSIFHWADDR"); + return (-EINVAL); + } + + if (strcmp(req.ifr_name, "lo") == 0 || + strncmp(req.ifr_name, "lo:", 3) == 0) { + /* Abuse ifr_addr for linux ifr_hwaddr */ + bzero(&req.ifr_addr, sizeof (struct sockaddr)); + req.ifr_addr.sa_family = LX_ARPHRD_LOOPBACK; + + /* Copy out the data */ + if (uucopy(&req, reqp, sizeof (struct ifreq)) != 0) + return (-errno); + + return (0); + } + + if (ifname_from_linux(req.ifr_name) < 0) + return (-EINVAL); + + lx_debug("\tioctl(%d, 0x%x - %s, %.14s)", + fd, SIOCGIFADDR, "SIOCGIFADDR", req.ifr_name); + + if (ioctl(fd, SIOCGIFADDR, &req) < 0) + return (-errno); + + bcopy(&req.ifr_addr, &arpreq.arp_pa, sizeof (struct sockaddr)); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGARP, "SIOCGARP"); + + if (ioctl(fd, SIOCGARP, &arpreq) < 0) + return (-errno); + + if (ifname_from_solaris(req.ifr_name) < 0) + return (-EINVAL); + + /* Abuse ifr_addr for linux ifr_hwaddr */ + bcopy(&arpreq.arp_ha, &req.ifr_addr, sizeof (struct sockaddr)); + if (strncmp(req.ifr_name, "eth", 3) == 0) + req.ifr_addr.sa_family = LX_ARPHRD_ETHER; + else + req.ifr_addr.sa_family = LX_ARPHRD_VOID; + + /* Copy out the data */ + if (uucopy(&req, reqp, sizeof (struct ifreq)) != 0) + return (-errno); + + return (0); +} + +static void +l2s_termios(struct lx_termios *l_tios, struct termios *s_tios) +{ + assert((l_tios != NULL) && (s_tios != NULL)); + + bzero(s_tios, sizeof (*s_tios)); + + s_tios->c_iflag = l_tios->c_iflag; + s_tios->c_oflag = l_tios->c_oflag; + s_tios->c_cflag = l_tios->c_cflag; + + s_tios->c_lflag = l_tios->c_lflag; + if (s_tios->c_lflag & ICANON) { + s_tios->c_cc[VEOF] = l_tios->c_cc[LX_VEOF]; + s_tios->c_cc[VEOL] = l_tios->c_cc[LX_VEOL]; + } else { + s_tios->c_cc[VMIN] = l_tios->c_cc[LX_VMIN]; + s_tios->c_cc[VTIME] = l_tios->c_cc[LX_VTIME]; + } + + s_tios->c_cc[VEOL2] = l_tios->c_cc[LX_VEOL2]; + s_tios->c_cc[VERASE] = l_tios->c_cc[LX_VERASE]; + s_tios->c_cc[VKILL] = l_tios->c_cc[LX_VKILL]; + s_tios->c_cc[VREPRINT] = l_tios->c_cc[LX_VREPRINT]; + s_tios->c_cc[VLNEXT] = l_tios->c_cc[LX_VLNEXT]; + s_tios->c_cc[VWERASE] = l_tios->c_cc[LX_VWERASE]; + s_tios->c_cc[VINTR] = l_tios->c_cc[LX_VINTR]; + s_tios->c_cc[VQUIT] = l_tios->c_cc[LX_VQUIT]; + s_tios->c_cc[VSWTCH] = l_tios->c_cc[LX_VSWTC]; + s_tios->c_cc[VSTART] = l_tios->c_cc[LX_VSTART]; + s_tios->c_cc[VSTOP] = l_tios->c_cc[LX_VSTOP]; + s_tios->c_cc[VSUSP] = l_tios->c_cc[LX_VSUSP]; + s_tios->c_cc[VDISCARD] = l_tios->c_cc[LX_VDISCARD]; +} + +static void +l2s_termio(struct lx_termio *l_tio, struct termio *s_tio) +{ + assert((l_tio != NULL) && (s_tio != NULL)); + + bzero(s_tio, sizeof (*s_tio)); + + s_tio->c_iflag = l_tio->c_iflag; + s_tio->c_oflag = l_tio->c_oflag; + s_tio->c_cflag = l_tio->c_cflag; + + s_tio->c_lflag = l_tio->c_lflag; + if (s_tio->c_lflag & ICANON) { + s_tio->c_cc[VEOF] = l_tio->c_cc[LX_VEOF]; + } else { + s_tio->c_cc[VMIN] = l_tio->c_cc[LX_VMIN]; + s_tio->c_cc[VTIME] = l_tio->c_cc[LX_VTIME]; + } + + s_tio->c_cc[VINTR] = l_tio->c_cc[LX_VINTR]; + s_tio->c_cc[VQUIT] = l_tio->c_cc[LX_VQUIT]; + s_tio->c_cc[VERASE] = l_tio->c_cc[LX_VERASE]; + s_tio->c_cc[VKILL] = l_tio->c_cc[LX_VKILL]; + s_tio->c_cc[VSWTCH] = l_tio->c_cc[LX_VSWTC]; +} + +static void +termios2lx_cc(struct lx_termios *l_tios, struct lx_cc *lio) +{ + assert((l_tios != NULL) && (lio != NULL)); + + bzero(lio, sizeof (*lio)); + + lio->veof = l_tios->c_cc[LX_VEOF]; + lio->veol = l_tios->c_cc[LX_VEOL]; + lio->vmin = l_tios->c_cc[LX_VMIN]; + lio->vtime = l_tios->c_cc[LX_VTIME]; +} + +static void +termio2lx_cc(struct lx_termio *l_tio, struct lx_cc *lio) +{ + assert((l_tio != NULL) && (lio != NULL)); + + bzero(lio, sizeof (*lio)); + + lio->veof = l_tio->c_cc[LX_VEOF]; + lio->veol = 0; + lio->vmin = l_tio->c_cc[LX_VMIN]; + lio->vtime = l_tio->c_cc[LX_VTIME]; +} + +static void +s2l_termios(struct termios *s_tios, struct lx_termios *l_tios) +{ + assert((s_tios != NULL) && (l_tios != NULL)); + + bzero(l_tios, sizeof (*l_tios)); + + l_tios->c_iflag = s_tios->c_iflag; + l_tios->c_oflag = s_tios->c_oflag; + l_tios->c_cflag = s_tios->c_cflag; + l_tios->c_lflag = s_tios->c_lflag; + + if (s_tios->c_lflag & ICANON) { + l_tios->c_cc[LX_VEOF] = s_tios->c_cc[VEOF]; + l_tios->c_cc[LX_VEOL] = s_tios->c_cc[VEOL]; + } else { + l_tios->c_cc[LX_VMIN] = s_tios->c_cc[VMIN]; + l_tios->c_cc[LX_VTIME] = s_tios->c_cc[VTIME]; + } + + l_tios->c_cc[LX_VEOL2] = s_tios->c_cc[VEOL2]; + l_tios->c_cc[LX_VERASE] = s_tios->c_cc[VERASE]; + l_tios->c_cc[LX_VKILL] = s_tios->c_cc[VKILL]; + l_tios->c_cc[LX_VREPRINT] = s_tios->c_cc[VREPRINT]; + l_tios->c_cc[LX_VLNEXT] = s_tios->c_cc[VLNEXT]; + l_tios->c_cc[LX_VWERASE] = s_tios->c_cc[VWERASE]; + l_tios->c_cc[LX_VINTR] = s_tios->c_cc[VINTR]; + l_tios->c_cc[LX_VQUIT] = s_tios->c_cc[VQUIT]; + l_tios->c_cc[LX_VSWTC] = s_tios->c_cc[VSWTCH]; + l_tios->c_cc[LX_VSTART] = s_tios->c_cc[VSTART]; + l_tios->c_cc[LX_VSTOP] = s_tios->c_cc[VSTOP]; + l_tios->c_cc[LX_VSUSP] = s_tios->c_cc[VSUSP]; + l_tios->c_cc[LX_VDISCARD] = s_tios->c_cc[VDISCARD]; +} + +static void +s2l_termio(struct termio *s_tio, struct lx_termio *l_tio) +{ + assert((s_tio != NULL) && (l_tio != NULL)); + + bzero(l_tio, sizeof (*l_tio)); + + l_tio->c_iflag = s_tio->c_iflag; + l_tio->c_oflag = s_tio->c_oflag; + l_tio->c_cflag = s_tio->c_cflag; + l_tio->c_lflag = s_tio->c_lflag; + + if (s_tio->c_lflag & ICANON) { + l_tio->c_cc[LX_VEOF] = s_tio->c_cc[VEOF]; + } else { + l_tio->c_cc[LX_VMIN] = s_tio->c_cc[VMIN]; + l_tio->c_cc[LX_VTIME] = s_tio->c_cc[VTIME]; + } + + l_tio->c_cc[LX_VINTR] = s_tio->c_cc[VINTR]; + l_tio->c_cc[LX_VQUIT] = s_tio->c_cc[VQUIT]; + l_tio->c_cc[LX_VERASE] = s_tio->c_cc[VERASE]; + l_tio->c_cc[LX_VKILL] = s_tio->c_cc[VKILL]; + l_tio->c_cc[LX_VSWTC] = s_tio->c_cc[VSWTCH]; +} + +static int +/*ARGSUSED*/ +ict_tcsets(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termios l_tios, *l_tiosp = (struct lx_termios *)arg; + struct termios s_tios; + struct lx_cc lio; + int ldlinux, ret; + + assert(cmd == TCSETS || cmd == TCSETSW || cmd == TCSETSF); + + /* Copy in the data. */ + if (uucopy(l_tiosp, &l_tios, sizeof (l_tios)) != 0) + return (-errno); + + /* + * The TIOCSETLD/TIOCGETLD ioctls are only supported by the + * ldlinux strmod. So make sure the module exists on the + * target stream before we invoke the ioctl. + */ + if ((ldlinux = ldlinux_check(fd)) < 0) + return (ldlinux); + + if (ldlinux == 1) { + termios2lx_cc(&l_tios, &lio); + if (ioctl_istr(fd, TIOCSETLD, "TIOCSETLD", + &lio, sizeof (lio)) < 0) + return (-errno); + } + + l2s_termios(&l_tios, &s_tios); + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, cmd, cmd_str); + ret = ioctl(fd, cmd, (intptr_t)&s_tios); + return ((ret < 0) ? -errno : ret); +} + +static int +/*ARGSUSED*/ +ict_tcseta(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termio l_tio, *l_tiop = (struct lx_termio *)arg; + struct termio s_tio; + struct lx_cc lio; + int ldlinux, ret; + + assert(cmd == TCSETA || cmd == TCSETAW || cmd == TCSETAF); + + /* Copy in the data. */ + if (uucopy(l_tiop, &l_tio, sizeof (l_tio)) != 0) + return (-errno); + + /* + * The TIOCSETLD/TIOCGETLD ioctls are only supported by the + * ldlinux strmod. So make sure the module exists on the + * target stream before we invoke the ioctl. + */ + if ((ldlinux = ldlinux_check(fd)) < 0) + return (ldlinux); + + if (ldlinux == 1) { + termio2lx_cc(&l_tio, &lio); + if (ioctl_istr(fd, TIOCSETLD, "TIOCSETLD", + &lio, sizeof (lio)) < 0) + return (-errno); + } + + l2s_termio(&l_tio, &s_tio); + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, cmd, cmd_str); + ret = ioctl(fd, cmd, (intptr_t)&s_tio); + return ((ret < 0) ? -errno : ret); +} + +/* + * The Solaris TIOCGPGRP ioctl does not have exactly the same semantics as + * the Linux one. To mimic Linux semantics we have to do some extra work + * normally done by the Solaris version of tcgetpgrp(). + */ +static int +/*ARGSUSED*/ +ict_tiocgpgrp(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + pid_t ttysid, mysid; + int ret; + + assert(cmd == LX_TIOCGPGRP); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TIOCGSID, "TIOCGSID"); + if (ioctl(fd, TIOCGSID, (intptr_t)&ttysid) < 0) + return (-errno); + if ((mysid = getsid(0)) < 0) + return (-errno); + if (mysid != ttysid) + return (-ENOTTY); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TIOCGPGRP, "TIOCGPGRP"); + ret = ioctl(fd, TIOCGPGRP, arg); + return ((ret < 0) ? -errno : ret); +} + +static int +/*ARGSUSED*/ +ict_sptlock(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + assert(cmd == LX_TIOCSPTLCK); + + /* + * The success/fail return values are different between Linux + * and Solaris. Linux expects 0 or -1. Solaris can return + * positive number on success. + */ + if (ioctl_istr(fd, UNLKPT, "UNLKPT", NULL, 0) < 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_gptn(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + int ptyno, *ptynop = (int *)arg; + pt_own_t pto; + + assert(cmd == LX_TIOCGPTN); + assert(getmajor(stat->st_rdev) == ioc_translator_ptm.idt_major); + + /* This operation is only valid for the lx_ptm device. */ + ptyno = LX_PTM_DEV_TO_PTS(stat->st_rdev); + + /* + * We'd like to just use grantpt() directly, but we can't since + * it assumes the fd node that's passed to it is a ptm node, + * and in our case it's an lx_ptm node. It also relies on + * naming services to get the current process group name. + * Hence we have to invoke the OWNERPT ioctl directly here. + */ + pto.pto_ruid = getuid(); + pto.pto_rgid = getgid(); + if (ioctl_istr(fd, OWNERPT, "OWNERPT", &pto, sizeof (pto)) != 0) + return (-EACCES); + + /* Copy out the data. */ + if (uucopy(&ptyno, ptynop, sizeof (ptyno)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tiocgwinsz(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct winsize winsize, *winsizep = (struct winsize *)arg; + + assert(cmd == LX_TIOCGWINSZ); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, TIOCGWINSZ, "TIOCGWINSZ"); + if (ioctl(fd, TIOCGWINSZ, arg) >= 0) + return (0); + if (errno != EINVAL) + return (-errno); + + bzero(&winsize, sizeof (winsize)); + if (uucopy(&winsize, winsizep, sizeof (winsize)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tcgets_emulate(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termios l_tios, *l_tiosp = (struct lx_termios *)arg; + struct termios s_tios; + + assert(cmd == LX_TCGETS); + + if (syscall(SYS_brand, B_TTYMODES, &s_tios) < 0) + return (-errno); + + /* Now munge the data to how Linux wants it. */ + s2l_termios(&s_tios, &l_tios); + if (uucopy(&l_tios, l_tiosp, sizeof (l_tios)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tcgets_native(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termios l_tios, *l_tiosp = (struct lx_termios *)arg; + struct termios s_tios; + struct lx_cc lio; + int ldlinux; + + assert(cmd == LX_TCGETS); + + if ((ldlinux = ldlinux_check(fd)) < 0) + return (ldlinux); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TCGETS, "TCGETS"); + if (ioctl(fd, TCGETS, (intptr_t)&s_tios) < 0) + return (-errno); + + /* Now munge the data to how Linux wants it. */ + s2l_termios(&s_tios, &l_tios); + + /* + * The TIOCSETLD/TIOCGETLD ioctls are only supported by the + * ldlinux strmod. So make sure the module exists on the + * target stream before we invoke the ioctl. + */ + if (ldlinux != 0) { + if (ioctl_istr(fd, TIOCGETLD, "TIOCGETLD", + &lio, sizeof (lio)) < 0) + return (-errno); + + l_tios.c_cc[LX_VEOF] = lio.veof; + l_tios.c_cc[LX_VEOL] = lio.veol; + l_tios.c_cc[LX_VMIN] = lio.vmin; + l_tios.c_cc[LX_VTIME] = lio.vtime; + } + + /* Copy out the data. */ + if (uucopy(&l_tios, l_tiosp, sizeof (l_tios)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tcgeta(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termio l_tio, *l_tiop = (struct lx_termio *)arg; + struct termio s_tio; + struct lx_cc lio; + int ldlinux; + + assert(cmd == LX_TCGETA); + + if ((ldlinux = ldlinux_check(fd)) < 0) + return (ldlinux); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TCGETA, "TCGETA"); + if (ioctl(fd, TCGETA, (intptr_t)&s_tio) < 0) + return (-errno); + + /* Now munge the data to how Linux wants it. */ + s2l_termio(&s_tio, &l_tio); + + /* + * The TIOCSETLD/TIOCGETLD ioctls are only supported by the + * ldlinux strmod. So make sure the module exists on the + * target stream before we invoke the ioctl. + */ + if (ldlinux != 0) { + if (ioctl_istr(fd, TIOCGETLD, "TIOCGETLD", + &lio, sizeof (lio)) < 0) + return (-errno); + + l_tio.c_cc[LX_VEOF] = lio.veof; + l_tio.c_cc[LX_VMIN] = lio.vmin; + l_tio.c_cc[LX_VTIME] = lio.vtime; + } + + /* Copy out the data. */ + if (uucopy(&l_tio, l_tiop, sizeof (l_tio)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tiocsctty(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + pid_t mysid, ttysid; + + if ((mysid = getsid(0)) < 0) + return (-errno); + + /* Check if this fd is already our ctty. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TIOCGSID, "TIOCGSID"); + if (ioctl(fd, TIOCGSID, (intptr_t)&ttysid) >= 0) + if (mysid == ttysid) + return (0); + + /* + * Need to make sure we're a session leader, otherwise the + * TIOCSCTTY ioctl will fail. + */ + if (mysid != getpid()) + (void) setpgrp(); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TIOCSCTTY, "TIOCSCTTY"); + if (ioctl(fd, TIOCSCTTY, 0) < 0) + return (-errno); + return (0); +} + +/* + * /dev/dsp ioctl translators and support + */ +static int +i_is_dsp_dev(int fd) +{ + int minor; + + /* + * This is a cloning device so we have to ask the driver + * what kind of minor node this is. + */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GETMINORNUM, "LXA_IOC_GETMINORNUM"); + if (ioctl(fd, LXA_IOC_GETMINORNUM, &minor) < 0) + return (-EINVAL); + if (minor != LXA_MINORNUM_DSP) + return (-EINVAL); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_reset(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + int err; + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* Nothing to really do on Solaris. */ + return (0); +} + +static void +i_oss_fmt_str(char *buf, int buf_size, uint_t mask) +{ + int i, first = 1; + + assert(buf != NULL); + + buf[0] = '\0'; + for (i = 0; oss_fmt_str[i].i2s_str != NULL; i++) { + if ((oss_fmt_str[i].i2s_int != mask) && + ((oss_fmt_str[i].i2s_int & mask) == 0)) + continue; + if (first) + first = 0; + else + (void) strlcat(buf, " | ", buf_size); + (void) strlcat(buf, oss_fmt_str[i].i2s_str, buf_size); + } +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getfmts(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + audio_info_t sa_info; + char buf[MSGBUF]; + uint_t *maskp = (uint_t *)arg; + uint_t mask = 0; + int i, amode, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_GETFMTS); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* Test to see what Linux oss formats the target device supports. */ + for (i = 0; oft_table[i].oft_oss_fmt != 0; i++) { + + /* Initialize the mode request. */ + AUDIO_INITINFO(&sa_info); + + /* Translate a Linux oss format into Solaris settings. */ + if ((amode == O_RDONLY) || (amode == O_RDWR)) { + sa_info.record.encoding = oft_table[i].oft_encoding; + sa_info.record.precision = oft_table[i].oft_precision; + } + if ((amode == O_WRONLY) || (amode == O_RDWR)) { + sa_info.play.encoding = oft_table[i].oft_encoding; + sa_info.play.precision = oft_table[i].oft_precision; + } + + /* Send the request. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_SETINFO, "AUDIO_SETINFO"); + if (ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) + continue; + + /* This Linux oss format is supported. */ + mask |= oft_table[i].oft_oss_fmt; + } + + if (lx_debug_enabled != 0) { + i_oss_fmt_str(buf, sizeof (buf), mask); + lx_debug("\toss formats supported = 0x%x (%s)", mask, buf); + } + if (uucopy(&mask, maskp, sizeof (mask)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_setfmts(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + audio_info_t sa_info; + char buf[MSGBUF]; + uint_t *maskp = (uint_t *)arg; + uint_t mask; + int i, amode, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SETFMTS); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(maskp, &mask, sizeof (mask)) != 0) + return (-errno); + + if (lx_debug_enabled != 0) { + i_oss_fmt_str(buf, sizeof (buf), mask); + lx_debug("\toss formats request = 0x%x (%s)", mask, buf); + } + + if ((mask == (uint_t)-1) || (mask == 0)) { + lx_debug("\tXXX: possible oss formats query?"); + return (-EINVAL); + } + + /* Check if multiple format bits were specified. */ + if (!BIT_ONLYONESET(mask)) + return (-EINVAL); + + /* Decode the oss format request into a native format. */ + for (i = 0; oft_table[i].oft_oss_fmt != 0; i++) { + if (oft_table[i].oft_oss_fmt == mask) + break; + } + if (oft_table[i].oft_oss_fmt == 0) + return (-EINVAL); + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* Initialize the mode request. */ + AUDIO_INITINFO(&sa_info); + + /* Translate the Linux oss request into a Solaris request. */ + if ((amode == O_RDONLY) || (amode == O_RDWR)) { + sa_info.record.encoding = oft_table[i].oft_encoding; + sa_info.record.precision = oft_table[i].oft_precision; + } + if ((amode == O_WRONLY) || (amode == O_RDWR)) { + sa_info.play.encoding = oft_table[i].oft_encoding; + sa_info.play.precision = oft_table[i].oft_precision; + } + + /* Send the request. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_SETINFO, "AUDIO_SETINFO"); + return ((ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) ? -errno : 0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_channels(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + audio_info_t sa_info; + uint_t *channelsp = (uint_t *)arg; + uint_t channels; + int amode, err; + + assert((cmd == LX_OSS_SNDCTL_DSP_CHANNELS) || + (cmd == LX_OSS_SNDCTL_DSP_STEREO)); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(channelsp, &channels, sizeof (channels)) != 0) + return (-errno); + + lx_debug("\toss %s request = 0x%x (%u)", + (cmd == LX_OSS_SNDCTL_DSP_CHANNELS) ? "channel" : "stereo", + channels, channels); + + if (channels == (uint_t)-1) { + lx_debug("\tXXX: possible channel/stereo query?"); + return (-EINVAL); + } + + if (cmd == LX_OSS_SNDCTL_DSP_STEREO) { + /* + * There doesn't seem to be any documentation for + * SNDCTL_DSP_STEREO. Looking at source that uses or + * used this ioctl seems to indicate that the + * functionality provided by this ioctl has been + * subsumed by the SNDCTL_DSP_CHANNELS ioctl. It + * seems that the only arguments ever passed to + * the SNDCTL_DSP_STEREO. Ioctl are boolean values + * of '0' or '1'. Hence we'll start out strict and + * only support those values. + * + * Some online forum discussions about this ioctl + * seemed to indicate that in case of success it + * returns the "stereo" setting (ie, either + * '0' for mono or '1' for stereo). + */ + if ((channels != 0) && (channels != 1)) { + lx_debug("\tinvalid stereo request"); + return (-EINVAL); + } + channels += 1; + } else { + /* Limit the system to one or two channels. */ + if ((channels != 1) && (channels != 2)) { + lx_debug("\tinvalid channel request"); + return (-EINVAL); + } + } + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* Initialize the channel request. */ + AUDIO_INITINFO(&sa_info); + + /* Translate the Linux oss request into a Solaris request. */ + if ((amode == O_RDONLY) || (amode == O_RDWR)) + sa_info.record.channels = channels; + if ((amode == O_WRONLY) || (amode == O_RDWR)) + sa_info.play.channels = channels; + + /* Send the request. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_SETINFO, "AUDIO_SETINFO"); + if (ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) + return (-errno); + + if (cmd == LX_OSS_SNDCTL_DSP_STEREO) + return (channels - 1); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_speed(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + audio_info_t sa_info; + uint_t *speedp = (uint_t *)arg; + uint_t speed; + int amode, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SPEED); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(speedp, &speed, sizeof (speed)) != 0) + return (-errno); + + lx_debug("\toss speed request = 0x%x (%u)", speed, speed); + + if (speed == (uint_t)-1) { + lx_debug("\tXXX: possible oss speed query?"); + return (-EINVAL); + } + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* Initialize the speed request. */ + AUDIO_INITINFO(&sa_info); + + /* Translate the Linux oss request into a Solaris request. */ + if ((amode == O_RDONLY) || (amode == O_RDWR)) + sa_info.record.sample_rate = speed; + if ((amode == O_WRONLY) || (amode == O_RDWR)) + sa_info.play.sample_rate = speed; + + /* Send the request. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_SETINFO, "AUDIO_SETINFO"); + return ((ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) ? -errno : 0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getblksize(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_frag_info_t fi; + uint_t *blksizep = (uint_t *)arg; + uint_t blksize; + int err; + + assert(cmd == LX_OSS_SNDCTL_DSP_GETBLKSIZE); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* Query the current fragment count and size. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GET_FRAG_INFO, "LXA_IOC_GET_FRAG_INFO"); + if (ioctl(fd, LXA_IOC_GET_FRAG_INFO, &fi) < 0) + return (-errno); + + blksize = fi.lxa_fi_size; + + if (uucopy(&blksize, blksizep, sizeof (blksize)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getspace(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lx_oss_audio_buf_info_t *spacep = (lx_oss_audio_buf_info_t *)arg; + lx_oss_audio_buf_info_t space; + lxa_frag_info_t fi; + int err; + + assert((cmd == LX_OSS_SNDCTL_DSP_GETOSPACE) || + (cmd == LX_OSS_SNDCTL_DSP_GETISPACE)); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* Query the current fragment count and size. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GET_FRAG_INFO, "LXA_IOC_GET_FRAG_INFO"); + if (ioctl(fd, LXA_IOC_GET_FRAG_INFO, &fi) < 0) + return (-errno); + + /* Return the current fragment count and size. */ + space.fragstotal = fi.lxa_fi_cnt; + space.fragsize = fi.lxa_fi_size; + + /* + * We'll lie and tell applications that they can always write + * out at least one fragment without blocking. + */ + space.fragments = 1; + space.bytes = space.fragsize; + + if (cmd == LX_OSS_SNDCTL_DSP_GETOSPACE) + lx_debug("\toss get output space result = "); + if (cmd == LX_OSS_SNDCTL_DSP_GETISPACE) + lx_debug("\toss get input space result = "); + + lx_debug("\t\tbytes = 0x%x (%u), fragments = 0x%x (%u)", + space.bytes, space.bytes, space.fragments, space.fragments); + lx_debug("\t\tfragtotal = 0x%x (%u), fragsize = 0x%x (%u)", + space.fragstotal, space.fragstotal, + space.fragsize, space.fragsize); + + if (uucopy(&space, spacep, sizeof (space)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_setfragment(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_frag_info_t fi; + uint_t *fraginfop = (uint_t *)arg; + uint_t fraginfo, frag_size, frag_cnt; + int err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SETFRAGMENT); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(fraginfop, &fraginfo, sizeof (fraginfo)) != 0) + return (-errno); + + /* + * The argument to this ioctl is a 32-bit integer of the + * format 0x MMMM SSSS where: + * SSSS - requests a fragment size of 2^SSSS + * MMMM - requests a maximum fragment count of 2^MMMM + * if MMMM is 0x7fff then the application is requesting + * no limits on the number of fragments. + */ + + frag_size = fraginfo & 0xffff; + frag_cnt = fraginfo >> 16; + + lx_debug("\toss fragment request: " + "power size = 0x%x (%u), power cnt = 0x%x (%u)", + frag_size, frag_size, frag_cnt, frag_cnt); + + /* Limit the supported fragment size from 2^4 to 2^31. */ + if ((frag_size < 4) || (frag_size > 31)) + return (-EINVAL); + + /* Limit the number of fragments from 2^1 to 2^32. */ + if (((frag_cnt < 1) || (frag_cnt > 32)) && (frag_cnt != 0x7fff)) + return (-EINVAL); + + /* Expand the fragment values. */ + frag_size = 1 << frag_size; + if ((frag_cnt == 32) || (frag_cnt == 0x7fff)) { + frag_cnt = UINT_MAX; + } else { + frag_cnt = 1 << frag_cnt; + } + + lx_debug("\toss fragment request: " + "translated size = 0x%x (%u), translated cnt = 0x%x (%u)", + frag_size, frag_size, frag_cnt, frag_cnt); + + fi.lxa_fi_size = frag_size; + fi.lxa_fi_cnt = frag_cnt; + + /* Set the current fragment count and size. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_SET_FRAG_INFO, "LXA_IOC_SET_FRAG_INFO"); + return ((ioctl(fd, LXA_IOC_SET_FRAG_INFO, &fi) < 0) ? -errno : 0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getcaps(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + uint_t *capsp = (uint_t *)arg; + uint_t caps; + int err; + + assert(cmd == LX_OSS_SNDCTL_DSP_GETCAPS); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* + * Report that we support mmap access + * this is where things start to get fun. + */ + caps = LX_OSS_DSP_CAP_MMAP | LX_OSS_DSP_CAP_TRIGGER; + + if (uucopy(&caps, capsp, sizeof (caps)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_settrigger(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + uint_t *triggerp = (uint_t *)arg; + uint_t trigger; + int err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SETTRIGGER); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(triggerp, &trigger, sizeof (trigger)) != 0) + return (-errno); + + lx_debug("\toss set trigger request = 0x%x (%u)", + trigger, trigger); + + /* We only support two types of trigger requests. */ + if ((trigger != LX_OSS_PCM_DISABLE_OUTPUT) && + (trigger != LX_OSS_PCM_ENABLE_OUTPUT)) + return (-EINVAL); + + /* + * We only support triggers on devices open for write access, + * but we don't need to check for that here since the driver will + * verify this for us. + */ + + /* Send the trigger command to the audio device. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_MMAP_OUTPUT, "LXA_IOC_MMAP_OUTPUT"); + return ((ioctl(fd, LXA_IOC_MMAP_OUTPUT, &trigger) < 0) ? -errno : 0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getoptr(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + static uint_t bytes = 0; + lx_oss_count_info_t ci; + lxa_frag_info_t fi; + audio_info_t ai; + int ptr, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_GETOPTR); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* Query the current fragment size. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GET_FRAG_INFO, "LXA_IOC_GET_FRAG_INFO"); + if (ioctl(fd, LXA_IOC_GET_FRAG_INFO, &fi) < 0) + return (-errno); + + /* Figure out how many samples have been played. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_GETINFO, "AUDIO_GETINFO"); + if (ioctl(fd, AUDIO_GETINFO, &ai) < 0) + return (-errno); + ci.bytes = ai.play.samples + ai.record.samples; + + /* + * Figure out how many fragments of audio have gone out since + * the last call to this ioctl. + */ + ci.blocks = (ci.bytes - bytes) / fi.lxa_fi_size; + bytes = ci.bytes; + + /* Figure out the current fragment offset for mmap audio output. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_MMAP_PTR, "LXA_IOC_MMAP_PTR"); + if (ioctl(fd, LXA_IOC_MMAP_PTR, &ptr) < 0) { + /* + * We really should return an error here, but some + * application (*cough* *cough* flash) expect this + * ioctl to work even if they haven't mmaped the + * device. + */ + ci.ptr = 0; + } else { + ci.ptr = ptr; + } + + lx_debug("\toss get output ptr result = "); + lx_debug("\t\t" + "bytes = 0x%x (%u), blocks = 0x%x (%u), ptr = 0x%x (%u)", + ci.bytes, ci.bytes, ci.blocks, ci.blocks, ci.ptr, ci.ptr); + + if (uucopy(&ci, (void *)arg, sizeof (ci)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_sync(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + int amode, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SYNC); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* + * A sync is basically a noop for record only device. + * We check for this here because on Linux a sync on a record + * only device returns success immediately. But the Solaris + * equivalent to a drain operation is a AUDIO_DRAIN, and if + * it's issued to a record only device it will fail and return + * EINVAL. + */ + if (amode == O_RDONLY) + return (0); + + /* Drain any pending output. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_DRAIN, "AUDIO_DRAIN"); + return ((ioctl(fd, AUDIO_DRAIN, NULL) < 0) ? -errno : 0); +} + +/* + * /dev/mixer ioctl translators and support + * + * There are some interesting things to take note of for supporting + * /dev/mixer ioctls. + * + * 1) We report support for the following mixer resources: + * VOLUME, PCM, MIC + * + * 2) We assume the following number of channels for each mixer resource: + * VOLUME: 2 channels + * PCM: 2 channels + * MIC: 1 channel + * + * 3) OSS sets the gain on each channel independently but on Solaris + * there is only one gain value and a balance value. So we need + * to do some translation back and forth. + * + * 4) OSS assumes direct access to hardware but Solaris provides + * virtualized audio device access (where everyone who opens /dev/audio + * get a virtualized audio channel stream, all of which are merged + * together by a software mixer before reaching the hardware). Hence + * mapping OSS mixer resources to Solaris mixer resources takes some + * work. VOLUME and Mic resources are mapped to the actual underlying + * audio hardware resources. PCM resource are mapped to the virtual + * audio channel output level. This mapping becomes more complicated + * if there are no open audio output channels. In this case the + * lx_audio device caches the PCM channels setting for us and applies + * them to any new audio output channels that get opened. (This + * is the reason that we don't use AUDIO_SETINFO ioctls directly + * but instead the lx_audio driver custom LXA_IOC_MIXER_SET_* + * and LXA_IOC_MIXER_GET_* ioctls.) For more information see + * the comments in lx_audio.c. + */ +static int +i_is_mixer_dev(int fd) +{ + int minor; + + /* + * This is a cloning device so we have to ask the driver + * what kind of minor node this is. + */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GETMINORNUM, "LXA_IOC_GETMINORNUM"); + if (ioctl(fd, LXA_IOC_GETMINORNUM, &minor) < 0) + return (-EINVAL); + if (minor != LXA_MINORNUM_MIXER) + return (-EINVAL); + return (0); +} + +static int +i_oss_mixer_ml_to_val(lxa_mixer_levels_t *ml, uint_t *val) +{ + int range, val1, val2; + + /* Deal with the other easy case, both channels have the same level. */ + if (ml->lxa_ml_balance == AUDIO_MID_BALANCE) { + *val = LX_OSS_MIXER_ENC2( + LX_OSS_S2L_GAIN(ml->lxa_ml_gain), + LX_OSS_S2L_GAIN(ml->lxa_ml_gain)); + assert(LX_OSS_MIXER_2CH_OK(*val)); + return (0); + } + + /* Decode the balance/gain into two separate levels. */ + if (ml->lxa_ml_balance > AUDIO_MID_BALANCE) { + val2 = ml->lxa_ml_gain; + + range = AUDIO_RIGHT_BALANCE - AUDIO_MID_BALANCE; + val1 = AUDIO_RIGHT_BALANCE - ml->lxa_ml_balance; + val1 = (val2 * val1) / range; + } else { + assert(ml->lxa_ml_balance < AUDIO_MID_BALANCE); + val1 = ml->lxa_ml_gain; + + range = AUDIO_MID_BALANCE - AUDIO_LEFT_BALANCE; + val2 = ml->lxa_ml_balance; + val2 = (val1 * val2) / range; + } + + *val = LX_OSS_MIXER_ENC2(LX_OSS_S2L_GAIN(val1), + LX_OSS_S2L_GAIN(val2)); + return (0); +} + +static int +i_oss_mixer_val_to_ml(uint_t val, lxa_mixer_levels_t *ml_old, + lxa_mixer_levels_t *ml) +{ + int range, val1, val2; + + if (!LX_OSS_MIXER_2CH_OK(val)) + return (-EINVAL); + + val1 = LX_OSS_MIXER_DEC1(val); + val2 = LX_OSS_MIXER_DEC2(val); + + /* + * Deal with the easy case. + * Both channels have the same non-zero level. + */ + if ((val1 != 0) && (val1 == val2)) { + ml->lxa_ml_gain = LX_OSS_L2S_GAIN(val1); + ml->lxa_ml_balance = AUDIO_MID_BALANCE; + return (0); + } + + /* If both levels are zero, preserve the current balance setting. */ + if ((val1 == 0) && (val2 == 0)) { + ml->lxa_ml_gain = 0; + ml->lxa_ml_balance = ml_old->lxa_ml_balance; + return (0); + } + + /* + * First set the gain to match the highest channel value volume. + * Then use the balance to simulate lower volume on the second + * channel. + */ + if (val1 > val2) { + ml->lxa_ml_gain = LX_OSS_L2S_GAIN(val1); + + range = AUDIO_MID_BALANCE - AUDIO_LEFT_BALANCE; + ml->lxa_ml_balance = 0; + ml->lxa_ml_balance += ((val2 * range) / val1); + } else { + assert(val1 < val2); + + ml->lxa_ml_gain = LX_OSS_L2S_GAIN(val2); + + range = AUDIO_RIGHT_BALANCE - AUDIO_MID_BALANCE; + ml->lxa_ml_balance = AUDIO_RIGHT_BALANCE; + ml->lxa_ml_balance -= ((val1 * range) / val2); + } + + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_read_volume(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_mixer_levels_t ml; + uint_t *valp = (uint_t *)arg; + uint_t val; + char *cmd_txt; + int err, cmd_new; + + assert((cmd == LX_OSS_SOUND_MIXER_READ_VOLUME) || + (cmd == LX_OSS_SOUND_MIXER_READ_PCM)); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + if (cmd == LX_OSS_SOUND_MIXER_READ_VOLUME) { + cmd_new = LXA_IOC_MIXER_GET_VOL; + cmd_txt = "LXA_IOC_MIXER_GET_VOL"; + } + if (cmd == LX_OSS_SOUND_MIXER_READ_PCM) { + cmd_new = LXA_IOC_MIXER_GET_PCM; + cmd_txt = "LXA_IOC_MIXER_GET_PCM"; + } + + /* Attempt to set the device output gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, cmd_new, cmd_txt); + if (ioctl(fd, cmd_new, &ml) < 0) + return (-errno); + + lx_debug("\tlx_audio mixer results, " + "gain = 0x%x (%u), balance = 0x%x (%u)", + ml.lxa_ml_gain, ml.lxa_ml_gain, + ml.lxa_ml_balance, ml.lxa_ml_balance); + + assert(LXA_MIXER_LEVELS_OK(&ml)); + + /* Translate the mixer levels struct to an OSS mixer value. */ + if ((err = i_oss_mixer_ml_to_val(&ml, &val)) != 0) + return (err); + assert(LX_OSS_MIXER_2CH_OK(val)); + + lx_debug("\toss get mixer %s result = 0x%x (%u)", + (cmd == LX_OSS_SOUND_MIXER_READ_VOLUME) ? "volume" : "pcm", + val, val); + + if (uucopy(&val, valp, sizeof (val)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_write_volume(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_mixer_levels_t ml, ml_old; + uint_t *valp = (uint_t *)arg; + uint_t val; + char *cmd_txt; + int err, cmd_new; + + assert((cmd == LX_OSS_SOUND_MIXER_WRITE_VOLUME) || + (cmd == LX_OSS_SOUND_MIXER_WRITE_PCM)); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + if (uucopy(valp, &val, sizeof (val)) != 0) + return (-errno); + + if (cmd == LX_OSS_SOUND_MIXER_WRITE_VOLUME) { + cmd_new = LXA_IOC_MIXER_SET_VOL; + cmd_txt = "LXA_IOC_MIXER_SET_VOL"; + + /* Attempt to get the device output gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, + LXA_IOC_MIXER_GET_VOL, "LXA_IOC_MIXER_GET_VOL"); + if (ioctl(fd, LXA_IOC_MIXER_GET_VOL, &ml_old) < 0) + return (-errno); + } + + if (cmd == LX_OSS_SOUND_MIXER_WRITE_PCM) { + cmd_new = LXA_IOC_MIXER_SET_PCM; + cmd_txt = "LXA_IOC_MIXER_SET_PCM"; + + /* Attempt to get the device output gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, + LXA_IOC_MIXER_GET_PCM, "LXA_IOC_MIXER_GET_PCM"); + if (ioctl(fd, LXA_IOC_MIXER_GET_PCM, &ml_old) < 0) + return (-errno); + } + + lx_debug("\toss set mixer %s request = 0x%x (%u)", + (cmd == LX_OSS_SOUND_MIXER_WRITE_VOLUME) ? "volume" : "pcm", + val, val); + + /* Translate an OSS mixer value to mixer levels. */ + if ((err = i_oss_mixer_val_to_ml(val, &ml_old, &ml)) != 0) + return (err); + assert(LXA_MIXER_LEVELS_OK(&ml)); + + lx_debug("\tlx_audio mixer request, " + "gain = 0x%x (%u), balance = 0x%x (%u)", + ml.lxa_ml_gain, ml.lxa_ml_gain, + ml.lxa_ml_balance, ml.lxa_ml_balance); + + /* Attempt to set the device output gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, cmd_new, cmd_txt); + if (ioctl(fd, cmd_new, &ml) < 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_read_mic(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_mixer_levels_t ml; + uint_t *valp = (uint_t *)arg; + uint_t val; + int err; + + assert((cmd == LX_OSS_SOUND_MIXER_READ_MIC) || + (cmd == LX_OSS_SOUND_MIXER_READ_IGAIN)); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + /* Attempt to get the device input gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_MIXER_GET_MIC, "LXA_IOC_MIXER_GET_MIC"); + if (ioctl(fd, LXA_IOC_MIXER_GET_MIC, &ml) < 0) + return (-errno); + + /* Report the mixer as having two channels. */ + val = LX_OSS_MIXER_ENC2( + LX_OSS_S2L_GAIN(ml.lxa_ml_gain), + LX_OSS_S2L_GAIN(ml.lxa_ml_gain)); + + if (cmd == LX_OSS_SOUND_MIXER_READ_MIC) + lx_debug("\toss get mixer mic result = 0x%x (%u)", val, val); + if (cmd == LX_OSS_SOUND_MIXER_READ_IGAIN) + lx_debug("\toss get mixer igain result = 0x%x (%u)", val, val); + + if (uucopy(&val, valp, sizeof (val)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_write_mic(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_mixer_levels_t ml; + uint_t *valp = (uint_t *)arg; + uint_t val; + int err; + + assert((cmd == LX_OSS_SOUND_MIXER_WRITE_MIC) || + (cmd == LX_OSS_SOUND_MIXER_WRITE_IGAIN)); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + if (uucopy(valp, &val, sizeof (val)) != 0) + return (-errno); + + if (cmd == LX_OSS_SOUND_MIXER_WRITE_MIC) + lx_debug("\toss set mixer mic request = 0x%x (%u)", val, val); + if (cmd == LX_OSS_SOUND_MIXER_WRITE_IGAIN) + lx_debug("\toss set mixer igain request = 0x%x (%u)", val, val); + + /* The mic only supports one channel. */ + val = LX_OSS_MIXER_DEC1(val); + + ml.lxa_ml_balance = AUDIO_MID_BALANCE; + ml.lxa_ml_gain = LX_OSS_L2S_GAIN(val); + + /* Attempt to set the device input gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_MIXER_SET_MIC, "LXA_IOC_MIXER_SET_MIC"); + if (ioctl(fd, LXA_IOC_MIXER_SET_MIC, &ml) < 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_read_devs(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + uint_t *resultp = (uint_t *)arg; + uint_t result = 0; + int err; + + if (cmd == LX_OSS_SOUND_MIXER_READ_DEVMASK) { + /* Bitmap of all the mixer channels we supposedly support. */ + result = ((1 << LX_OSS_SM_PCM) | + (1 << LX_OSS_SM_MIC) | + (1 << LX_OSS_SM_VOLUME)); + } + if (cmd == LX_OSS_SOUND_MIXER_READ_STEREODEVS) { + /* Bitmap of the stereo mixer channels we supposedly support. */ + result = ((1 << LX_OSS_SM_PCM) | + (1 << LX_OSS_SM_VOLUME)); + } + if ((cmd == LX_OSS_SOUND_MIXER_READ_RECMASK) || + (cmd == LX_OSS_SOUND_MIXER_READ_RECSRC)) { + /* Bitmap of the mixer input channels we supposedly support. */ + result = (1 << LX_OSS_SM_MIC); + } + assert(result != 0); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + if (uucopy(&result, resultp, sizeof (result)) != 0) + return (-errno); + + return (0); +} + +/* + * Audio ioctl conversion support structures. + */ +static oss_fmt_translator_t oft_table[] = { + { LX_OSS_AFMT_MU_LAW, AUDIO_ENCODING_ULAW, 8 }, + { LX_OSS_AFMT_A_LAW, AUDIO_ENCODING_ALAW, 8 }, + { LX_OSS_AFMT_S8, AUDIO_ENCODING_LINEAR, 8 }, + { LX_OSS_AFMT_U8, AUDIO_ENCODING_LINEAR8, 8 }, + { LX_OSS_AFMT_S16_NE, AUDIO_ENCODING_LINEAR, 16 }, + { 0, 0, 0 } +}; + +/* + * Ioctl translator definitions. + */ + +/* + * Defines to help with creating ioctl translators. + * + * IOC_CMD_TRANSLATOR_NONE - Ioctl has the same semantics and argument + * values on Solaris and Linux but may have different command values. + * (Macro assumes the symbolic Linux name assigned to the ioctl command + * value is the same as the Solaris symbol but pre-pended with an "LX_") + * + * IOC_CMD_TRANSLATOR_PASS - Ioctl is a Linux specific ioctl and should + * be passed through unmodified. + * + * IOC_CMD_TRANSLATOR_FILTER - Ioctl has the same command name on + * Solaris and Linux and needs a translation function that is common to + * more than one ioctl. (Macro assumes the symbolic Linux name assigned + * to the ioctl command value is the same as the Solaris symbol but + * pre-pended with an "LX_") + * + * IOC_CMD_TRANSLATOR_CUSTOM - Ioctl needs special handling via a + * translation function. + */ +#define IOC_CMD_TRANSLATOR_NONE(ioc_cmd_sym) \ + { (int)LX_##ioc_cmd_sym, "LX_" #ioc_cmd_sym, \ + ioc_cmd_sym, #ioc_cmd_sym, ict_pass }, + +#define IOC_CMD_TRANSLATOR_PASS(ioc_cmd_sym) \ + { (int)ioc_cmd_sym, #ioc_cmd_sym, \ + ioc_cmd_sym, #ioc_cmd_sym, ict_pass }, + +#define IOC_CMD_TRANSLATOR_FILTER(ioc_cmd_sym, ioct_handler) \ + { (int)LX_##ioc_cmd_sym, "LX_" #ioc_cmd_sym, \ + ioc_cmd_sym, #ioc_cmd_sym, ioct_handler }, + +#define IOC_CMD_TRANSLATOR_CUSTOM(ioc_cmd_sym, ioct_handler) \ + { (int)ioc_cmd_sym, #ioc_cmd_sym, \ + (int)ioc_cmd_sym, #ioc_cmd_sym, ioct_handler }, + +#define IOC_CMD_TRANSLATOR_END \ + { 0, NULL, 0, NULL, NULL } + +/* All files will need to support these ioctls. */ +#define IOC_CMD_TRANSLATORS_ALL \ + IOC_CMD_TRANSLATOR_NONE(FIONREAD) \ + IOC_CMD_TRANSLATOR_NONE(FIONBIO) + +/* Any files supporting streams semantics will need these ioctls. */ +#define IOC_CMD_TRANSLATORS_STREAMS \ + IOC_CMD_TRANSLATOR_NONE(TCXONC) \ + IOC_CMD_TRANSLATOR_NONE(TCFLSH) \ + IOC_CMD_TRANSLATOR_NONE(TIOCEXCL) \ + IOC_CMD_TRANSLATOR_NONE(TIOCNXCL) \ + IOC_CMD_TRANSLATOR_NONE(TIOCSPGRP) \ + IOC_CMD_TRANSLATOR_NONE(TIOCSTI) \ + IOC_CMD_TRANSLATOR_NONE(TIOCSWINSZ) \ + IOC_CMD_TRANSLATOR_NONE(TIOCMBIS) \ + IOC_CMD_TRANSLATOR_NONE(TIOCMBIC) \ + IOC_CMD_TRANSLATOR_NONE(TIOCMSET) \ + IOC_CMD_TRANSLATOR_NONE(TIOCSETD) \ + IOC_CMD_TRANSLATOR_NONE(FIOASYNC) \ + IOC_CMD_TRANSLATOR_NONE(FIOSETOWN) \ + IOC_CMD_TRANSLATOR_NONE(TCSBRK) \ + \ + IOC_CMD_TRANSLATOR_FILTER(TCSETS, ict_tcsets) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETSW, ict_tcsets) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETSF, ict_tcsets) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETA, ict_tcseta) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETAW, ict_tcseta) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETAF, ict_tcseta) \ + \ + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCSBRKP, ict_tcsbrkp) + + +/* + * Translators for non-device files. + */ +static ioc_cmd_translator_t ioc_translators_file[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATOR_END +}; + +static ioc_cmd_translator_t ioc_translators_fifo[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATORS_STREAMS + IOC_CMD_TRANSLATOR_END +}; + +static ioc_cmd_translator_t ioc_translators_sock[] = { + IOC_CMD_TRANSLATORS_ALL + + IOC_CMD_TRANSLATOR_NONE(FIOASYNC) + IOC_CMD_TRANSLATOR_NONE(FIOGETOWN) + IOC_CMD_TRANSLATOR_NONE(FIOSETOWN) + IOC_CMD_TRANSLATOR_NONE(SIOCSPGRP) + IOC_CMD_TRANSLATOR_NONE(SIOCGPGRP) + + IOC_CMD_TRANSLATOR_FILTER(SIOCATMARK, ict_sioifoob) + + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFFLAGS, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFFLAGS, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFDSTADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFDSTADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFBRDADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFBRDADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFNETMASK, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFNETMASK, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMETRIC, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMETRIC, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMTU, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMTU, ict_sioifreq) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCGIFCONF, ict_siocgifconf) + IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCGIFHWADDR, ict_siocifhwaddr) + IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCSIFHWADDR, ict_siocifhwaddr) + + IOC_CMD_TRANSLATOR_END +}; + +/* + * Translators for devices. + */ +static ioc_cmd_translator_t ioc_cmd_translators_ptm[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATORS_STREAMS + + IOC_CMD_TRANSLATOR_NONE(TIOCPKT) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPGRP, ict_tiocgpgrp) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSPTLCK, ict_sptlock) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPTN, ict_gptn) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGWINSZ, ict_tiocgwinsz) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETS, ict_tcgets_emulate) + + IOC_CMD_TRANSLATOR_END +}; +static ioc_dev_translator_t ioc_translator_ptm = { + LX_PTM_DRV, /* idt_driver */ + 0, /* idt_major */ + ioc_cmd_translators_ptm +}; + +static ioc_cmd_translator_t ioc_cmd_translators_pts[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATORS_STREAMS + + IOC_CMD_TRANSLATOR_NONE(TIOCGETD) + IOC_CMD_TRANSLATOR_NONE(TIOCGSID) + IOC_CMD_TRANSLATOR_NONE(TIOCNOTTY) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPGRP, ict_tiocgpgrp) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETS, ict_tcgets_native) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETA, ict_tcgeta) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGWINSZ, ict_tiocgwinsz) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSCTTY, ict_tiocsctty) + + IOC_CMD_TRANSLATOR_END +}; +static ioc_dev_translator_t ioc_translator_pts = { + "pts", /* idt_driver */ + 0, /* idt_major */ + ioc_cmd_translators_pts +}; + +static ioc_dev_translator_t ioc_translator_sy = { + "sy", /* idt_driver */ + 0, /* idt_major */ + + /* + * /dev/tty (which is implemented via the "sy" driver) is basically + * a layered driver that passes on requests to the ctty for the + * current process. Since ctty's are currently always implemented + * via the pts driver, we should make sure to support all the + * same ioctls on the sy driver as we do on the pts driver. + */ + ioc_cmd_translators_pts +}; + +static ioc_cmd_translator_t ioc_cmd_translators_zcons[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATORS_STREAMS + + IOC_CMD_TRANSLATOR_NONE(TIOCNOTTY) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETS, ict_tcgets_native) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETA, ict_tcgeta) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGWINSZ, ict_tiocgwinsz) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSCTTY, ict_tiocsctty) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCLINUX, ict_einval) + + IOC_CMD_TRANSLATOR_END +}; +static ioc_dev_translator_t ioc_translator_zcons = { + "zcons", /* idt_driver */ + 0, /* idt_major */ + ioc_cmd_translators_zcons +}; + +static ioc_cmd_translator_t ioc_cmd_translators_lx_audio[] = { + IOC_CMD_TRANSLATORS_ALL + + /* /dev/dsp ioctls */ + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_RESET, + ict_oss_sndctl_dsp_reset) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETFMTS, + ict_oss_sndctl_dsp_getfmts) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SETFMTS, + ict_oss_sndctl_dsp_setfmts) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_CHANNELS, + ict_oss_sndctl_dsp_channels) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_STEREO, + ict_oss_sndctl_dsp_channels) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SPEED, + ict_oss_sndctl_dsp_speed) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETBLKSIZE, + ict_oss_sndctl_dsp_getblksize) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SYNC, + ict_oss_sndctl_dsp_sync) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SETFRAGMENT, + ict_oss_sndctl_dsp_setfragment) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETOSPACE, + ict_oss_sndctl_dsp_getspace) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETCAPS, + ict_oss_sndctl_dsp_getcaps) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SETTRIGGER, + ict_oss_sndctl_dsp_settrigger) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETOPTR, + ict_oss_sndctl_dsp_getoptr) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETISPACE, + ict_oss_sndctl_dsp_getspace) + + /* /dev/mixer level ioctls */ + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_VOLUME, + ict_oss_mixer_read_volume) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_PCM, + ict_oss_mixer_read_volume) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_MIC, + ict_oss_mixer_read_mic) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_IGAIN, + ict_oss_mixer_read_mic) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_VOLUME, + ict_oss_mixer_write_volume) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_PCM, + ict_oss_mixer_write_volume) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_MIC, + ict_oss_mixer_write_mic) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_IGAIN, + ict_oss_mixer_write_mic) + + /* /dev/mixer capability ioctls */ + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_STEREODEVS, + ict_oss_mixer_read_devs) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_DEVMASK, + ict_oss_mixer_read_devs) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_RECMASK, + ict_oss_mixer_read_devs) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_RECSRC, + ict_oss_mixer_read_devs) + + IOC_CMD_TRANSLATOR_END +}; +static ioc_dev_translator_t ioc_translator_lx_audio = { + "lx_audio", /* idt_driver */ + 0, /* idt_major */ + ioc_cmd_translators_lx_audio +}; + +/* + * An array of all the device translators. + */ +static ioc_dev_translator_t *ioc_translators_dev[] = { + &ioc_translator_lx_audio, + &ioc_translator_ptm, + &ioc_translator_pts, + &ioc_translator_sy, + &ioc_translator_zcons, + NULL +}; + +/* + * Translators for filesystems. + */ +static ioc_cmd_translator_t ioc_cmd_translators_autofs[] = { + IOC_CMD_TRANSLATOR_PASS(LX_AUTOFS_IOC_READY) + IOC_CMD_TRANSLATOR_PASS(LX_AUTOFS_IOC_FAIL) + IOC_CMD_TRANSLATOR_PASS(LX_AUTOFS_IOC_CATATONIC) + IOC_CMD_TRANSLATOR_END +}; + +static ioc_fs_translator_t ioc_translator_autofs = { + LX_AUTOFS_NAME, /* ift_filesystem */ + ioc_cmd_translators_autofs +}; + +/* + * An array of all the filesystem translators. + */ +static ioc_fs_translator_t *ioc_translators_fs[] = { + &ioc_translator_autofs, + NULL +}; + +/* + * Ioctl error translator definitions. + */ +#define IOC_ERRNO_TRANSLATOR(iet_cmd_sym, iet_errno) \ + { (int)LX_##iet_cmd_sym, "LX_" #iet_cmd_sym, iet_errno }, + +#define IOC_ERRNO_TRANSLATOR_END \ + { 0, NULL, 0 } + +static ioc_errno_translator_t ioc_translators_errno[] = { + IOC_ERRNO_TRANSLATOR(TCGETS, ENOTTY) + IOC_ERRNO_TRANSLATOR(TCSETS, ENOTTY) + IOC_ERRNO_TRANSLATOR(TCSBRK, ENOTTY) + IOC_ERRNO_TRANSLATOR(TCXONC, ENOTTY) + IOC_ERRNO_TRANSLATOR(TCFLSH, ENOTTY) + IOC_ERRNO_TRANSLATOR(TIOCGPGRP, ENOTTY) + IOC_ERRNO_TRANSLATOR(TIOCSPGRP, ENOTTY) + IOC_ERRNO_TRANSLATOR(TIOCGWINSZ, ENOTTY) + IOC_ERRNO_TRANSLATOR_END +}; + +int +lx_vhangup(void) +{ + if (geteuid() != 0) + return (-EPERM); + + vhangup(); + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/iovec.c b/usr/src/lib/brand/lx/lx_brand/common/iovec.c new file mode 100644 index 0000000000..49af88d22e --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/iovec.c @@ -0,0 +1,241 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <sys/uio.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <alloca.h> +#include <string.h> +#include <sys/lx_syscall.h> +#include <sys/lx_misc.h> +#include <sys/lx_types.h> + +static int +lx_is_directory(int fd) +{ + struct stat64 sbuf; + + if (fstat64(fd, &sbuf) < 0) + sbuf.st_mode = 0; + + return ((sbuf.st_mode & S_IFMT) == S_IFDIR); +} + +int +lx_read(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + void *buf = (void *)p2; + size_t nbyte = (size_t)p3; + ssize_t ret; + + if (lx_is_directory(fd)) + return (-EISDIR); + + if ((ret = read(fd, buf, nbyte)) < 0) + return (-errno); + + return (ret); +} + +int +lx_pread64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5) +{ + int fd = (int)p1; + void *buf = (void *)p2; + size_t nbyte = (size_t)p3; + uintptr_t off_lo = p4; + uintptr_t off_hi = p5; + ssize_t ret; + + if (lx_is_directory(fd)) + return (-EISDIR); + + ret = pread64(fd, buf, nbyte, (off64_t)LX_32TO64(off_lo, off_hi)); + + if (ret < 0) + return (-errno); + + return (ret); +} + +/* + * On Linux, the pwrite(2) system call behaves identically to Solaris except + * in the case of the file being opened with O_APPEND. In that case Linux's + * pwrite(2) ignores the offset parameter and instead appends the data to the + * file without modifying the current seek pointer. + */ +int +lx_pwrite64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + int fd = (int)p1; + void *buf = (void *)p2; + size_t nbyte = (size_t)p3; + uintptr_t off_lo = p4; + uintptr_t off_hi = p5; + ssize_t ret; + int rval; + struct stat64 statbuf; + + if ((rval = fcntl(fd, F_GETFL, 0)) < 0) + return (-errno); + + if (!(rval & O_APPEND)) { + ret = pwrite64(fd, buf, nbyte, + (off64_t)LX_32TO64(off_lo, off_hi)); + } else if ((ret = fstat64(fd, &statbuf)) == 0) { + ret = pwrite64(fd, buf, nbyte, statbuf.st_size); + } + + if (ret < 0) + return (-errno); + + return (ret); +} + +/* + * Implementation of Linux readv() and writev() system calls. + * + * The Linux system calls differ from the Solaris system calls in a few key + * areas: + * + * - On Solaris, the maximum number of I/O vectors that can be passed to readv() + * or writev() is IOV_MAX (16). Linux has a much larger restriction (1024). + * + * - Passing 0 as a vector count is an error on Solaris, but on Linux results + * in a return value of 0. Even though the man page says the opposite. + * + * - If the Nth vector results in an error, Solaris will return an error code + * for the entire operation. Linux only returns an error if there has been + * no data transferred yet. Otherwise, it returns the number of bytes + * transferred up until that point. + * + * In order to accomodate these differences, we implement these functions as a + * series of ordinary read() or write() calls. + */ + +#define LX_IOV_MAX 1024 /* Also called MAX_IOVEC */ + +static int +lx_iovec_copy_and_check(const struct iovec *iovp, struct iovec *iov, int count) +{ + int i; + ssize_t cnt = 0; + + if (uucopy(iovp, (void *)iov, count * sizeof (struct iovec)) != 0) + return (-errno); + + for (i = 0; i < count; i++) { + cnt += iov[i].iov_len; + if (iov[i].iov_len < 0 || cnt < 0) + return (-EINVAL); + } + + return (0); +} + +int +lx_readv(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + const struct iovec *iovp = (const struct iovec *)p2; + int count = (int)p3; + struct iovec *iov; + ssize_t total = 0, ret; + int i; + + if (count == 0) + return (0); + + if (count < 0 || count > LX_IOV_MAX) + return (-EINVAL); + + if (lx_is_directory(fd)) + return (-EISDIR); + + iov = SAFE_ALLOCA(count * sizeof (struct iovec)); + if (iov == NULL) + return (-ENOMEM); + if ((ret = lx_iovec_copy_and_check(iovp, iov, count)) != 0) + return (ret); + + for (i = 0; i < count; i++) { + ret = read(fd, iov[i].iov_base, iov[i].iov_len); + + if (ret < 0) { + if (total > 0) + return (total); + return (-errno); + } + + total += ret; + } + + return (total); +} + +int +lx_writev(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + const struct iovec *iovp = (const struct iovec *)p2; + int count = (int)p3; + struct iovec *iov; + ssize_t total = 0, ret; + int i; + + if (count == 0) + return (0); + + if (count < 0 || count > LX_IOV_MAX) + return (-EINVAL); + + iov = SAFE_ALLOCA(count * sizeof (struct iovec)); + if (iov == NULL) + return (-ENOMEM); + if ((ret = lx_iovec_copy_and_check(iovp, iov, count)) != 0) + return (ret); + + for (i = 0; i < count; i++) { + ret = write(fd, iov[i].iov_base, iov[i].iov_len); + + if (ret < 0) { + if (total > 0) + return (total); + return (-errno); + } + + total += ret; + } + + return (total); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c new file mode 100644 index 0000000000..41d69c9a09 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c @@ -0,0 +1,1237 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/syscall.h> +#include <sys/utsname.h> +#include <sys/inttypes.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/fstyp.h> +#include <sys/fsid.h> +#include <sys/systm.h> +#include <sys/auxv.h> +#include <sys/frame.h> +#include <sys/brand.h> + +#include <assert.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <errno.h> +#include <syslog.h> +#include <signal.h> +#include <fcntl.h> +#include <synch.h> +#include <libelf.h> +#include <libgen.h> +#include <pthread.h> +#include <utime.h> +#include <dirent.h> +#include <ucontext.h> +#include <libintl.h> +#include <locale.h> + +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/lx_brand.h> +#include <sys/lx_types.h> +#include <sys/lx_stat.h> +#include <sys/lx_statfs.h> +#include <sys/lx_ioctl.h> +#include <sys/lx_signal.h> +#include <sys/lx_syscall.h> +#include <sys/lx_thread.h> +#include <sys/lx_thunk_server.h> + +/* + * Map solaris errno to the linux equivalent. + */ +static int stol_errno[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 35, 47, 22, 38, 22, /* 49 */ + 52, 53, 54, 55, 56, 57, 58, 59, 22, 22, + 61, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 22, 22, 72, 22, 22, 74, 36, 75, + 76, 77, 78, 79, 80, 81, 82, 83, 84, 38, + 40, 85, 86, 39, 87, 88, 89, 90, 91, 92, /* 99 */ + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, + 103, 104, 105, 106, 107, 22, 22, 22, 22, 22, + 22, 22, 22, 108, 109, 110, 111, 112, 113, 114, /* 149 */ + 115, 116 +}; + +char lx_release[128]; + +/* + * Map a linux locale ending string to the solaris equivalent. + */ +struct lx_locale_ending { + const char *linux_end; /* linux ending string */ + const char *solaris_end; /* to transform with this string */ + int le_size; /* linux ending string length */ + int se_size; /* solaris ending string length */ +}; + +#define l2s_locale(lname, sname) \ + {(lname), (sname), sizeof ((lname)) - 1, sizeof ((sname)) - 1} + +#define MAXLOCALENAMELEN 30 +#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ +#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ +#endif + +/* + * This flag is part of the registration with the in-kernel brand module. It's + * used in lx_handler() to determine if we should go back into the kernel after + * a system call in case the kernel needs to perform some post-syscall work + * like tracing for example. + */ +int lx_traceflag; + +#define NOSYS_NULL 1 +#define NOSYS_NO_EQUIV 2 +#define NOSYS_KERNEL 3 +#define NOSYS_UNDOC 4 +#define NOSYS_OBSOLETE 5 + +/* + * SYS_PASSTHRU denotes a system call we can just call on behalf of the + * branded process without having to translate the arguments. + * + * The restriction on this is that the call in question MUST return -1 to + * denote an error. + */ +#define SYS_PASSTHRU 5 + +static char *nosys_msgs[] = { + "Either not yet done, or we haven't come up with an excuse", + "No such Linux system call", + "No equivalent Solaris functionality", + "Reads/modifies Linux kernel state", + "Undocumented and/or rarely used system call", + "Unsupported, obsolete system call" +}; + +struct lx_sysent { + char *sy_name; + int (*sy_callc)(); + char sy_flags; + char sy_narg; +}; + +static struct lx_sysent sysents[LX_NSYSCALLS + 1]; + +static uintptr_t stack_bottom; + +int lx_install = 0; /* install mode enabled if non-zero */ +boolean_t lx_is_rpm = B_FALSE; +int lx_rpm_delay = 1; +int lx_strict = 0; /* "strict" mode enabled if non-zero */ +int lx_verbose = 0; /* verbose mode enabled if non-zero */ +int lx_debug_enabled = 0; /* debugging output enabled if non-zero */ + +pid_t zoneinit_pid; /* zone init PID */ + +thread_key_t lx_tsd_key; + +int +uucopy_unsafe(const void *src, void *dst, size_t n) +{ + bcopy(src, dst, n); + return (0); +} + +int +uucopystr_unsafe(const void *src, void *dst, size_t n) +{ + (void) strncpy((char *)src, dst, n); + return (0); +} + +static void +i_lx_msg(int fd, char *msg, va_list ap) +{ + int i; + char buf[LX_MSG_MAXLEN]; + + /* LINTED [possible expansion issues] */ + i = vsnprintf(buf, sizeof (buf), msg, ap); + buf[LX_MSG_MAXLEN - 1] = '\0'; + if (i == -1) + return; + + /* if debugging is enabled, send this message to debug output */ + if (lx_debug_enabled != 0) + lx_debug(buf); + + /* + * If we are trying to print to stderr, we also want to send the + * message to syslog. + */ + if (fd == 2) { + syslog(LOG_ERR, "%s", buf); + + /* + * We let the user choose whether or not to see these + * messages on the console. + */ + if (lx_verbose == 0) + return; + } + + /* we retry in case of EINTR */ + do { + i = write(fd, buf, strlen(buf)); + } while ((i == -1) && (errno == EINTR)); +} + +/*PRINTFLIKE1*/ +void +lx_err(char *msg, ...) +{ + va_list ap; + + assert(msg != NULL); + + va_start(ap, msg); + i_lx_msg(STDERR_FILENO, msg, ap); + va_end(ap); +} + +/* + * This is just a non-zero exit value which also isn't one that would allow + * us to easily detect if a branded process exited because of a recursive + * fatal error. + */ +#define LX_ERR_FATAL 42 + +/* + * Our own custom version of abort(), this routine will be used in place + * of the one located in libc. The primary difference is that this version + * will first reset the signal handler for SIGABRT to SIG_DFL, ensuring the + * SIGABRT sent causes us to dump core and is not caught by a user program. + */ +void +abort(void) +{ + static int aborting = 0; + + struct sigaction sa; + sigset_t sigmask; + + /* watch out for recursive calls to this function */ + if (aborting != 0) + exit(LX_ERR_FATAL); + + aborting = 1; + + /* + * Block all signals here to avoid taking any signals while exiting + * in an effort to avoid any strange user interaction with our death. + */ + (void) sigfillset(&sigmask); + (void) sigprocmask(SIG_BLOCK, &sigmask, NULL); + + /* + * Our own version of abort(3C) that we know will never call + * a user-installed SIGABRT handler first. We WANT to die. + * + * Do this by resetting the handler to SIG_DFL, and releasing any + * held SIGABRTs. + * + * If no SIGABRTs are pending, send ourselves one. + * + * The while loop is a bit of overkill, but abort(3C) does it to + * assure it never returns so we will as well. + */ + (void) sigemptyset(&sa.sa_mask); + sa.sa_sigaction = SIG_DFL; + sa.sa_flags = 0; + + for (;;) { + (void) sigaction(SIGABRT, &sa, NULL); + (void) sigrelse(SIGABRT); + (void) thr_kill(thr_self(), SIGABRT); + } + + /*NOTREACHED*/ +} + +/*PRINTFLIKE1*/ +void +lx_msg(char *msg, ...) +{ + va_list ap; + + assert(msg != NULL); + va_start(ap, msg); + i_lx_msg(STDOUT_FILENO, msg, ap); + va_end(ap); +} + +/*PRINTFLIKE1*/ +void +lx_err_fatal(char *msg, ...) +{ + va_list ap; + + assert(msg != NULL); + + va_start(ap, msg); + i_lx_msg(STDERR_FILENO, msg, ap); + va_end(ap); + abort(); +} + +/* + * See if it is safe to alloca() sz bytes. Return 1 for yes, 0 for no. + */ +int +lx_check_alloca(size_t sz) +{ + uintptr_t sp = (uintptr_t)&sz; + uintptr_t end = sp - sz; + + return ((end < sp) && (end >= stack_bottom)); +} + +/*PRINTFLIKE1*/ +void +lx_unsupported(char *msg, ...) +{ + va_list ap; + + assert(msg != NULL); + + /* send the msg to the error stream */ + va_start(ap, msg); + i_lx_msg(STDERR_FILENO, msg, ap); + va_end(ap); + + /* + * If the user doesn't trust the application to responsibly + * handle ENOTSUP, we kill the application. + */ + if (lx_strict) + (void) kill(getpid(), SIGSYS); +} + +extern void lx_runexe(void *argv, int32_t entry); +int lx_init(int argc, char *argv[], char *envp[]); + +static int +lx_emulate_args(lx_regs_t *rp, struct lx_sysent *s, uintptr_t *args) +{ + /* + * If the system call takes 6 args, then libc has stashed them in + * memory at the address contained in %ebx. Except for some syscalls + * which store the 6th argument in %ebp. + */ + if (s->sy_narg == 6 && !(s->sy_flags & EBP_HAS_ARG6)) { + if (uucopy((void *)rp->lxr_ebx, args, + sizeof (args[0]) * 6) != 0) + return (-stol_errno[errno]); + } else { + args[0] = rp->lxr_ebx; + args[1] = rp->lxr_ecx; + args[2] = rp->lxr_edx; + args[3] = rp->lxr_esi; + args[4] = rp->lxr_edi; + args[5] = rp->lxr_ebp; + } + + return (0); +} + +void +lx_emulate(lx_regs_t *rp) +{ + struct lx_sysent *s; + uintptr_t args[6]; + uintptr_t gs = rp->lxr_gs & 0xffff; /* %gs is only 16 bits */ + int syscall_num, ret; + + syscall_num = rp->lxr_eax; + + /* + * lx_brand_int80_callback() ensures that the syscall_num is sane; + * Use it as is. + */ + assert(syscall_num >= 0); + assert(syscall_num < (sizeof (sysents) / sizeof (sysents[0]))); + s = &sysents[syscall_num]; + + if ((ret = lx_emulate_args(rp, s, args)) != 0) + goto out; + + /* + * If the tracing flag is enabled we call into the brand-specific + * kernel module to handle the tracing activity (DTrace or ptrace). + * It would be tempting to perform DTrace activity in the brand + * module's syscall trap callback, rather than having to return + * to the kernel here, but -- since argument encoding can vary + * according to the specific system call -- that would require + * replicating the knowledge of argument decoding in the kernel + * module as well as here in the brand library. + */ + if (lx_traceflag != 0) { + /* + * Part of the ptrace "interface" is that on syscall entry + * %eax should be reported as -ENOSYS while the orig_eax + * field of the user structure needs to contain the actual + * system call number. If we end up stopping here, the + * controlling process will dig the lx_regs_t structure out of + * our stack. + */ + rp->lxr_orig_eax = syscall_num; + rp->lxr_eax = -stol_errno[ENOSYS]; + + (void) syscall(SYS_brand, B_SYSENTRY, syscall_num, args); + + /* + * The external tracer may have modified the arguments to this + * system call. Refresh the argument cache to account for this. + */ + if ((ret = lx_emulate_args(rp, s, args)) != 0) + goto out; + } + + if (s->sy_callc == NULL) { + lx_unsupported(gettext("unimplemented syscall #%d (%s): %s\n"), + syscall_num, s->sy_name, nosys_msgs[(int)s->sy_flags]); + ret = -stol_errno[ENOTSUP]; + goto out; + } + + if (lx_debug_enabled != 0) { + const char *fmt = NULL; + + switch (s->sy_narg) { + case 0: + fmt = "calling %s()"; + break; + case 1: + fmt = "calling %s(0x%p)"; + break; + case 2: + fmt = "calling %s(0x%p, 0x%p)"; + break; + case 3: + fmt = "calling %s(0x%p, 0x%p, 0x%p)"; + break; + case 4: + fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p)"; + break; + case 5: + fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p)"; + break; + case 6: + fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)"; + break; + } + + lx_debug(fmt, s->sy_name, args[0], args[1], args[2], args[3], + args[4], args[5]); + } + + if (gs != LWPGS_SEL) { + lx_tsd_t *lx_tsd; + + /* + * While a %gs of 0 is technically legal (as long as the + * application never dereferences memory using %gs), Solaris + * has its own ideas as to how a zero %gs should be handled in + * _update_sregs(), such that any 32-bit user process with a + * %gs of zero running on a system with a 64-bit kernel will + * have its %gs hidden base register stomped on on return from + * a system call, leaving an incorrect base address in place + * until the next time %gs is actually reloaded (forcing a + * reload of the base address from the appropriate descriptor + * table.) + * + * Of course the kernel will once again stomp on THAT base + * address when returning from a system call, resulting in an + * an application segmentation fault. + * + * To avoid this situation, disallow a save of a zero %gs + * here in order to try and capture any Linux process that + * attempts to make a syscall with a zero %gs installed. + */ + assert(gs != 0); + + if ((ret = thr_getspecific(lx_tsd_key, + (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "lx_emulate", strerror(ret)); + + assert(lx_tsd != 0); + + lx_tsd->lxtsd_gs = gs; + + lx_debug("lx_emulate(): gsp 0x%p, saved gs: 0x%x", lx_tsd, gs); + } + + if (s->sy_flags == SYS_PASSTHRU) + lx_debug("\tCalling Solaris %s()", s->sy_name); + + ret = s->sy_callc(args[0], args[1], args[2], args[3], args[4], args[5]); + + if (ret > -65536 && ret < 65536) + lx_debug("\t= %d", ret); + else + lx_debug("\t= 0x%x", ret); + + if ((s->sy_flags == SYS_PASSTHRU) && (ret == -1)) { + ret = -stol_errno[errno]; + } else { + /* + * If the return value is between -4096 and 0 we assume it's an + * error, so we translate the Solaris error number into the + * Linux equivalent. + */ + if (ret < 0 && ret > -4096) { + if (-ret >= + sizeof (stol_errno) / sizeof (stol_errno[0])) { + lx_debug("Invalid return value from emulated " + "syscall %d (%s): %d\n", + syscall_num, s->sy_name, ret); + assert(0); + } + + ret = -stol_errno[-ret]; + } + } + +out: + /* + * %eax holds the return code from the system call. + */ + rp->lxr_eax = ret; + + /* + * If the trace flag is set, bounce into the kernel to let it do + * any necessary tracing (DTrace or ptrace). + */ + if (lx_traceflag != 0) { + rp->lxr_orig_eax = syscall_num; + (void) syscall(SYS_brand, B_SYSRETURN, syscall_num, ret); + } +} + +static void +lx_close_fh(FILE *file) +{ + int fd, fd_new; + + if (file == NULL) + return; + + if ((fd = fileno(file)) < 0) + return; + + fd_new = dup(fd); + if (fd_new == -1) + return; + + (void) fclose(file); + (void) dup2(fd_new, fd); + (void) close(fd_new); +} + + +extern int set_l10n_alternate_root(char *path); + +/*ARGSUSED*/ +int +lx_init(int argc, char *argv[], char *envp[]) +{ + char *r; + auxv_t *ap; + int *p, err; + lx_elf_data_t edp; + lx_brand_registration_t reg; + static lx_tsd_t lx_tsd; + + + /* Look up the PID that serves as init for this zone */ + if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0) + lx_err_fatal(gettext( + "Unable to find PID for zone init process: %s"), + strerror(err)); + + /* + * Ubuntu init will fail if its TERM environment variable is not set + * so if we are running init, and TERM is not set, we set term and + * reexec so that the new environment variable is propagated to the + * linux application stack. + */ + if ((getpid() == zoneinit_pid) && (getenv("TERM") == NULL)) { + if (setenv("TERM", "vt100", 1) < 0 || execv(argv[0], argv) < 0) + lx_err_fatal(gettext("failed to set TERM")); + } + + stack_bottom = 2 * sysconf(_SC_PAGESIZE); + + /* + * We need to shutdown all libc stdio. libc stdio normally goes to + * file descriptors, but since we're actually part of a linux + * process we don't own these file descriptors and we can't make + * any assumptions about their state. + */ + lx_close_fh(stdin); + lx_close_fh(stdout); + lx_close_fh(stderr); + + lx_debug_init(); + + r = getenv("LX_RELEASE"); + if (r == NULL) { + if (lx_get_kern_version() == LX_KERN_2_6) + (void) strlcpy(lx_release, LX_UNAME_RELEASE_2_6, + sizeof (lx_release)); + else + (void) strlcpy(lx_release, LX_UNAME_RELEASE_2_4, + sizeof (lx_release)); + } else { + (void) strlcpy(lx_release, r, 128); + } + + lx_debug("lx_release: %s\n", lx_release); + + /* + * Should we kill an application that attempts an unimplemented + * system call? + */ + if (getenv("LX_STRICT") != NULL) { + lx_strict = 1; + lx_debug("STRICT mode enabled.\n"); + } + + /* + * Are we in install mode? + */ + if (getenv("LX_INSTALL") != NULL) { + lx_install = 1; + lx_debug("INSTALL mode enabled.\n"); + } + + /* + * Should we attempt to send messages to the screen? + */ + if (getenv("LX_VERBOSE") != NULL) { + lx_verbose = 1; + lx_debug("VERBOSE mode enabled.\n"); + } + + lx_debug("executing linux process: %s", argv[0]); + lx_debug("branding myself and setting handler to 0x%p", + (void *)lx_handler_table); + + /* + * The version of rpm that ships with CentOS/RHEL 3.x has a race + * condition in it. If it creates a child process to run a + * post-install script, and that child process completes too + * quickly, it will disappear before the parent notices. This + * causes the parent to hang forever waiting for the already dead + * child to die. I'm sure there's a Lazarus joke buried in here + * somewhere. + * + * Anyway, as a workaround, we make every child of an 'rpm' process + * sleep for 1 second, giving the parent a chance to enter its + * wait-for-the-child-to-die loop. Thay may be the hackiest trick + * in all of our Linux emulation code - and that's saying + * something. + */ + if (strcmp("rpm", basename(argv[0])) == NULL) + lx_is_rpm = B_TRUE; + + reg.lxbr_version = LX_VERSION; + reg.lxbr_handler = (void *)&lx_handler_table; + reg.lxbr_tracehandler = (void *)&lx_handler_trace_table; + reg.lxbr_traceflag = &lx_traceflag; + + /* + * Register the address of the user-space handler with the lx + * brand module. + */ + if (syscall(SYS_brand, B_REGISTER, ®)) + lx_err_fatal(gettext("failed to brand the process")); + + /* + * Download data about the lx executable from the kernel. + */ + if (syscall(SYS_brand, B_ELFDATA, (void *)&edp)) + lx_err_fatal(gettext( + "failed to get required ELF data from the kernel")); + + if (lx_ioctl_init() != 0) + lx_err_fatal(gettext("failed to setup the %s translator"), + "ioctl"); + + if (lx_stat_init() != 0) + lx_err_fatal(gettext("failed to setup the %s translator"), + "stat"); + + if (lx_statfs_init() != 0) + lx_err_fatal(gettext("failed to setup the %s translator"), + "statfs"); + + /* + * Find the aux vector on the stack. + */ + p = (int *)envp; + while (*p != NULL) + p++; + /* + * p is now pointing at the 0 word after the environ pointers. After + * that is the aux vectors. + */ + p++; + for (ap = (auxv_t *)p; ap->a_type != 0; ap++) { + switch (ap->a_type) { + case AT_BASE: + ap->a_un.a_val = edp.ed_base; + break; + case AT_ENTRY: + ap->a_un.a_val = edp.ed_entry; + break; + case AT_PHDR: + ap->a_un.a_val = edp.ed_phdr; + break; + case AT_PHENT: + ap->a_un.a_val = edp.ed_phent; + break; + case AT_PHNUM: + ap->a_un.a_val = edp.ed_phnum; + break; + default: + break; + } + } + + /* Do any thunk server initalization. */ + lxt_server_init(argc, argv); + + /* Setup signal handler information. */ + if (lx_siginit()) + lx_err_fatal(gettext( + "failed to initialize lx signals for the branded process")); + + /* Setup thread-specific data area for managing linux threads. */ + if ((err = thr_keycreate(&lx_tsd_key, NULL)) != 0) + lx_err_fatal( + gettext("%s failed: %s"), "thr_keycreate(lx_tsd_key)", + strerror(err)); + + lx_debug("thr_keycreate created lx_tsd_key (%d)", lx_tsd_key); + + /* Initialize the thread specific data for this thread. */ + bzero(&lx_tsd, sizeof (lx_tsd)); + lx_tsd.lxtsd_gs = LWPGS_SEL; + + if ((err = thr_setspecific(lx_tsd_key, &lx_tsd)) != 0) + lx_err_fatal(gettext( + "Unable to initialize thread-specific data: %s"), + strerror(err)); + + /* + * Save the current context of this thread. + * We'll restore this context when this thread attempts to exit. + */ + if (getcontext(&lx_tsd.lxtsd_exit_context) != 0) + lx_err_fatal(gettext( + "Unable to initialize thread-specific exit context: %s"), + strerror(errno)); + + if (lx_tsd.lxtsd_exit == 0) { + lx_runexe(argv, edp.ed_ldentry); + /* lx_runexe() never returns. */ + assert(0); + } + + /* + * We are here because the Linux application called the exit() or + * exit_group() system call. In turn the brand library did a + * setcontext() to jump to the thread context state we saved above. + */ + if (lx_tsd.lxtsd_exit == 1) + thr_exit((void *)lx_tsd.lxtsd_exit_status); + else + exit(lx_tsd.lxtsd_exit_status); + + assert(0); + + /*NOTREACHED*/ + return (0); +} + +/* + * Walk back through the stack until we find the lx_emulate() frame. + */ +lx_regs_t * +lx_syscall_regs(void) +{ + /* LINTED - alignment */ + struct frame *fr = (struct frame *)_getfp(); + + while (fr->fr_savpc != (uintptr_t)&lx_emulate_done) { + fr = (struct frame *)fr->fr_savfp; + assert(fr->fr_savpc != NULL); + } + + return ((lx_regs_t *)((uintptr_t *)fr)[2]); +} + +int +lx_lpid_to_spair(pid_t lpid, pid_t *spid, lwpid_t *slwp) +{ + pid_t pid; + lwpid_t tid; + + if (lpid == 0) { + pid = getpid(); + tid = thr_self(); + } else { + if (syscall(SYS_brand, B_LPID_TO_SPAIR, lpid, &pid, &tid) < 0) + return (-errno); + + /* + * If the returned pid is -1, that indicates we tried to + * look up the PID for init, but that process no longer + * exists. + */ + if (pid == -1) + return (-ESRCH); + } + + if (uucopy(&pid, spid, sizeof (pid_t)) != 0) + return (-errno); + + if (uucopy(&tid, slwp, sizeof (lwpid_t)) != 0) + return (-errno); + + return (0); +} + +int +lx_lpid_to_spid(pid_t lpid, pid_t *spid) +{ + lwpid_t slwp; + + return (lx_lpid_to_spair(lpid, spid, &slwp)); +} + +char * +lx_fd_to_path(int fd, char *buf, int buf_size) +{ + char path_proc[MAXPATHLEN]; + pid_t pid; + int n; + + assert((buf != NULL) && (buf_size >= 0)); + + if (fd < 0) + return (NULL); + + if ((pid = getpid()) == -1) + return (NULL); + + (void) snprintf(path_proc, MAXPATHLEN, + "/native/proc/%d/path/%d", pid, fd); + + if ((n = readlink(path_proc, buf, buf_size - 1)) == -1) + return (NULL); + buf[n] = '\0'; + + return (buf); +} + +/* + * Create a translation routine that jumps to a particular emulation + * module syscall. + */ +#define IN_KERNEL_SYSCALL(name, num) \ +int \ +lx_##name(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, \ + uintptr_t p5, uintptr_t p6) \ +{ \ + int r; \ + lx_debug("\tsyscall %d re-vectoring to lx kernel module " \ + "for " #name "()", num); \ + r = syscall(SYS_brand, B_EMULATE_SYSCALL + num, p1, p2, \ + p3, p4, p5, p6); \ + return ((r == -1) ? -errno : r); \ +} + +IN_KERNEL_SYSCALL(kill, 37) +IN_KERNEL_SYSCALL(brk, 45) +IN_KERNEL_SYSCALL(ustat, 62) +IN_KERNEL_SYSCALL(getppid, 64) +IN_KERNEL_SYSCALL(sysinfo, 116) +IN_KERNEL_SYSCALL(modify_ldt, 123) +IN_KERNEL_SYSCALL(adjtimex, 124) +IN_KERNEL_SYSCALL(setresuid16, 164) +IN_KERNEL_SYSCALL(setresgid16, 170) +IN_KERNEL_SYSCALL(setresuid, 208) +IN_KERNEL_SYSCALL(setresgid, 210) +IN_KERNEL_SYSCALL(gettid, 224) +IN_KERNEL_SYSCALL(tkill, 238) +IN_KERNEL_SYSCALL(futex, 240) +IN_KERNEL_SYSCALL(set_thread_area, 243) +IN_KERNEL_SYSCALL(get_thread_area, 244) +IN_KERNEL_SYSCALL(set_tid_address, 258) + +static struct lx_sysent sysents[] = { + {"nosys", NULL, NOSYS_NULL, 0}, /* 0 */ + {"exit", lx_exit, 0, 1}, /* 1 */ + {"fork", lx_fork, 0, 0}, /* 2 */ + {"read", lx_read, 0, 3}, /* 3 */ + {"write", write, SYS_PASSTHRU, 3}, /* 4 */ + {"open", lx_open, 0, 3}, /* 5 */ + {"close", close, SYS_PASSTHRU, 1}, /* 6 */ + {"waitpid", lx_waitpid, 0, 3}, /* 7 */ + {"creat", creat, SYS_PASSTHRU, 2}, /* 8 */ + {"link", lx_link, 0, 2}, /* 9 */ + {"unlink", lx_unlink, 0, 1}, /* 10 */ + {"execve", lx_execve, 0, 3}, /* 11 */ + {"chdir", chdir, SYS_PASSTHRU, 1}, /* 12 */ + {"time", lx_time, 0, 1}, /* 13 */ + {"mknod", lx_mknod, 0, 3}, /* 14 */ + {"chmod", lx_chmod, 0, 2}, /* 15 */ + {"lchown16", lx_lchown16, 0, 3}, /* 16 */ + {"break", NULL, NOSYS_OBSOLETE, 0}, /* 17 */ + {"stat", NULL, NOSYS_OBSOLETE, 0}, /* 18 */ + {"lseek", lx_lseek, 0, 3}, /* 19 */ + {"getpid", lx_getpid, 0, 0}, /* 20 */ + {"mount", lx_mount, 0, 5}, /* 21 */ + {"umount", lx_umount, 0, 1}, /* 22 */ + {"setuid16", lx_setuid16, 0, 1}, /* 23 */ + {"getuid16", lx_getuid16, 0, 0}, /* 24 */ + {"stime", stime, SYS_PASSTHRU, 1}, /* 25 */ + {"ptrace", lx_ptrace, 0, 4}, /* 26 */ + {"alarm", (int (*)())alarm, SYS_PASSTHRU, 1}, /* 27 */ + {"fstat", NULL, NOSYS_OBSOLETE, 0}, /* 28 */ + {"pause", pause, SYS_PASSTHRU, 0}, /* 29 */ + {"utime", lx_utime, 0, 2}, /* 30 */ + {"stty", NULL, NOSYS_OBSOLETE, 0}, /* 31 */ + {"gtty", NULL, NOSYS_OBSOLETE, 0}, /* 32 */ + {"access", access, SYS_PASSTHRU, 2}, /* 33 */ + {"nice", nice, SYS_PASSTHRU, 1}, /* 34 */ + {"ftime", NULL, NOSYS_OBSOLETE, 0}, /* 35 */ + {"sync", lx_sync, 0, 0}, /* 36 */ + {"kill", lx_kill, 0, 2}, /* 37 */ + {"rename", lx_rename, 0, 2}, /* 38 */ + {"mkdir", mkdir, SYS_PASSTHRU, 2}, /* 39 */ + {"rmdir", lx_rmdir, 0, 1}, /* 40 */ + {"dup", dup, SYS_PASSTHRU, 1}, /* 41 */ + {"pipe", lx_pipe, 0, 1}, /* 42 */ + {"times", lx_times, 0, 1}, /* 43 */ + {"prof", NULL, NOSYS_OBSOLETE, 0}, /* 44 */ + {"brk", lx_brk, 0, 1}, /* 45 */ + {"setgid16", lx_setgid16, 0, 1}, /* 46 */ + {"getgid16", lx_getgid16, 0, 0}, /* 47 */ + {"signal", lx_signal, 0, 2}, /* 48 */ + {"geteuid16", lx_geteuid16, 0, 0}, /* 49 */ + {"getegid16", lx_getegid16, 0, 0}, /* 50 */ + {"acct", NULL, NOSYS_NO_EQUIV, 0}, /* 51 */ + {"umount2", lx_umount2, 0, 2}, /* 52 */ + {"lock", NULL, NOSYS_OBSOLETE, 0}, /* 53 */ + {"ioctl", lx_ioctl, 0, 3}, /* 54 */ + {"fcntl", lx_fcntl, 0, 3}, /* 55 */ + {"mpx", NULL, NOSYS_OBSOLETE, 0}, /* 56 */ + {"setpgid", lx_setpgid, 0, 2}, /* 57 */ + {"ulimit", NULL, NOSYS_OBSOLETE, 0}, /* 58 */ + {"olduname", NULL, NOSYS_OBSOLETE, 0}, /* 59 */ + {"umask", (int (*)())umask, SYS_PASSTHRU, 1}, /* 60 */ + {"chroot", chroot, SYS_PASSTHRU, 1}, /* 61 */ + {"ustat", lx_ustat, 0, 2}, /* 62 */ + {"dup2", lx_dup2, 0, 2}, /* 63 */ + {"getppid", lx_getppid, 0, 0}, /* 64 */ + {"getpgrp", lx_getpgrp, 0, 0}, /* 65 */ + {"setsid", lx_setsid, 0, 0}, /* 66 */ + {"sigaction", lx_sigaction, 0, 3}, /* 67 */ + {"sgetmask", NULL, NOSYS_OBSOLETE, 0}, /* 68 */ + {"ssetmask", NULL, NOSYS_OBSOLETE, 0}, /* 69 */ + {"setreuid16", lx_setreuid16, 0, 2}, /* 70 */ + {"setregid16", lx_setregid16, 0, 2}, /* 71 */ + {"sigsuspend", lx_sigsuspend, 0, 1}, /* 72 */ + {"sigpending", lx_sigpending, 0, 1}, /* 73 */ + {"sethostname", lx_sethostname, 0, 2}, /* 74 */ + {"setrlimit", lx_setrlimit, 0, 2}, /* 75 */ + {"getrlimit", lx_oldgetrlimit, 0, 2}, /* 76 */ + {"getrusage", lx_getrusage, 0, 2}, /* 77 */ + {"gettimeofday", lx_gettimeofday, 0, 2}, /* 78 */ + {"settimeofday", lx_settimeofday, 0, 2}, /* 79 */ + {"getgroups16", lx_getgroups16, 0, 2}, /* 80 */ + {"setgroups16", lx_setgroups16, 0, 2}, /* 81 */ + {"select", NULL, NOSYS_OBSOLETE, 0}, /* 82 */ + {"symlink", symlink, SYS_PASSTHRU, 2}, /* 83 */ + {"oldlstat", NULL, NOSYS_OBSOLETE, 0}, /* 84 */ + {"readlink", readlink, SYS_PASSTHRU, 3}, /* 85 */ + {"uselib", NULL, NOSYS_KERNEL, 0}, /* 86 */ + {"swapon", NULL, NOSYS_KERNEL, 0}, /* 87 */ + {"reboot", lx_reboot, 0, 4}, /* 88 */ + {"readdir", lx_readdir, 0, 3}, /* 89 */ + {"mmap", lx_mmap, 0, 6}, /* 90 */ + {"munmap", munmap, SYS_PASSTHRU, 2}, /* 91 */ + {"truncate", lx_truncate, 0, 2}, /* 92 */ + {"ftruncate", lx_ftruncate, 0, 2}, /* 93 */ + {"fchmod", fchmod, SYS_PASSTHRU, 2}, /* 94 */ + {"fchown16", lx_fchown16, 0, 3}, /* 95 */ + {"getpriority", lx_getpriority, 0, 2}, /* 96 */ + {"setpriority", lx_setpriority, 0, 3}, /* 97 */ + {"profil", NULL, NOSYS_NO_EQUIV, 0}, /* 98 */ + {"statfs", lx_statfs, 0, 2}, /* 99 */ + {"fstatfs", lx_fstatfs, 0, 2}, /* 100 */ + {"ioperm", NULL, NOSYS_NO_EQUIV, 0}, /* 101 */ + {"socketcall", lx_socketcall, 0, 2}, /* 102 */ + {"syslog", NULL, NOSYS_KERNEL, 0}, /* 103 */ + {"setitimer", lx_setitimer, 0, 3}, /* 104 */ + {"getitimer", getitimer, SYS_PASSTHRU, 2}, /* 105 */ + {"stat", lx_stat, 0, 2}, /* 106 */ + {"lstat", lx_lstat, 0, 2}, /* 107 */ + {"fstat", lx_fstat, 0, 2}, /* 108 */ + {"uname", NULL, NOSYS_OBSOLETE, 0}, /* 109 */ + {"oldiopl", NULL, NOSYS_NO_EQUIV, 0}, /* 110 */ + {"vhangup", lx_vhangup, 0, 0}, /* 111 */ + {"idle", NULL, NOSYS_NO_EQUIV, 0}, /* 112 */ + {"vm86old", NULL, NOSYS_OBSOLETE, 0}, /* 113 */ + {"wait4", lx_wait4, 0, 4}, /* 114 */ + {"swapoff", NULL, NOSYS_KERNEL, 0}, /* 115 */ + {"sysinfo", lx_sysinfo, 0, 1}, /* 116 */ + {"ipc", lx_ipc, 0, 5}, /* 117 */ + {"fsync", lx_fsync, 0, 1}, /* 118 */ + {"sigreturn", lx_sigreturn, 0, 1}, /* 119 */ + {"clone", lx_clone, 0, 5}, /* 120 */ + {"setdomainname", lx_setdomainname, 0, 2}, /* 121 */ + {"uname", lx_uname, 0, 1}, /* 122 */ + {"modify_ldt", lx_modify_ldt, 0, 3}, /* 123 */ + {"adjtimex", lx_adjtimex, 0, 1}, /* 124 */ + {"mprotect", lx_mprotect, 0, 3}, /* 125 */ + {"sigprocmask", lx_sigprocmask, 0, 3}, /* 126 */ + {"create_module", NULL, NOSYS_KERNEL, 0}, /* 127 */ + {"init_module", NULL, NOSYS_KERNEL, 0}, /* 128 */ + {"delete_module", NULL, NOSYS_KERNEL, 0}, /* 129 */ + {"get_kernel_syms", NULL, NOSYS_KERNEL, 0}, /* 130 */ + {"quotactl", NULL, NOSYS_KERNEL, 0}, /* 131 */ + {"getpgid", lx_getpgid, 0, 1}, /* 132 */ + {"fchdir", fchdir, SYS_PASSTHRU, 1}, /* 133 */ + {"bdflush", NULL, NOSYS_KERNEL, 0}, /* 134 */ + {"sysfs", lx_sysfs, 0, 3}, /* 135 */ + {"personality", lx_personality, 0, 1}, /* 136 */ + {"afs_syscall", NULL, NOSYS_KERNEL, 0}, /* 137 */ + {"setfsuid16", lx_setfsuid16, 0, 1}, /* 138 */ + {"setfsgid16", lx_setfsgid16, 0, 1}, /* 139 */ + {"llseek", lx_llseek, 0, 5}, /* 140 */ + {"getdents", getdents, SYS_PASSTHRU, 3}, /* 141 */ + {"select", lx_select, 0, 5}, /* 142 */ + {"flock", lx_flock, 0, 2}, /* 143 */ + {"msync", lx_msync, 0, 3}, /* 144 */ + {"readv", lx_readv, 0, 3}, /* 145 */ + {"writev", lx_writev, 0, 3}, /* 146 */ + {"getsid", lx_getsid, 0, 1}, /* 147 */ + {"fdatasync", lx_fdatasync, 0, 1}, /* 148 */ + {"sysctl", lx_sysctl, 0, 1}, /* 149 */ + {"mlock", lx_mlock, 0, 2}, /* 150 */ + {"munlock", lx_munlock, 0, 2}, /* 151 */ + {"mlockall", lx_mlockall, 0, 1}, /* 152 */ + {"munlockall", lx_munlockall, 0, 0}, /* 153 */ + {"sched_setparam", lx_sched_setparam, 0, 2}, /* 154 */ + {"sched_getparam", lx_sched_getparam, 0, 2}, /* 155 */ + {"sched_setscheduler", lx_sched_setscheduler, 0, 3}, /* 156 */ + {"sched_getscheduler", lx_sched_getscheduler, 0, 1}, /* 157 */ + {"sched_yield", (int (*)())yield, SYS_PASSTHRU, 0}, /* 158 */ + {"sched_get_priority_max", lx_sched_get_priority_max, 0, 1}, /* 159 */ + {"sched_get_priority_min", lx_sched_get_priority_min, 0, 1}, /* 160 */ + {"sched_rr_get_interval", lx_sched_rr_get_interval, 0, 2}, /* 161 */ + {"nanosleep", nanosleep, SYS_PASSTHRU, 2}, /* 162 */ + {"mremap", NULL, NOSYS_NO_EQUIV, 0}, /* 163 */ + {"setresuid16", lx_setresuid16, 0, 3}, /* 164 */ + {"getresuid16", lx_getresuid16, 0, 3}, /* 165 */ + {"vm86", NULL, NOSYS_NO_EQUIV, 0}, /* 166 */ + {"query_module", lx_query_module, NOSYS_KERNEL, 5}, /* 167 */ + {"poll", lx_poll, 0, 3}, /* 168 */ + {"nfsservctl", NULL, NOSYS_KERNEL, 0}, /* 169 */ + {"setresgid16", lx_setresgid16, 0, 3}, /* 170 */ + {"getresgid16", lx_getresgid16, 0, 3}, /* 171 */ + {"prctl", NULL, NOSYS_UNDOC, 0}, /* 172 */ + {"rt_sigreturn", lx_rt_sigreturn, 0, 0}, /* 173 */ + {"rt_sigaction", lx_rt_sigaction, 0, 4}, /* 174 */ + {"rt_sigprocmask", lx_rt_sigprocmask, 0, 4}, /* 175 */ + {"rt_sigpending", lx_rt_sigpending, 0, 2}, /* 176 */ + {"rt_sigtimedwait", lx_rt_sigtimedwait, 0, 4}, /* 177 */ + {"sigqueueinfo", NULL, NOSYS_UNDOC, 0}, /* 178 */ + {"rt_sigsuspend", lx_rt_sigsuspend, 0, 2}, /* 179 */ + {"pread64", lx_pread64, 0, 5}, /* 180 */ + {"pwrite64", lx_pwrite64, 0, 5}, /* 181 */ + {"chown16", lx_chown16, 0, 3}, /* 182 */ + {"getcwd", lx_getcwd, 0, 2}, /* 183 */ + {"capget", NULL, NOSYS_NO_EQUIV, 0}, /* 184 */ + {"capset", NULL, NOSYS_NO_EQUIV, 0}, /* 185 */ + {"sigaltstack", lx_sigaltstack, 0, 2}, /* 186 */ + {"sendfile", lx_sendfile, 0, 4}, /* 187 */ + {"getpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 188 */ + {"putpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 189 */ + {"vfork", lx_vfork, 0, 0}, /* 190 */ + {"getrlimit", lx_getrlimit, 0, 2}, /* 191 */ + {"mmap2", lx_mmap2, EBP_HAS_ARG6, 6}, /* 192 */ + {"truncate64", lx_truncate64, 0, 3}, /* 193 */ + {"ftruncate64", lx_ftruncate64, 0, 3}, /* 194 */ + {"stat64", lx_stat64, 0, 2}, /* 195 */ + {"lstat64", lx_lstat64, 0, 2}, /* 196 */ + {"fstat64", lx_fstat64, 0, 2}, /* 197 */ + {"lchown", lchown, SYS_PASSTHRU, 3}, /* 198 */ + {"getuid", (int (*)())getuid, SYS_PASSTHRU, 0}, /* 199 */ + {"getgid", (int (*)())getgid, SYS_PASSTHRU, 0}, /* 200 */ + {"geteuid", lx_geteuid, 0, 0}, /* 201 */ + {"getegid", lx_getegid, 0, 0}, /* 202 */ + {"setreuid", setreuid, SYS_PASSTHRU, 0}, /* 203 */ + {"setregid", setregid, SYS_PASSTHRU, 0}, /* 204 */ + {"getgroups", getgroups, SYS_PASSTHRU, 2}, /* 205 */ + {"setgroups", lx_setgroups, 0, 2}, /* 206 */ + {"fchown", lx_fchown, 0, 3}, /* 207 */ + {"setresuid", lx_setresuid, 0, 3}, /* 208 */ + {"getresuid", lx_getresuid, 0, 3}, /* 209 */ + {"setresgid", lx_setresgid, 0, 3}, /* 210 */ + {"getresgid", lx_getresgid, 0, 3}, /* 211 */ + {"chown", lx_chown, 0, 3}, /* 212 */ + {"setuid", setuid, SYS_PASSTHRU, 1}, /* 213 */ + {"setgid", setgid, SYS_PASSTHRU, 1}, /* 214 */ + {"setfsuid", lx_setfsuid, 0, 1}, /* 215 */ + {"setfsgid", lx_setfsgid, 0, 1}, /* 216 */ + {"pivot_root", NULL, NOSYS_KERNEL, 0}, /* 217 */ + {"mincore", mincore, SYS_PASSTHRU, 3}, /* 218 */ + {"madvise", lx_madvise, 0, 3}, /* 219 */ + {"getdents64", lx_getdents64, 0, 3}, /* 220 */ + {"fcntl64", lx_fcntl64, 0, 3}, /* 221 */ + {"tux", NULL, NOSYS_NO_EQUIV, 0}, /* 222 */ + {"security", NULL, NOSYS_NO_EQUIV, 0}, /* 223 */ + {"gettid", lx_gettid, 0, 0}, /* 224 */ + {"readahead", NULL, NOSYS_NO_EQUIV, 0}, /* 225 */ + {"setxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 226 */ + {"lsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 227 */ + {"fsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 228 */ + {"getxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 229 */ + {"lgetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 230 */ + {"fgetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 231 */ + {"listxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 232 */ + {"llistxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 233 */ + {"flistxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 234 */ + {"removexattr", NULL, NOSYS_NO_EQUIV, 0}, /* 235 */ + {"lremovexattr", NULL, NOSYS_NO_EQUIV, 0}, /* 236 */ + {"fremovexattr", NULL, NOSYS_NO_EQUIV, 0}, /* 237 */ + {"tkill", lx_tkill, 0, 2}, /* 238 */ + {"sendfile64", lx_sendfile64, 0, 4}, /* 239 */ + {"futex", lx_futex, EBP_HAS_ARG6, 6}, /* 240 */ + {"sched_setaffinity", lx_sched_setaffinity, 0, 3}, /* 241 */ + {"sched_getaffinity", lx_sched_getaffinity, 0, 3}, /* 242 */ + {"set_thread_area", lx_set_thread_area, 0, 1}, /* 243 */ + {"get_thread_area", lx_get_thread_area, 0, 1}, /* 244 */ + {"io_setup", NULL, NOSYS_NO_EQUIV, 0}, /* 245 */ + {"io_destroy", NULL, NOSYS_NO_EQUIV, 0}, /* 246 */ + {"io_getevents", NULL, NOSYS_NO_EQUIV, 0}, /* 247 */ + {"io_submit", NULL, NOSYS_NO_EQUIV, 0}, /* 248 */ + {"io_cancel", NULL, NOSYS_NO_EQUIV, 0}, /* 249 */ + {"fadvise64", NULL, NOSYS_UNDOC, 0}, /* 250 */ + {"nosys", NULL, 0, 0}, /* 251 */ + {"group_exit", lx_group_exit, 0, 1}, /* 252 */ + {"lookup_dcookie", NULL, NOSYS_NO_EQUIV, 0}, /* 253 */ + {"epoll_create", NULL, NOSYS_NO_EQUIV, 0}, /* 254 */ + {"epoll_ctl", NULL, NOSYS_NO_EQUIV, 0}, /* 255 */ + {"epoll_wait", NULL, NOSYS_NO_EQUIV, 0}, /* 256 */ + {"remap_file_pages", NULL, NOSYS_NO_EQUIV, 0}, /* 257 */ + {"set_tid_address", lx_set_tid_address, 0, 1}, /* 258 */ + {"timer_create", NULL, NOSYS_UNDOC, 0}, /* 259 */ + {"timer_settime", NULL, NOSYS_UNDOC, 0}, /* 260 */ + {"timer_gettime", NULL, NOSYS_UNDOC, 0}, /* 261 */ + {"timer_getoverrun", NULL, NOSYS_UNDOC, 0}, /* 262 */ + {"timer_delete", NULL, NOSYS_UNDOC, 0}, /* 263 */ + {"clock_settime", lx_clock_settime, 0, 2}, /* 264 */ + {"clock_gettime", lx_clock_gettime, 0, 2}, /* 265 */ + {"clock_getres", lx_clock_getres, 0, 2}, /* 266 */ + {"clock_nanosleep", lx_clock_nanosleep, 0, 4}, /* 267 */ + {"statfs64", lx_statfs64, 0, 2}, /* 268 */ + {"fstatfs64", lx_fstatfs64, 0, 2}, /* 269 */ + {"tgkill", lx_tgkill, 0, 3}, /* 270 */ + + /* The following system calls only exist in kernel 2.6 and greater */ + {"utimes", utimes, SYS_PASSTHRU, 2}, /* 271 */ + {"fadvise64_64", NULL, NOSYS_NULL, 0}, /* 272 */ + {"vserver", NULL, NOSYS_NULL, 0}, /* 273 */ + {"mbind", NULL, NOSYS_NULL, 0}, /* 274 */ + {"get_mempolicy", NULL, NOSYS_NULL, 0}, /* 275 */ + {"set_mempolicy", NULL, NOSYS_NULL, 0}, /* 276 */ + {"mq_open", NULL, NOSYS_NULL, 0}, /* 277 */ + {"mq_unlink", NULL, NOSYS_NULL, 0}, /* 278 */ + {"mq_timedsend", NULL, NOSYS_NULL, 0}, /* 279 */ + {"mq_timedreceive", NULL, NOSYS_NULL, 0}, /* 280 */ + {"mq_notify", NULL, NOSYS_NULL, 0}, /* 281 */ + {"mq_getsetattr", NULL, NOSYS_NULL, 0}, /* 282 */ + {"kexec_load", NULL, NOSYS_NULL, 0}, /* 283 */ + {"waitid", lx_waitid, 0, 4}, /* 284 */ + {"sys_setaltroot", NULL, NOSYS_NULL, 0}, /* 285 */ + {"add_key", NULL, NOSYS_NULL, 0}, /* 286 */ + {"request_key", NULL, NOSYS_NULL, 0}, /* 287 */ + {"keyctl", NULL, NOSYS_NULL, 0}, /* 288 */ + {"ioprio_set", NULL, NOSYS_NULL, 0}, /* 289 */ + {"ioprio_get", NULL, NOSYS_NULL, 0}, /* 290 */ + {"inotify_init", NULL, NOSYS_NULL, 0}, /* 291 */ + {"inotify_add_watch", NULL, NOSYS_NULL, 0}, /* 292 */ + {"inotify_rm_watch", NULL, NOSYS_NULL, 0}, /* 293 */ + {"migrate_pages", NULL, NOSYS_NULL, 0}, /* 294 */ + {"openat", lx_openat, 0, 4}, /* 295 */ + {"mkdirat", lx_mkdirat, 0, 3}, /* 296 */ + {"mknodat", lx_mknodat, 0, 4}, /* 297 */ + {"fchownat", lx_fchownat, 0, 5}, /* 298 */ + {"futimesat", lx_futimesat, 0, 3}, /* 299 */ + {"fstatat64", lx_fstatat64, 0, 4}, /* 300 */ + {"unlinkat", lx_unlinkat, 0, 3}, /* 301 */ + {"renameat", lx_renameat, 0, 4}, /* 302 */ + {"linkat", lx_linkat, 0, 5}, /* 303 */ + {"symlinkat", lx_symlinkat, 0, 3}, /* 304 */ + {"readlinkat", lx_readlinkat, 0, 4}, /* 305 */ + {"fchmodat", lx_fchmodat, 0, 4}, /* 306 */ + {"faccessat", lx_faccessat, 0, 4}, /* 307 */ + {"pselect6", NULL, NOSYS_NULL, 0}, /* 308 */ + {"ppoll", NULL, NOSYS_NULL, 0}, /* 309 */ + {"unshare", NULL, NOSYS_NULL, 0}, /* 310 */ + {"set_robust_list", NULL, NOSYS_NULL, 0}, /* 311 */ + {"get_robust_list", NULL, NOSYS_NULL, 0}, /* 312 */ + {"splice", NULL, NOSYS_NULL, 0}, /* 313 */ + {"sync_file_range", NULL, NOSYS_NULL, 0}, /* 314 */ + {"tee", NULL, NOSYS_NULL, 0}, /* 315 */ + {"vmsplice", NULL, NOSYS_NULL, 0}, /* 316 */ + {"move_pages", NULL, NOSYS_NULL, 0}, /* 317 */ +}; diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c b/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c new file mode 100644 index 0000000000..e547762378 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c @@ -0,0 +1,1026 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The BrandZ Linux thunking server. + * + * The interfaces defined in this file form the server side of a bridge + * to allow native solaris process to access Linux services. Currently + * the Linux services that is made accessible by these interfaces here + * are: + * - Linux host <-> address naming services + * - Linux service <-> port naming services + * - Linux syslog + * + * Access to all these services is provided through a doors server. + * Currently the only client of these interfaces and the process that + * initially starts up the doors server is lx_thunk.so. + * + * lx_thunk.so is a native solaris library that is loaded into native + * solaris process that need to run inside a Linux zone and have access + * to Linux services. When lx_thunk.so receives a request that requires + * accessing Linux services it creates a "thunk server" process by + * forking and executing the following shell script (which runs as + * a native /bin/sh Linux process): + * /native/usr/lib/brand/lx/lx_thunk + * + * The first and only thing this shell script attempts to do is re-exec + * itself. The brand library will detect when this script attempts to + * re-exec itself and take control of the process. The exec() system + * call made by the Linux shell will never return. + * + * At this point the process becomes a "thunk server" process. + * The first thing it does is a bunch of initialization: + * + * - Sanity check that a file descriptor based communication mechanism + * needed talk to the parent process is correctly initialized. + * + * - Verify that two predetermined file descriptors are FIFOs. + * These FIFOs will be used to establish communications with + * the client program that spawned us and which will be sending + * us requests. + * + * - Use existing debugging libraries (libproc.so, librtld_db.so, + * and the BrandZ lx plug-in to librtld_db.so) and /native/proc to + * walk the Linux link maps in our own address space to determine + * the address of the Linux dlsym() function. + * + * - Use the native Linux dlsym() function to look up other symbols + * (for both functions and variables) that we will need access + * to service thunking requests. + * + * - Create a doors server and notify the parent process that we + * are ready to service requests. + * + * - Enter a service loop and wait for requests. + * + * At this point the lx_thunk process is ready to service door + * based requests. When door service request is received the + * following happens inside the lx_thunk process: + * + * - The doors server function is is invoked on a new solaris thread + * that the kernel injects into the lx_thunk process. We sanity + * check the incoming request, place it on a service queue, and + * wait for notification that the request has been completed. + * + * - A Linux thread takes this request off the service queue + * and dispatches it to a service function that will: + * - Decode the request. + * - Handle the request by invoking native Linux interfaces. + * - Encode the results for the request. + * + * - The Linux thread then notifies the requesting doors server + * thread that the request has been completed and goes to sleep + * until it receives another request. + * + * - the solaris door server thread returns the results of the + * operation to the caller. + * + * Notes: + * + * - The service request hand off operation from the solaris doors thread to + * the "Linux thread" is required because only "Linux threads" can call + * into Linux code. In this context a "Linux thread" is a thread that + * is either the initial thread of a Linux process or a thread that was + * created by calling the Linux version of thread_create(). The reason + * for this restriction is that any thread that invokes Linux code needs + * to have been initialized in the Linux threading libraries and have + * things like Linux thread local storage properly setup. + * + * But under solaris all door server threads are created and destroyed + * dynamically. This means that when a doors server function is invoked, + * it is invoked via a thread that hasn't been initialized in the Linux + * environment and there for can't call directly into Linux code. + * + * - Currently when a thunk server process is starting up, it communicated + * with it's parent via two FIFOs. These FIFOs are setup by the + * lx_thunk.so library. After creating the FIFOs and starting the lx_thunk + * server, lx_thunk.so writes the name of the file that the door should + * be attached to to the first pipe. The lx_thunk server reads in this + * value, initialized the server, fattach()s it to the file request by + * lx_thunk.so and does a write to the second FIFO to let lx_thunk.so + * know that the server is ready to take requests. + * + * This negotiation could be simplified to use only use one FIFO. + * lx_thunk.so would attempt to read from the FIFO and the lx_thunk + * server process could send the new door server file descriptor + * to this process via an I_SENDFD ioctl (see streamio.7I). + * + * - The lx_thunk server process will exit when the client process + * that it's handling requests for exists. (ie, when there are no + * more open file handles to the doors server.) + */ + +#include <assert.h> +#include <door.h> +#include <errno.h> +#include <libproc.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_thread.h> +#include <sys/lx_thunk_server.h> +#include <sys/varargs.h> +#include <thread.h> +#include <unistd.h> + +/* + * Generic interfaces used for looking up and calling Linux functions. + */ +typedef struct __lx_handle_dlsym *lx_handle_dlsym_t; +typedef struct __lx_handle_sym *lx_handle_sym_t; + +uintptr_t lx_call0(lx_handle_sym_t); +uintptr_t lx_call1(lx_handle_sym_t, uintptr_t); +uintptr_t lx_call2(lx_handle_sym_t, uintptr_t, uintptr_t); +uintptr_t lx_call3(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t); +uintptr_t lx_call4(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); +uintptr_t lx_call5(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t); +uintptr_t lx_call6(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t); +uintptr_t lx_call7(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t, uintptr_t); +uintptr_t lx_call8(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +/* + * Flag indicating if this process is destined to become a thunking + * server process. + */ +static int lxt_server_processes = 0; + +/* + * Linux function call defines and handles. + */ +static lx_handle_dlsym_t lxh_init = NULL; + +#define LXTH_GETHOSTBYNAME_R 0 +#define LXTH_GETHOSTBYADDR_R 1 +#define LXTH_GETSERVBYNAME_R 2 +#define LXTH_GETSERVBYPORT_R 3 +#define LXTH_OPENLOG 4 +#define LXTH_SYSLOG 5 +#define LXTH_CLOSELOG 6 +#define LXTH_PROGNAME 7 + +static struct lxt_handles { + int lxth_index; + char *lxth_name; + lx_handle_sym_t lxth_handle; +} lxt_handles[] = { + { LXTH_GETHOSTBYNAME_R, "gethostbyname_r", NULL }, + { LXTH_GETHOSTBYADDR_R, "gethostbyaddr_r", NULL }, + { LXTH_GETSERVBYNAME_R, "getservbyname_r", NULL }, + { LXTH_GETSERVBYPORT_R, "getservbyport_r", NULL }, + { LXTH_OPENLOG, "openlog", NULL }, + { LXTH_SYSLOG, "syslog", NULL }, + { LXTH_CLOSELOG, "closelog", NULL }, + { LXTH_PROGNAME, "__progname", NULL }, + { -1, NULL, NULL }, +}; + +/* + * Door server operations dispatch functions and table. + * + * When the doors server get's a request for a particlar operation + * this dispatch table controls what function will be invoked to + * service the request. The function is invoked via Linux thread + * so that it can call into native Linux code if necessary. + */ +static void lxt_server_gethost(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); +static void lxt_server_getserv(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); +static void lxt_server_openlog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); +static void lxt_server_syslog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); +static void lxt_server_closelog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); + +typedef void (*lxt_op_func_t)(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); + +static struct lxt_operations { + int lxto_index; + lxt_op_func_t lxto_fp; +} lxt_operations[] = { + { LXT_SERVER_OP_PING, NULL }, + { LXT_SERVER_OP_NAME2HOST, lxt_server_gethost }, + { LXT_SERVER_OP_ADDR2HOST, lxt_server_gethost }, + { LXT_SERVER_OP_NAME2SERV, lxt_server_getserv }, + { LXT_SERVER_OP_PORT2SERV, lxt_server_getserv }, + { LXT_SERVER_OP_OPENLOG, lxt_server_openlog }, + { LXT_SERVER_OP_SYSLOG, lxt_server_syslog }, + { LXT_SERVER_OP_CLOSELOG, lxt_server_closelog }, +}; + +/* + * Structures for passing off requests from doors threads (which are + * solaris threads) to a Linux thread that that can handle them. + */ +typedef struct lxt_req { + lxt_server_arg_t *lxtr_request; + size_t lxtr_request_size; + char *lxtr_result; + size_t lxtr_result_size; + int lxtr_complete; + cond_t lxtr_complete_cv; +} lxt_req_t; + +static mutex_t lxt_req_lock = DEFAULTMUTEX; +static cond_t lxt_req_cv = DEFAULTCV; +static lxt_req_t *lxt_req_ptr = NULL; + +static mutex_t lxt_pid_lock = DEFAULTMUTEX; +static pid_t lxt_pid = NULL; + +/* + * Interfaces used to call from lx_brand.so into Linux code. + */ +typedef struct lookup_cb_arg { + struct ps_prochandle *lca_ph; + caddr_t lca_ptr; +} lookup_cb_arg_t; + +static int +/*ARGSUSED*/ +lookup_cb(void *data, const prmap_t *pmp, const char *object) +{ + lookup_cb_arg_t *lcap = (lookup_cb_arg_t *)data; + prsyminfo_t si; + GElf_Sym sym; + + if (Pxlookup_by_name(lcap->lca_ph, + LM_ID_BASE, object, "dlsym", &sym, &si) != 0) + return (0); + + if (sym.st_shndx == SHN_UNDEF) + return (0); + + /* + * XXX: we should be more paranoid and verify that the symbol + * we just looked up is libdl.so.2`dlsym + */ + lcap->lca_ptr = (caddr_t)(uintptr_t)sym.st_value; + return (1); +} + +lx_handle_dlsym_t +lx_call_init(void) +{ + struct ps_prochandle *ph; + lookup_cb_arg_t lca; + extern int __libc_threaded; + int err; + + lx_debug("lx_call_init(): looking up Linux dlsym"); + + /* + * The handle is really the address of the Linux "dlsym" function. + * Once we have this address we can call into the Linux "dlsym" + * function to lookup other functions. It's the initial lookup + * of "dlsym" that's difficult. To do this we'll leverage the + * brand support that we added to librtld_db. We're going + * to fire up a seperate native solaris process that will + * attach to us via libproc/librtld_db and lookup the symbol + * for us. + */ + + /* Make sure we're single threaded. */ + if (__libc_threaded) { + lx_debug("lx_call_init() fail: " + "process must be single threaded"); + return (NULL); + } + + /* Tell libproc.so where the real procfs is mounted. */ + Pset_procfs_path("/native/proc"); + + /* Tell librtld_db.so where the real /native is */ + (void) rd_ctl(RD_CTL_SET_HELPPATH, "/native"); + + /* Grab ourselves but don't stop ourselves. */ + if ((ph = Pgrab(getpid(), + PGRAB_FORCE | PGRAB_RDONLY | PGRAB_NOSTOP, &err)) == NULL) { + lx_debug("lx_call_init() fail: Pgrab failed: %s", + Pgrab_error(err)); + return (NULL); + } + + lca.lca_ph = ph; + if (Pobject_iter(ph, lookup_cb, &lca) == -1) { + lx_debug("lx_call_init() fail: couldn't find Linux dlsym"); + return (NULL); + } + + lx_debug("lx_call_init(): Linux dlsym = 0x%p", lca.lca_ptr); + return ((lx_handle_dlsym_t)lca.lca_ptr); +} + +#define LX_RTLD_DEFAULT ((void *)0) +#define LX_RTLD_NEXT ((void *) -1l) + +lx_handle_sym_t +lx_call_dlsym(lx_handle_dlsym_t lxh_dlsym, const char *str) +{ + lx_handle_sym_t result; + lx_debug("lx_call_dlsym: calling Linux dlsym for: %s", str); + result = (lx_handle_sym_t)lx_call2((lx_handle_sym_t)lxh_dlsym, + (uintptr_t)LX_RTLD_DEFAULT, (uintptr_t)str); + lx_debug("lx_call_dlsym: Linux sym: \"%s\" = 0x%p", str, result); + return (result); +} + +static uintptr_t +/*ARGSUSED*/ +lx_call(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, + uintptr_t p3, uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7, + uintptr_t p8) +{ + typedef uintptr_t (*fp8_t)(uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + lx_regs_t *rp; + uintptr_t ret; + fp8_t lx_funcp = (fp8_t)lx_ch; + long cur_gs; + + rp = lx_syscall_regs(); + + lx_debug("lx_call: calling to Linux code at 0x%p", lx_ch); + lx_debug("lx_call: loading Linux gs, rp = 0x%p, gs = 0x%p", + rp, rp->lxr_gs); + + lx_swap_gs(rp->lxr_gs, &cur_gs); + ret = lx_funcp(p1, p2, p3, p4, p5, p6, p7, p8); + lx_swap_gs(cur_gs, &rp->lxr_gs); + + lx_debug("lx_call: returned from Linux code at 0x%p (%p)", lx_ch, ret); + lx_debug("lx_call: restored solaris gs 0x%p", cur_gs); + return (ret); +} + +uintptr_t +lx_call0(lx_handle_sym_t lx_ch) +{ + return (lx_call(lx_ch, 0, 0, 0, 0, 0, 0, 0, 0)); +} + +uintptr_t +lx_call1(lx_handle_sym_t lx_ch, uintptr_t p1) +{ + return (lx_call(lx_ch, p1, 0, 0, 0, 0, 0, 0, 0)); +} + +uintptr_t +lx_call2(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2) +{ + return (lx_call(lx_ch, p1, p2, 0, 0, 0, 0, 0, 0)); +} + +uintptr_t +lx_call3(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + return (lx_call(lx_ch, p1, p2, p3, 0, 0, 0, 0, 0)); +} + +uintptr_t +lx_call4(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, 0, 0, 0, 0)); +} + +uintptr_t +lx_call5(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4, uintptr_t p5) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, p5, 0, 0, 0)); +} + +uintptr_t +lx_call6(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4, uintptr_t p5, uintptr_t p6) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, 0, 0)); +} + +uintptr_t +lx_call7(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, p7, 0)); +} + +uintptr_t +lx_call8(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7, uintptr_t p8) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, p7, p8)); +} + +/* + * Linux Thunking Interfaces - Server Side + */ +static int +lxt_gethost_arg_check(lxt_gethost_arg_t *x, int x_size) +{ + if (x_size != sizeof (*x) + x->lxt_gh_buf_len - 1) + return (-1); + + if ((x->lxt_gh_token_len < 0) || (x->lxt_gh_buf_len < 0)) + return (-1); + + /* Token and buf should use up all the storage. */ + if ((x->lxt_gh_token_len + x->lxt_gh_buf_len) != x->lxt_gh_storage_len) + return (-1); + + return (0); +} + +static void +lxt_server_gethost(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + lxt_gethost_arg_t *data; + struct hostent *result, *rv; + int token_len, buf_len, type, data_size, i; + char *token, *buf; + int h_errnop; + + assert((request->lxt_sa_op == LXT_SERVER_OP_NAME2HOST) || + (request->lxt_sa_op == LXT_SERVER_OP_ADDR2HOST)); + + /*LINTED*/ + data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0]; + data_size = request_size - sizeof (*request) - 1; + + if (!lxt_gethost_arg_check(data, data_size)) { + lx_debug("lxt_server_gethost: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + /* Unpack the arguments. */ + type = data->lxt_gh_type; + token = &data->lxt_gh_storage[0]; + token_len = data->lxt_gh_token_len; + result = &data->lxt_gh_result; + buf = &data->lxt_gh_storage[data->lxt_gh_token_len]; + buf_len = data->lxt_gh_buf_len - data->lxt_gh_token_len; + + if (request->lxt_sa_op == LXT_SERVER_OP_NAME2HOST) { + (void) lx_call6(lxt_handles[LXTH_GETHOSTBYNAME_R].lxth_handle, + (uintptr_t)token, (uintptr_t)result, + (uintptr_t)buf, buf_len, (uintptr_t)&rv, + (uintptr_t)&h_errnop); + } else { + (void) lx_call8(lxt_handles[LXTH_GETHOSTBYADDR_R].lxth_handle, + (uintptr_t)token, token_len, type, (uintptr_t)result, + (uintptr_t)buf, buf_len, (uintptr_t)&rv, + (uintptr_t)&h_errnop); + } + + if (rv == NULL) { + /* the lookup failed */ + request->lxt_sa_success = 0; + request->lxt_sa_errno = errno; + data->lxt_gh_h_errno = h_errnop; + *door_result = (char *)request; + *door_result_size = request_size; + return; + } + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + data->lxt_gh_h_errno = 0; + + /* + * The result structure that we would normally return contains a + * bunch of pointers, but those pointers are useless to our caller + * since they are in a different address space. So before returning + * we'll convert all the result pointers into offsets. The caller + * can then map the offsets back into pointers. + */ + for (i = 0; result->h_aliases[i] != NULL; i++) { + result->h_aliases[i] = + LXT_PTR_TO_OFFSET(result->h_aliases[i], buf); + } + for (i = 0; result->h_addr_list[i] != NULL; i++) { + result->h_addr_list[i] = + LXT_PTR_TO_OFFSET(result->h_addr_list[i], buf); + } + result->h_name = LXT_PTR_TO_OFFSET(result->h_name, buf); + result->h_aliases = LXT_PTR_TO_OFFSET(result->h_aliases, buf); + result->h_addr_list = LXT_PTR_TO_OFFSET(result->h_addr_list, buf); + + *door_result = (char *)request; + *door_result_size = request_size; +} + +static int +lxt_getserv_arg_check(lxt_getserv_arg_t *x, int x_size) +{ + if (x_size != sizeof (*x) + x->lxt_gs_buf_len - 1) + return (-1); + + if ((x->lxt_gs_token_len < 0) || (x->lxt_gs_buf_len < 0)) + return (-1); + + /* Token and buf should use up all the storage. */ + if ((x->lxt_gs_token_len + x->lxt_gs_buf_len) != x->lxt_gs_storage_len) + return (-1); + + return (0); +} + +static void +lxt_server_getserv(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + lxt_getserv_arg_t *data; + struct servent *result, *rv; + int token_len, buf_len, data_size, i, port; + char *token, *buf, *proto = NULL; + + assert((request->lxt_sa_op == LXT_SERVER_OP_NAME2SERV) || + (request->lxt_sa_op == LXT_SERVER_OP_PORT2SERV)); + + /*LINTED*/ + data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0]; + data_size = request_size - sizeof (*request) - 1; + + if (!lxt_getserv_arg_check(data, data_size)) { + lx_debug("lxt_server_getserv: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + /* Unpack the arguments. */ + token = &data->lxt_gs_storage[0]; + token_len = data->lxt_gs_token_len; + result = &data->lxt_gs_result; + buf = &data->lxt_gs_storage[data->lxt_gs_token_len]; + buf_len = data->lxt_gs_buf_len - data->lxt_gs_token_len; + if (strlen(data->lxt_gs_proto) > 0) + proto = data->lxt_gs_proto; + + /* Do more sanity checks */ + if ((request->lxt_sa_op == LXT_SERVER_OP_PORT2SERV) && + (token_len != sizeof (int))) { + lx_debug("lxt_server_getserv: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + if (request->lxt_sa_op == LXT_SERVER_OP_NAME2SERV) { + (void) lx_call6(lxt_handles[LXTH_GETSERVBYNAME_R].lxth_handle, + (uintptr_t)token, (uintptr_t)proto, (uintptr_t)result, + (uintptr_t)buf, buf_len, (uintptr_t)&rv); + } else { + bcopy(token, &port, sizeof (int)); + (void) lx_call6(lxt_handles[LXTH_GETSERVBYPORT_R].lxth_handle, + port, (uintptr_t)proto, (uintptr_t)result, + (uintptr_t)buf, buf_len, (uintptr_t)&rv); + } + + if (rv == NULL) { + /* the lookup failed */ + request->lxt_sa_success = 0; + request->lxt_sa_errno = errno; + *door_result = (char *)request; + *door_result_size = request_size; + return; + } + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + + /* + * The result structure that we would normally return contains a + * bunch of pointers, but those pointers are useless to our caller + * since they are in a different address space. So before returning + * we'll convert all the result pointers into offsets. The caller + * can then map the offsets back into pointers. + */ + for (i = 0; result->s_aliases[i] != NULL; i++) { + result->s_aliases[i] = + LXT_PTR_TO_OFFSET(result->s_aliases[i], buf); + } + result->s_proto = LXT_PTR_TO_OFFSET(result->s_proto, buf); + result->s_aliases = LXT_PTR_TO_OFFSET(result->s_aliases, buf); + result->s_name = LXT_PTR_TO_OFFSET(result->s_name, buf); + + *door_result = (char *)request; + *door_result_size = request_size; +} + +static void +/*ARGSUSED*/ +lxt_server_openlog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + lxt_openlog_arg_t *data; + int data_size; + static char ident[128]; + + assert(request->lxt_sa_op == LXT_SERVER_OP_OPENLOG); + + /*LINTED*/ + data = (lxt_openlog_arg_t *)&request->lxt_sa_data[0]; + data_size = request_size - sizeof (*request); + + if (data_size != sizeof (*data)) { + lx_debug("lxt_server_openlog: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + /* + * Linux expects that the ident pointer passed to openlog() + * points to a static string that won't go away. Linux + * saves the pointer and references with syslog() is called. + * Hence we'll make a local copy of the ident string here. + */ + (void) mutex_lock(&lxt_pid_lock); + (void) strlcpy(ident, data->lxt_ol_ident, sizeof (ident)); + (void) mutex_unlock(&lxt_pid_lock); + + /* Call Linx openlog(). */ + (void) lx_call3(lxt_handles[LXTH_OPENLOG].lxth_handle, + (uintptr_t)ident, data->lxt_ol_logopt, data->lxt_ol_facility); + + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + *door_result = (char *)request; + *door_result_size = request_size; +} + +static void +/*ARGSUSED*/ +lxt_server_syslog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + lxt_syslog_arg_t *data; + int data_size; + char *progname_ptr_new; + char *progname_ptr_old; + + assert(request->lxt_sa_op == LXT_SERVER_OP_SYSLOG); + + /*LINTED*/ + data = (lxt_syslog_arg_t *)&request->lxt_sa_data[0]; + data_size = request_size - sizeof (*request); + + if (data_size != sizeof (*data)) { + lx_debug("lxt_server_openlog: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + progname_ptr_new = data->lxt_sl_progname; + + (void) mutex_lock(&lxt_pid_lock); + + /* + * Ensure the message has the correct pid. + * We do this by telling our getpid() system call to return a + * different value. + */ + lxt_pid = data->lxt_sl_pid; + + /* + * Ensure the message has the correct program name. + * Normally instead of a program name an "ident" string is + * used, this is the string passed to openlog(). But if + * openlog() wasn't called before syslog() then Linux + * syslog() will attempt to use the program name as + * the ident string, and the program name is determined + * by looking at the __progname variable. So we'll just + * update the Linux __progname variable while we do the + * call. + */ + (void) uucopy(lxt_handles[LXTH_PROGNAME].lxth_handle, + &progname_ptr_old, sizeof (char *)); + (void) uucopy(&progname_ptr_new, + lxt_handles[LXTH_PROGNAME].lxth_handle, sizeof (char *)); + + /* Call Linux syslog(). */ + (void) lx_call2(lxt_handles[LXTH_SYSLOG].lxth_handle, + data->lxt_sl_priority, (uintptr_t)data->lxt_sl_message); + + /* Restore pid and program name. */ + (void) uucopy(&progname_ptr_old, + lxt_handles[LXTH_PROGNAME].lxth_handle, sizeof (char *)); + lxt_pid = NULL; + + (void) mutex_unlock(&lxt_pid_lock); + + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + *door_result = (char *)request; + *door_result_size = request_size; +} + +static void +/*ARGSUSED*/ +lxt_server_closelog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + int data_size; + + assert(request->lxt_sa_op == LXT_SERVER_OP_CLOSELOG); + + data_size = request_size - sizeof (*request); + if (data_size != 0) { + lx_debug("lxt_server_closelog: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + /* Call Linux closelog(). */ + (void) lx_call0(lxt_handles[LXTH_CLOSELOG].lxth_handle); + + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + *door_result = (char *)request; + *door_result_size = request_size; +} + +static void +/*ARGSUSED*/ +lxt_server(void *cookie, char *argp, size_t request_size, + door_desc_t *dp, uint_t n_desc) +{ + /*LINTED*/ + lxt_server_arg_t *request = (lxt_server_arg_t *)argp; + lxt_req_t lxt_req; + char *door_path = cookie; + + /* Check if there's no callers left */ + if (argp == DOOR_UNREF_DATA) { + (void) fdetach(door_path); + (void) unlink(door_path); + lx_debug("lxt_thunk_server: no clients, exiting"); + exit(0); + } + + /* Sanity check the incomming request. */ + if (request_size < sizeof (*request)) { + /* the lookup failed */ + lx_debug("lxt_thunk_server: invalid request size"); + (void) door_return(NULL, 0, NULL, 0); + return; + } + + if ((request->lxt_sa_op < LXT_SERVER_OP_MIN) || + (request->lxt_sa_op > LXT_SERVER_OP_MAX)) { + lx_debug("lxt_thunk_server: invalid request op"); + (void) door_return(NULL, 0, NULL, 0); + return; + } + + /* Handle ping requests immediatly, return here. */ + if (request->lxt_sa_op == LXT_SERVER_OP_PING) { + lx_debug("lxt_thunk_server: handling ping request"); + request->lxt_sa_success = 1; + (void) door_return((char *)request, request_size, NULL, 0); + return; + } + + lx_debug("lxt_thunk_server: hand off request to Linux thread, " + "request = 0x%p", request); + + /* Pack the request up so we can pass it to a Linux thread. */ + lxt_req.lxtr_request = request; + lxt_req.lxtr_request_size = request_size; + lxt_req.lxtr_result = NULL; + lxt_req.lxtr_result_size = 0; + lxt_req.lxtr_complete = 0; + (void) cond_init(&lxt_req.lxtr_complete_cv, USYNC_THREAD, NULL); + + /* Pass the request onto a Linux thread. */ + (void) mutex_lock(&lxt_req_lock); + while (lxt_req_ptr != NULL) + (void) cond_wait(&lxt_req_cv, &lxt_req_lock); + lxt_req_ptr = &lxt_req; + (void) cond_broadcast(&lxt_req_cv); + + /* Wait for the request to be completed. */ + while (lxt_req.lxtr_complete == 0) + (void) cond_wait(&lxt_req.lxtr_complete_cv, &lxt_req_lock); + assert(lxt_req_ptr != &lxt_req); + (void) mutex_unlock(&lxt_req_lock); + + lx_debug("lxt_thunk_server: hand off request completed, " + "request = 0x%p", request); + + /* + * If door_return() is successfull it never returns, so if we made + * it here there was some kind of error, but there's nothing we can + * really do about it. + */ + (void) door_return( + lxt_req.lxtr_result, lxt_req.lxtr_result_size, NULL, 0); +} + +static void +lxt_server_loop(void) +{ + lxt_req_t *lxt_req; + lxt_server_arg_t *request; + size_t request_size; + char *door_result; + size_t door_result_size; + + for (;;) { + /* Wait for a request from a doors server thread. */ + (void) mutex_lock(&lxt_req_lock); + while (lxt_req_ptr == NULL) + (void) cond_wait(&lxt_req_cv, &lxt_req_lock); + + /* We got a request, get a local pointer to it. */ + lxt_req = lxt_req_ptr; + lxt_req_ptr = NULL; + (void) cond_broadcast(&lxt_req_cv); + (void) mutex_unlock(&lxt_req_lock); + + /* Get a pointer to the request. */ + request = lxt_req->lxtr_request; + request_size = lxt_req->lxtr_request_size; + + lx_debug("lxt_server_loop: Linux thread request recieved, " + "request = %p", request); + + /* Dispatch the request. */ + assert((request->lxt_sa_op > LXT_SERVER_OP_PING) || + (request->lxt_sa_op < LXT_SERVER_OP_MAX)); + lxt_operations[request->lxt_sa_op].lxto_fp( + request, request_size, &door_result, &door_result_size); + + lx_debug("lxt_server_loop: Linux thread request completed, " + "request = %p", request); + + (void) mutex_lock(&lxt_req_lock); + + /* Set the result pointers for the calling door thread. */ + lxt_req->lxtr_result = door_result; + lxt_req->lxtr_result_size = door_result_size; + + /* Let the door thread know we're done. */ + lxt_req->lxtr_complete = 1; + (void) cond_signal(&lxt_req->lxtr_complete_cv); + + (void) mutex_unlock(&lxt_req_lock); + } + /*NOTREACHED*/ +} + +static void +lxt_server_enter(int fifo1_wr, int fifo2_rd) +{ + struct stat stat; + char door_path[MAXPATHLEN]; + int i, dfd, junk = 0; + + /* + * Do some sanity checks. Make sure we've got the fifos + * we need passed to us on the correct file descriptors. + */ + if ((fstat(fifo1_wr, &stat) != 0) || + ((stat.st_mode & S_IFMT) != S_IFIFO) || + (fstat(fifo2_rd, &stat) != 0) || + ((stat.st_mode & S_IFMT) != S_IFIFO)) { + lx_err("lx_thunk server aborting, can't contact parent"); + exit(-1); + } + + /* + * Get the initial Linux call handle so we can invoke other + * Linux calls. + */ + lxh_init = lx_call_init(); + if (lxh_init == NULL) { + lx_err("lx_thunk server aborting, failed Linux call init"); + exit(-1); + } + + /* Now lookup other Linux symbols we'll need access to. */ + for (i = 0; lxt_handles[i].lxth_name != NULL; i++) { + assert(lxt_handles[i].lxth_index == i); + if ((lxt_handles[i].lxth_handle = lx_call_dlsym(lxh_init, + lxt_handles[i].lxth_name)) == NULL) { + lx_err("lx_thunk server aborting, " + "failed Linux symbol lookup: %s", + lxt_handles[i].lxth_name); + exit(-1); + } + } + + /* get the path to the door server */ + if (read(fifo2_rd, door_path, sizeof (door_path)) < 0) { + lx_err("lxt_server_enter: failed to get door path"); + exit(-1); + } + (void) close(fifo2_rd); + + /* Create the door server. */ + if ((dfd = door_create(lxt_server, door_path, + DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { + lx_err("lxt_server_enter: door_create() failed"); + exit(-1); + } + + /* Attach the door to a file system path. */ + (void) fdetach(door_path); + if (fattach(dfd, door_path) < 0) { + lx_err("lxt_server_enter: fattach() failed"); + exit(-1); + } + + /* The door server is ready, signal this via a fifo write */ + (void) write(fifo1_wr, &junk, 1); + (void) close(fifo1_wr); + + lx_debug("lxt_server_enter: doors server initialized"); + lxt_server_loop(); + /*NOTREACHED*/ +} + +void +lxt_server_exec_check(void) +{ + if (lxt_server_processes == 0) + return; + + /* + * We're a thunk server process, so we take over control of + * the current Linux process here. + */ + lx_debug("lx_thunk server initalization starting"); + lxt_server_enter(LXT_SERVER_FIFO_WR_FD, LXT_SERVER_FIFO_RD_FD); + /*NOTREACHED*/ +} + +void +lxt_server_init(int argc, char *argv[]) +{ + /* + * The thunk server process is a shell script named LXT_SERVER_BINARY. + * It is executed without any parameters. Since it's a shell script + * the arguments passed to the shell's main entry point are: + * 1) the name of the shell + * 2) the name of the script to execute + * + * So to check if we're the thunk server process we first check + * for the expected number of arduments and then we'll look at + * the second parameter to see if it's LXT_SERVER_BINARY. + */ + if ((argc != 2) || + (strcmp(argv[1], LXT_SERVER_BINARY) != 0)) + return; + + lxt_server_processes = 1; + lx_debug("lx_thunk server detected, delaying initalization"); +} + +int +lxt_server_pid(int *pid) +{ + if (lxt_server_processes == 0) + return (0); + *pid = lxt_pid; + return (1); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile b/usr/src/lib/brand/lx/lx_brand/common/mapfile new file mode 100644 index 0000000000..0663f4bc19 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +# +# Scope everything local -- our .init section is our only public interface. +# +{ + local: + *; +}; diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers new file mode 100644 index 0000000000..0663f4bc19 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +# +# Scope everything local -- our .init section is our only public interface. +# +{ + local: + *; +}; diff --git a/usr/src/lib/brand/lx/lx_brand/common/mem.c b/usr/src/lib/brand/lx/lx_brand/common/mem.c new file mode 100644 index 0000000000..15b077bd33 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/mem.c @@ -0,0 +1,210 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> + +/* + * There are two forms of mmap, mmap() and mmap2(). The only difference is that + * the final argument to mmap2() specifies the number of pages, not bytes. + * Linux has a number of additional flags, but they are all deprecated. We also + * ignore the MAP_GROWSDOWN flag, which has no equivalent on Solaris. + * + * The Linux mmap() returns ENOMEM in some cases where Solaris returns + * EOVERFLOW, so we translate the errno as necessary. + */ + +int pagesize; /* needed for mmap2() */ + +#define LX_MAP_ANONYMOUS 0x00020 +#define LX_MAP_NORESERVE 0x04000 + +static int +ltos_mmap_flags(int flags) +{ + int new_flags; + + new_flags = flags & (MAP_TYPE | MAP_FIXED); + if (flags & LX_MAP_ANONYMOUS) + new_flags |= MAP_ANONYMOUS; + if (flags & LX_MAP_NORESERVE) + new_flags |= MAP_NORESERVE; + + return (new_flags); +} + +static int +mmap_common(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5, off64_t p6) +{ + void *addr = (void *)p1; + size_t len = p2; + int prot = p3; + int flags = p4; + int fd = p5; + off64_t off = p6; + void *ret; + + if (lx_debug_enabled != 0) { + char *path, path_buf[MAXPATHLEN]; + + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + if (path == NULL) + path = "?"; + + lx_debug("\tmmap_common(): fd = %d - %s", fd, path); + } + + /* + * Under Linux, the file descriptor is ignored when mapping zfod + * anonymous memory, On Solaris, we want the fd set to -1 for the + * same functionality. + */ + if (flags & LX_MAP_ANONYMOUS) + fd = -1; + + /* + * This is totally insane. The NOTES section in the linux mmap(2) man + * page claims that on some architectures, read protection may + * automatically include exec protection. It has been observed on a + * native linux system that the /proc/<pid>/maps file does indeed + * show that segments mmap'd from userland (such as libraries mapped in + * by the dynamic linker) all have exec the permission set, even for + * data segments. + */ + if (prot & PROT_READ) + prot |= PROT_EXEC; + + ret = mmap64(addr, len, prot, ltos_mmap_flags(flags), fd, off); + + if (ret == MAP_FAILED) + return (errno == EOVERFLOW ? -ENOMEM : -errno); + else + return ((int)ret); +} + +int +lx_mmap(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5, uintptr_t p6) +{ + return (mmap_common(p1, p2, p3, p4, p5, (off64_t)p6)); +} + +int +lx_mmap2(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5, uintptr_t p6) +{ + if (pagesize == 0) + pagesize = sysconf(_SC_PAGESIZE); + + return (mmap_common(p1, p2, p3, p4, p5, (off64_t)p6 * pagesize)); +} + + +/* + * The locking family of system calls, as well as msync(), are identical. On + * Solaris, they are layered on top of the memcntl syscall, so they cannot be + * pass-thru. + */ +int +lx_mlock(uintptr_t addr, uintptr_t len) +{ + uintptr_t addr1 = addr & PAGEMASK; + uintptr_t len1 = len + (addr & PAGEOFFSET); + + return (mlock((void *)addr1, (size_t)len1) ? -errno : 0); +} + +int +lx_mlockall(uintptr_t flags) +{ + return (mlockall(flags) ? -errno : 0); +} + +int +lx_munlock(uintptr_t addr, uintptr_t len) +{ + uintptr_t addr1 = addr & PAGEMASK; + uintptr_t len1 = len + (addr & PAGEOFFSET); + + return (munlock((void *)addr1, (size_t)len1) ? -errno : 0); +} + +int +lx_munlockall(void) +{ + return (munlockall() ? -errno : 0); +} + +int +lx_msync(uintptr_t addr, uintptr_t len, uintptr_t flags) +{ + return (msync((void *)addr, (size_t)len, flags) ? -errno : 0); +} + +/* + * Solaris recognizes more flags than Linux, so we don't want to inadvertently + * use what would be an invalid flag on Linux. Linux also allows the length to + * be zero, while Solaris does not. + */ +int +lx_madvise(uintptr_t start, uintptr_t len, uintptr_t advice) +{ + if (len == 0) + return (0); + + switch (advice) { + case MADV_NORMAL: + case MADV_RANDOM: + case MADV_SEQUENTIAL: + case MADV_WILLNEED: + case MADV_DONTNEED: + return (madvise((void *)start, len, advice) ? -errno : 0); + + default: + return (-EINVAL); + } +} + +/* + * mprotect() is identical except that we ignore the Linux flags PROT_GROWSDOWN + * and PROT_GROWSUP, which have no equivalent on Solaris. + */ +#define LX_PROT_GROWSDOWN 0x01000000 +#define LX_PROT_GROWSUP 0x02000000 + +int +lx_mprotect(uintptr_t start, uintptr_t len, uintptr_t prot) +{ + prot &= ~(LX_PROT_GROWSUP | LX_PROT_GROWSDOWN); + + return (mprotect((void *)start, len, prot) ? -errno : 0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c new file mode 100644 index 0000000000..1cc37f1fbb --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c @@ -0,0 +1,546 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <assert.h> +#include <alloca.h> +#include <errno.h> +#include <fcntl.h> +#include <strings.h> +#include <macros.h> +#include <sys/brand.h> +#include <sys/reboot.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/sysmacros.h> +#include <sys/systeminfo.h> +#include <sys/types.h> +#include <sys/lx_types.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_stat.h> +#include <sys/lx_syscall.h> +#include <sys/lx_thunk_server.h> +#include <sys/lx_fcntl.h> +#include <unistd.h> +#include <libintl.h> +#include <zone.h> + +extern int sethostname(char *, int); + +/* ARGUSED */ +int +lx_rename(uintptr_t p1, uintptr_t p2) +{ + int ret; + + ret = rename((const char *)p1, (const char *)p2); + + if (ret < 0) { + /* + * If rename(2) failed and we're in install mode, return + * success if the the reason we failed was either because the + * source file didn't actually exist or if it was because we + * tried to rename it to be the name of a device currently in + * use (resulting in an EBUSY.) + * + * To help install along further, if the failure was due + * to an EBUSY, delete the original file so we don't leave + * extra files lying around. + */ + if (lx_install != 0) { + if (errno == ENOENT) + return (0); + + if (errno == EBUSY) { + (void) unlink((const char *)p1); + return (0); + } + } + + return (-errno); + } + + return (0); +} + +int +lx_renameat(uintptr_t ext1, uintptr_t p1, uintptr_t ext2, uintptr_t p2) +{ + int ret; + int atfd1 = (int)ext1; + int atfd2 = (int)ext2; + + if (atfd1 == LX_AT_FDCWD) + atfd1 = AT_FDCWD; + + if (atfd2 == LX_AT_FDCWD) + atfd2 = AT_FDCWD; + + ret = renameat(atfd1, (const char *)p1, atfd2, (const char *)p2); + + if (ret < 0) { + /* see lx_rename() for why we check lx_install */ + if (lx_install != 0) { + if (errno == ENOENT) + return (0); + + if (errno == EBUSY) { + (void) unlinkat(ext1, (const char *)p1, 0); + return (0); + } + } + + return (-errno); + } + + return (0); +} + +/*ARGSUSED*/ +int +lx_reboot(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + int magic = (int)p1; + int magic2 = (int)p2; + uint_t flag = (int)p3; + int rc; + + if (magic != LINUX_REBOOT_MAGIC1) + return (-EINVAL); + if (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A && + magic2 != LINUX_REBOOT_MAGIC2B && magic2 != LINUX_REBOOT_MAGIC2C && + magic2 != LINUX_REBOOT_MAGIC2D) + return (-EINVAL); + + if (geteuid() != 0) + return (-EPERM); + + switch (flag) { + case LINUX_REBOOT_CMD_CAD_ON: + case LINUX_REBOOT_CMD_CAD_OFF: + /* ignored */ + rc = 0; + break; + case LINUX_REBOOT_CMD_POWER_OFF: + case LINUX_REBOOT_CMD_HALT: + rc = reboot(RB_HALT, NULL); + break; + case LINUX_REBOOT_CMD_RESTART: + case LINUX_REBOOT_CMD_RESTART2: + /* RESTART2 may need more work */ + lx_msg(gettext("Restarting system.\n")); + rc = reboot(RB_AUTOBOOT, NULL); + break; + default: + return (-EINVAL); + } + + return ((rc == -1) ? -errno : rc); +} + +/* + * getcwd() - Linux syscall semantics are slightly different; we need to return + * the length of the pathname copied (+ 1 for the terminating NULL byte.) + */ +int +lx_getcwd(uintptr_t p1, uintptr_t p2) +{ + char *buf; + size_t buflen = (size_t)p2; + size_t copylen, local_len; + size_t len = 0; + + if ((getcwd((char *)p1, (size_t)p2)) == NULL) + return (-errno); + + /* + * We need the length of the pathname getcwd() copied but we never want + * to dereference a Linux pointer for any reason. + * + * Thus, to get the string length we will uucopy() up to copylen bytes + * at a time into a local buffer and will walk each chunk looking for + * the string-terminating NULL byte. + * + * We can use strlen() to find the length of the string in the + * local buffer by delimiting the buffer with a NULL byte in the + * last element that will never be overwritten. + */ + copylen = min(buflen, MAXPATHLEN + 1); + buf = SAFE_ALLOCA(copylen + 1); + if (buf == NULL) + return (-ENOMEM); + buf[copylen] = '\0'; + + for (;;) { + if (uucopy((char *)p1 + len, buf, copylen) != 0) + return (-errno); + + local_len = strlen(buf); + len += local_len; + + /* + * If the strlen() is less than copylen, we found the + * real end of the string -- not the NULL byte used to + * delimit the end of our buffer. + */ + if (local_len != copylen) + break; + + /* prepare to check the next chunk of the string */ + buflen -= copylen; + copylen = min(buflen, copylen); + } + + return (len + 1); +} + +int +lx_get_kern_version(void) +{ + /* + * Since this function is called quite often, and zone_getattr is slow, + * we cache the kernel version in kvers_cache. -1 signifies that no + * value has yet been cached. + */ + static int kvers_cache = -1; + /* dummy variable for use in zone_getattr */ + int kvers; + + if (kvers_cache != -1) + return (kvers_cache); + if (zone_getattr(getzoneid(), LX_KERN_VERSION_NUM, &kvers, sizeof (int)) + != sizeof (int)) + return (kvers_cache = LX_KERN_2_4); + else + return (kvers_cache = kvers); +} + +int +lx_uname(uintptr_t p1) +{ + struct lx_utsname *un = (struct lx_utsname *)p1; + char buf[LX_SYS_UTS_LN + 1]; + + if (gethostname(un->nodename, sizeof (un->nodename)) == -1) + return (-errno); + + (void) strlcpy(un->sysname, LX_UNAME_SYSNAME, LX_SYS_UTS_LN); + (void) strlcpy(un->release, lx_release, LX_SYS_UTS_LN); + (void) strlcpy(un->version, LX_UNAME_VERSION, LX_SYS_UTS_LN); + (void) strlcpy(un->machine, LX_UNAME_MACHINE, LX_SYS_UTS_LN); + if ((sysinfo(SI_SRPC_DOMAIN, buf, LX_SYS_UTS_LN) < 0)) + un->domainname[0] = '\0'; + else + (void) strlcpy(un->domainname, buf, LX_SYS_UTS_LN); + + return (0); +} + +/* + * {get,set}groups16() - Handle the conversion between 16-bit Linux gids and + * 32-bit Solaris gids. + */ +int +lx_getgroups16(uintptr_t p1, uintptr_t p2) +{ + int count = (int)p1; + lx_gid16_t *grouplist = (lx_gid16_t *)p2; + gid_t *grouplist32; + int ret; + int i; + + grouplist32 = SAFE_ALLOCA(count * sizeof (gid_t)); + if (grouplist32 == NULL) + return (-ENOMEM); + if ((ret = getgroups(count, grouplist32)) < 0) + return (-errno); + + for (i = 0; i < ret; i++) + grouplist[i] = LX_GID32_TO_GID16(grouplist32[i]); + + return (ret); +} + +int +lx_setgroups16(uintptr_t p1, uintptr_t p2) +{ + int count = (int)p1; + lx_gid16_t *grouplist = (lx_gid16_t *)p2; + gid_t *grouplist32; + int i; + + grouplist32 = SAFE_ALLOCA(count * sizeof (gid_t)); + if (grouplist32 == NULL) + return (-ENOMEM); + for (i = 0; i < count; i++) + grouplist32[i] = LX_GID16_TO_GID32(grouplist[i]); + + return (setgroups(count, grouplist32) ? -errno : 0); +} + +/* + * personality() - Solaris doesn't support Linux personalities, but we have to + * emulate enough to show that we support the basic personality. + */ +#define LX_PER_LINUX 0x0 + +int +lx_personality(uintptr_t p1) +{ + int per = (int)p1; + + switch (per) { + case -1: + /* Request current personality */ + return (LX_PER_LINUX); + case LX_PER_LINUX: + return (0); + default: + return (-EINVAL); + } +} + +/* + * mknod() - Since we don't have the SYS_CONFIG privilege within a zone, the + * only mode we have to support is S_IFIFO. We also have to distinguish between + * an invalid type and insufficient privileges. + */ +#define LX_S_IFMT 0170000 +#define LX_S_IFDIR 0040000 +#define LX_S_IFCHR 0020000 +#define LX_S_IFBLK 0060000 +#define LX_S_IFREG 0100000 +#define LX_S_IFIFO 0010000 +#define LX_S_IFLNK 0120000 +#define LX_S_IFSOCK 0140000 + +/*ARGSUSED*/ +int +lx_mknod(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + char *path = (char *)p1; + lx_dev_t lx_dev = (lx_dev_t)p3; + struct sockaddr_un sockaddr; + struct stat statbuf; + mode_t mode, type; + dev_t dev; + int fd; + + type = ((mode_t)p2 & LX_S_IFMT); + mode = ((mode_t)p2 & 07777); + + switch (type) { + case 0: + case LX_S_IFREG: + /* create a regular file */ + if (stat(path, &statbuf) == 0) + return (-EEXIST); + + if (errno != ENOENT) + return (-errno); + + if ((fd = creat(path, mode)) < 0) + return (-errno); + + (void) close(fd); + return (0); + + case LX_S_IFSOCK: + /* + * Create a UNIX domain socket. + * + * Most programmers aren't even aware you can do this. + * + * Note you can also do this via Solaris' mknod(2), but + * Linux allows anyone who can create a UNIX domain + * socket via bind(2) to create one via mknod(2); + * Solaris requires the caller to be privileged. + */ + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) + return (-errno); + + if (stat(path, &statbuf) == 0) + return (-EEXIST); + + if (errno != ENOENT) + return (-errno); + + if (uucopy(path, &sockaddr.sun_path, + sizeof (sockaddr.sun_path)) < 0) + return (-errno); + + /* assure NULL termination of sockaddr.sun_path */ + sockaddr.sun_path[sizeof (sockaddr.sun_path) - 1] = '\0'; + sockaddr.sun_family = AF_UNIX; + + if (bind(fd, (struct sockaddr *)&sockaddr, + strlen(sockaddr.sun_path) + + sizeof (sockaddr.sun_family)) < 0) + return (-errno); + + (void) close(fd); + return (0); + + case LX_S_IFIFO: + dev = 0; + break; + + case LX_S_IFCHR: + case LX_S_IFBLK: + /* + * The "dev" RPM package wants to create all possible Linux + * device nodes, so just report its mknod()s as having + * succeeded if we're in install mode. + */ + if (lx_install != 0) { + lx_debug("lx_mknod: install mode spoofed creation of " + "Linux device [%lld, %lld]\n", + LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev)); + + return (0); + } + + dev = makedevice(LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev)); + break; + + default: + return (-EINVAL); + } + + return (mknod(path, mode | type, dev) ? -errno : 0); +} + +int +lx_sethostname(uintptr_t p1, uintptr_t p2) +{ + char *name = (char *)p1; + int len = (size_t)p2; + + return (sethostname(name, len) ? -errno : 0); +} + +int +lx_setdomainname(uintptr_t p1, uintptr_t p2) +{ + char *name = (char *)p1; + int len = (size_t)p2; + long rval; + + if (len < 0 || len >= LX_SYS_UTS_LN) + return (-EINVAL); + + rval = sysinfo(SI_SET_SRPC_DOMAIN, name, len); + + return ((rval < 0) ? -errno : 0); +} + +int +lx_getpid(void) +{ + int pid; + + /* First call the thunk server hook. */ + if (lxt_server_pid(&pid) != 0) + return (pid); + + pid = syscall(SYS_brand, B_EMULATE_SYSCALL + 20); + return ((pid == -1) ? -errno : pid); +} + +int +lx_execve(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + char *filename = (char *)p1; + char **argv = (char **)p2; + char **envp = (char **)p3; + char *nullist[] = { NULL }; + char path[64]; + + /* First call the thunk server hook. */ + lxt_server_exec_check(); + + /* Get a copy of the executable we're trying to run */ + path[0] = '\0'; + (void) uucopystr(filename, path, sizeof (path)); + + /* Check if we're trying to run a native binary */ + if (strncmp(path, "/native/usr/lib/brand/lx/lx_native", + sizeof (path)) == 0) { + /* Skip the first element in the argv array */ + argv++; + + /* + * The name of the new program to execute was the first + * parameter passed to lx_native. + */ + if (uucopy(argv, &filename, sizeof (char *)) != 0) + return (-errno); + + (void) syscall(SYS_brand, B_EXEC_NATIVE, filename, argv, envp, + NULL, NULL, NULL); + return (-errno); + } + + if (argv == NULL) + argv = nullist; + + /* This is a normal exec call. */ + (void) execve(filename, argv, envp); + + return (-errno); +} + +int +lx_setgroups(uintptr_t p1, uintptr_t p2) +{ + int ng = (int)p1; + gid_t *glist = NULL; + int i, r; + + lx_debug("\tlx_setgroups(%d, 0x%p", ng, p2); + + if (ng > 0) { + if ((glist = (gid_t *)SAFE_ALLOCA(ng * sizeof (gid_t))) == NULL) + return (-ENOMEM); + + if (uucopy((void *)p2, glist, ng * sizeof (gid_t)) != 0) + return (-errno); + + /* + * Linux doesn't check the validity of the group IDs, but + * Solaris does. Change any invalid group IDs to a known, valid + * value (yuck). + */ + for (i = 0; i < ng; i++) { + if (glist[i] > MAXUID) + glist[i] = MAXUID; + } + } + + r = syscall(SYS_brand, B_EMULATE_SYSCALL + LX_SYS_setgroups32, + ng, glist); + + return ((r == -1) ? -errno : r); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/module.c b/usr/src/lib/brand/lx/lx_brand/common/module.c new file mode 100644 index 0000000000..3ec4164f71 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/module.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * We don't support Linux modules, but we have to emulate enough of the system + * calls to show that we don't have any modules installed. + */ + +#include <errno.h> +#include <sys/types.h> +#include <sys/lx_misc.h> + +/* + * For query_module(), we provide an empty list of modules, and return ENOENT + * on any request for a specific module. + */ +#define LX_QM_MODULES 1 +#define LX_QM_DEPS 2 +#define LX_QM_REFS 3 +#define LX_QM_SYMBOLS 4 +#define LX_QM_INFO 5 + +/*ARGSUSED*/ +int +lx_query_module(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + /* + * parameter p1 is the 'name' argument. + */ + int which = (int)p2; + char *buf = (char *)p3; + size_t bufsize = (size_t)p4; + size_t *ret = (size_t *)p5; + + switch (which) { + case 0: + /* + * Special case: always return 0 + */ + return (0); + + case LX_QM_MODULES: + /* + * Generate an empty list of modules. + */ + if (bufsize && buf) + buf[0] = '\0'; + if (ret) + *ret = 0; + return (0); + + case LX_QM_DEPS: + case LX_QM_REFS: + case LX_QM_SYMBOLS: + case LX_QM_INFO: + /* + * Any requests for specific module information return ENOENT. + */ + return (-ENOENT); + + default: + return (-EINVAL); + } +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/mount.c b/usr/src/lib/brand/lx/lx_brand/common/mount.c new file mode 100644 index 0000000000..3db9652eca --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/mount.c @@ -0,0 +1,719 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <alloca.h> +#include <assert.h> +#include <ctype.h> +#include <fcntl.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <strings.h> +#include <nfs/mount.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <sys/lx_autofs.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_mount.h> + +/* + * support definitions + */ +union fh_buffer { + struct nfs_fid fh2; + struct nfs_fh3 fh3; + char fh_data[NFS3_FHSIZE + 2]; +}; + +typedef enum mount_opt_type { + MOUNT_OPT_INVALID = 0, + MOUNT_OPT_NORMAL = 1, /* option value: none */ + MOUNT_OPT_UINT = 2 /* option value: unsigned int */ +} mount_opt_type_t; + +typedef struct mount_opt { + char *mo_name; + mount_opt_type_t mo_type; +} mount_opt_t; + + +/* + * Globals + */ +mount_opt_t lofs_options[] = { + { NULL, MOUNT_OPT_INVALID } +}; + +mount_opt_t lx_proc_options[] = { + { NULL, MOUNT_OPT_INVALID } +}; + +mount_opt_t lx_autofs_options[] = { + { LX_MNTOPT_FD, MOUNT_OPT_UINT }, + { LX_MNTOPT_PGRP, MOUNT_OPT_UINT }, + { LX_MNTOPT_MINPROTO, MOUNT_OPT_UINT }, + { LX_MNTOPT_MAXPROTO, MOUNT_OPT_UINT }, +}; + + +/* + * i_lx_opt_verify() - Check the mount options. + * + * You might wonder why we're being so strict about the mount options + * we allow. The reason is that normally all mount option verification + * is done by the Solaris userland mount command. Once mount options + * are passed to the kernel, invalid options are simply ignored. So + * if we actually want to catch requests for functionality that we + * don't support, or if we want to make sure that we don't randomly + * enable options that we haven't check to make sure they have the + * same syntax on Linux and Solaris, we need to reject any options + * we don't know to be ok here. + */ +static int +i_lx_opt_verify(char *opts, mount_opt_t *mop) +{ + int opts_len = strlen(opts); + char *opts_tmp, *opt; + int opt_len, i; + + assert((opts != NULL) && (mop != NULL)); + + /* If no options were specified, there's no problem. */ + if (opts_len == 0) + return (1); + + /* If no options are allowed, fail. */ + if (mop[0].mo_name == NULL) + return (0); + + /* Don't accept leading or trailing ','. */ + if ((opts[0] == ',') || (opts[opts_len] == ',')) + return (0); + + /* Don't accept sequential ','. */ + for (i = 1; i < opts_len; i++) + if ((opts[i - 1] == ',') && (opts[i] == ',')) + return (0); + + /* + * We're going to use strtok() which modifies the target + * string so make a temporary copy. + */ + opts_tmp = SAFE_ALLOCA(opts_len); + if (opts_tmp == NULL) + return (-1); + bcopy(opts, opts_tmp, opts_len + 1); + + /* Verify each prop one at a time. */ + opt = strtok(opts_tmp, ","); + opt_len = strlen(opt); + for (;;) { + + /* Check for matching option/value pair. */ + for (i = 0; mop[i].mo_name != NULL; i++) { + char *ovalue; + int ovalue_len, mo_len; + + /* If the options is too short don't bother comparing */ + mo_len = strlen(mop[i].mo_name); + if (opt_len < mo_len) { + /* Keep trying to find a match. */ + continue; + } + + /* Compare the option to an allowed option. */ + if (strncmp(mop[i].mo_name, opt, mo_len) != 0) { + /* Keep trying to find a match. */ + continue; + } + + if (mop[i].mo_type == MOUNT_OPT_NORMAL) { + /* The option doesn't take a value. */ + if (opt_len == mo_len) { + /* This option is ok. */ + break; + } else { + /* Keep trying to find a match. */ + continue; + } + } + + /* This options takes a value. */ + if ((opt_len == mo_len) || (opt[mo_len] != '=')) { + /* Keep trying to find a match. */ + continue; + } + + /* We have an option match. Verify option value. */ + ovalue = &opt[mo_len] + 1; + ovalue_len = strlen(ovalue); + + /* Value can't be zero length string. */ + if (ovalue_len == 0) + return (0); + + if (mop[i].mo_type == MOUNT_OPT_UINT) { + int j; + /* Verify that value is an unsigned int. */ + for (j = 0; j < ovalue_len; j++) + if (!isdigit(ovalue[j])) + return (0); + } else { + /* Unknown option type specified. */ + assert(0); + } + + /* The option is ok. */ + break; + } + + /* If there were no matches this is an unsupported option. */ + if (mop[i].mo_name == NULL) + return (0); + + /* This option is ok, move onto the next option. */ + if ((opt = strtok(NULL, ",")) == NULL) + break; + opt_len = strlen(opt); + }; + + /* We verified all the options. */ + return (1); +} + +static int +i_add_option(char *option, char *buf, size_t buf_size) +{ + char *fmt_str = NULL; + + assert((option != NULL) && (strlen(option) > 0)); + assert((buf != NULL) && (buf_size > 0)); + + if (buf[0] == '\0') { + fmt_str = "%s"; + } else { + fmt_str = ",%s"; + } + + buf_size -= strlen(buf); + buf += strlen(buf); + + /*LINTED*/ + if (snprintf(buf, buf_size, fmt_str, option) > (buf_size - 1)) + return (-EOVERFLOW); + return (0); +} + +static int +i_add_option_int(char *option, int val, char *buf, size_t buf_size) +{ + char *fmt_str = NULL; + + assert((option != NULL) && (strlen(option) > 0)); + assert((buf != NULL) && (buf_size > 0)); + + if (buf[0] == '\0') { + fmt_str = "%s=%d"; + } else { + fmt_str = ",%s=%d"; + } + + buf_size -= strlen(buf); + buf += strlen(buf); + + /*LINTED*/ + if (snprintf(buf, buf_size, fmt_str, option, val) > (buf_size - 1)) + return (-EOVERFLOW); + return (0); +} + +static int +i_make_nfs_args(lx_nfs_mount_data_t *lx_nmd, struct nfs_args *nfs_args, + struct netbuf *nfs_args_addr, struct knetconfig *nfs_args_knconf, + union fh_buffer *nfs_args_fh, struct sec_data *nfs_args_secdata, + char *fstype, char *options, int options_size) +{ + struct stat statbuf; + int i, rv, use_tcp; + + /* Sanity check the incomming Linux request. */ + if ((lx_nmd->nmd_rsize < 0) || (lx_nmd->nmd_wsize < 0) || + (lx_nmd->nmd_timeo < 0) || (lx_nmd->nmd_retrans < 0) || + (lx_nmd->nmd_acregmin < 0) || (lx_nmd->nmd_acregmax < 0) || + (lx_nmd->nmd_acdirmax < 0)) { + return (-EINVAL); + } + + /* + * Additional sanity checks of incomming request. + * + * Some of the sanity checks below should probably return + * EINVAL (or some other error code) instead or ENOTSUP, + * but without experiminting on Linux to see how it + * deals with certain strange values there is no way + * to really know what we should return, hence we return + * ENOTSUP to tell us that eventually if we see some + * application hitting the problem we can go to a real + * Linux system, figure out how it deals with the situation + * and update our code to handle it in the same fashion. + */ + if (lx_nmd->nmd_version != 4) { + lx_unsupported("unsupported nfs mount request, " + "unrecognized NFS mount structure: %d\n", + lx_nmd->nmd_version); + return (-ENOTSUP); + } + if ((lx_nmd->nmd_flags & ~LX_NFS_MOUNT_SUPPORTED) != 0) { + lx_unsupported("unsupported nfs mount request, " + "flags: 0x%x\n", lx_nmd->nmd_flags); + return (-ENOTSUP); + } + if (lx_nmd->nmd_addr.sin_family != AF_INET) { + lx_unsupported("unsupported nfs mount request, " + "transport address family: 0x%x\n", + lx_nmd->nmd_addr.sin_family); + return (-ENOTSUP); + } + for (i = 0; i < LX_NMD_MAXHOSTNAMELEN; i++) { + if (lx_nmd->nmd_hostname[i] == '\0') + break; + } + if (i == 0) { + lx_unsupported("unsupported nfs mount request, " + "no hostname specified\n"); + return (-ENOTSUP); + } + if (i == LX_NMD_MAXHOSTNAMELEN) { + lx_unsupported("unsupported nfs mount request, " + "hostname not terminated\n"); + return (-ENOTSUP); + } + if (lx_nmd->nmd_namlen < i) { + lx_unsupported("unsupported nfs mount request, " + "invalid namlen value: 0x%x\n", lx_nmd->nmd_namlen); + return (-ENOTSUP); + } + if (lx_nmd->nmd_bsize != 0) { + lx_unsupported("unsupported nfs mount request, " + "bsize value: 0x%x\n", lx_nmd->nmd_bsize); + return (-ENOTSUP); + } + + /* Initialize and clear the output structure pointers passed in. */ + bzero(nfs_args, sizeof (*nfs_args)); + bzero(nfs_args_addr, sizeof (*nfs_args_addr)); + bzero(nfs_args_knconf, sizeof (*nfs_args_knconf)); + bzero(nfs_args_fh, sizeof (*nfs_args_fh)); + bzero(nfs_args_secdata, sizeof (*nfs_args_secdata)); + nfs_args->addr = nfs_args_addr; + nfs_args->knconf = nfs_args_knconf; + nfs_args->fh = (caddr_t)nfs_args_fh; + nfs_args->nfs_ext_u.nfs_extB.secdata = nfs_args_secdata; + + /* Check if we're using tcp. */ + use_tcp = (lx_nmd->nmd_flags & LX_NFS_MOUNT_TCP) ? 1 : 0; + + /* + * These seem to be the default flags used by Solaris for v2 and v3 + * nfs mounts. + * + * Don't bother with NFSMNT_TRYRDMA since we always specify a + * transport (either udp or tcp). + */ + nfs_args->flags = NFSMNT_NEWARGS | NFSMNT_KNCONF | NFSMNT_INT | + NFSMNT_HOSTNAME; + + /* Translate some Linux mount flags into Solaris mount flags. */ + if (lx_nmd->nmd_flags & LX_NFS_MOUNT_SOFT) + nfs_args->flags |= NFSMNT_SOFT; + if (lx_nmd->nmd_flags & LX_NFS_MOUNT_INTR) + nfs_args->flags |= NFSMNT_INT; + if (lx_nmd->nmd_flags & LX_NFS_MOUNT_POSIX) + nfs_args->flags |= NFSMNT_POSIX; + if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NOCTO) + nfs_args->flags |= NFSMNT_NOCTO; + if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NOAC) + nfs_args->flags |= NFSMNT_NOAC; + if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NONLM) + nfs_args->flags |= NFSMNT_LLOCK; + + if ((lx_nmd->nmd_flags & LX_NFS_MOUNT_VER3) != 0) { + (void) strcpy(fstype, "nfs3"); + if ((rv = i_add_option_int("vers", 3, + options, options_size)) != 0) + return (rv); + + if (lx_nmd->nmd_root.lx_fh3_length > + sizeof (nfs_args_fh->fh3.fh3_u.data)) { + lx_unsupported("unsupported nfs mount request, " + "nfs file handle length: 0x%x\n", + lx_nmd->nmd_root.lx_fh3_length); + return (-ENOTSUP); + } + + /* Set the v3 file handle info. */ + nfs_args_fh->fh3.fh3_length = lx_nmd->nmd_root.lx_fh3_length; + bcopy(&lx_nmd->nmd_root.lx_fh3_data, + nfs_args_fh->fh3.fh3_u.data, + lx_nmd->nmd_root.lx_fh3_length); + } else { + /* + * Assume nfs v2. Note that this could also be a v1 + * mount request but there doesn't seem to be any difference + * in the parameters passed to the Linux mount system + * call for v1 or v2 mounts so there is no way of really + * knowing. + */ + (void) strcpy(fstype, "nfs"); + if ((rv = i_add_option_int("vers", 2, + options, options_size)) != 0) + return (rv); + + /* Solaris seems to add this flag when using v2. */ + nfs_args->flags |= NFSMNT_SECDEFAULT; + + /* Set the v2 file handle info. */ + bcopy(&lx_nmd->nmd_old_root, + nfs_args_fh, sizeof (nfs_args_fh->fh2)); + } + + /* + * We can't use getnetconfig() here because there is no netconfig + * database in linux. + */ + nfs_args_knconf->knc_protofmly = "inet"; + if (use_tcp) { + /* + * TCP uses NC_TPI_COTS_ORD semantics. + * See /etc/netconfig. + */ + nfs_args_knconf->knc_semantics = NC_TPI_COTS_ORD; + nfs_args_knconf->knc_proto = "tcp"; + if ((rv = i_add_option("proto=tcp", + options, options_size)) != 0) + return (rv); + if (stat("/dev/tcp", &statbuf) != 0) + return (-errno); + nfs_args_knconf->knc_rdev = statbuf.st_rdev; + } else { + /* + * Assume UDP. UDP uses NC_TPI_CLTS semantics. + * See /etc/netconfig. + */ + nfs_args_knconf->knc_semantics = NC_TPI_CLTS; + nfs_args_knconf->knc_proto = "udp"; + if ((rv = i_add_option("proto=udp", + options, options_size)) != 0) + return (rv); + if (stat("/dev/udp", &statbuf) != 0) + return (-errno); + nfs_args_knconf->knc_rdev = statbuf.st_rdev; + } + + /* Set the server address. */ + nfs_args_addr->maxlen = nfs_args_addr->len = + sizeof (struct sockaddr_in); + nfs_args_addr->buf = (char *)&lx_nmd->nmd_addr; + + /* Set the server hostname string. */ + nfs_args->hostname = lx_nmd->nmd_hostname; + + /* Translate Linux nfs mount parameters into Solaris mount options. */ + if (lx_nmd->nmd_rsize != LX_NMD_DEFAULT_RSIZE) { + if ((rv = i_add_option_int("rsize", lx_nmd->nmd_rsize, + options, options_size)) != 0) + return (rv); + nfs_args->rsize = lx_nmd->nmd_rsize; + nfs_args->flags |= NFSMNT_RSIZE; + } + if (lx_nmd->nmd_wsize != LX_NMD_DEFAULT_WSIZE) { + if ((rv = i_add_option_int("wsize", lx_nmd->nmd_wsize, + options, options_size)) != 0) + return (rv); + nfs_args->wsize = lx_nmd->nmd_wsize; + nfs_args->flags |= NFSMNT_WSIZE; + } + if ((rv = i_add_option_int("timeo", lx_nmd->nmd_timeo, + options, options_size)) != 0) + return (rv); + nfs_args->timeo = lx_nmd->nmd_timeo; + nfs_args->flags |= NFSMNT_TIMEO; + if ((rv = i_add_option_int("retrans", lx_nmd->nmd_retrans, + options, options_size)) != 0) + return (rv); + nfs_args->retrans = lx_nmd->nmd_retrans; + nfs_args->flags |= NFSMNT_RETRANS; + if ((rv = i_add_option_int("acregmin", lx_nmd->nmd_acregmin, + options, options_size)) != 0) + return (rv); + nfs_args->acregmin = lx_nmd->nmd_acregmin; + nfs_args->flags |= NFSMNT_ACREGMIN; + if ((rv = i_add_option_int("acregmax", lx_nmd->nmd_acregmax, + options, options_size)) != 0) + return (rv); + nfs_args->acregmax = lx_nmd->nmd_acregmax; + nfs_args->flags |= NFSMNT_ACREGMAX; + if ((rv = i_add_option_int("acdirmin", lx_nmd->nmd_acdirmin, + options, options_size)) != 0) + return (rv); + nfs_args->acdirmin = lx_nmd->nmd_acdirmin; + nfs_args->flags |= NFSMNT_ACDIRMIN; + if ((rv = i_add_option_int("acdirmax", lx_nmd->nmd_acdirmax, + options, options_size)) != 0) + return (rv); + nfs_args->acdirmax = lx_nmd->nmd_acdirmax; + nfs_args->flags |= NFSMNT_ACDIRMAX; + + /* We only support nfs with a security type of AUTH_SYS. */ + nfs_args->nfs_args_ext = NFS_ARGS_EXTB; + nfs_args_secdata->secmod = AUTH_SYS; + nfs_args_secdata->rpcflavor = AUTH_SYS; + nfs_args_secdata->flags = 0; + nfs_args_secdata->uid = 0; + nfs_args_secdata->data = NULL; + nfs_args->nfs_ext_u.nfs_extB.next = NULL; + + /* + * The Linux nfs mount command seems to pass an open socket fd + * to the kernel during the mount system call. We don't need + * this fd on Solaris so just close it. + */ + (void) close(lx_nmd->nmd_fd); + + return (0); +} + +int +lx_mount(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + /* Linux input arguments. */ + const char *sourcep = (const char *)p1; + const char *targetp = (const char *)p2; + const char *fstypep = (const char *)p3; + unsigned int flags = (unsigned int)p4; + const void *datap = (const void *)p5; + + /* Variables needed for all mounts. */ + char source[MAXPATHLEN], target[MAXPATHLEN]; + char fstype[MAXPATHLEN], options[MAXPATHLEN]; + int sflags, rv; + + /* Variables needed for nfs mounts. */ + lx_nfs_mount_data_t lx_nmd; + struct nfs_args nfs_args; + struct netbuf nfs_args_addr; + struct knetconfig nfs_args_knconf; + union fh_buffer nfs_args_fh; + struct sec_data nfs_args_secdata; + char *sdataptr = NULL; + int sdatalen = 0; + + /* Initialize Solaris mount arguments. */ + sflags = MS_OPTIONSTR; + options[0] = '\0'; + sdatalen = 0; + + /* Copy in parameters that are always present. */ + rv = uucopystr((void *)sourcep, &source, sizeof (source)); + if ((rv == -1) || (rv == sizeof (source))) + return (-EFAULT); + + rv = uucopystr((void *)targetp, &target, sizeof (target)); + if ((rv == -1) || (rv == sizeof (target))) + return (-EFAULT); + + rv = uucopystr((void *)fstypep, &fstype, sizeof (fstype)); + if ((rv == -1) || (rv == sizeof (fstype))) + return (-EFAULT); + + lx_debug("\tlinux mount source: %s", source); + lx_debug("\tlinux mount target: %s", target); + lx_debug("\tlinux mount fstype: %s", fstype); + + /* Make sure we support the requested mount flags. */ + if ((flags & ~LX_MS_SUPPORTED) != 0) { + lx_unsupported( + "unsupported mount flags: 0x%x", flags); + return (-ENOTSUP); + } + + /* Do filesystem specific mount work. */ + if (flags & LX_MS_BIND) { + + /* If MS_BIND is set, we turn this into a lofs mount. */ + (void) strcpy(fstype, "lofs"); + + /* Copy in Linux mount options. */ + if (datap != NULL) { + rv = uucopystr((void *)datap, + options, sizeof (options)); + if ((rv == -1) || (rv == sizeof (options))) + return (-EFAULT); + } + lx_debug("\tlinux mount options: \"%s\"", options); + + /* Verify Linux mount options. */ + if (i_lx_opt_verify(options, lofs_options) == 0) { + lx_unsupported("unsupported lofs mount options"); + return (-ENOTSUP); + } + } else if (strcmp(fstype, "proc") == 0) { + + /* Translate proc mount requests to lx_proc requests. */ + (void) strcpy(fstype, "lx_proc"); + + /* Copy in Linux mount options. */ + if (datap != NULL) { + rv = uucopystr((void *)datap, + options, sizeof (options)); + if ((rv == -1) || (rv == sizeof (options))) + return (-EFAULT); + } + lx_debug("\tlinux mount options: \"%s\"", options); + + /* Verify Linux mount options. */ + if (i_lx_opt_verify(options, lx_proc_options) == 0) { + lx_unsupported("unsupported lx_proc mount options"); + return (-ENOTSUP); + } + } else if (strcmp(fstype, "autofs") == 0) { + + /* Translate proc mount requests to lx_afs requests. */ + (void) strcpy(fstype, LX_AUTOFS_NAME); + + /* Copy in Linux mount options. */ + if (datap != NULL) { + rv = uucopystr((void *)datap, + options, sizeof (options)); + if ((rv == -1) || (rv == sizeof (options))) + return (-EFAULT); + } + lx_debug("\tlinux mount options: \"%s\"", options); + + /* Verify Linux mount options. */ + if (i_lx_opt_verify(options, lx_autofs_options) == 0) { + lx_unsupported("unsupported lx_autofs mount options"); + return (-ENOTSUP); + } + } else if (strcmp(fstype, "nfs") == 0) { + + /* + * Copy in Linux mount options. Note that for Linux + * nfs mounts the mount options pointer (which normally + * points to a string) points to a structure. + */ + if (uucopy((void *)datap, &lx_nmd, sizeof (lx_nmd)) < 0) + return (-errno); + + /* + * For Solaris nfs mounts, the kernel expects a special + * strucutre, but a pointer to this structure is passed + * in via an extra parameter (sdataptr below.) + */ + if ((rv = i_make_nfs_args(&lx_nmd, &nfs_args, + &nfs_args_addr, &nfs_args_knconf, &nfs_args_fh, + &nfs_args_secdata, fstype, + options, sizeof (options))) != 0) + return (rv); + + /* + * For nfs mounts we need to tell the mount system call + * to expect extra parameters. + */ + sflags |= MS_DATA; + sdataptr = (char *)&nfs_args; + sdatalen = sizeof (nfs_args); + } else { + lx_unsupported( + "unsupported mount filesystem type: %s", fstype); + return (-ENOTSUP); + } + + /* Convert some Linux flags to Solaris flags. */ + if (flags & LX_MS_RDONLY) + sflags |= MS_RDONLY; + if (flags & LX_MS_NOSUID) + sflags |= MS_NOSUID; + if (flags & LX_MS_REMOUNT) + sflags |= MS_REMOUNT; + + /* Convert some Linux flags to Solaris option strings. */ + if ((flags & LX_MS_NODEV) && + ((rv = i_add_option("nodev", options, sizeof (options))) != 0)) + return (rv); + if ((flags & LX_MS_NOEXEC) && + ((rv = i_add_option("noexec", options, sizeof (options))) != 0)) + return (rv); + if ((flags & LX_MS_NOATIME) && + ((rv = i_add_option("noatime", options, sizeof (options))) != 0)) + return (rv); + + lx_debug("\tsolaris mount fstype: %s", fstype); + lx_debug("\tsolaris mount options: \"%s\"", options); + + return (mount(source, target, sflags, fstype, sdataptr, sdatalen, + options, sizeof (options)) ? -errno : 0); +} + +/* + * umount() is identical, though it is implemented on top of umount2() in + * Solaris so it cannot be a pass-thru system call. + */ +int +lx_umount(uintptr_t p1) +{ + return (umount((char *)p1) ? -errno : 0); +} + +/* + * The Linux umount2() system call is identical but has a different value for + * MNT_FORCE (the logical equivalent to MS_FORCE). + */ +#define LX_MNT_FORCE 0x1 + +int +lx_umount2(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + int flags = 0; + + if (p2 & ~LX_MNT_FORCE) + return (-EINVAL); + + if (p2 & LX_MNT_FORCE) + flags |= MS_FORCE; + + return (umount2(path, flags) ? -errno : 0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/open.c b/usr/src/lib/brand/lx/lx_brand/common/open.c new file mode 100644 index 0000000000..e55d8fabe3 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/open.c @@ -0,0 +1,183 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/inttypes.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <libintl.h> +#include <stdio.h> + +#include <sys/lx_types.h> +#include <sys/lx_debug.h> +#include <sys/lx_syscall.h> +#include <sys/lx_fcntl.h> +#include <sys/lx_misc.h> + +static int +ltos_open_flags(uintptr_t p2) +{ + int flags; + + if ((p2 & O_ACCMODE) == LX_O_RDONLY) + flags = O_RDONLY; + else if ((p2 & O_ACCMODE) == LX_O_WRONLY) + flags = O_WRONLY; + else + flags = O_RDWR; + + if (p2 & LX_O_CREAT) { + flags |= O_CREAT; + } + + if (p2 & LX_O_EXCL) + flags |= O_EXCL; + if (p2 & LX_O_NOCTTY) + flags |= O_NOCTTY; + if (p2 & LX_O_TRUNC) + flags |= O_TRUNC; + if (p2 & LX_O_APPEND) + flags |= O_APPEND; + if (p2 & LX_O_NONBLOCK) + flags |= O_NONBLOCK; + if (p2 & LX_O_SYNC) + flags |= O_SYNC; + if (p2 & LX_O_LARGEFILE) + flags |= O_LARGEFILE; + if (p2 & LX_O_NOFOLLOW) + flags |= O_NOFOLLOW; + + /* + * Linux uses the LX_O_DIRECT flag to do raw, synchronous I/O to the + * device backing the fd in question. Solaris doesn't have similar + * functionality, but we can attempt to simulate it using the flags + * (O_RSYNC|O_SYNC) and directio(3C). + * + * The LX_O_DIRECT flag also requires that the transfer size and + * alignment of I/O buffers be a multiple of the logical block size for + * the underlying file system, but frankly there isn't an easy way to + * support that functionality without doing something like adding an + * fcntl(2) flag to denote LX_O_DIRECT mode. + * + * Since LX_O_DIRECT is merely a performance advisory, we'll just + * emulate what we can and trust that the only applications expecting + * an error when performing I/O from a misaligned buffer or when + * passing a transfer size is not a multiple of the underlying file + * system block size will be test suites. + */ + if (p2 & LX_O_DIRECT) + flags |= (O_RSYNC|O_SYNC); + + return (flags); +} + +static int +lx_open_postprocess(int fd, uintptr_t p2) +{ + struct stat64 statbuf; + + /* + * Check the file type AFTER opening the file to avoid a race condition + * where the file we want to open could change types between a stat64() + * and an open(). + */ + if (p2 & LX_O_DIRECTORY) { + if (fstat64(fd, &statbuf) < 0) { + int ret = -errno; + + (void) close(fd); + return (ret); + } else if (!S_ISDIR(statbuf.st_mode)) { + (void) close(fd); + return (-ENOTDIR); + } + } + + if (p2 & LX_O_DIRECT) + (void) directio(fd, DIRECTIO_ON); + + /* + * Set the ASYNC flag if passsed. + */ + if (p2 & LX_O_ASYNC) { + if (fcntl(fd, F_SETFL, FASYNC) < 0) { + int ret = -errno; + + (void) close(fd); + return (ret); + } + } + + return (fd); +} + +int +lx_openat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int atfd = (int)ext1; + int flags, fd; + mode_t mode = 0; + char *path = (char *)p1; + + if (atfd == LX_AT_FDCWD) + atfd = AT_FDCWD; + + flags = ltos_open_flags(p2); + + if (flags & O_CREAT) { + mode = (mode_t)p3; + } + + lx_debug("\topenat(%d, %s, 0%o, 0%o)", atfd, path, flags, mode); + + if ((fd = openat(atfd, path, flags, mode)) < 0) + return (-errno); + + return (lx_open_postprocess(fd, p2)); +} + +int +lx_open(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int flags, fd; + mode_t mode = 0; + char *path = (char *)p1; + + flags = ltos_open_flags(p2); + + if (flags & O_CREAT) { + mode = (mode_t)p3; + } + + lx_debug("\topen(%s, 0%o, 0%o)", path, flags, mode); + + if ((fd = open(path, flags, mode)) < 0) + return (-errno); + + return (lx_open_postprocess(fd, p2)); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/pgrp.c b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c new file mode 100644 index 0000000000..1eada7c185 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <unistd.h> +#include <errno.h> +#include <sys/lx_misc.h> + +int +lx_getpgrp(void) +{ + int ret; + + ret = getpgrp(); + + /* + * If the pgrp is that of the init process, return the value Linux + * expects. + */ + if (ret == zoneinit_pid) + return (LX_INIT_PGID); + + return ((ret == -1) ? -errno : ret); +} + +int +lx_getpgid(uintptr_t p1) +{ + pid_t spid; + int pid = (int)p1; + int ret; + + if (pid < 0) + return (-ESRCH); + + /* + * If the supplied pid matches that of the init process, return + * the pgid Linux expects. + */ + if (pid == zoneinit_pid) + return (LX_INIT_PGID); + + if ((ret = lx_lpid_to_spid(pid, &spid)) < 0) + return (ret); + + ret = getpgid(spid); + + /* + * If the pgid is that of the init process, return the value Linux + * expects. + */ + if (ret == zoneinit_pid) + return (LX_INIT_PGID); + + return ((ret == -1) ? -errno : ret); +} + +int +lx_setpgid(uintptr_t p1, uintptr_t p2) +{ + pid_t pid = (pid_t)p1; + pid_t pgid = (pid_t)p2; + pid_t spid, spgid; + int ret; + + if (pid < 0) + return (-ESRCH); + + if (pgid < 0) + return (-EINVAL); + + if ((ret = lx_lpid_to_spid(pid, &spid)) < 0) + return (ret); + + if (pgid == 0) + spgid = spid; + else if ((ret = lx_lpid_to_spid(pgid, &spgid)) < 0) + return (ret); + + ret = setpgid(spid, spgid); + + return ((ret == 0) ? 0 : -errno); +} + +int +lx_getsid(uintptr_t p1) +{ + pid_t spid; + int pid = (int)p1; + int ret; + + if (pid < 0) + return (-ESRCH); + + /* + * If the supplied matches that of the init process, return the value + * Linux expects. + */ + if (pid == zoneinit_pid) + return (LX_INIT_SID); + + if ((ret = lx_lpid_to_spid(pid, &spid)) < 0) + return (ret); + + ret = getsid(spid); + + /* + * If the sid is that of the init process, return the value Linux + * expects. + */ + if (ret == zoneinit_pid) + return (LX_INIT_SID); + + return ((ret == -1) ? -errno : ret); +} + +int +lx_setsid(void) +{ + int ret; + + ret = setsid(); + + /* + * If the pgid is that of the init process, return the value Linux + * expects. + */ + if (ret == zoneinit_pid) + return (LX_INIT_SID); + + return ((ret == -1) ? -errno : ret); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/poll_select.c b/usr/src/lib/brand/lx/lx_brand/common/poll_select.c new file mode 100644 index 0000000000..5eba21c652 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/poll_select.c @@ -0,0 +1,215 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <alloca.h> +#include <signal.h> +#include <strings.h> +#include <sys/param.h> +#include <sys/brand.h> +#include <sys/poll.h> +#include <sys/syscall.h> +#include <sys/lx_debug.h> +#include <sys/lx_poll.h> +#include <sys/lx_syscall.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> + +extern int select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, + struct timeval *tv); + +int +lx_select(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + int nfds = (int)p1; + fd_set *rfdsp = NULL; + fd_set *wfdsp = NULL; + fd_set *efdsp = NULL; + struct timeval tv, *tvp = NULL; + int fd_set_len = howmany(nfds, 8); + int r; + hrtime_t start = NULL, end; + + lx_debug("\tselect(%d, 0x%p, x%p, 0x%p. 0x%p, 0x%p)", + nfds, rfdsp, wfdsp, efdsp, tvp); + + if (nfds > 0) { + if (p2 != NULL) { + rfdsp = SAFE_ALLOCA(fd_set_len); + if (rfdsp == NULL) + return (-ENOMEM); + if (uucopy((void *)p2, rfdsp, fd_set_len) != 0) + return (-errno); + } + if (p3 != NULL) { + wfdsp = SAFE_ALLOCA(fd_set_len); + if (wfdsp == NULL) + return (-ENOMEM); + if (uucopy((void *)p3, wfdsp, fd_set_len) != 0) + return (-errno); + } + if (p4 != NULL) { + efdsp = SAFE_ALLOCA(fd_set_len); + if (efdsp == NULL) + return (-ENOMEM); + if (uucopy((void *)p4, efdsp, fd_set_len) != 0) + return (-errno); + } + } + if (p5 != NULL) { + tvp = &tv; + if (uucopy((void *)p5, &tv, sizeof (tv)) != 0) + return (-errno); + start = gethrtime(); + } + + if (nfds >= FD_SETSIZE) + r = select_large_fdset(nfds, rfdsp, wfdsp, efdsp, tvp); + else + r = select(nfds, rfdsp, wfdsp, efdsp, tvp); + if (r < 0) + return (-errno); + + if (tvp != NULL) { + long long tv_total; + + /* + * Linux updates the timeval parameter for select() calls + * with the amount of time that left before the select + * would have timed out. + */ + end = gethrtime(); + tv_total = (tv.tv_sec * MICROSEC) + tv.tv_usec; + tv_total -= ((end - start) / (NANOSEC / MICROSEC)); + if (tv_total < 0) { + tv.tv_sec = 0; + tv.tv_usec = 0; + } else { + tv.tv_sec = tv_total / MICROSEC; + tv.tv_usec = tv_total % MICROSEC; + } + + if (uucopy(&tv, (void *)p5, sizeof (tv)) != 0) + return (-errno); + } + + if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0)) + return (-errno); + if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0)) + return (-errno); + if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0)) + return (-errno); + + return (r); +} + +int +lx_poll(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + struct pollfd *lfds, *sfds; + nfds_t nfds = (nfds_t)p2; + int fds_size, i, rval, revents; + + /* + * Note: we are assuming that the Linux and Solaris pollfd + * structures are identical. Copy in the linux poll structure. + */ + fds_size = sizeof (struct pollfd) * nfds; + lfds = (struct pollfd *)SAFE_ALLOCA(fds_size); + if (lfds == NULL) + return (-ENOMEM); + if (uucopy((void *)p1, lfds, fds_size) != 0) + return (-errno); + + /* + * The poll system call modifies the poll structures passed in + * so we'll need to make an exra copy of them. + */ + sfds = (struct pollfd *)SAFE_ALLOCA(fds_size); + if (sfds == NULL) + return (-ENOMEM); + + /* Convert the Linux events bitmask into the Solaris equivalent. */ + for (i = 0; i < nfds; i++) { + /* + * If the caller is polling for an unsupported event, we + * have to bail out. + */ + if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) { + lx_unsupported("unsupported poll events requested: " + "events=0x%x", lfds[i].events); + return (-ENOTSUP); + } + + sfds[i].fd = lfds[i].fd; + sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS; + if (lfds[i].events & LX_POLLWRNORM) + sfds[i].events |= POLLWRNORM; + if (lfds[i].events & LX_POLLWRBAND) + sfds[i].events |= POLLWRBAND; + sfds[i].revents = 0; + } + + lx_debug("\tpoll(0x%p, %u, %d)", sfds, nfds, (int)p3); + + if ((rval = poll(sfds, nfds, (int)p3)) < 0) + return (-errno); + + /* Convert the Solaris revents bitmask into the Linux equivalent */ + for (i = 0; i < nfds; i++) { + revents = sfds[i].revents & LX_POLL_COMMON_EVENTS; + if (sfds[i].revents & POLLWRBAND) + revents |= LX_POLLWRBAND; + + /* + * Be carefull because on solaris POLLOUT and POLLWRNORM + * are defined to the same values but on linux they + * are not. + */ + if (sfds[i].revents & POLLOUT) { + if ((lfds[i].events & LX_POLLOUT) == 0) + revents &= ~LX_POLLOUT; + if (lfds[i].events & LX_POLLWRNORM) + revents |= LX_POLLWRNORM; + } + + lfds[i].revents = revents; + } + + /* Copy out the results */ + if (uucopy(lfds, (void *)p1, fds_size) != 0) + return (-errno); + + return (rval); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/priority.c b/usr/src/lib/brand/lx/lx_brand/common/priority.c new file mode 100644 index 0000000000..1519c18a71 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/priority.c @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <sys/types.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_syscall.h> +#include <sys/lx_types.h> +#include <sys/resource.h> +#include <sys/lx_misc.h> + +int +lx_getpriority(uintptr_t p1, uintptr_t p2) +{ + uint_t which = (int)p1; + id_t who = (id_t)p2; + int ret; + + /* + * The only valid values for 'which' are positive integers, and unlike + * Solaris, linux doesn't support anything past PRIO_USER. + */ + if (which > PRIO_USER) + return (-EINVAL); + + lx_debug("\tgetpriority(%d, %d)", which, who); + + errno = 0; + + if ((which == PRIO_PROCESS) && (who == 1)) + who = zoneinit_pid; + + ret = getpriority(which, who); + if (ret == -1 && errno != 0) + return (-errno); + + /* + * The return value of the getpriority syscall is biased by 20 to avoid + * returning negative values when successful. + */ + return (20 - ret); +} + +int +lx_setpriority(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int which = (int)p1; + id_t who = (id_t)p2; + int prio = (int)p3; + int rval; + + if (which > PRIO_USER) + return (-EINVAL); + + lx_debug("\tsetpriority(%d, %d, %d)", which, who, prio); + + if ((which == PRIO_PROCESS) && (who == 1)) + who = zoneinit_pid; + + rval = setpriority(which, who, prio); + + return ((rval == -1) ? -errno : rval); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c new file mode 100644 index 0000000000..0c9dd5e461 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c @@ -0,0 +1,2114 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/lx_syscall.h> +#include <sys/lx_signal.h> +#include <sys/lx_thread.h> +#include <sys/lwp.h> +#include <unistd.h> +#include <fcntl.h> +#include <procfs.h> +#include <sys/frame.h> +#include <strings.h> +#include <signal.h> +#include <stddef.h> +#include <stdlib.h> +#include <sys/wait.h> +#include <sys/auxv.h> +#include <thread.h> +#include <pthread.h> +#include <synch.h> +#include <elf.h> +#include <ieeefp.h> +#include <assert.h> +#include <libintl.h> + +/* + * Linux ptrace compatibility. + * + * The brand support for ptrace(2) is built on top of the Solaris /proc + * interfaces, mounted at /native/proc in the zone. This gets quite + * complicated due to the way ptrace works and the Solaris realization of the + * Linux threading model. + * + * ptrace can only interact with a process if we are tracing it, and it is + * currently stopped. There are two ways a process can begin tracing another + * process: + * + * PTRACE_TRACEME + * + * A child process can use PTRACE_TRACEME to indicate that it wants to be + * traced by the parent. This sets the ptrace compatibility flag in /proc + * which causes ths ptrace consumer to be notified through the wait(2) + * system call of events of interest. PTRACE_TRACEME is typically used by + * the debugger by forking a process, using PTRACE_TRACEME, and finally + * doing an exec of the specified program. + * + * + * PTRACE_ATTACH + * + * We can attach to a process using PTRACE_ATTACH. This is considerably + * more complicated than the previous case. On Linux, the traced process is + * effectively reparented to the ptrace consumer so that event notification + * can go through the normal wait(2) system call. Solaris has no such + * ability to reparent a process (nor should it) so some trickery was + * required. + * + * When the ptrace consumer uses PTRACE_ATTACH it forks a monitor child + * process. The monitor enables the /proc ptrace flag for itself and uses + * the native /proc mechanisms to observe the traced process and wait for + * events of interest. When the traced process stops, the monitor process + * sends itself a SIGTRAP thus rousting its parent process (the ptrace + * consumer) out of wait(2). We then translate the process id and status + * code from wait(2) to those of the traced process. + * + * To detach from the process we just have to clean up tracing flags and + * clean up the monitor. + * + * ptrace can only interact with a process if we have traced it, and it is + * currently stopped (see is_traced()). For threads, there's no way to + * distinguish whether ptrace() has been called for all threads or some + * subset. Since most clients will be tracing all threads, and erroneously + * allowing ptrace to access a non-traced thread is non-fatal (or at least + * would be fatal on linux), we ignore this aspect of the problem. + */ + +#define LX_PTRACE_TRACEME 0 +#define LX_PTRACE_PEEKTEXT 1 +#define LX_PTRACE_PEEKDATA 2 +#define LX_PTRACE_PEEKUSER 3 +#define LX_PTRACE_POKETEXT 4 +#define LX_PTRACE_POKEDATA 5 +#define LX_PTRACE_POKEUSER 6 +#define LX_PTRACE_CONT 7 +#define LX_PTRACE_KILL 8 +#define LX_PTRACE_SINGLESTEP 9 +#define LX_PTRACE_GETREGS 12 +#define LX_PTRACE_SETREGS 13 +#define LX_PTRACE_GETFPREGS 14 +#define LX_PTRACE_SETFPREGS 15 +#define LX_PTRACE_ATTACH 16 +#define LX_PTRACE_DETACH 17 +#define LX_PTRACE_GETFPXREGS 18 +#define LX_PTRACE_SETFPXREGS 19 +#define LX_PTRACE_SYSCALL 24 + +/* + * This corresponds to the user_i387_struct Linux structure. + */ +typedef struct lx_user_fpregs { + long lxuf_cwd; + long lxuf_swd; + long lxuf_twd; + long lxuf_fip; + long lxuf_fcs; + long lxuf_foo; + long lxuf_fos; + long lxuf_st_space[20]; +} lx_user_fpregs_t; + +/* + * This corresponds to the user_fxsr_struct Linux structure. + */ +typedef struct lx_user_fpxregs { + uint16_t lxux_cwd; + uint16_t lxux_swd; + uint16_t lxux_twd; + uint16_t lxux_fop; + long lxux_fip; + long lxux_fcs; + long lxux_foo; + long lxux_fos; + long lxux_mxcsr; + long lxux_reserved; + long lxux_st_space[32]; + long lxux_xmm_space[32]; + long lxux_padding[56]; +} lx_user_fpxregs_t; + +/* + * This corresponds to the user_regs_struct Linux structure. + */ +typedef struct lx_user_regs { + long lxur_ebx; + long lxur_ecx; + long lxur_edx; + long lxur_esi; + long lxur_edi; + long lxur_ebp; + long lxur_eax; + long lxur_xds; + long lxur_xes; + long lxur_xfs; + long lxur_xgs; + long lxur_orig_eax; + long lxur_eip; + long lxur_xcs; + long lxur_eflags; + long lxur_esp; + long lxur_xss; +} lx_user_regs_t; + +typedef struct lx_user { + lx_user_regs_t lxu_regs; + int lxu_fpvalid; + lx_user_fpregs_t lxu_i387; + ulong_t lxu_tsize; + ulong_t lxu_dsize; + ulong_t lxu_ssize; + ulong_t lxu_start_code; + ulong_t lxu_start_stack; + long lxu_signal; + int lxu_reserved; + lx_user_regs_t *lxu_ar0; + lx_user_fpregs_t *lxu_fpstate; + ulong_t lxu_magic; + char lxu_comm[32]; + int lxu_debugreg[8]; +} lx_user_t; + +typedef struct ptrace_monitor_map { + struct ptrace_monitor_map *pmm_next; /* next pointer */ + pid_t pmm_monitor; /* monitor child process */ + pid_t pmm_target; /* traced Linux pid */ + pid_t pmm_pid; /* Solaris pid */ + lwpid_t pmm_lwpid; /* Solaris lwpid */ + uint_t pmm_exiting; /* detached */ +} ptrace_monitor_map_t; + +typedef struct ptrace_state_map { + struct ptrace_state_map *psm_next; /* next pointer */ + pid_t psm_pid; /* Solaris pid */ + uintptr_t psm_debugreg[8]; /* debug registers */ +} ptrace_state_map_t; + +static ptrace_monitor_map_t *ptrace_monitor_map = NULL; +static ptrace_state_map_t *ptrace_state_map = NULL; +static mutex_t ptrace_map_mtx = DEFAULTMUTEX; + +extern void *_START_; + +static sigset_t blockable_sigs; + +#pragma init(ptrace_init) +void +ptrace_init(void) +{ + (void) sigfillset(&blockable_sigs); + (void) sigdelset(&blockable_sigs, SIGKILL); + (void) sigdelset(&blockable_sigs, SIGSTOP); +} + +/* + * Given a pid, open the named file under /native/proc/<pid>/name using the + * given mode. + */ +static int +open_procfile(pid_t pid, int mode, const char *name) +{ + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), "/native/proc/%d/%s", pid, name); + + return (open(path, mode)); +} + +/* + * Given a pid and lwpid, open the named file under + * /native/proc/<pid>/<lwpid>/name using the given mode. + */ +static int +open_lwpfile(pid_t pid, lwpid_t lwpid, int mode, const char *name) +{ + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), "/native/proc/%d/lwp/%d/%s", + pid, lwpid, name); + + return (open(path, mode)); +} + +static int +get_status(pid_t pid, pstatus_t *psp) +{ + int fd; + + if ((fd = open_procfile(pid, O_RDONLY, "status")) < 0) + return (-ESRCH); + + if (read(fd, psp, sizeof (pstatus_t)) != sizeof (pstatus_t)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +static int +get_lwpstatus(pid_t pid, lwpid_t lwpid, lwpstatus_t *lsp) +{ + int fd; + + if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) + return (-ESRCH); + + if (read(fd, lsp, sizeof (lwpstatus_t)) != sizeof (lwpstatus_t)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +static uintptr_t +syscall_regs(int fd, uintptr_t fp, pid_t pid) +{ + uintptr_t addr, done; + struct frame fr; + auxv_t auxv; + int afd; + Elf32_Phdr phdr; + + /* + * Try to walk the stack looking for a return address that corresponds + * to the traced process's lx_emulate_done symbol. This relies on the + * fact that the brand library in the traced process is the same as the + * brand library in this process (indeed, this is true of all processes + * in a given branded zone). + */ + + /* + * Find the base address for the brand library in the traced process + * by grabbing the AT_PHDR auxv entry, reading in the program header + * at that location and subtracting off the p_vaddr member. We use + * this to compute the location of lx_emulate done in the traced + * process. + */ + if ((afd = open_procfile(pid, O_RDONLY, "auxv")) < 0) + return (0); + + do { + if (read(afd, &auxv, sizeof (auxv)) != sizeof (auxv)) { + (void) close(afd); + return (0); + } + } while (auxv.a_type != AT_PHDR); + + (void) close(afd); + + if (pread(fd, &phdr, sizeof (phdr), auxv.a_un.a_val) != sizeof (phdr)) { + lx_debug("failed to read brand library's phdr"); + return (0); + } + + addr = auxv.a_un.a_val - phdr.p_vaddr; + done = (uintptr_t)&lx_emulate_done - (uintptr_t)&_START_ + addr; + + fr.fr_savfp = fp; + + do { + addr = fr.fr_savfp; + if (pread(fd, &fr, sizeof (fr), addr) != sizeof (fr)) { + lx_debug("ptrace read failed for stack walk"); + return (0); + } + + if (addr >= fr.fr_savfp) { + lx_debug("ptrace stack not monotonically increasing " + "%p %p (%p)", addr, fr.fr_savfp, done); + return (0); + } + } while (fr.fr_savpc != done); + + /* + * The first argument to lx_emulate is known to be an lx_regs_t + * structure and the ABI specifies that it will be placed on the stack + * immediately preceeding the return address. + */ + addr += sizeof (fr); + if (pread(fd, &addr, sizeof (addr), addr) != sizeof (addr)) { + lx_debug("ptrace stack failed to read register set address"); + return (0); + } + + return (addr); +} + +static int +getregs(pid_t pid, lwpid_t lwpid, lx_user_regs_t *rp) +{ + lwpstatus_t status; + uintptr_t addr; + int fd, ret; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0) + return (-ESRCH); + + /* + * If we find the syscall regs (and are therefore in an emulated + * syscall, use the register set at given address. Otherwise, use the + * registers as reported by /proc. + */ + if ((addr = syscall_regs(fd, status.pr_reg[EBP], pid)) != 0) { + lx_regs_t regs; + + if (pread(fd, ®s, sizeof (regs), addr) != sizeof (regs)) { + (void) close(fd); + lx_debug("ptrace failed to read register set"); + return (-EIO); + } + + (void) close(fd); + + rp->lxur_ebx = regs.lxr_ebx; + rp->lxur_ecx = regs.lxr_ecx; + rp->lxur_edx = regs.lxr_edx; + rp->lxur_esi = regs.lxr_esi; + rp->lxur_edi = regs.lxr_edi; + rp->lxur_ebp = regs.lxr_ebp; + rp->lxur_eax = regs.lxr_eax; + rp->lxur_xds = status.pr_reg[DS]; + rp->lxur_xes = status.pr_reg[ES]; + rp->lxur_xfs = status.pr_reg[FS]; + rp->lxur_xgs = regs.lxr_gs; + rp->lxur_orig_eax = regs.lxr_orig_eax; + rp->lxur_eip = regs.lxr_eip; + rp->lxur_xcs = status.pr_reg[CS]; + rp->lxur_eflags = status.pr_reg[EFL]; + rp->lxur_esp = regs.lxr_esp; + rp->lxur_xss = status.pr_reg[SS]; + + } else { + (void) close(fd); + + rp->lxur_ebx = status.pr_reg[EBX]; + rp->lxur_ecx = status.pr_reg[ECX]; + rp->lxur_edx = status.pr_reg[EDX]; + rp->lxur_esi = status.pr_reg[ESI]; + rp->lxur_edi = status.pr_reg[EDI]; + rp->lxur_ebp = status.pr_reg[EBP]; + rp->lxur_eax = status.pr_reg[EAX]; + rp->lxur_xds = status.pr_reg[DS]; + rp->lxur_xes = status.pr_reg[ES]; + rp->lxur_xfs = status.pr_reg[FS]; + rp->lxur_xgs = status.pr_reg[GS]; + rp->lxur_orig_eax = 0; + rp->lxur_eip = status.pr_reg[EIP]; + rp->lxur_xcs = status.pr_reg[CS]; + rp->lxur_eflags = status.pr_reg[EFL]; + rp->lxur_esp = status.pr_reg[UESP]; + rp->lxur_xss = status.pr_reg[SS]; + + /* + * If the target process has just returned from exec, it's not + * going to be sitting in the emulation function. In that case + * we need to manually fake up the values for %eax and orig_eax + * to indicate a successful return and that the traced process + * had called execve (respectively). + */ + if (status.pr_why == PR_SYSEXIT && + status.pr_what == SYS_execve) { + rp->lxur_eax = 0; + rp->lxur_orig_eax = LX_SYS_execve; + } + } + + return (0); +} + +static int +setregs(pid_t pid, lwpid_t lwpid, const lx_user_regs_t *rp) +{ + long ctl[1 + sizeof (prgregset_t) / sizeof (long)]; + lwpstatus_t status; + uintptr_t addr; + int fd, ret; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + if ((fd = open_procfile(pid, O_RDWR, "as")) < 0) + return (-ESRCH); + + /* + * If we find the syscall regs (and are therefore in an emulated + * syscall, modify the register set at given address and set the + * remaining registers through the /proc interface. Otherwise just use + * the /proc interface to set register values; + */ + if ((addr = syscall_regs(fd, status.pr_reg[EBP], pid)) != 0) { + lx_regs_t regs; + + regs.lxr_ebx = rp->lxur_ebx; + regs.lxr_ecx = rp->lxur_ecx; + regs.lxr_edx = rp->lxur_edx; + regs.lxr_esi = rp->lxur_esi; + regs.lxr_edi = rp->lxur_edi; + regs.lxr_ebp = rp->lxur_ebp; + regs.lxr_eax = rp->lxur_eax; + regs.lxr_gs = rp->lxur_xgs; + regs.lxr_orig_eax = rp->lxur_orig_eax; + regs.lxr_eip = rp->lxur_eip; + regs.lxr_esp = rp->lxur_esp; + + if (pwrite(fd, ®s, sizeof (regs), addr) != sizeof (regs)) { + (void) close(fd); + lx_debug("ptrace failed to write register set"); + return (-EIO); + } + + (void) close(fd); + + status.pr_reg[DS] = rp->lxur_xds; + status.pr_reg[ES] = rp->lxur_xes; + status.pr_reg[FS] = rp->lxur_xfs; + status.pr_reg[CS] = rp->lxur_xcs; + status.pr_reg[EFL] = rp->lxur_eflags; + status.pr_reg[SS] = rp->lxur_xss; + + } else { + (void) close(fd); + + status.pr_reg[EBX] = rp->lxur_ebx; + status.pr_reg[ECX] = rp->lxur_ecx; + status.pr_reg[EDX] = rp->lxur_edx; + status.pr_reg[ESI] = rp->lxur_esi; + status.pr_reg[EDI] = rp->lxur_edi; + status.pr_reg[EBP] = rp->lxur_ebp; + status.pr_reg[EAX] = rp->lxur_eax; + status.pr_reg[DS] = rp->lxur_xds; + status.pr_reg[ES] = rp->lxur_xes; + status.pr_reg[FS] = rp->lxur_xfs; + status.pr_reg[GS] = rp->lxur_xgs; + status.pr_reg[EIP] = rp->lxur_eip; + status.pr_reg[CS] = rp->lxur_xcs; + status.pr_reg[EFL] = rp->lxur_eflags; + status.pr_reg[UESP] = rp->lxur_esp; + status.pr_reg[SS] = rp->lxur_xss; + status.pr_reg[SS] = rp->lxur_xss; + } + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + ctl[0] = PCSREG; + bcopy(status.pr_reg, &ctl[1], sizeof (prgregset_t)); + + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +static int +getfpregs(pid_t pid, lwpid_t lwpid, lx_user_fpregs_t *rp) +{ + lwpstatus_t status; + struct _fpstate *fp; + char *data; + int ret, i; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state; + + rp->lxuf_cwd = fp->cw; + rp->lxuf_swd = fp->sw; + rp->lxuf_twd = fp->tag; + rp->lxuf_fip = fp->ipoff; + rp->lxuf_fcs = fp->cssel; + rp->lxuf_foo = fp->dataoff; + rp->lxuf_fos = fp->datasel; + + /* + * The Linux structure uses 10 bytes per floating-point register. + */ + data = (char *)&rp->lxuf_st_space[0]; + for (i = 0; i < 8; i++) { + bcopy(&fp->_st[i], data, 10); + data += 10; + } + + return (0); +} + +static int +setfpregs(pid_t pid, lwpid_t lwpid, const lx_user_fpregs_t *rp) +{ + lwpstatus_t status; + struct { + long cmd; + prfpregset_t regs; + } ctl; + struct _fpstate *fp = (struct _fpstate *)&ctl.regs; + char *data; + int ret, i, fd; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs)); + + fp->cw = rp->lxuf_cwd; + fp->sw = rp->lxuf_swd; + fp->tag = rp->lxuf_twd; + fp->ipoff = rp->lxuf_fip; + fp->cssel = rp->lxuf_fcs; + fp->dataoff = rp->lxuf_foo; + fp->datasel = rp->lxuf_fos; + + /* + * The Linux structure uses 10 bytes per floating-point register. + */ + data = (char *)&rp->lxuf_st_space[0]; + for (i = 0; i < 8; i++) { + bcopy(data, &fp->_st[i], 10); + data += 10; + } + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + ctl.cmd = PCSFPREG; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + + +static int +getfpxregs(pid_t pid, lwpid_t lwpid, lx_user_fpxregs_t *rp) +{ + lwpstatus_t status; + struct _fpstate *fp; + int ret, i; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state; + + rp->lxux_cwd = (uint16_t)fp->cw; + rp->lxux_swd = (uint16_t)fp->sw; + rp->lxux_twd = (uint16_t)fp->tag; + rp->lxux_fop = (uint16_t)(fp->cssel >> 16); + rp->lxux_fip = fp->ipoff; + rp->lxux_fcs = (uint16_t)fp->cssel; + rp->lxux_foo = fp->dataoff; + rp->lxux_fos = fp->datasel; + rp->lxux_mxcsr = status.pr_fpreg.fp_reg_set.fpchip_state.mxcsr; + + bcopy(fp->xmm, rp->lxux_xmm_space, sizeof (rp->lxux_xmm_space)); + bzero(rp->lxux_st_space, sizeof (rp->lxux_st_space)); + for (i = 0; i < 8; i++) { + bcopy(&fp->_st[i], &rp->lxux_st_space[i * 4], + sizeof (fp->_st[i])); + } + + return (0); +} + +static int +setfpxregs(pid_t pid, lwpid_t lwpid, const lx_user_fpxregs_t *rp) +{ + lwpstatus_t status; + struct { + long cmd; + prfpregset_t regs; + } ctl; + struct _fpstate *fp = (struct _fpstate *)&ctl.regs; + int ret, i, fd; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs)); + + fp->cw = rp->lxux_cwd; + fp->sw = rp->lxux_swd; + fp->tag = rp->lxux_twd; + fp->ipoff = rp->lxux_fip; + fp->cssel = rp->lxux_fcs | (rp->lxux_fop << 16); + fp->dataoff = rp->lxux_foo; + fp->datasel = rp->lxux_fos; + + bcopy(rp->lxux_xmm_space, fp->xmm, sizeof (rp->lxux_xmm_space)); + for (i = 0; i < 8; i++) { + bcopy(&rp->lxux_st_space[i * 4], &fp->_st[i], + sizeof (fp->_st[i])); + } + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + ctl.cmd = PCSFPREG; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +/* + * Solaris does not allow a process to manipulate its own or some + * other process's debug registers. Linux ptrace(2) allows this + * and gdb manipulates them for its watchpoint implementation. + * + * We keep a pseudo set of debug registers for each traced process + * and map their contents into the appropriate PCWATCH /proc + * operations when they are activated by gdb. + * + * To understand how the debug registers work on x86 machines, + * see section 13.1 of the AMD x86-64 Architecture Programmer's + * Manual, Volume 2, System Programming. + */ +static uintptr_t * +debug_registers(pid_t pid) +{ + ptrace_state_map_t *p; + + (void) mutex_lock(&ptrace_map_mtx); + for (p = ptrace_state_map; p != NULL; p = p->psm_next) { + if (p->psm_pid == pid) + break; + } + if (p == NULL && (p = malloc(sizeof (*p))) != NULL) { + bzero(p, sizeof (*p)); + p->psm_pid = pid; + p->psm_next = ptrace_state_map; + p->psm_debugreg[6] = 0xffff0ff0; /* read as ones */ + ptrace_state_map = p; + } + (void) mutex_unlock(&ptrace_map_mtx); + return (p != NULL? p->psm_debugreg : NULL); +} + +static void +free_debug_registers(pid_t pid) +{ + ptrace_state_map_t **pp; + ptrace_state_map_t *p; + + /* ASSERT(MUTEX_HELD(&ptrace_map_mtx) */ + for (pp = &ptrace_state_map; (p = *pp) != NULL; pp = &p->psm_next) { + if (p->psm_pid == pid) { + *pp = p->psm_next; + free(p); + break; + } + } +} + +static int +setup_watchpoints(pid_t pid, uintptr_t *debugreg) +{ + int dr7 = debugreg[7]; + int lrw; + int fd; + size_t size = NULL; + prwatch_t prwatch[4]; + int nwatch; + int i; + int wflags = NULL; + int error; + struct { + long req; + prwatch_t prwatch; + } ctl; + + /* find all watched areas */ + if ((fd = open_procfile(pid, O_RDONLY, "watch")) < 0) + return (-ESRCH); + nwatch = read(fd, prwatch, sizeof (prwatch)) / sizeof (prwatch_t); + (void) close(fd); + if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) + return (-ESRCH); + /* clear all watched areas */ + for (i = 0; i < nwatch; i++) { + ctl.req = PCWATCH; + ctl.prwatch = prwatch[i]; + ctl.prwatch.pr_wflags = 0; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + error = -errno; + (void) close(fd); + return (error); + } + } + /* establish all new watched areas */ + for (i = 0; i < 4; i++) { + if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */ + continue; + lrw = (dr7 >> (16 + (4 * i))) & 0xf; + switch (lrw >> 2) { /* length */ + case 0: size = 1; break; + case 1: size = 2; break; + case 2: size = 8; break; + case 3: size = 4; break; + } + switch (lrw & 0x3) { /* mode */ + case 0: wflags = WA_EXEC; break; + case 1: wflags = WA_WRITE; break; + case 2: continue; + case 3: wflags = WA_READ | WA_WRITE; break; + } + ctl.req = PCWATCH; + ctl.prwatch.pr_vaddr = debugreg[i]; + ctl.prwatch.pr_size = size; + ctl.prwatch.pr_wflags = wflags | WA_TRAPAFTER; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + error = -errno; + (void) close(fd); + return (error); + } + } + (void) close(fd); + return (0); +} + +/* + * Returns TRUE if the process is traced, FALSE otherwise. This is only true + * if the process is currently stopped, and has been traced using PTRACE_TRACEME + * or PTRACE_ATTACH. + */ +static int +is_traced(pid_t pid) +{ + ptrace_monitor_map_t *p; + pstatus_t status; + + if (get_status(pid, &status) != 0) + return (0); + + if ((status.pr_flags & PR_PTRACE) && + (status.pr_ppid == getpid()) && + (status.pr_lwp.pr_flags & PR_ISTOP)) + return (1); + + (void) mutex_lock(&ptrace_map_mtx); + for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) { + if (p->pmm_target == pid) { + (void) mutex_unlock(&ptrace_map_mtx); + return (1); + } + } + (void) mutex_unlock(&ptrace_map_mtx); + + return (0); +} + +static int +ptrace_trace_common(int fd) +{ + struct { + long cmd; + union { + long flags; + sigset_t signals; + fltset_t faults; + } arg; + } ctl; + size_t size; + + ctl.cmd = PCSTRACE; + prfillset(&ctl.arg.signals); + size = sizeof (long) + sizeof (sigset_t); + if (write(fd, &ctl, size) != size) + return (-1); + + ctl.cmd = PCSFAULT; + premptyset(&ctl.arg.faults); + size = sizeof (long) + sizeof (fltset_t); + if (write(fd, &ctl, size) != size) + return (-1); + + ctl.cmd = PCUNSET; + ctl.arg.flags = PR_FORK; + size = sizeof (long) + sizeof (long); + if (write(fd, &ctl, size) != size) + return (-1); + + return (0); +} + +/* + * Notify that parent that we wish to be traced. This is the equivalent of: + * + * 1. Stop on all signals, and nothing else + * 2. Turn off inherit-on-fork flag + * 3. Set ptrace compatible flag + * + * If we are not the main thread, then the client is trying to request behavior + * by which one of its own thread is to be traced. We don't support this mode + * of operation. + */ +static int +ptrace_traceme(void) +{ + int fd, ret; + int error; + long ctl[2]; + pstatus_t status; + pid_t pid = getpid(); + + if (_lwp_self() != 1) { + lx_unsupported(gettext( + "thread %d calling PTRACE_TRACEME is unsupported"), + _lwp_self()); + return (-ENOTSUP); + } + + if ((ret = get_status(pid, &status)) != 0) + return (ret); + + /* + * Why would a process try to do this twice? I'm not sure, but there's + * a conformance test which wants this to fail just so. + */ + if (status.pr_flags & PR_PTRACE) + return (-EPERM); + + if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) + return (-errno); + + ctl[0] = PCSET; + ctl[1] = PR_PTRACE; + error = 0; + if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl) || + ptrace_trace_common(fd) != 0) + error = -errno; + + (void) close(fd); + return (error); +} + +/* + * Read a word of data from the given address. Because this is a process-wide + * action, we don't need the lwpid. + */ +static int +ptrace_peek(pid_t pid, uintptr_t addr, int *ret) +{ + int fd, data; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0) + return (-ESRCH); + + if (pread(fd, &data, sizeof (data), addr) != sizeof (data)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + if (uucopy(&data, ret, sizeof (data)) != 0) + return (-errno); + + return (0); +} + +#define LX_USER_BOUND(m) \ +(offsetof(lx_user_t, m) + sizeof (((lx_user_t *)NULL)->m)) + +static int +ptrace_peek_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int *ret) +{ + int err, data; + uintptr_t *debugreg; + int dreg; + + if (!is_traced(pid)) + return (-ESRCH); + + /* + * The offset specified by the user is an offset into the Linux + * user structure (seriously). Rather than constructing a full + * user structure, we figure out which part of the user structure + * the offset is in, and fill in just that component. + */ + if (off < LX_USER_BOUND(lxu_regs)) { + lx_user_regs_t regs; + + if ((err = getregs(pid, lwpid, ®s)) != 0) + return (err); + + data = *(int *)((uintptr_t)®s + off - + offsetof(lx_user_t, lxu_regs)); + + } else if (off < LX_USER_BOUND(lxu_fpvalid)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_i387)) { + lx_user_fpregs_t regs; + + if ((err = getfpregs(pid, lwpid, ®s)) != 0) + return (err); + + data = *(int *)((uintptr_t)®s + off - + offsetof(lx_user_t, lxu_i387)); + + } else if (off < LX_USER_BOUND(lxu_tsize)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_dsize)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_ssize)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_start_code)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_start_stack)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_signal)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_reserved)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_ar0)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_fpstate)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_magic)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_comm)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_debugreg)) { + dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int); + if (dreg == 4) /* aliased */ + dreg = 6; + else if (dreg == 5) /* aliased */ + dreg = 7; + if ((debugreg = debug_registers(pid)) != NULL) + data = debugreg[dreg]; + else + data = 0; + } else { + lx_unsupported(gettext( + "unsupported ptrace %s user offset: 0x%x\n"), "peek", off); + assert(0); + return (-ENOTSUP); + } + + if (uucopy(&data, ret, sizeof (data)) != 0) + return (-errno); + + return (0); +} + +/* + * Write a word of data to the given address. Because this is a process-wide + * action, we don't need the lwpid. Returns EINVAL if the address is not + * word-aligned. + */ +static int +ptrace_poke(pid_t pid, uintptr_t addr, int data) +{ + int fd; + + if (!is_traced(pid)) + return (-ESRCH); + + if (addr & 0x3) + return (-EINVAL); + + if ((fd = open_procfile(pid, O_WRONLY, "as")) < 0) + return (-ESRCH); + + if (pwrite(fd, &data, sizeof (data), addr) != sizeof (data)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + return (0); +} + +static int +ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data) +{ + lx_user_regs_t regs; + int err = 0; + uintptr_t *debugreg; + int dreg; + + if (!is_traced(pid)) + return (-ESRCH); + + if (off & 0x3) + return (-EINVAL); + + if (off < offsetof(lx_user_t, lxu_regs) + sizeof (lx_user_regs_t)) { + if ((err = getregs(pid, lwpid, ®s)) != 0) + return (err); + *(int *)((uintptr_t)®s + off - + offsetof(lx_user_t, lxu_regs)) = data; + return (setregs(pid, lwpid, ®s)); + } + + if (off >= offsetof(lx_user_t, lxu_debugreg) && + off < offsetof(lx_user_t, lxu_debugreg) + 8 * sizeof (int)) { + dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int); + if (dreg == 4) /* aliased */ + dreg = 6; + else if (dreg == 5) /* aliased */ + dreg = 7; + if ((debugreg = debug_registers(pid)) != NULL) { + debugreg[dreg] = data; + if (dreg == 7) + err = setup_watchpoints(pid, debugreg); + } + return (err); + } + + lx_unsupported(gettext("unsupported ptrace %s user offset: 0x%x\n"), + "poke", off); + assert(0); + return (-ENOTSUP); +} + +static int +ptrace_cont_common(int fd, int sig, int run, int step) +{ + long ctl[1 + 1 + sizeof (siginfo_t) / sizeof (long) + 2]; + long *ctlp = ctl; + size_t size; + + assert(0 <= sig && sig < LX_NSIG); + assert(!step || run); + + /* + * Clear the current signal. + */ + *ctlp++ = PCCSIG; + + /* + * Send a signal if one was specified. + */ + if (sig != 0 && sig != LX_SIGSTOP) { + siginfo_t *infop; + + *ctlp++ = PCSSIG; + infop = (siginfo_t *)ctlp; + bzero(infop, sizeof (siginfo_t)); + infop->si_signo = ltos_signo[sig]; + + ctlp += sizeof (siginfo_t) / sizeof (long); + } + + /* + * If run is true, set the lwp running. + */ + if (run) { + *ctlp++ = PCRUN; + *ctlp++ = step ? PRSTEP : 0; + } + + size = (char *)ctlp - (char *)&ctl[0]; + assert(size <= sizeof (ctl)); + + if (write(fd, ctl, size) != size) { + lx_debug("failed to continue %s", strerror(errno)); + return (-EIO); + } + + return (0); +} + +static int +ptrace_cont_monitor(ptrace_monitor_map_t *p) +{ + long ctl[2]; + int fd; + + fd = open_procfile(p->pmm_monitor, O_WRONLY, "ctl"); + if (fd < 0) { + lx_debug("failed to open monitor ctl %d", + errno); + return (-EIO); + } + + ctl[0] = PCRUN; + ctl[1] = PRCSIG; + if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +static int +ptrace_cont(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig, int step) +{ + ptrace_monitor_map_t *p; + uintptr_t *debugreg; + int fd, ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if (sig < 0 || sig >= LX_NSIG) + return (-EINVAL); + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + if ((ret = ptrace_cont_common(fd, sig, 1, step)) != 0) { + (void) close(fd); + return (ret); + } + + (void) close(fd); + + /* kludge: use debugreg[4] to remember the single-step flag */ + if ((debugreg = debug_registers(pid)) != NULL) + debugreg[4] = step; + + /* + * Check for a monitor and get it moving if we find it. If any of the + * /proc operations fail, we're kind of sunk so just return an error. + */ + (void) mutex_lock(&ptrace_map_mtx); + for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) { + if (p->pmm_target == lxpid) { + if ((ret = ptrace_cont_monitor(p)) != 0) + return (ret); + break; + } + } + (void) mutex_unlock(&ptrace_map_mtx); + + return (0); +} + +/* + * If a monitor exists for this traced process, dispose of it. + * First turn off its ptrace flag so we won't be notified of its + * impending demise. We ignore errors for this step since they + * indicate only that the monitor has been damaged due to pilot + * error. Then kill the monitor, and wait for it. If the wait + * succeeds we can dispose of the corpse, otherwise another thread's + * wait call has collected it and we need to set a flag in the + * structure so that if can be picked up in wait. + */ +static void +monitor_kill(pid_t lxpid, pid_t pid) +{ + ptrace_monitor_map_t *p, **pp; + pid_t mpid; + int fd; + long ctl[2]; + + (void) mutex_lock(&ptrace_map_mtx); + free_debug_registers(pid); + for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) { + if (p->pmm_target == lxpid) { + mpid = p->pmm_monitor; + if ((fd = open_procfile(mpid, O_WRONLY, "ctl")) >= 0) { + ctl[0] = PCUNSET; + ctl[1] = PR_PTRACE; + (void) write(fd, ctl, sizeof (ctl)); + (void) close(fd); + } + + (void) kill(mpid, SIGKILL); + + if (waitpid(mpid, NULL, 0) == mpid) { + *pp = p->pmm_next; + free(p); + } else { + p->pmm_exiting = 1; + } + + break; + } + } + (void) mutex_unlock(&ptrace_map_mtx); +} + +static int +ptrace_kill(pid_t lxpid, pid_t pid) +{ + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + ret = kill(pid, SIGKILL); + + /* kill off the monitor process, if any */ + monitor_kill(lxpid, pid); + + return (ret); +} + +static int +ptrace_step(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) +{ + return (ptrace_cont(lxpid, pid, lwpid, sig, 1)); +} + +static int +ptrace_getregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_regs_t regs; + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((ret = getregs(pid, lwpid, ®s)) != 0) + return (ret); + + if (uucopy(®s, (void *)addr, sizeof (regs)) != 0) + return (-errno); + + return (0); +} + +static int +ptrace_setregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_regs_t regs; + + if (!is_traced(pid)) + return (-ESRCH); + + if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) + return (-errno); + + return (setregs(pid, lwpid, ®s)); +} + +static int +ptrace_getfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_fpregs_t regs; + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((ret = getfpregs(pid, lwpid, ®s)) != 0) + return (ret); + + if (uucopy(®s, (void *)addr, sizeof (regs)) != 0) + return (-errno); + + return (0); +} + +static int +ptrace_setfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_fpregs_t regs; + + if (!is_traced(pid)) + return (-ESRCH); + + if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) + return (-errno); + + return (setfpregs(pid, lwpid, ®s)); +} + +static int +ptrace_getfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_fpxregs_t regs; + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((ret = getfpxregs(pid, lwpid, ®s)) != 0) + return (ret); + + if (uucopy(®s, (void *)addr, sizeof (regs)) != 0) + return (-errno); + + return (0); +} + +static int +ptrace_setfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_fpxregs_t regs; + + if (!is_traced(pid)) + return (-ESRCH); + + if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) + return (-errno); + + return (setfpxregs(pid, lwpid, ®s)); +} + +static void __NORETURN +ptrace_monitor(int fd) +{ + struct { + long cmd; + union { + long flags; + sigset_t signals; + fltset_t faults; + } arg; + } ctl; + size_t size; + int monfd; + int rv; + + monfd = open_procfile(getpid(), O_WRONLY, "ctl"); + + ctl.cmd = PCSTRACE; /* trace only SIGTRAP */ + premptyset(&ctl.arg.signals); + praddset(&ctl.arg.signals, SIGTRAP); + size = sizeof (long) + sizeof (sigset_t); + (void) write(monfd, &ctl, size); /* can't fail */ + + ctl.cmd = PCSFAULT; + premptyset(&ctl.arg.faults); + size = sizeof (long) + sizeof (fltset_t); + (void) write(monfd, &ctl, size); /* can't fail */ + + ctl.cmd = PCUNSET; + ctl.arg.flags = PR_FORK; + size = sizeof (long) + sizeof (long); + (void) write(monfd, &ctl, size); /* can't fail */ + + ctl.cmd = PCSET; /* wait()able by the parent */ + ctl.arg.flags = PR_PTRACE; + size = sizeof (long) + sizeof (long); + (void) write(monfd, &ctl, size); /* can't fail */ + + (void) close(monfd); + + ctl.cmd = PCWSTOP; + size = sizeof (long); + + for (;;) { + /* + * Wait for the traced process to stop. + */ + if (write(fd, &ctl, size) != size) { + rv = (errno == ENOENT)? 0 : 1; + lx_debug("monitor failed to wait for LWP to stop: %s", + strerror(errno)); + _exit(rv); + } + + lx_debug("monitor caught traced LWP"); + + /* + * Pull the ptrace trigger by sending ourself a SIGTRAP. This + * will cause this, the monitor process, to stop which will + * cause the parent's waitid(2) call to return this process + * id. In lx_wait(), we remap the monitor process's pid and + * status to those of the traced LWP. When the parent process + * uses ptrace to resume the traced LWP, it will additionally + * restart this process. + */ + (void) _lwp_kill(_lwp_self(), SIGTRAP); + + lx_debug("monitor was resumed"); + } +} + +static int +ptrace_attach_common(int fd, pid_t lxpid, pid_t pid, lwpid_t lwpid, int run) +{ + pid_t child; + ptrace_monitor_map_t *p; + sigset_t unblock; + pstatus_t status; + long ctl[1 + sizeof (sysset_t) / sizeof (long) + 2]; + long *ctlp = ctl; + size_t size; + sysset_t *sysp; + int ret; + + /* + * We're going to need this structure so better to fail now before its + * too late to turn back. + */ + if ((p = malloc(sizeof (ptrace_monitor_map_t))) == NULL) + return (-EIO); + + if ((ret = get_status(pid, &status)) != 0) { + free(p); + return (ret); + } + + /* + * If this process is already traced, bail. + */ + if (status.pr_flags & PR_PTRACE) { + free(p); + return (-EPERM); + } + + /* + * Turn on the appropriate tracing flags. It's exceedingly unlikely + * that this operation will fail; any failure would probably be due + * to another /proc consumer mucking around. + */ + if (ptrace_trace_common(fd) != 0) { + free(p); + return (-EIO); + } + + /* + * Native ptrace automatically catches processes when they exec so we + * have to do that explicitly here. + */ + *ctlp++ = PCSEXIT; + sysp = (sysset_t *)ctlp; + ctlp += sizeof (sysset_t) / sizeof (long); + premptyset(sysp); + praddset(sysp, SYS_execve); + if (run) { + *ctlp++ = PCRUN; + *ctlp++ = 0; + } + + size = (char *)ctlp - (char *)&ctl[0]; + + if (write(fd, ctl, size) != size) { + free(p); + return (-EIO); + } + + /* + * Spawn the monitor proceses to notify this process of events of + * interest in the traced process. We block signals here both so + * we're not interrupted during this operation and so that the + * monitor process doesn't accept signals. + */ + (void) sigprocmask(SIG_BLOCK, &blockable_sigs, &unblock); + if ((child = fork1()) == 0) + ptrace_monitor(fd); + (void) sigprocmask(SIG_SETMASK, &unblock, NULL); + + if (child == -1) { + lx_debug("failed to fork monitor process\n"); + free(p); + return (-EIO); + } + + p->pmm_monitor = child; + p->pmm_target = lxpid; + p->pmm_pid = pid; + p->pmm_lwpid = lwpid; + p->pmm_exiting = 0; + + (void) mutex_lock(&ptrace_map_mtx); + p->pmm_next = ptrace_monitor_map; + ptrace_monitor_map = p; + (void) mutex_unlock(&ptrace_map_mtx); + + return (0); +} + +static int +ptrace_attach(pid_t lxpid, pid_t pid, lwpid_t lwpid) +{ + int fd, ret; + long ctl; + + /* + * Linux doesn't let you trace process 1 -- go figure. + */ + if (lxpid == 1) + return (-EPERM); + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY | O_EXCL, "lwpctl")) < 0) + return (errno == EBUSY ? -EPERM : -ESRCH); + + ctl = PCSTOP; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + lx_err(gettext("failed to stop %d/%d\n"), (int)pid, (int)lwpid); + assert(0); + } + + ret = ptrace_attach_common(fd, lxpid, pid, lwpid, 0); + + (void) close(fd); + + return (ret); +} + +static int +ptrace_detach(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) +{ + long ctl[2]; + int fd, ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if (sig < 0 || sig >= LX_NSIG) + return (-EINVAL); + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + /* + * The /proc ptrace flag may not be set, but we clear it + * unconditionally since doing so doesn't hurt anything. + */ + ctl[0] = PCUNSET; + ctl[1] = PR_PTRACE; + if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + /* + * Clear the brand-specific system call tracing flag to ensure that + * the target doesn't stop unexpectedly some time in the future. + */ + if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 0)) != 0) { + (void) close(fd); + return (-ret); + } + + /* kill off the monitor process, if any */ + monitor_kill(lxpid, pid); + + /* + * Turn on the run-on-last-close flag so that all tracing flags will be + * cleared when we close the control file descriptor. + */ + ctl[0] = PCSET; + ctl[1] = PR_RLC; + if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + /* + * Clear the current signal (if any) and possibly send the traced + * process a new signal. + */ + ret = ptrace_cont_common(fd, sig, 0, 0); + + (void) close(fd); + + return (ret); +} + +static int +ptrace_syscall(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) +{ + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 1)) != 0) + return (-ret); + + return (ptrace_cont(lxpid, pid, lwpid, sig, 0)); +} + +int +lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + pid_t pid, lxpid = (pid_t)p2; + lwpid_t lwpid; + + if ((p1 != LX_PTRACE_TRACEME) && + (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0)) + return (-ESRCH); + + switch (p1) { + case LX_PTRACE_TRACEME: + return (ptrace_traceme()); + + case LX_PTRACE_PEEKTEXT: + case LX_PTRACE_PEEKDATA: + return (ptrace_peek(pid, p3, (int *)p4)); + + case LX_PTRACE_PEEKUSER: + return (ptrace_peek_user(pid, lwpid, p3, (int *)p4)); + + case LX_PTRACE_POKETEXT: + case LX_PTRACE_POKEDATA: + return (ptrace_poke(pid, p3, (int)p4)); + + case LX_PTRACE_POKEUSER: + return (ptrace_poke_user(pid, lwpid, p3, (int)p4)); + + case LX_PTRACE_CONT: + return (ptrace_cont(lxpid, pid, lwpid, (int)p4, 0)); + + case LX_PTRACE_KILL: + return (ptrace_kill(lxpid, pid)); + + case LX_PTRACE_SINGLESTEP: + return (ptrace_step(lxpid, pid, lwpid, (int)p4)); + + case LX_PTRACE_GETREGS: + return (ptrace_getregs(pid, lwpid, p4)); + + case LX_PTRACE_SETREGS: + return (ptrace_setregs(pid, lwpid, p4)); + + case LX_PTRACE_GETFPREGS: + return (ptrace_getfpregs(pid, lwpid, p4)); + + case LX_PTRACE_SETFPREGS: + return (ptrace_setfpregs(pid, lwpid, p4)); + + case LX_PTRACE_ATTACH: + return (ptrace_attach(lxpid, pid, lwpid)); + + case LX_PTRACE_DETACH: + return (ptrace_detach(lxpid, pid, lwpid, (int)p4)); + + case LX_PTRACE_GETFPXREGS: + return (ptrace_getfpxregs(pid, lwpid, p4)); + + case LX_PTRACE_SETFPXREGS: + return (ptrace_setfpxregs(pid, lwpid, p4)); + + case LX_PTRACE_SYSCALL: + return (ptrace_syscall(lxpid, pid, lwpid, (int)p4)); + + default: + return (-EINVAL); + } +} + +void +lx_ptrace_fork(void) +{ + /* + * Send a special signal (that has no Linux equivalent) to indicate + * that we're in this particularly special case. The signal will be + * ignored by this process, but noticed by /proc consumers tracing + * this process. + */ + (void) _lwp_kill(_lwp_self(), SIGWAITING); +} + +static void +ptrace_catch_fork(pid_t pid, int monitor) +{ + long ctl[14 + 2 * sizeof (sysset_t) / sizeof (long)]; + long *ctlp; + sysset_t *sysp; + size_t size; + pstatus_t ps; + pid_t child; + int fd, err; + + /* + * If any of this fails, we're really sunk since the child + * will be stuck in the middle of lx_ptrace_fork(). + * Fortunately it's practically assured to succeed unless + * something is seriously wrong on the system. + */ + if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) { + lx_debug("lx_catch_fork: failed to control %d", + (int)pid); + return; + } + + /* + * Turn off the /proc PR_PTRACE flag so the parent doesn't get + * spurious wake ups while we're working our dark magic. Arrange to + * catch the process when it exits from fork, and turn on the /proc + * inherit-on-fork flag so we catcht the child as well. We then run + * the process, wait for it to stop on the fork1(2) call and reset + * the tracing flags to their original state. + */ + ctlp = ctl; + *ctlp++ = PCCSIG; + if (!monitor) { + *ctlp++ = PCUNSET; + *ctlp++ = PR_PTRACE; + } + *ctlp++ = PCSET; + *ctlp++ = PR_FORK; + *ctlp++ = PCSEXIT; + sysp = (sysset_t *)ctlp; + ctlp += sizeof (sysset_t) / sizeof (long); + premptyset(sysp); + praddset(sysp, SYS_forksys); /* fork1() is forksys(0, 0) */ + *ctlp++ = PCRUN; + *ctlp++ = 0; + *ctlp++ = PCWSTOP; + if (!monitor) { + *ctlp++ = PCSET; + *ctlp++ = PR_PTRACE; + } + *ctlp++ = PCUNSET; + *ctlp++ = PR_FORK; + *ctlp++ = PCSEXIT; + sysp = (sysset_t *)ctlp; + ctlp += sizeof (sysset_t) / sizeof (long); + premptyset(sysp); + if (monitor) + praddset(sysp, SYS_execve); + + size = (char *)ctlp - (char *)&ctl[0]; + assert(size <= sizeof (ctl)); + + if (write(fd, ctl, size) != size) { + (void) close(fd); + lx_debug("lx_catch_fork: failed to set %d running", + (int)pid); + return; + } + + /* + * Get the status so we can find the value returned from fork1() -- + * the child process's pid. + */ + if (get_status(pid, &ps) != 0) { + (void) close(fd); + lx_debug("lx_catch_fork: failed to get status for %d", + (int)pid); + return; + } + + child = (pid_t)ps.pr_lwp.pr_reg[R_R0]; + + /* + * We're done with the parent -- off you go. + */ + ctl[0] = PCRUN; + ctl[1] = 0; + size = 2 * sizeof (long); + + if (write(fd, ctl, size) != size) { + (void) close(fd); + lx_debug("lx_catch_fork: failed to set %d running", + (int)pid); + return; + } + + (void) close(fd); + + /* + * If fork1(2) failed, we're done. + */ + if (child < 0) { + lx_debug("lx_catch_fork: fork1 failed"); + return; + } + + /* + * Now we need to screw with the child process. + */ + if ((fd = open_lwpfile(child, 1, O_WRONLY, "lwpctl")) < 0) { + lx_debug("lx_catch_fork: failed to control %d", + (int)child); + return; + } + + ctlp = ctl; + *ctlp++ = PCUNSET; + *ctlp++ = PR_FORK; + *ctlp++ = PCSEXIT; + sysp = (sysset_t *)ctlp; + ctlp += sizeof (sysset_t) / sizeof (long); + premptyset(sysp); + size = (char *)ctlp - (char *)&ctl[0]; + + if (write(fd, ctl, size) != size) { + (void) close(fd); + lx_debug("lx_catch_fork: failed to clear trace flags for %d", + (int)child); + return; + } + + /* + * Now treat the child as though we had attached to it explicitly. + */ + err = ptrace_attach_common(fd, child, child, 1, 1); + assert(err == 0); + + (void) close(fd); +} + +static void +set_dr6(pid_t pid, siginfo_t *infop) +{ + uintptr_t *debugreg; + uintptr_t addr; + uintptr_t base; + size_t size = NULL; + int dr7; + int lrw; + int i; + + if ((debugreg = debug_registers(pid)) == NULL) + return; + + debugreg[6] = 0xffff0ff0; /* read as ones */ + switch (infop->si_code) { + case TRAP_TRACE: + debugreg[6] |= 0x4000; /* single-step */ + break; + case TRAP_RWATCH: + case TRAP_WWATCH: + case TRAP_XWATCH: + dr7 = debugreg[7]; + addr = (uintptr_t)infop->si_addr; + for (i = 0; i < 4; i++) { + if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */ + continue; + lrw = (dr7 >> (16 + (4 * i))) & 0xf; + switch (lrw >> 2) { /* length */ + case 0: size = 1; break; + case 1: size = 2; break; + case 2: size = 8; break; + case 3: size = 4; break; + } + base = debugreg[i]; + if (addr >= base && addr < base + size) + debugreg[6] |= (1 << i); + } + /* + * Were we also attempting a single-step? + * (kludge: we use debugreg[4] for this flag.) + */ + if (debugreg[4]) + debugreg[6] |= 0x4000; + break; + default: + break; + } +} + +/* + * This is called from the emulation of the wait4 and waitpid system call to + * take into account the monitor processes which we spawn to observe other + * processes from ptrace_attach(). + */ +int +lx_ptrace_wait(siginfo_t *infop) +{ + ptrace_monitor_map_t *p, **pp; + pid_t lxpid, pid = infop->si_pid; + lwpid_t lwpid; + int fd; + pstatus_t status; + + /* + * If the process observed by waitid(2) corresponds to the monitor + * process for a traced thread, we need to rewhack the siginfo_t to + * look like it came from the traced thread with the flags set + * according to the current state. + */ + (void) mutex_lock(&ptrace_map_mtx); + for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) { + if (p->pmm_monitor == pid) { + assert(infop->si_code == CLD_EXITED || + infop->si_code == CLD_KILLED || + infop->si_code == CLD_DUMPED || + infop->si_code == CLD_TRAPPED); + goto found; + } + } + (void) mutex_unlock(&ptrace_map_mtx); + + /* + * If the traced process got a SIGWAITING, we must be in the middle + * of a clone(2) with CLONE_PTRACE set. + */ + if (infop->si_code == CLD_TRAPPED && infop->si_status == SIGWAITING) { + ptrace_catch_fork(pid, 0); + return (-1); + } + + if (get_status(pid, &status) == 0 && + (status.pr_lwp.pr_flags & PR_STOPPED) && + status.pr_lwp.pr_why == PR_SIGNALLED && + status.pr_lwp.pr_info.si_signo == SIGTRAP) + set_dr6(pid, &status.pr_lwp.pr_info); + + return (0); + +found: + /* + * If the monitor is in the exiting state, ignore the event and free + * the monitor structure if the monitor has exited. By returning -1 we + * indicate to the caller that this was a spurious return from + * waitid(2) and that it should ignore the result and try again. + */ + if (p->pmm_exiting) { + if (infop->si_code == CLD_EXITED || + infop->si_code == CLD_KILLED || + infop->si_code == CLD_DUMPED) { + *pp = p->pmm_next; + (void) mutex_unlock(&ptrace_map_mtx); + free(p); + } + return (-1); + } + + lxpid = p->pmm_target; + pid = p->pmm_pid; + lwpid = p->pmm_lwpid; + (void) mutex_unlock(&ptrace_map_mtx); + + /* + * If we can't find the traced process, kill off its monitor. + */ + if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) { + assert(errno == ENOENT); + monitor_kill(lxpid, pid); + infop->si_code = CLD_EXITED; + infop->si_status = 0; + infop->si_pid = lxpid; + return (0); + } + + if (read(fd, &status.pr_lwp, sizeof (status.pr_lwp)) != + sizeof (status.pr_lwp)) { + lx_err(gettext("read lwpstatus failed %d %s"), + fd, strerror(errno)); + assert(0); + } + + (void) close(fd); + + /* + * If the traced process isn't stopped, this is a truly spurious + * event probably caused by another /proc consumer tracing the + * monitor. + */ + if (!(status.pr_lwp.pr_flags & PR_STOPPED)) { + (void) ptrace_cont_monitor(p); + return (-1); + } + + switch (status.pr_lwp.pr_why) { + case PR_SIGNALLED: + /* + * If the traced process got a SIGWAITING, we must be in the + * middle of a clone(2) with CLONE_PTRACE set. + */ + if (status.pr_lwp.pr_what == SIGWAITING) { + ptrace_catch_fork(lxpid, 1); + (void) ptrace_cont_monitor(p); + return (-1); + } + infop->si_code = CLD_TRAPPED; + infop->si_status = status.pr_lwp.pr_what; + if (status.pr_lwp.pr_info.si_signo == SIGTRAP) + set_dr6(pid, &status.pr_lwp.pr_info); + break; + + case PR_REQUESTED: + /* + * Make it look like the traced process stopped on an + * event of interest. + */ + infop->si_code = CLD_TRAPPED; + infop->si_status = SIGTRAP; + break; + + case PR_JOBCONTROL: + /* + * Ignore this as it was probably caused by another /proc + * consumer tracing the monitor. + */ + (void) ptrace_cont_monitor(p); + return (-1); + + case PR_SYSEXIT: + /* + * Processes traced via a monitor (rather than using the + * native Solaris ptrace support) explicitly trace returns + * from exec system calls since it's an implicit ptrace + * trace point. Accordingly we need to present a process + * in that state as though it had reached the ptrace trace + * point. + */ + if (status.pr_lwp.pr_what == SYS_execve) { + infop->si_code = CLD_TRAPPED; + infop->si_status = SIGTRAP; + break; + } + + /*FALLTHROUGH*/ + + case PR_SYSENTRY: + case PR_FAULTED: + case PR_SUSPENDED: + default: + lx_err(gettext("didn't expect %d (%d %d)"), + status.pr_lwp.pr_why, + status.pr_lwp.pr_what, status.pr_lwp.pr_flags); + assert(0); + } + + infop->si_pid = lxpid; + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/rlimit.c b/usr/src/lib/brand/lx/lx_brand/common/rlimit.c new file mode 100644 index 0000000000..97498c6d4a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/rlimit.c @@ -0,0 +1,233 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/resource.h> +#include <sys/sysconfig.h> +#include <sys/lx_types.h> +#include <sys/lx_misc.h> + +#define LX_RLIMIT_RSS 5 +#define LX_RLIMIT_NPROC 6 +#define LX_RLIMIT_MEMLOCK 8 +#define LX_RLIMIT_LOCKS 10 +#define LX_RLIMIT_NLIMITS 11 + +/* + * Linux supports many of the same resources that we do, but the numbering + * is slightly different. This table is used to translate Linux resource + * limit keys into their Solaris equivalents. + */ +static int ltos_resource[LX_RLIMIT_NLIMITS] = { + RLIMIT_CPU, + RLIMIT_FSIZE, + RLIMIT_DATA, + RLIMIT_STACK, + RLIMIT_CORE, + -1, /* RSS */ + -1, /* NPROC */ + RLIMIT_NOFILE, + -1, /* MEMLOCK */ + RLIMIT_AS, + -1 /* LOCKS */ +}; + +#define NLIMITS (sizeof (ltos_resource) / sizeof (int)) + +/* + * Magic values Linux uses to indicate infinity + */ +#define LX_RLIM_INFINITY_O (0x7fffffffUL) +#define LX_RLIM_INFINITY_N (0xffffffffUL) + +/* + * Array to store the rlimits that we track but do not enforce. + */ +static struct rlimit fake_limits[NLIMITS] = { + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + RLIM_INFINITY, RLIM_INFINITY, /* LX_RLIM_RSS */ + RLIM_INFINITY, RLIM_INFINITY, /* LX_RLIM_NPROC */ + 0, 0, + RLIM_INFINITY, RLIM_INFINITY, /* LX_RLIM_MEMLOCK */ + 0, 0, + RLIM_INFINITY, RLIM_INFINITY /* LX_RLIM_LOCKS */ +}; + +static int +lx_getrlimit_common(int resource, struct rlimit *rlp, int inf) +{ + int rv; + int sresource; + struct rlimit rl; + + if (resource < 0 || resource >= LX_RLIMIT_NLIMITS) + return (-EINVAL); + + sresource = ltos_resource[resource]; + + if (sresource == -1) { + switch (resource) { + case LX_RLIMIT_MEMLOCK: + case LX_RLIMIT_RSS: + case LX_RLIMIT_LOCKS: + case LX_RLIMIT_NPROC: + rl.rlim_max = fake_limits[resource].rlim_max; + rl.rlim_cur = fake_limits[resource].rlim_cur; + if (rl.rlim_cur == RLIM_INFINITY) + rl.rlim_cur = inf; + if (rl.rlim_max == RLIM_INFINITY) + rl.rlim_max = inf; + if ((uucopy(&rl, rlp, sizeof (rl))) != 0) + return (-errno); + return (0); + default: + lx_unsupported("Unsupported resource type %d\n", + resource); + return (-ENOTSUP); + } + } else { + rv = getrlimit(sresource, rlp); + } + + if (rv < 0) + return (-errno); + + if (rlp->rlim_cur == RLIM_INFINITY) + rlp->rlim_cur = inf; + + if (rlp->rlim_max == RLIM_INFINITY) + rlp->rlim_max = inf; + + return (0); +} + +/* + * This is the 'new' getrlimit, variously called getrlimit or ugetrlimit + * in Linux headers and code. The only difference between this and the old + * getrlimit (variously called getrlimit or old_getrlimit) is the value of + * RLIM_INFINITY, which is smaller for the older version. Modern code will + * use this version by default. + */ +int +lx_getrlimit(uintptr_t p1, uintptr_t p2) +{ + int resource = (int)p1; + struct rlimit *rlp = (struct rlimit *)p2; + + return (lx_getrlimit_common(resource, rlp, LX_RLIM_INFINITY_N)); +} + +/* + * This is the 'old' getrlimit, variously called getrlimit or old_getrlimit + * in Linux headers and code. The only difference between this and the new + * getrlimit (variously called getrlimit or ugetrlimit) is the value of + * RLIM_INFINITY, which is smaller for the older version. + */ +int +lx_oldgetrlimit(uintptr_t p1, uintptr_t p2) +{ + int resource = (int)p1; + struct rlimit *rlp = (struct rlimit *)p2; + + return (lx_getrlimit_common(resource, rlp, LX_RLIM_INFINITY_O)); +} + +int +lx_setrlimit(uintptr_t p1, uintptr_t p2) +{ + int resource = (int)p1; + struct rlimit *rlp = (struct rlimit *)p2; + struct rlimit rl; + int rv, sresource; + + if (resource < 0 || resource >= LX_RLIMIT_NLIMITS) + return (-EINVAL); + + sresource = ltos_resource[resource]; + + if (sresource == -1) { + if (uucopy((void *)p2, &rl, sizeof (rl)) != 0) + return (-errno); + + switch (resource) { + case LX_RLIMIT_MEMLOCK: + case LX_RLIMIT_RSS: + case LX_RLIMIT_LOCKS: + case LX_RLIMIT_NPROC: + if (rl.rlim_max != LX_RLIM_INFINITY_N && + (rl.rlim_cur == LX_RLIM_INFINITY_N || + rl.rlim_cur > rl.rlim_max)) + return (-EINVAL); + if (rl.rlim_max == LX_RLIM_INFINITY_N) + fake_limits[resource].rlim_max = RLIM_INFINITY; + else + fake_limits[resource].rlim_max = rl.rlim_max; + if (rl.rlim_cur == LX_RLIM_INFINITY_N) + fake_limits[resource].rlim_cur = RLIM_INFINITY; + else + fake_limits[resource].rlim_cur = rl.rlim_cur; + return (0); + } + + lx_unsupported("Unsupported resource type %d\n", resource); + return (-ENOTSUP); + } + + rv = setrlimit(sresource, rlp); + + return (rv < 0 ? -errno : 0); +} + +/* + * We lucked out here. Linux and Solaris have exactly the same + * rusage structures. + */ +int +lx_getrusage(uintptr_t p1, uintptr_t p2) +{ + int who = (int)p1; + struct rusage *rup = (struct rusage *)p2; + int rv, swho; + + if (who == LX_RUSAGE_SELF) + swho = _RUSAGESYS_GETRUSAGE; + else if (who == LX_RUSAGE_CHILDREN) + swho = _RUSAGESYS_GETRUSAGE_CHLD; + else + return (-EINVAL); + + rv = getrusage(swho, rup); + + return (rv < 0 ? -errno : 0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/sched.c b/usr/src/lib/brand/lx/lx_brand/common/sched.c new file mode 100644 index 0000000000..f37ab83aee --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/sched.c @@ -0,0 +1,610 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/cred_impl.h> +#include <sys/ucred.h> +#include <ucred.h> +#include <stdlib.h> +#include <signal.h> +#include <errno.h> +#include <sched.h> +#include <strings.h> +#include <pthread.h> +#include <time.h> +#include <thread.h> +#include <alloca.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/lx_syscall.h> +#include <sys/lx_debug.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> +#include <sys/lx_sched.h> + +/* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */ +static int +validate_policy(int policy) +{ + switch (policy) { + case LX_SCHED_FIFO: + return (SCHED_FIFO); + + case LX_SCHED_RR: + return (SCHED_RR); + + case LX_SCHED_OTHER: + return (SCHED_OTHER); + + default: + lx_debug("validate_policy: illegal policy: %d", policy); + return (-EINVAL); + } +} + +/* + * Check to see if we have the permissions to set scheduler parameters and + * policy, based on Linux' demand that such commands fail with errno set to + * EPERM if the current euid is not the euid or ruid of the process in + * question. + */ +static int +check_schedperms(pid_t pid) +{ + size_t sz; + ucred_t *cr; + uid_t euid; + + euid = geteuid(); + + if (pid == getpid()) { + /* + * If we're the process to be checked, simply check the euid + * against our ruid. + */ + if (euid != getuid()) + return (-EPERM); + + return (0); + } + + /* + * We allocate a ucred_t ourselves rather than call ucred_get(3C) + * because ucred_get() calls malloc(3C), which the brand library cannot + * use. Because we allocate the space with SAFE_ALLOCA(), there's + * no need to free it when we're done. + */ + sz = ucred_size(); + cr = (ucred_t *)SAFE_ALLOCA(sz); + + if (cr == NULL) + return (-ENOMEM); + + /* + * If we can't access the process' credentials, fail with errno EPERM + * as the call would not have succeeded anyway. + */ + if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0) + return ((errno == EACCES) ? -EPERM : -errno); + + if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr))) + return (-EPERM); + + return (0); +} + +static int +ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp) +{ + struct lx_sched_param ls; + int smin = sched_get_priority_min(policy); + int smax = sched_get_priority_max(policy); + + if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0) + return (-errno); + + bzero(sp, sizeof (struct sched_param)); + + /* + * Linux has a fixed priority range, 0 - 99, which we need to convert to + * Solaris's dynamic range. Linux considers lower numbers to be + * higher priority, so we'll invert the priority within Solaris's range. + * + * The formula to convert between ranges is: + * + * L * (smax - smin) + * S = ----------------- + smin + * (lmax - lmin) + * + * where S is the Solaris equivalent of the linux priority L. + * + * To invert the priority, we use: + * S' = smax - S + smin + * + * Together, these two formulas become: + * + * L * (smax - smin) + * S = smax - ----------------- + 2smin + * 99 + */ + sp->sched_priority = smax - + ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin; + + lx_debug("ltos_sparam: linux prio %d = Solaris prio %d " + "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority, + smin, smax); + + return (0); +} + +static int +stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp) +{ + struct lx_sched_param ls; + int smin = sched_get_priority_min(policy); + int smax = sched_get_priority_max(policy); + + if (policy == SCHED_OTHER) { + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0 + */ + ls.lx_sched_prio = 0; + } else { + /* + * Convert Solaris's dynamic, inverted priority range to the + * fixed Linux range of 1 - 99. + * + * The formula is (see above): + * + * (smax - s + 2smin) * 99 + * l = ----------------------- + * smax - smin + */ + ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) * + LX_PRI_MAX) / (smax - smin); + } + + lx_debug("stol_sparam: Solaris prio %d = linux prio %d " + "(Solaris range %d,%d)\n", sp->sched_priority, ls.lx_sched_prio, + smin, smax); + + return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0) + ? -errno : 0); +} + +#define BITINDEX(ind) (ind / (sizeof (ulong_t) * 8)) +#define BITSHIFT(ind) (1 << (ind % (sizeof (ulong_t) * 8))) + +/* ARGSUSED */ +int +lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp) +{ + int sz; + ulong_t *lmask, *zmask; + int i; + + sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp); + if (sz == -1) + return (-errno); + + /* + * If the target LWP hasn't ever had an affinity mask set, the kernel + * will return a mask of all 0's. If that is the case we must build a + * default mask that has all valid bits turned on. + */ + lmask = SAFE_ALLOCA(sz); + zmask = SAFE_ALLOCA(sz); + if (lmask == NULL || zmask == NULL) + return (-ENOMEM); + + bzero(zmask, sz); + + if (uucopy((void *)maskp, lmask, sz) != 0) + return (-EFAULT); + + if (bcmp(lmask, zmask, sz) != 0) + return (sz); + + for (i = 0; i < sz * 8; i++) { + if (p_online(i, P_STATUS) != -1) { + lmask[BITINDEX(i)] |= BITSHIFT(i); + } + } + + if (uucopy(lmask, (void *)maskp, sz) != 0) + return (-EFAULT); + + return (sz); +} + +/* ARGSUSED */ +int +lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp) +{ + int ret; + int sz; + int i; + int found; + ulong_t *lmask; + pid_t s_pid; + lwpid_t s_tid; + processorid_t cpuid = NULL; + + if ((pid_t)pid < 0) + return (-EINVAL); + + if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + /* + * We only support setting affinity masks for threads in + * the calling process. + */ + if (s_pid != getpid()) + return (-EPERM); + + /* + * First, get the minimum bitmask size from the kernel. + */ + sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0); + if (sz == -1) + return (-errno); + + lmask = SAFE_ALLOCA(sz); + if (lmask == NULL) + return (-ENOMEM); + + if (uucopy((void *)maskp, lmask, sz) != 0) + return (-EFAULT); + + /* + * Make sure the mask contains at least one processor that is + * physically on the system. Reduce the user's mask to the set of + * physically present CPUs. Keep track of how many valid + * bits are set in the user's mask. + */ + + for (found = 0, i = 0; i < sz * 8; i++) { + if (p_online(i, P_STATUS) == -1) { + /* + * This CPU doesn't exist, so clear this bit from + * the user's mask. + */ + lmask[BITINDEX(i)] &= ~BITSHIFT(i); + continue; + } + + if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) { + found++; + cpuid = i; + } + } + + if (found == 0) { + lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n"); + return (-EINVAL); + } + + /* + * If only one bit is set, bind the thread to that procesor; + * otherwise, clear the binding. + */ + if (found == 1) { + lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n", + s_tid, cpuid); + if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0) + /* + * It could be that the requested processor is offline, + * so we'll just abandon our good-natured attempt to + * bind to it. + */ + lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid, + cpuid, strerror(errno)); + } else { + lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n", + s_tid); + if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) { + lx_debug("couldn't clear CPU binding for LWP %d: %s\n", + s_tid, strerror(errno)); + } + } + + /* + * Finally, ask the kernel to make a note of our current (though fairly + * meaningless) affinity mask. + */ + ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask); + + return ((ret == 0) ? 0 : -errno); +} + +int +lx_sched_getparam(uintptr_t pid, uintptr_t param) +{ + int policy, ret; + pid_t s_pid; + lwpid_t s_tid; + + struct sched_param sp; + + if (((pid_t)pid < 0) || (param == NULL)) + return (-EINVAL); + + if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + /* + * If we're attempting to get information on our own process, we can + * get data on a per-thread basis; if not, punt and use the specified + * pid. + */ + if (s_pid == getpid()) { + if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0) + return (-ret); + } else { + if (sched_getparam(s_pid, &sp) == -1) + return (-errno); + + if ((policy = sched_getscheduler(s_pid)) < 0) + return (-errno); + } + + return (stol_sparam(policy, &sp, (struct lx_sched_param *)param)); +} + +int +lx_sched_setparam(uintptr_t pid, uintptr_t param) +{ + int err, policy; + pid_t s_pid; + lwpid_t s_tid; + struct lx_sched_param lp; + struct sched_param sp; + + if (((pid_t)pid < 0) || (param == NULL)) + return (-EINVAL); + + if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + if (s_pid == getpid()) { + struct sched_param dummy; + + if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0) + return (-err); + } else + if ((policy = sched_getscheduler(s_pid)) < 0) + return (-errno); + + lx_debug("sched_setparam(): current policy %d", policy); + + if (uucopy((void *)param, &lp, sizeof (lp)) != 0) + return (-errno); + + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0 + */ + if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0)) + return (-EINVAL); + + if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp, + &sp)) != 0) + return (err); + + /* + * Check if we're allowed to change the scheduler for the process. + * + * If we're operating on a thread, we can't just call + * pthread_setschedparam() because as all threads reside within a + * single Solaris process, Solaris will allow the modification + * + * If we're operating on a process, we can't just call sched_setparam() + * because Solaris will allow the call to succeed if the scheduler + * parameters do not differ from those being installed, but Linux wants + * the call to fail. + */ + if ((err = check_schedperms(s_pid)) != 0) + return (err); + + if (s_pid == getpid()) + return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0) + ? -err : 0); + + return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0); +} + +int +lx_sched_rr_get_interval(uintptr_t pid, uintptr_t timespec) +{ + struct timespec ts; + pid_t s_pid; + + if ((pid_t)pid < 0) + return (-EINVAL); + + if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0) + return (-ESRCH); + + if (uucopy((struct timespec *)timespec, &ts, + sizeof (struct timespec)) != 0) + return (-errno); + + return ((sched_rr_get_interval(s_pid, &ts) == -1) ? -errno : 0); +} + +int +lx_sched_getscheduler(uintptr_t pid) +{ + int policy, rv; + pid_t s_pid; + lwpid_t s_tid; + + if ((pid_t)pid < 0) + return (-EINVAL); + + if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + if (s_pid == getpid()) { + struct sched_param dummy; + + if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0) + return (-rv); + } else + if ((policy = sched_getscheduler(s_pid)) < 0) + return (-errno); + + /* + * Linux only supports certain policies; avoid confusing apps with + * alien policies. + */ + switch (policy) { + case SCHED_FIFO: + return (LX_SCHED_FIFO); + case SCHED_OTHER: + return (LX_SCHED_OTHER); + case SCHED_RR: + return (LX_SCHED_RR); + default: + break; + } + + return (LX_SCHED_OTHER); +} + +int +lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param) +{ + int rt_pol; + int rv; + pid_t s_pid; + lwpid_t s_tid; + struct lx_sched_param lp; + + struct sched_param sp; + + if (((pid_t)pid < 0) || (param == NULL)) + return (-EINVAL); + + if ((rt_pol = validate_policy((int)policy)) < 0) + return (rt_pol); + + if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param, + &sp)) != 0) + return (rv); + + if (uucopy((void *)param, &lp, sizeof (lp)) != 0) + return (-errno); + + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0 + */ + if ((rt_pol == LX_SCHED_OTHER) && (lp.lx_sched_prio != 0)) + return (-EINVAL); + + if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + /* + * Check if we're allowed to change the scheduler for the process. + * + * If we're operating on a thread, we can't just call + * pthread_setschedparam() because as all threads reside within a + * single Solaris process, Solaris will allow the modification. + * + * If we're operating on a process, we can't just call + * sched_setscheduler() because Solaris will allow the call to succeed + * if the scheduler and scheduler parameters do not differ from those + * being installed, but Linux wants the call to fail. + */ + if ((rv = check_schedperms(s_pid)) != 0) + return (rv); + + if (s_pid == getpid()) { + struct sched_param param; + int pol; + + if ((pol = sched_getscheduler(s_pid)) != 0) + return (-errno); + + /* + * sched_setscheduler() returns the previous scheduling policy + * on success, so call pthread_getschedparam() to get the + * current thread's scheduling policy and return that if the + * call to pthread_setschedparam() succeeds. + */ + if ((rv = pthread_getschedparam(s_tid, &pol, ¶m)) != 0) + return (-rv); + + return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0) + ? -rv : pol); + } + + return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1) + ? -errno : rv); +} + +int +lx_sched_get_priority_min(uintptr_t policy) +{ + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0. + * Linux scheduling priorities are not alterable, so there is no + * Solaris translation necessary. + */ + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + return (LX_SCHED_PRIORITY_MIN_RRFIFO); + case LX_SCHED_OTHER: + return (LX_SCHED_PRIORITY_MIN_OTHER); + default: + break; + } + return (-EINVAL); +} + +int +lx_sched_get_priority_max(uintptr_t policy) +{ + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0 + * Linux scheduling priorities are not alterable, so there is no + * Solaris translation necessary. + */ + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + return (LX_SCHED_PRIORITY_MAX_RRFIFO); + case LX_SCHED_OTHER: + return (LX_SCHED_PRIORITY_MAX_OTHER); + default: + break; + } + return (-EINVAL); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/sendfile.c b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c new file mode 100644 index 0000000000..1c4af9bf74 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * lx_sendfile() and lx_sendfile64() are just branded versions of the + * library calls available in the Solaris libsendfile (see sendfile(3EXT)). + */ + +#include <sys/types.h> +#include <sys/syscall.h> +#include <sys/sendfile.h> +#include <string.h> +#include <errno.h> +#include <sys/lx_misc.h> + +int +lx_sendfile(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + sysret_t rval; + off_t off = 0; + off_t *offp = (off_t *)p3; + int error; + struct sendfilevec sfv; + size_t xferred; + size_t sz = (size_t)p4; + + if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0) + return (-errno); + + sfv.sfv_fd = p2; + sfv.sfv_flag = 0; + sfv.sfv_off = off; + sfv.sfv_len = sz; + error = __systemcall(&rval, SYS_sendfilev, SENDFILEV, p1, &sfv, + 1, &xferred); + + if (error == 0 && xferred > 0) { + off += xferred; + error = uucopy(&off, offp, sizeof (off)); + } + + return (error ? -error : (int)rval.sys_rval1); +} + +int +lx_sendfile64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + sysret_t rval; + off64_t off = 0; + off64_t *offp = (off64_t *)p3; + size_t sz = (size_t)p4; + int error; + struct sendfilevec64 sfv; + size_t xferred; + + if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0) + return (-errno); + + sfv.sfv_fd = p2; + sfv.sfv_flag = 0; + sfv.sfv_off = off; + sfv.sfv_len = sz; + error = __systemcall(&rval, SYS_sendfilev, SENDFILEV64, p1, &sfv, + 1, &xferred); + + if (error == 0 && xferred > 0) { + off += xferred; + error = uucopy(&off, offp, sizeof (off)); + } + + return (error ? -error : (int)rval.sys_rval1); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c new file mode 100644 index 0000000000..b6fea626b7 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c @@ -0,0 +1,1714 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/segments.h> +#include <sys/lx_types.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/lx_signal.h> +#include <sys/lx_syscall.h> +#include <sys/lx_thread.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <thread.h> +#include <ucontext.h> +#include <unistd.h> +#include <stdio.h> +#include <libintl.h> +#include <ieeefp.h> + +/* + * Delivering signals to a Linux process is complicated by differences in + * signal numbering, stack structure and contents, and the action taken when a + * signal handler exits. In addition, many signal-related structures, such as + * sigset_ts, vary between Solaris and Linux. + * + * To support user-level signal handlers, the brand uses a double layer of + * indirection to process and deliver signals to branded threads. + * + * When a Linux process sends a signal using the kill(2) system call, we must + * translate the signal into the Solaris equivalent before handing control off + * to the standard signalling mechanism. When a signal is delivered to a Linux + * process, we translate the signal number from Solaris to back to Linux. + * Translating signals both at generation and delivery time ensures both that + * Solaris signals are sent properly to Linux applications and that signals' + * default behavior works as expected. + * + * In a normal Solaris process, signal delivery is interposed on for any thread + * registering a signal handler by libc. Libc needs to do various bits of magic + * to provide thread-safe critical regions, so it registers its own handler, + * named sigacthandler(), using the sigaction(2) system call. When a signal is + * received, sigacthandler() is called, and after some processing, libc turns + * around and calls the user's signal handler via a routine named + * call_user_handler(). + * + * Adding a Linux branded thread to the mix complicates things somewhat. + * + * First, when a thread receives a signal, it may be running with a Linux value + * in the x86 %gs segment register as opposed to the value Solaris threads + * expect; if control were passed directly to Solaris code, such as libc's + * sigacthandler(), that code would experience a segmentation fault the first + * time it tried to dereference a memory location using %gs. + * + * Second, the signal number translation referenced above must take place. + * Further, as was the case with Solaris libc, before the Linux signal handler + * is called, the value of the %gs segment register MUST be restored to the + * value Linux code expects. + * + * This need to translate signal numbers and manipulate the %gs register means + * that while with standard Solaris libc, following a signal from generation to + * delivery looks something like: + * + * kernel -> + * sigacthandler() -> + * call_user_handler() -> + * user signal handler + * + * while for the brand's Linux threads, this would look like: + * + * kernel -> + * lx_sigacthandler() -> + * sigacthandler() -> + * call_user_handler() -> + * lx_call_user_handler() -> + * Linux user signal handler + * + * The new addtions are: + * + * lx_sigacthandler + * ================ + * This routine is responsible for setting the %gs segment register to the + * value Solaris code expects, and jumping to Solaris' libc signal + * interposition handler, sigacthandler(). + * + * lx_call_user_handler + * ==================== + * This routine is responsible for translating Solaris signal numbers to + * their Linux equivalents, building a Linux signal stack based on the + * information Solaris has provided, and passing the stack to the + * registered Linux signal handler. It is, in effect, the Linux thread + * equivalent to libc's call_user_handler(). + * + * Installing lx_sigacthandler() is a bit tricky, as normally libc's + * sigacthandler() routine is hidden from user programs. To facilitate this, a + * new private function was added to libc, setsigaction(): + * + * void setsigacthandler(void (*new_handler)(int, siginfo_t *, void *), + * void (**old_handler)(int, siginfo_t *, void *)) + * + * The routine works by modifying the per-thread data structure libc already + * keeps that keeps track of the address of its own interposition handler with + * the address passed in; the old handler's address is set in the pointer + * pointed to by the second argument, if it is non-NULL, mimicking the behavior + * of sigaction() itself. Once setsigacthandler() has been executed, all + * future branded threads this thread may create will automatically have the + * proper interposition handler installed as the result of a normal + * sigaction() call. + * + * Note that none of this interposition is necessary unless a Linux thread + * registers a user signal handler, as the default action for all signals is the + * same between Solaris and Linux save for one signal, SIGPWR. For this reason, + * the brand ALWAYS installs its own internal signal handler for SIGPWR that + * translates the action to the Linux default, to terminate the process. + * (Solaris' default action is to ignore SIGPWR.) + * + * It is also important to note that when signals are not translated, the brand + * relies upon code interposing upon the wait(2) system call to translate + * signals to their proper values for any Linux threads retrieving the status + * of others. So while the Solaris signal number for a particular signal is set + * in a process' data structures (and would be returned as the result of say, + * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for + * translating the value WTERMSIG() would return from a Solaris signal number + * to the appropriate Linux value. + * + * The process of returning to an interrupted thread of execution from a user + * signal handler is entirely different between Solaris and Linux. While + * Solaris generally expects to set the context to the interrupted one on a + * normal return from a signal handler, in the normal case Linux instead calls + * code that calls a specific Linux system call, sigreturn(2). Thus when a + * Linux signal handler completes execution, instead of returning through what + * would in libc be a call to setcontext(2), the sigreturn(2) Linux system call + * is responsible for accomplishing much the same thing. + * + * This trampoline code looks something like this: + * + * pop %eax + * mov LX_SYS_rt_sigreturn, %eax + * int $0x80 + * + * so when the Linux user signal handler is eventually called, the stack looks + * like this (in the case of an "lx_sigstack" stack: + * + * ========================================================= + * | Pointer to actual trampoline code (in code segment) | + * ========================================================= + * | Linux signal number | + * ========================================================= + * | Pointer to Linux siginfo_t (or NULL) | + * ========================================================= + * | Pointer to Linux ucontext_t (or NULL) | + * ========================================================= + * | Linux siginfo_t | + * ========================================================= + * | Linux ucontext_t | + * ========================================================= + * | Linux struct _fpstate | + * ========================================================= + * | Trampoline code (marker for gdb, not really executed) | + * ========================================================= + * + * The brand takes the approach of intercepting the Linux sigreturn(2) system + * call in order to turn it into the return through the libc call stack that + * Solaris expects. This is done by the lx_sigreturn() and lx_rt_sigreturn() + * routines, which remove the Linux signal frame from the stack and pass the + * resulting stack pointer to another routine, lx_sigreturn_tolibc(), which + * makes libc believe the user signal handler it had called returned. + * + * (Note that the trampoline code actually lives in a proper executable segment + * and not on the stack, but gdb checks for the exact code sequence of the + * trampoline code on the stack to determine whether it is in a signal stack + * frame or not. Really.) + * + * When control then returns to libc's call_user_handler() routine, a + * setcontext(2) will be done that (in most cases) returns the thread executing + * the code back to the location originally interrupted by receipt of the + * signal. + */ + +/* + * Two flavors of Linux signal stacks: + * + * lx_sigstack - used for "modern" signal handlers, in practice those + * that have the sigaction(2) flag SA_SIGINFO set + * + * lx_oldsigstack - used for legacy signal handlers, those that do not have + * the sigaction(2) flag SA_SIGINFO set or that were setup via + * the signal(2) call. + * + * NOTE: Since these structures will be placed on the stack and stack math will + * be done with their sizes, they must be word aligned in size (32 bits) + * so the stack remains word aligned per the i386 ABI. + */ +struct lx_sigstack { + void (*retaddr)(); /* address of real lx_rt_sigreturn code */ + int sig; /* signal number */ + lx_siginfo_t *sip; /* points to "si" if valid, NULL if not */ + lx_ucontext_t *ucp; /* points to "uc" if valid, NULL if not */ + lx_siginfo_t si; /* saved signal information */ + lx_ucontext_t uc; /* saved user context */ + lx_fpstate_t fpstate; /* saved FP state */ + char trampoline[8]; /* code for trampoline to lx_rt_sigreturn() */ +}; + +struct lx_oldsigstack { + void (*retaddr)(); /* address of real lx_sigreturn code */ + int sig; /* signal number */ + lx_sigcontext_t sigc; /* saved user context */ + lx_fpstate_t fpstate; /* saved FP state */ + int sig_extra; /* signal mask for signals [32 .. NSIG - 1] */ + char trampoline[8]; /* code for trampoline to lx_sigreturn() */ +}; + +/* + * libc_sigacthandler is set to the address of the libc signal interposition + * routine, sigacthandler(). + */ +void (*libc_sigacthandler)(int, siginfo_t *, void*); + +/* + * The lx_sighandlers structure needs to be a global due to the semantics of + * clone(). + * + * If CLONE_SIGHAND is set, the calling process and child share signal + * handlers, and if either calls sigaction(2) it should change the behavior + * in the other thread. Each thread does, however, have its own signal mask + * and set of pending signals. + * + * If CLONE_SIGHAND is not set, the child process should inherit a copy of + * the signal handlers at the time of the clone() but later calls to + * sigaction(2) should only affect the individual thread calling it. + * + * This maps perfectly to a thr_create(3C) thread semantic in the first + * case and a fork(2)-type semantic in the second case. By making + * lx_sighandlers global, we automatically get the correct behavior. + */ +static lx_sighandlers_t lx_sighandlers; + +/* + * stol_stack() and ltos_stack() convert between Solaris and Linux stack_t + * structures. + * + * These routines are needed because although the two structures have the same + * contents, their contents are declared in a different order, so the content + * of the structures cannot be copied with a simple bcopy(). + */ +static void +stol_stack(stack_t *fr, lx_stack_t *to) +{ + to->ss_sp = fr->ss_sp; + to->ss_flags = fr->ss_flags; + to->ss_size = fr->ss_size; +} + +static void +ltos_stack(lx_stack_t *fr, stack_t *to) +{ + to->ss_sp = fr->ss_sp; + to->ss_flags = fr->ss_flags; + to->ss_size = fr->ss_size; +} + +static int +ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp) +{ + lx_sigset_t l; + int lx_sig, sig; + + if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0) + return (-errno); + + (void) sigemptyset(s_sigsetp); + + for (lx_sig = 1; lx_sig < LX_NSIG; lx_sig++) { + if (lx_sigismember(&l, lx_sig) && + ((sig = ltos_signo[lx_sig]) > 0)) + (void) sigaddset(s_sigsetp, sig); + } + + return (0); +} + +static int +stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp) +{ + lx_sigset_t l; + int sig, lx_sig; + + bzero(&l, sizeof (lx_sigset_t)); + + for (sig = 1; sig < NSIG; sig++) { + if (sigismember(s_sigsetp, sig) && + ((lx_sig = stol_signo[sig]) > 0)) + lx_sigaddset(&l, lx_sig); + } + + return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0) + ? -errno : 0); +} + +static int +ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp) +{ + lx_osigset_t lo; + int lx_sig, sig; + + if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0) + return (-errno); + + (void) sigemptyset(s_sigsetp); + + for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++) + if ((lo & OSIGSET_BITSET(lx_sig)) && + ((sig = ltos_signo[lx_sig]) > 0)) + (void) sigaddset(s_sigsetp, sig); + + return (0); +} + +static int +stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp) +{ + lx_osigset_t lo = 0; + int lx_sig, sig; + + /* + * Note that an lx_osigset_t can only represent the signals from + * [1 .. OSIGSET_NBITS], so even though a signal may be present in the + * Solaris sigset_t, it may not be representable as a bit in the + * lx_osigset_t. + */ + for (sig = 1; sig < NSIG; sig++) + if (sigismember(s_sigsetp, sig) && + ((lx_sig = stol_signo[sig]) > 0) && + (lx_sig <= OSIGSET_NBITS)) + lo |= OSIGSET_BITSET(lx_sig); + + return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0) + ? -errno : 0); +} + +static int +stol_sigcode(int si_code) +{ + switch (si_code) { + case SI_USER: + return (LX_SI_USER); + case SI_LWP: + return (LX_SI_TKILL); + case SI_QUEUE: + return (LX_SI_QUEUE); + case SI_TIMER: + return (LX_SI_TIMER); + case SI_ASYNCIO: + return (LX_SI_ASYNCIO); + case SI_MESGQ: + return (LX_SI_MESGQ); + default: + return (si_code); + } +} + +int +stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop) +{ + lx_siginfo_t lx_siginfo; + + bzero(&lx_siginfo, sizeof (*lx_siginfop)); + + if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) { + errno = EINVAL; + return (-1); + } + + lx_siginfo.lsi_code = stol_sigcode(siginfop->si_code); + lx_siginfo.lsi_errno = siginfop->si_errno; + + switch (lx_siginfo.lsi_signo) { + /* + * Semantics ARE defined for SIGKILL, but since + * we can't catch it, we can't translate it. :-( + */ + case LX_SIGPOLL: + lx_siginfo.lsi_band = siginfop->si_band; + lx_siginfo.lsi_fd = siginfop->si_fd; + break; + + case LX_SIGCHLD: + lx_siginfo.lsi_pid = siginfop->si_pid; + lx_siginfo.lsi_status = siginfop->si_status; + lx_siginfo.lsi_utime = siginfop->si_utime; + lx_siginfo.lsi_stime = siginfop->si_stime; + + break; + + case LX_SIGILL: + case LX_SIGBUS: + case LX_SIGFPE: + lx_siginfo.lsi_addr = siginfop->si_addr; + break; + + default: + lx_siginfo.lsi_pid = siginfop->si_pid; + lx_siginfo.lsi_uid = + LX_UID32_TO_UID16(siginfop->si_uid); + break; + } + + return ((uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0) + ? -errno : 0); +} + +static void +stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr) +{ + struct _fpstate *fpsp = (struct _fpstate *)fpr; + size_t copy_len; + + /* + * The Solaris struct _fpstate and lx_fpstate_t are identical from the + * beginning of the structure to the lx_fpstate_t "magic" field, so + * just bcopy() those entries. + */ + copy_len = (size_t)&(((lx_fpstate_t *)0)->magic); + bcopy(fpsp, lfpr, copy_len); + + /* + * These fields are all only significant for the first 16 bits. + */ + lfpr->cw &= 0xffff; /* x87 control word */ + lfpr->tag &= 0xffff; /* x87 tag word */ + lfpr->cssel &= 0xffff; /* cs selector */ + lfpr->datasel &= 0xffff; /* ds selector */ + + /* + * Linux wants the x87 status word field to contain the value of the + * x87 saved exception status word. + */ + lfpr->sw = lfpr->status & 0xffff; /* x87 status word */ + + lfpr->mxcsr = fpsp->mxcsr; + + if (fpsp->mxcsr != 0) { + /* + * Linux uses the "magic" field to denote whether the XMM + * registers contain legal data or not. Since we can't get to + * %cr4 from userland to check the status of the OSFXSR bit, + * check the mxcsr field to see if it's 0, which it should + * never be on a system with the OXFXSR bit enabled. + */ + lfpr->magic = LX_X86_FXSR_MAGIC; + bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm)); + } else { + lfpr->magic = LX_X86_FXSR_NONE; + } +} + +static void +ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr) +{ + struct _fpstate *fpsp = (struct _fpstate *)fpr; + size_t copy_len; + + /* + * The lx_fpstate_t and Solaris struct _fpstate are identical from the + * beginning of the structure to the struct _fpstate "mxcsr" field, so + * just bcopy() those entries. + * + * Note that we do NOT have to propogate changes the user may have made + * to the "status" word back to the "sw" word, unlike the way we have + * to deal with processing the ESP and UESP register values on return + * from a signal handler. + */ + copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr); + bcopy(lfpr, fpsp, copy_len); + + /* + * These fields are all only significant for the first 16 bits. + */ + fpsp->cw &= 0xffff; /* x87 control word */ + fpsp->sw &= 0xffff; /* x87 status word */ + fpsp->tag &= 0xffff; /* x87 tag word */ + fpsp->cssel &= 0xffff; /* cs selector */ + fpsp->datasel &= 0xffff; /* ds selector */ + fpsp->status &= 0xffff; /* saved status */ + + fpsp->mxcsr = lfpr->mxcsr; + + if (lfpr->magic == LX_X86_FXSR_MAGIC) + bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm)); +} + +/* + * The brand needs a lx version of this because the format of the lx stack_t + * differs from the Solaris stack_t not really in content but in ORDER, + * so we can't simply pass pointers and expect things to work (sigh...) + */ +int +lx_sigaltstack(uintptr_t nsp, uintptr_t osp) +{ + lx_stack_t ls; + stack_t newsstack, oldsstack; + stack_t *nssp = (nsp ? &newsstack : NULL); + stack_t *ossp = (osp ? &oldsstack : NULL); + + if (nsp) { + if (uucopy((void *)nsp, &ls, sizeof (lx_stack_t)) != 0) + return (-errno); + + if ((ls.ss_flags & LX_SS_DISABLE) == 0 && + ls.ss_size < LX_MINSIGSTKSZ) + return (-ENOMEM); + + newsstack.ss_sp = (int *)ls.ss_sp; + newsstack.ss_size = (long)ls.ss_size; + newsstack.ss_flags = ls.ss_flags; + } + + if (sigaltstack(nssp, ossp) != 0) + return (-errno); + + if (osp) { + ls.ss_sp = (void *)oldsstack.ss_sp; + ls.ss_size = (size_t)oldsstack.ss_size; + ls.ss_flags = oldsstack.ss_flags; + + if (uucopy(&ls, (void *)osp, sizeof (lx_stack_t)) != 0) + return (-errno); + } + + return (0); +} + +/* + * The following routines are needed because sigset_ts and siginfo_ts are + * different in format between Linux and Solaris. + * + * Note that there are two different lx_sigset structures, lx_sigset_ts and + * lx_osigset_ts: + * + * + An lx_sigset_t is the equivalent of a Solaris sigset_t and supports + * more than 32 signals. + * + * + An lx_osigset_t is simply a uint32_t, so it by definition only supports + * 32 signals. + * + * When there are two versions of a routine, one prefixed with lx_rt_ and + * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with + * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts. Unfortunately, + * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn()) + */ +int +lx_sigpending(uintptr_t sigpend) +{ + sigset_t sigpendset; + + if (sigpending(&sigpendset) != 0) + return (-errno); + + return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend)); +} + +int +lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize) +{ + sigset_t sigpendset; + + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + if (sigpending(&sigpendset) != 0) + return (-errno); + + return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend)); +} + +/* + * Create a common routine to encapsulate all of the sigprocmask code, + * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask() + * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by + * the setting of the "sigset_type" flag. + */ +static int +lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp, + uintptr_t sigset_type) +{ + int err; + sigset_t set, oset; + sigset_t *s_setp = NULL; + sigset_t *s_osetp; + + if (l_setp) { + switch (how) { + case LX_SIG_BLOCK: + how = SIG_BLOCK; + break; + + case LX_SIG_UNBLOCK: + how = SIG_UNBLOCK; + break; + + case LX_SIG_SETMASK: + how = SIG_SETMASK; + break; + + default: + return (-EINVAL); + } + + s_setp = &set; + + if (sigset_type == USE_SIGSET) + err = ltos_sigset((lx_sigset_t *)l_setp, s_setp); + else + err = ltos_osigset((lx_osigset_t *)l_setp, s_setp); + + if (err != 0) + return (err); + } + + s_osetp = (l_osetp ? &oset : NULL); + + /* + * In a multithreaded environment, a call to sigprocmask(2) should + * only affect the current thread's signal mask so we don't need to + * explicitly call thr_sigsetmask(3C) here. + */ + if (sigprocmask(how, s_setp, s_osetp) != 0) + return (-errno); + + if (l_osetp) { + if (sigset_type == USE_SIGSET) + err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp); + else + err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp); + + if (err != 0) { + /* + * Encountered a fault while writing to the old signal + * mask buffer, so unwind the signal mask change made + * above. + */ + (void) sigprocmask(how, s_osetp, (sigset_t *)NULL); + return (err); + } + } + + return (0); +} + +int +lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp) +{ + return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET)); +} + +int +lx_sgetmask(void) +{ + lx_osigset_t oldmask; + + return ((lx_sigprocmask_common(SIG_SETMASK, NULL, (uintptr_t)&oldmask, + USE_OSIGSET) != 0) ? -errno : (int)oldmask); +} + +int +lx_ssetmask(uintptr_t sigmask) +{ + lx_osigset_t newmask, oldmask; + + newmask = (lx_osigset_t)sigmask; + + return ((lx_sigprocmask_common(SIG_SETMASK, (uintptr_t)&newmask, + (uintptr_t)&oldmask, USE_OSIGSET) != 0) ? -errno : (int)oldmask); +} + +int +lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp, + uintptr_t setsize) +{ + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET)); +} + +int +lx_sigsuspend(uintptr_t set) +{ + sigset_t s_set; + + if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0) + return (-errno); + + return ((sigsuspend(&s_set) == -1) ? -errno : 0); +} + +int +lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize) +{ + sigset_t s_set; + + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0) + return (-errno); + + return ((sigsuspend(&s_set) == -1) ? -errno : 0); +} + +int +lx_sigwaitinfo(uintptr_t set, uintptr_t sinfo) +{ + lx_osigset_t *setp = (lx_osigset_t *)set; + lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; + + sigset_t s_set; + siginfo_t s_sinfo, *s_sinfop; + int rc; + + if (ltos_osigset(setp, &s_set) != 0) + return (-errno); + + s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; + + if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1) + return (-errno); + + if (s_sinfop == NULL) + return (rc); + + return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc); +} + +int +lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize) +{ + sigset_t s_set; + siginfo_t s_sinfo, *s_sinfop; + int rc; + + lx_sigset_t *setp = (lx_sigset_t *)set; + lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; + + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + if (ltos_sigset(setp, &s_set) != 0) + return (-errno); + + s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; + + if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1) + return (-errno); + + if (s_sinfop == NULL) + return (rc); + + return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc); +} + +int +lx_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp) +{ + sigset_t s_set; + siginfo_t s_sinfo, *s_sinfop; + int rc; + + lx_osigset_t *setp = (lx_osigset_t *)set; + lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; + + if (ltos_osigset(setp, &s_set) != 0) + return (-errno); + + s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; + + if ((rc = sigtimedwait(&s_set, s_sinfop, + (struct timespec *)toutp)) == -1) + return (-errno); + + if (s_sinfop == NULL) + return (rc); + + return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc); +} + +int +lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp, + uintptr_t setsize) +{ + sigset_t s_set; + siginfo_t s_sinfo, *s_sinfop; + int rc; + + lx_sigset_t *setp = (lx_sigset_t *)set; + lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; + + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + if (ltos_sigset(setp, &s_set) != 0) + return (-errno); + + s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; + + if ((rc = sigtimedwait(&s_set, s_sinfop, + (struct timespec *)toutp)) == -1) + return (-errno); + + if (s_sinfop == NULL) + return (rc); + + return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc); +} + +/* + * Intercept the Linux sigreturn() syscall to turn it into the return through + * the libc call stack that Solaris expects. + * + * When control returns to libc's call_user_handler() routine, a setcontext(2) + * will be done that returns thread execution to the point originally + * interrupted by receipt of the signal. + */ +int +lx_sigreturn(void) +{ + struct lx_oldsigstack *lx_ossp; + lx_sigset_t lx_sigset; + lx_regs_t *rp; + ucontext_t *ucp; + uintptr_t sp; + + rp = lx_syscall_regs(); + + /* + * NOTE: The sp saved in the context is eight bytes off of where we + * need it to be. + */ + sp = (uintptr_t)rp->lxr_esp - 8; + + /* + * At this point, the stack pointer should point to the struct + * lx_oldsigstack that lx_build_old_signal_frame() constructed and + * placed on the stack. We need to reference it a bit later, so + * save a pointer to it before incrementing our copy of the sp. + */ + lx_ossp = (struct lx_oldsigstack *)sp; + sp += sizeof (struct lx_oldsigstack); + + /* + * lx_sigdeliver() pushes LX_SIGRT_MAGIC on the stack before it + * creates the struct lx_oldsigstack. + * + * If we don't find it here, the stack's been corrupted and we need to + * kill ourselves. + */ + if (*(uint32_t *)sp != LX_SIGRT_MAGIC) + lx_err_fatal(gettext( + "sp @ 0x%p, expected 0x%x, found 0x%x!"), + sp, LX_SIGRT_MAGIC, *(uint32_t *)sp); + + sp += sizeof (uint32_t); + + /* + * For signal mask handling to be done properly, this call needs to + * return to the libc routine that originally called the signal handler + * rather than directly set the context back to the place the signal + * interrupted execution as the original Linux code would do. + * + * Here *sp points to the Solaris ucontext_t, so we need to copy + * machine registers the Linux signal handler may have modified + * back to the Solaris version. + */ + ucp = (ucontext_t *)(*(uint32_t *)sp); + + /* + * General registers copy across as-is, except Linux expects that + * changes made to uc_mcontext.gregs[ESP] will be reflected when the + * interrupted thread resumes execution after the signal handler. To + * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to + * match uc_mcontext.gregs[ESP] as Solaris will restore the UESP + * value to ESP. + */ + lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp; + bcopy(&lx_ossp->sigc, &ucp->uc_mcontext, sizeof (gregset_t)); + + /* copy back FP regs if present */ + if (lx_ossp->sigc.sc_fpstate != NULL) + ltos_fpstate(&lx_ossp->fpstate, &ucp->uc_mcontext.fpregs); + + /* convert Linux signal mask back to its Solaris equivalent */ + bzero(&lx_sigset, sizeof (lx_sigset_t)); + lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask; + lx_sigset.__bits[1] = lx_ossp->sig_extra; + (void) ltos_sigset(&lx_sigset, &ucp->uc_sigmask); + + /* + * At this point sp contains the value of the stack pointer when + * lx_call_user_handler() was called. + * + * Pop one more value off the stack and pass the new sp to + * lx_sigreturn_tolibc(), which will in turn manipulate the x86 + * registers to make it appear to libc's call_user_handler() as if the + * handler it had called returned. + */ + sp += sizeof (uint32_t); + lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp); + lx_sigreturn_tolibc(sp); + + /*NOTREACHED*/ + return (0); +} + +int +lx_rt_sigreturn(void) +{ + struct lx_sigstack *lx_ssp; + lx_regs_t *rp; + lx_ucontext_t *lx_ucp; + ucontext_t *ucp; + uintptr_t sp; + + rp = lx_syscall_regs(); + + /* + * NOTE: Because of some silly compatibility measures done in the + * signal trampoline code to make sure it uses the _exact same_ + * instruction sequence Linux does, we have to manually "pop" + * one extra four byte instruction off the stack here before + * passing the stack address to the syscall because the + * trampoline code isn't allowed to do it. + * + * No, I'm not kidding. + * + * The sp saved in the context is eight bytes off of where we + * need it to be, so the need to pop the extra four byte + * instruction means we need to subtract a net four bytes from + * the sp before "popping" the struct lx_sigstack off the stack. + * This will yield the value the stack pointer had before + * lx_sigdeliver() created the stack frame for the Linux signal + * handler. + */ + sp = (uintptr_t)rp->lxr_esp - 4; + + /* + * At this point, the stack pointer should point to the struct + * lx_sigstack that lx_build_signal_frame() constructed and + * placed on the stack. We need to reference it a bit later, so + * save a pointer to it before incrementing our copy of the sp. + */ + lx_ssp = (struct lx_sigstack *)sp; + sp += sizeof (struct lx_sigstack); + + /* + * lx_sigdeliver() pushes LX_SIGRT_MAGIC on the stack before it + * creates the struct lx_sigstack (and possibly struct lx_fpstate_t). + * + * If we don't find it here, the stack's been corrupted and we need to + * kill ourselves. + */ + if (*(uint32_t *)sp != LX_SIGRT_MAGIC) + lx_err_fatal(gettext("sp @ 0x%p, expected 0x%x, found 0x%x!"), + sp, LX_SIGRT_MAGIC, *(uint32_t *)sp); + + sp += sizeof (uint32_t); + + /* + * For signal mask handling to be done properly, this call needs to + * return to the libc routine that originally called the signal handler + * rather than directly set the context back to the place the signal + * interrupted execution as the original Linux code would do. + * + * Here *sp points to the Solaris ucontext_t, so we need to copy + * machine registers the Linux signal handler may have modified + * back to the Solaris version. + */ + ucp = (ucontext_t *)(*(uint32_t *)sp); + + lx_ucp = lx_ssp->ucp; + + if (lx_ucp != NULL) { + /* + * General registers copy across as-is, except Linux expects + * that changes made to uc_mcontext.gregs[ESP] will be reflected + * when the interrupted thread resumes execution after the + * signal handler. To emulate this behavior, we must modify + * uc_mcontext.gregs[UESP] to match uc_mcontext.gregs[ESP] as + * Solaris will restore the UESP value to ESP. + */ + lx_ucp->uc_sigcontext.sc_esp_at_signal = + lx_ucp->uc_sigcontext.sc_esp; + bcopy(&lx_ucp->uc_sigcontext, &ucp->uc_mcontext.gregs, + sizeof (gregset_t)); + + if (lx_ucp->uc_sigcontext.sc_fpstate != NULL) + ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate, + &ucp->uc_mcontext.fpregs); + + /* + * Convert the Linux signal mask and stack back to their + * Solaris equivalents. + */ + (void) ltos_sigset(&lx_ucp->uc_sigmask, &ucp->uc_sigmask); + ltos_stack(&lx_ucp->uc_stack, &ucp->uc_stack); + } + + /* + * At this point sp contains the value of the stack pointer when + * lx_call_user_handler() was called. + * + * Pop one more value off the stack and pass the new sp to + * lx_sigreturn_tolibc(), which will in turn manipulate the x86 + * registers to make it appear to libc's call_user_handler() as if the + * handler it had called returned. + */ + sp += sizeof (uint32_t); + lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp); + lx_sigreturn_tolibc(sp); + + /*NOTREACHED*/ + return (0); +} + +/* + * Build signal frame for processing for "old" (legacy) Linux signals + */ +static void +lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp) +{ + extern void lx_sigreturn_tramp(); + + lx_sigset_t lx_sigset; + ucontext_t *ucp = (ucontext_t *)p; + struct lx_sigaction *lxsap; + struct lx_oldsigstack *lx_ossp = sp; + + lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp); + + lx_ossp->sig = lx_sig; + lxsap = &lx_sighandlers.lx_sa[lx_sig]; + lx_debug("lxsap @ 0x%p", lxsap); + + if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) && + lxsap->lxsa_restorer) { + lx_ossp->retaddr = lxsap->lxsa_restorer; + lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr); + } else { + lx_ossp->retaddr = lx_sigreturn_tramp; + lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp); + } + + lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr); + + /* convert Solaris signal mask and stack to their Linux equivalents */ + (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset); + lx_ossp->sigc.sc_mask = lx_sigset.__bits[0]; + lx_ossp->sig_extra = lx_sigset.__bits[1]; + + /* + * General registers copy across as-is, except Linux expects that + * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a + * signal. + */ + bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t)); + lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal; + + /* + * cr2 contains the faulting address, and Linux only sets cr2 for a + * a segmentation fault. + */ + lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ? + (uintptr_t)sip->si_addr : 0); + + /* convert FP regs if present */ + if (ucp->uc_flags & UC_FPU) { + stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate); + lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate; + } else { + lx_ossp->sigc.sc_fpstate = NULL; + } + + /* + * Believe it or not, gdb wants to SEE the trampoline code on the + * bottom of the stack to determine whether the stack frame belongs to + * a signal handler, even though this code is no longer actually + * called. + * + * You can't make this stuff up. + */ + bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline, + sizeof (lx_ossp->trampoline)); +} + +/* + * Build signal frame for processing for modern Linux signals + */ +static void +lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp) +{ + extern void lx_rt_sigreturn_tramp(); + + lx_ucontext_t *lx_ucp; + ucontext_t *ucp = (ucontext_t *)p; + struct lx_sigstack *lx_ssp = sp; + struct lx_sigaction *lxsap; + + lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp); + + lx_ucp = &lx_ssp->uc; + lx_ssp->ucp = lx_ucp; + lx_ssp->sig = lx_sig; + + lxsap = &lx_sighandlers.lx_sa[lx_sig]; + lx_debug("lxsap @ 0x%p", lxsap); + + if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) && + lxsap->lxsa_restorer) { + lx_ssp->retaddr = lxsap->lxsa_restorer; + lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr); + } else { + lx_ssp->retaddr = lx_rt_sigreturn_tramp; + lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp); + } + + /* Linux has these fields but always clears them to 0 */ + lx_ucp->uc_flags = 0; + lx_ucp->uc_link = NULL; + + /* convert Solaris signal mask and stack to their Linux equivalents */ + (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask); + stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack); + + /* + * General registers copy across as-is, except Linux expects that + * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a + * signal. + */ + bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t)); + lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal; + + /* + * cr2 contains the faulting address, which Linux only sets for a + * a segmentation fault. + */ + lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ? + (uintptr_t)sip->si_addr : 0; + + /* + * Point the lx_siginfo_t pointer to the signal stack's lx_siginfo_t + * if there was a Solaris siginfo_t to convert, otherwise set it to + * NULL. + */ + if ((sip) && (stol_siginfo(sip, &lx_ssp->si) == 0)) + lx_ssp->sip = &lx_ssp->si; + else + lx_ssp->sip = NULL; + + /* convert FP regs if present */ + if (ucp->uc_flags & UC_FPU) { + /* + * Copy FP regs to the appropriate place in the the lx_sigstack + * structure. + */ + stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate); + lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate; + } else + lx_ucp->uc_sigcontext.sc_fpstate = NULL; + + /* + * Believe it or not, gdb wants to SEE the trampoline code on the + * bottom of the stack to determine whether the stack frame belongs to + * a signal handler, even though this code is no longer actually + * called. + * + * You can't make this stuff up. + */ + bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline, + sizeof (lx_ssp->trampoline)); +} + +/* + * This is the second level interposition handler for Linux signals. + */ +static void +lx_call_user_handler(int sig, siginfo_t *sip, void *p) +{ + void (*user_handler)(); + void (*stk_builder)(); + + lx_tsd_t *lx_tsd; + struct lx_sigaction *lxsap; + ucontext_t *ucp = (ucontext_t *)p; + uintptr_t gs; + size_t stksize; + int err, lx_sig; + + /* + * If Solaris signal has no Linux equivalent, effectively + * ignore it. + */ + if ((lx_sig = stol_signo[sig]) == -1) { + lx_debug("caught solaris signal %d, no Linux equivalent", sig); + return; + } + + lx_debug("interpose caught solaris signal %d, translating to Linux " + "signal %d", sig, lx_sig); + + lxsap = &lx_sighandlers.lx_sa[lx_sig]; + lx_debug("lxsap @ 0x%p", lxsap); + + if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) { + /* Linux SIG_DFL for SIGPWR is to terminate */ + exit(LX_SIGPWR | 0x80); + } + + if ((lxsap->lxsa_handler == SIG_DFL) || + (lxsap->lxsa_handler == SIG_IGN)) + lx_err_fatal(gettext("%s set to %s? How?!?!?"), + "lxsa_handler", + ((lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN"), + lxsap->lxsa_handler); + + if ((err = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "lx_call_user_handler", strerror(err)); + + assert(lx_tsd != 0); + + gs = lx_tsd->lxtsd_gs & 0xffff; /* gs is only 16 bits */ + + /* + * Any zero %gs value should be caught when a save is attempted in + * lx_emulate(), but this extra check will catch any zero values due to + * bugs in the library. + */ + assert(gs != 0); + + if (lxsap->lxsa_flags & LX_SA_SIGINFO) { + stksize = sizeof (struct lx_sigstack); + stk_builder = lx_build_signal_frame; + } else { + stksize = sizeof (struct lx_oldsigstack); + stk_builder = lx_build_old_signal_frame; + } + + user_handler = lxsap->lxsa_handler; + + lx_debug("delivering %d (lx %d) to handler at 0x%p with gs 0x%x", sig, + lx_sig, lxsap->lxsa_handler, gs); + + if (lxsap->lxsa_flags & LX_SA_RESETHAND) + lxsap->lxsa_handler = SIG_DFL; + + /* + * lx_sigdeliver() doesn't return, so it relies on the Linux + * signal handlers to clean up the stack, reset the current + * signal mask and return to the code interrupted by the signal. + */ + lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler, gs); +} + +/* + * Common routine to modify sigaction characteristics of a thread. + * + * We shouldn't need any special locking code here as we actually use + * libc's sigaction() to do all the real work, so its thread locking should + * take care of any issues for us. + */ +static int +lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp, + struct lx_sigaction *olxsp) +{ + struct lx_sigaction *lxsap; + struct sigaction sa; + + if (lx_sig <= 0 || lx_sig >= LX_NSIG) + return (-EINVAL); + + lxsap = &lx_sighandlers.lx_sa[lx_sig]; + lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap); + + if ((olxsp != NULL) && + ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0)) + return (-errno); + + if (lxsp != NULL) { + int err, sig; + struct lx_sigaction lxsa; + sigset_t new_set, oset; + + if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0) + return (-errno); + + if ((sig = ltos_signo[lx_sig]) != -1) { + /* + * Block this signal while messing with its dispostion + */ + (void) sigemptyset(&new_set); + (void) sigaddset(&new_set, sig); + + if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) { + err = errno; + lx_debug("unable to block signal %d: %s", sig, + strerror(err)); + return (-err); + } + + /* + * We don't really need the old signal disposition at + * this point, but this weeds out signals that would + * cause sigaction() to return an error before we change + * anything other than the current signal mask. + */ + if (sigaction(sig, NULL, &sa) < 0) { + err = errno; + lx_debug("sigaction() to get old " + "disposition for signal %d failed: " + "%s", sig, strerror(err)); + (void) sigprocmask(SIG_SETMASK, &oset, NULL); + return (-err); + } + + if ((lxsa.lxsa_handler != SIG_DFL) && + (lxsa.lxsa_handler != SIG_IGN)) { + sa.sa_handler = lx_call_user_handler; + + /* + * The interposition signal handler needs the + * information provided via the SA_SIGINFO flag. + */ + sa.sa_flags = SA_SIGINFO; + + if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP) + sa.sa_flags |= SA_NOCLDSTOP; + if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT) + sa.sa_flags |= SA_NOCLDWAIT; + if (lxsa.lxsa_flags & LX_SA_ONSTACK) + sa.sa_flags |= SA_ONSTACK; + if (lxsa.lxsa_flags & LX_SA_RESTART) + sa.sa_flags |= SA_RESTART; + if (lxsa.lxsa_flags & LX_SA_NODEFER) + sa.sa_flags |= SA_NODEFER; + + /* + * Can't use RESETHAND with SIGPWR due to + * different default actions between Linux + * and Solaris. + */ + if ((sig != SIGPWR) && + (lxsa.lxsa_flags & LX_SA_RESETHAND)) + sa.sa_flags |= SA_RESETHAND; + + if (ltos_sigset(&lxsa.lxsa_mask, + &sa.sa_mask) != 0) { + err = errno; + (void) sigprocmask(SIG_SETMASK, &oset, + NULL); + return (-err); + } + + lx_debug("interposing handler @ 0x%p for " + "signal %d (lx %d), flags 0x%x", + lxsa.lxsa_handler, sig, lx_sig, + lxsa.lxsa_flags); + + if (sigaction(sig, &sa, NULL) < 0) { + err = errno; + lx_debug("sigaction() to set new " + "disposition for signal %d failed: " + "%s", sig, strerror(err)); + (void) sigprocmask(SIG_SETMASK, &oset, + NULL); + return (-err); + } + } else if ((sig != SIGPWR) || + ((sig == SIGPWR) && + (lxsa.lxsa_handler == SIG_IGN))) { + /* + * There's no need to interpose for SIG_DFL or + * SIG_IGN so just call libc's sigaction(), but + * don't allow SIG_DFL for SIGPWR due to + * differing default actions between Linux and + * Solaris. + * + * Get the previous disposition first so things + * like sa_mask and sa_flags are preserved over + * a transition to SIG_DFL or SIG_IGN, which is + * what Linux expects. + */ + + sa.sa_handler = lxsa.lxsa_handler; + + if (sigaction(sig, &sa, NULL) < 0) { + err = errno; + lx_debug("sigaction(%d, %s) failed: %s", + sig, ((sa.sa_handler == SIG_DFL) ? + "SIG_DFL" : "SIG_IGN"), + strerror(err)); + (void) sigprocmask(SIG_SETMASK, &oset, + NULL); + return (-err); + } + } + } else { + lx_debug("Linux signal with no kill support " + "specified: %d", lx_sig); + } + + /* + * Save the new disposition for the signal in the global + * lx_sighandlers structure. + */ + bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction)); + + /* + * Reset the signal mask to what we came in with if + * we were modifying a kill-supported signal. + */ + if (sig != -1) + (void) sigprocmask(SIG_SETMASK, &oset, NULL); + } + + return (0); +} + +int +lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp) +{ + int val; + struct lx_sigaction sa, osa; + struct lx_sigaction *sap, *osap; + struct lx_osigaction *osp; + + sap = (actp ? &sa : NULL); + osap = (oactp ? &osa : NULL); + + /* + * If we have a source pointer, convert source lxsa_mask from + * lx_osigset_t to lx_sigset_t format. + */ + if (sap) { + osp = (struct lx_osigaction *)actp; + sap->lxsa_handler = osp->lxsa_handler; + + bzero(&sap->lxsa_mask, sizeof (lx_sigset_t)); + + for (val = 1; val <= OSIGSET_NBITS; val++) + if (osp->lxsa_mask & OSIGSET_BITSET(val)) + (void) lx_sigaddset(&sap->lxsa_mask, val); + + sap->lxsa_flags = osp->lxsa_flags; + sap->lxsa_restorer = osp->lxsa_restorer; + } + + if ((val = lx_sigaction_common(lx_sig, sap, osap))) + return (val); + + /* + * If we have a save pointer, convert the old lxsa_mask from + * lx_sigset_t to lx_osigset_t format. + */ + if (osap) { + osp = (struct lx_osigaction *)oactp; + + osp->lxsa_handler = osap->lxsa_handler; + + bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask)); + for (val = 1; val <= OSIGSET_NBITS; val++) + if (lx_sigismember(&osap->lxsa_mask, val)) + osp->lxsa_mask |= OSIGSET_BITSET(val); + + osp->lxsa_flags = osap->lxsa_flags; + osp->lxsa_restorer = osap->lxsa_restorer; + } + + return (0); +} + +int +lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp, + uintptr_t setsize) +{ + /* + * The "new" rt_sigaction call checks the setsize + * parameter. + */ + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp, + (struct lx_sigaction *)oactp)); +} + +/* + * Convert signal syscall to a call to the lx_sigaction() syscall + */ +int +lx_signal(uintptr_t lx_sig, uintptr_t handler) +{ + struct sigaction act; + struct sigaction oact; + int rc; + + /* + * Use sigaction to mimic SYSV signal() behavior; glibc will + * actually call sigaction(2) itself, so we're really reaching + * back for signal(2) semantics here. + */ + bzero(&act, sizeof (act)); + act.sa_handler = (void (*)())handler; + act.sa_flags = SA_RESETHAND | SA_NODEFER; + + rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact); + return ((rc == 0) ? ((int)oact.sa_handler) : rc); +} + +int +lx_tgkill(uintptr_t tgid, uintptr_t pid, uintptr_t sig) +{ + if (((pid_t)tgid <= 0) || ((pid_t)pid <= 0)) + return (-EINVAL); + + if (tgid != pid) { + lx_unsupported(gettext( + "BrandZ tgkill(2) does not support gid != pid\n")); + return (-ENOTSUP); + } + + /* + * Pad the lx_tkill() call with NULLs to match the IN_KERNEL_SYSCALL + * prototype generated for it by IN_KERNEL_SYSCALL in lx_brand.c. + */ + return (lx_tkill(pid, sig, NULL, NULL, NULL, NULL)); +} + +/* + * This C routine to save the passed %gs value into the thread-specific save + * area is called by the assembly routine lx_sigacthandler. + */ +void +lx_sigsavegs(uintptr_t signalled_gs) +{ + lx_tsd_t *lx_tsd; + int err; + + signalled_gs &= 0xffff; /* gs is only 16 bits */ + + /* + * While a %gs of 0 is technically legal (as long as the application + * never dereferences memory using %gs), Solaris has its own ideas as + * to how a zero %gs should be handled in _update_sregs(), such that + * any 32-bit user process with a %gs of zero running on a system with + * a 64-bit kernel will have its %gs hidden base register stomped on on + * return from a system call, leaving an incorrect base address in + * place until the next time %gs is actually reloaded (forcing a reload + * of the base address from the appropriate descriptor table.) + * + * Of course the kernel will once again stomp on THAT base address when + * returning from a system call, resulting in an application + * segmentation fault. + * + * To avoid this situation, disallow a save of a zero %gs here in order + * to try and capture any Linux process that takes a signal with a zero + * %gs installed. + */ + assert(signalled_gs != 0); + + if (signalled_gs != LWPGS_SEL) { + if ((err = thr_getspecific(lx_tsd_key, + (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "sigsavegs", strerror(err)); + + assert(lx_tsd != 0); + + lx_tsd->lxtsd_gs = signalled_gs; + + lx_debug("lx_sigsavegs(): gsp 0x%p, saved gs: 0x%x\n", + lx_tsd, signalled_gs); + } +} + +int +lx_siginit(void) +{ + extern void set_setcontext_enforcement(int); + extern void lx_sigacthandler(int, siginfo_t *, void *); + + struct sigaction sa; + sigset_t new_set, oset; + int lx_sig, sig; + + /* + * Block all signals possible while setting up the signal imposition + * mechanism. + */ + (void) sigfillset(&new_set); + + if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) + lx_err_fatal(gettext("unable to block signals while setting up " + "imposition mechanism: %s"), strerror(errno)); + + /* + * Ignore any signals that have no Linux analog so that those + * signals cannot be sent to Linux processes from the global zone + */ + for (sig = 1; sig < NSIG; sig++) + if (stol_signo[sig] < 0) + (void) sigignore(sig); + + /* + * As mentioned previously, when a user signal handler is installed + * via sigaction(), libc interposes on the mechanism by actually + * installing an internal routine sigacthandler() as the signal + * handler. On receipt of the signal, libc does some thread-related + * processing via sigacthandler(), then calls the registered user + * signal handler on behalf of the user. + * + * We need to interpose on that mechanism to make sure the correct + * %gs segment register value is installed before the libc routine + * is called, otherwise the libc code will die with a segmentation + * fault. + * + * The private libc routine setsigacthandler() will set our + * interposition routine, lx_sigacthandler(), as the default + * "sigacthandler" routine for all new signal handlers for this + * thread. + */ + setsigacthandler(lx_sigacthandler, &libc_sigacthandler); + lx_debug("lx_sigacthandler installed, libc_sigacthandler = 0x%p", + libc_sigacthandler); + + /* + * Mark any signals that are ignored as ignored in our interposition + * handler array + */ + for (lx_sig = 1; lx_sig < LX_NSIG; lx_sig++) { + if (((sig = ltos_signo[lx_sig]) != -1) && + (sigaction(sig, NULL, &sa) < 0)) + lx_err_fatal(gettext("unable to determine previous " + "disposition for signal %d: %s"), + sig, strerror(errno)); + + if (sa.sa_handler == SIG_IGN) { + lx_debug("marking signal %d (lx %d) as SIG_IGN", + sig, lx_sig); + lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN; + } + } + + /* + * Have our interposition handler handle SIGPWR to start with, + * as it has a default action of terminating the process in Linux + * but its default is to be ignored in Solaris. + */ + (void) sigemptyset(&sa.sa_mask); + sa.sa_sigaction = lx_call_user_handler; + sa.sa_flags = SA_SIGINFO; + + if (sigaction(SIGPWR, &sa, NULL) < 0) + lx_err_fatal(gettext("%s failed: %s"), "sigaction(SIGPWR)", + strerror(errno)); + + /* + * Solaris' libc forces certain register values in the ucontext_t + * used to restore a post-signal user context to be those Solaris + * expects; however that is not what we want to happen if the signal + * was taken while branded code was executing, so we must disable + * that behavior. + */ + set_setcontext_enforcement(0); + + /* + * Reset the signal mask to what we came in with + */ + (void) sigprocmask(SIG_SETMASK, &oset, NULL); + + lx_debug("interposition handler setup for SIGPWR"); + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/socket.c b/usr/src/lib/brand/lx/lx_brand/common/socket.c new file mode 100644 index 0000000000..c5a402e8f1 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/socket.c @@ -0,0 +1,1487 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <libintl.h> +#include <strings.h> +#include <alloca.h> +#include <ucred.h> + +#include <sys/param.h> +#include <sys/brand.h> +#include <sys/syscall.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/un.h> +#include <netinet/tcp.h> +#include <netinet/igmp.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/lx_debug.h> +#include <sys/lx_syscall.h> +#include <sys/lx_socket.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> + +/* + * This string is used to prefix all abstract namespace unix sockets, ie all + * abstract namespace sockets are converted to regular sockets in the /tmp + * directory with .ABSK_ prefixed to their names. + */ +#define ABST_PRFX "/tmp/.ABSK_" +#define ABST_PRFX_LEN 11 + +static int lx_socket(ulong_t *); +static int lx_bind(ulong_t *); +static int lx_connect(ulong_t *); +static int lx_listen(ulong_t *); +static int lx_accept(ulong_t *); +static int lx_getsockname(ulong_t *); +static int lx_getpeername(ulong_t *); +static int lx_socketpair(ulong_t *); +static int lx_send(ulong_t *); +static int lx_recv(ulong_t *); +static int lx_sendto(ulong_t *); +static int lx_recvfrom(ulong_t *); +static int lx_shutdown(ulong_t *); +static int lx_setsockopt(ulong_t *); +static int lx_getsockopt(ulong_t *); +static int lx_sendmsg(ulong_t *); +static int lx_recvmsg(ulong_t *); + +typedef int (*sockfn_t)(ulong_t *); + +static struct { + sockfn_t s_fn; /* Function implementing the subcommand */ + int s_nargs; /* Number of arguments the function takes */ +} sockfns[] = { + lx_socket, 3, + lx_bind, 3, + lx_connect, 3, + lx_listen, 2, + lx_accept, 3, + lx_getsockname, 3, + lx_getpeername, 3, + lx_socketpair, 4, + lx_send, 4, + lx_recv, 4, + lx_sendto, 6, + lx_recvfrom, 6, + lx_shutdown, 2, + lx_setsockopt, 5, + lx_getsockopt, 5, + lx_sendmsg, 3, + lx_recvmsg, 3 +}; + +/* + * What follows are a series of tables we use to translate Linux constants + * into equivalent Solaris constants and back again. I wish this were + * cleaner, more programmatic, and generally nicer. Sadly, life is messy, + * and Unix networking even more so. + */ +static const int ltos_family[LX_AF_MAX + 1] = { + AF_UNSPEC, AF_UNIX, AF_INET, AF_CCITT, AF_IPX, + AF_APPLETALK, AF_NOTSUPPORTED, AF_OSI, AF_NOTSUPPORTED, + AF_X25, AF_INET6, AF_CCITT, AF_DECnet, + AF_802, AF_POLICY, AF_KEY, AF_ROUTE, + AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, + AF_NOTSUPPORTED, AF_SNA, AF_NOTSUPPORTED, AF_NOTSUPPORTED, + AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, + AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED +}; + +#define LTOS_FAMILY(d) ((d) <= LX_AF_MAX ? ltos_family[(d)] : AF_INVAL) + +static const int ltos_socktype[LX_SOCK_PACKET + 1] = { + SOCK_NOTSUPPORTED, SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, + SOCK_RDM, SOCK_SEQPACKET, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, + SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED +}; + +#define LTOS_SOCKTYPE(t) \ + ((t) <= LX_SOCK_PACKET ? ltos_socktype[(t)] : SOCK_INVAL) + +/* + * Linux socket option type definitions + * + * The protocol `levels` are well defined (see in.h) The option values are + * not so well defined. Linux often uses different values to Solaris + * although they mean the same thing. For example, IP_TOS in Linux is + * defined as value 1 but in Solaris it is defined as value 3. This table + * maps all the Protocol levels to their options and maps them between + * Linux and Solaris and vice versa. Hence the reason for the complexity. + */ + +typedef struct lx_proto_opts { + const int *proto; /* Linux to Solaris mapping table */ + int maxentries; /* max entries in this table */ +} lx_proto_opts_t; + +#define OPTNOTSUP -1 /* we don't support it */ + +static const int ltos_ip_sockopts[LX_IP_DROP_MEMBERSHIP + 1] = { + OPTNOTSUP, IP_TOS, IP_TTL, IP_HDRINCL, + IP_OPTIONS, OPTNOTSUP, IP_RECVOPTS, IP_RETOPTS, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + IP_RECVTTL, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + IP_MULTICAST_IF, IP_MULTICAST_TTL, IP_MULTICAST_LOOP, + IP_ADD_MEMBERSHIP, IP_DROP_MEMBERSHIP +}; + +static const int ltos_tcp_sockopts[LX_TCP_QUICKACK + 1] = { + OPTNOTSUP, TCP_NODELAY, TCP_MAXSEG, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + TCP_KEEPALIVE, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP +}; + +static const int ltos_igmp_sockopts[IGMP_MTRACE + 1] = { + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + IGMP_MINLEN, OPTNOTSUP, OPTNOTSUP, /* XXX: was IGMP_TIMER_SCALE */ + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, IGMP_MEMBERSHIP_QUERY, + IGMP_V1_MEMBERSHIP_REPORT, IGMP_DVMRP, + IGMP_PIM, OPTNOTSUP, IGMP_V2_MEMBERSHIP_REPORT, + IGMP_V2_LEAVE_GROUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + IGMP_MTRACE_RESP, IGMP_MTRACE +}; + +static const int ltos_socket_sockopts[LX_SO_ACCEPTCONN + 1] = { + OPTNOTSUP, SO_DEBUG, SO_REUSEADDR, SO_TYPE, + SO_ERROR, SO_DONTROUTE, SO_BROADCAST, SO_SNDBUF, + SO_RCVBUF, SO_KEEPALIVE, SO_OOBINLINE, OPTNOTSUP, + OPTNOTSUP, SO_LINGER, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, SO_RCVLOWAT, SO_SNDLOWAT, + SO_RCVTIMEO, SO_SNDTIMEO, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, SO_ACCEPTCONN +}; + +#define PROTO_SOCKOPTS(opts) \ + { (opts), sizeof ((opts)) / sizeof ((opts)[0]) } + +/* + * The main Linux to Solaris protocol to options mapping table + * IPPROTO_TAB_SIZE can be set up to IPPROTO_MAX. All entries above + * IPPROTO_TAB_SIZE are in effect not implemented, + */ + +#define IPPROTO_TAB_SIZE 8 + +static const lx_proto_opts_t ltos_proto_opts[IPPROTO_TAB_SIZE] = { + /* IPPROTO_IP 0 */ + PROTO_SOCKOPTS(ltos_ip_sockopts), + /* SOL_SOCKET 1 */ + PROTO_SOCKOPTS(ltos_socket_sockopts), + /* IPPROTO_IGMP 2 */ + PROTO_SOCKOPTS(ltos_igmp_sockopts), + /* NOT IMPLEMENTED 3 */ + { NULL, 0 }, + /* NOT IMPLEMENTED 4 */ + { NULL, 0 }, + /* NOT IMPLEMENTED 5 */ + { NULL, 0 }, + /* IPPROTO_TCP 6 */ + PROTO_SOCKOPTS(ltos_tcp_sockopts), + /* NOT IMPLEMENTED 7 */ + { NULL, 0 } +}; + +/* + * Lifted from socket.h, since these definitions are contained within + * _KERNEL guards. + */ +#define _CMSG_HDR_ALIGNMENT 4 +#define _CMSG_HDR_ALIGN(x) (((uintptr_t)(x) + _CMSG_HDR_ALIGNMENT - 1) & \ + ~(_CMSG_HDR_ALIGNMENT - 1)) +#define CMSG_FIRSTHDR(m) \ + (((m)->msg_controllen < sizeof (struct cmsghdr)) ? \ + (struct cmsghdr *)0 : (struct cmsghdr *)((m)->msg_control)) + +#define CMSG_NXTHDR(m, c) \ + (((c) == 0) ? CMSG_FIRSTHDR(m) : \ + ((((uintptr_t)_CMSG_HDR_ALIGN((char *)(c) + \ + ((struct cmsghdr *)(c))->cmsg_len) + sizeof (struct cmsghdr)) > \ + (((uintptr_t)((struct lx_msghdr *)(m))->msg_control) + \ + ((uintptr_t)((struct lx_msghdr *)(m))->msg_controllen))) ? \ + ((struct cmsghdr *)0) : \ + ((struct cmsghdr *)_CMSG_HDR_ALIGN((char *)(c) + \ + ((struct cmsghdr *)(c))->cmsg_len)))) + +#define LX_TO_SOL 1 +#define SOL_TO_LX 2 + +static int +convert_cmsgs(int direction, struct lx_msghdr *msg, char *caller) +{ + struct cmsghdr *cmsg, *last; + int err = 0; + + cmsg = CMSG_FIRSTHDR(msg); + while (cmsg != NULL && err == 0) { + if (direction == LX_TO_SOL) { + if (cmsg->cmsg_level == LX_SOL_SOCKET) { + cmsg->cmsg_level = SOL_SOCKET; + if (cmsg->cmsg_type == LX_SCM_RIGHTS) + cmsg->cmsg_type = SCM_RIGHTS; + else if (cmsg->cmsg_type == LX_SCM_CRED) + cmsg->cmsg_type = SCM_UCRED; + else + err = ENOTSUP; + } else { + err = ENOTSUP; + } + } else { + if (cmsg->cmsg_level == SOL_SOCKET) { + cmsg->cmsg_level = LX_SOL_SOCKET; + if (cmsg->cmsg_type == SCM_RIGHTS) + cmsg->cmsg_type = LX_SCM_RIGHTS; + else if (cmsg->cmsg_type == SCM_UCRED) + cmsg->cmsg_type = LX_SCM_CRED; + else + err = ENOTSUP; + } else { + err = ENOTSUP; + } + } + + last = cmsg; + cmsg = CMSG_NXTHDR(msg, last); + } + if (err) + lx_unsupported("Unsupported socket control message in %s\n.", + caller); + + return (err); +} + +/* + * If inaddr is an abstract namespace unix socket, this function expects addr + * to have enough memory to hold the expanded socket name, ie it must be of + * size *len + ABST_PRFX_LEN. + */ +static int +convert_sockaddr(struct sockaddr *addr, socklen_t *len, + struct sockaddr *inaddr, socklen_t inlen) +{ + sa_family_t family; + int lx_in6_len; + int size; + int i, orig_len; + + /* + * Note that if the buffer at inaddr is ever smaller than inlen bytes, + * we may erroneously return EFAULT rather than a possible EINVAL + * as the copy comes before the various checks as to whether inlen + * is of the proper length for the socket type. + * + * This isn't an issue at present because all callers to this routine + * do meet that constraint. + */ + if ((ssize_t)inlen < 0) + return (-EINVAL); + if (uucopy(inaddr, addr, inlen) != 0) + return (-errno); + + family = LTOS_FAMILY(addr->sa_family); + + switch (family) { + case (sa_family_t)AF_NOTSUPPORTED: + return (-EPROTONOSUPPORT); + case (sa_family_t)AF_INVAL: + return (-EAFNOSUPPORT); + case AF_INET: + size = sizeof (struct sockaddr); + + if (inlen < size) + return (-EINVAL); + + *len = size; + break; + + case AF_INET6: + /* + * The Solaris sockaddr_in6 has one more 32-bit + * field than the Linux version. + */ + size = sizeof (struct sockaddr_in6); + lx_in6_len = size - sizeof (uint32_t); + + if (inlen != lx_in6_len) + return (-EINVAL); + + *len = (sizeof (struct sockaddr_in6)); + bzero((char *)addr + lx_in6_len, sizeof (uint32_t)); + break; + + case AF_UNIX: + if (inlen > sizeof (struct sockaddr_un)) + return (-EINVAL); + + *len = inlen; + + /* + * Linux supports abstract unix sockets, which are + * simply sockets that do not exist on the file system. + * These sockets are denoted by beginning the path with + * a NULL character. To support these, we strip out the + * leading NULL character and change the path to point + * to a real place in /tmp directory, by prepending + * ABST_PRFX and replacing all illegal characters with + * '_'. + */ + if (addr->sa_data[0] == '\0') { + + /* + * inlen is the entire size of the sockaddr_un + * data structure, including the sun_family, so + * we need to subtract this out. We subtract + * 1 since we want to overwrite the leadin NULL + * character, and thus do not include it in the + * length. + */ + orig_len = inlen - sizeof (addr->sa_family) - 1; + + /* + * Since abstract paths can contain illegal + * filename characters, we simply replace these + * with '_' + */ + for (i = 1; i < orig_len + 1; i++) { + if (addr->sa_data[i] == '\0' || + addr->sa_data[i] == '/') + addr->sa_data[i] = '_'; + } + + /* + * prepend ABST_PRFX to file name, minus the + * leading NULL character. This places the + * socket as a hidden file in the /tmp + * directory. + */ + (void) memmove(addr->sa_data + ABST_PRFX_LEN, + addr->sa_data + 1, orig_len); + bcopy(ABST_PRFX, addr->sa_data, ABST_PRFX_LEN); + + /* + * Since abstract socket paths may not be NULL + * terminated, we must explicitly NULL terminate + * our string. + */ + addr->sa_data[orig_len + ABST_PRFX_LEN] = '\0'; + + /* + * Make len reflect the new len of our string. + * Although we removed the NULL character at the + * beginning of the string, we added a NULL + * character to the end, so the net gain in + * length is simply ABST_PRFX_LEN. + */ + *len = inlen + ABST_PRFX_LEN; + } + break; + + default: + *len = inlen; + } + + addr->sa_family = family; + return (0); +} + +static int +convert_sock_args(int in_dom, int in_type, int in_protocol, int *out_dom, + int *out_type) +{ + int domain, type; + + if (in_dom < 0 || in_type < 0 || in_protocol < 0) + return (-EINVAL); + + domain = LTOS_FAMILY(in_dom); + if (domain == AF_NOTSUPPORTED || domain == AF_UNSPEC) + return (-EAFNOSUPPORT); + if (domain == AF_INVAL) + return (-EINVAL); + + type = LTOS_SOCKTYPE(in_type); + if (type == SOCK_NOTSUPPORTED) + return (-ESOCKTNOSUPPORT); + if (type == SOCK_INVAL) + return (-EINVAL); + + /* + * Linux does not allow the app to specify IP Protocol for raw + * sockets. Solaris does, so bail out here. + */ + if (type == SOCK_RAW && in_protocol == IPPROTO_IP) + return (-ESOCKTNOSUPPORT); + + *out_dom = domain; + *out_type = type; + return (0); +} + +static int +convert_sockflags(int lx_flags) +{ + int solaris_flags = 0; + + if (lx_flags & LX_MSG_OOB) + solaris_flags |= MSG_OOB; + + if (lx_flags & LX_MSG_PEEK) + solaris_flags |= MSG_PEEK; + + if (lx_flags & LX_MSG_DONTROUTE) + solaris_flags |= MSG_DONTROUTE; + + if (lx_flags & LX_MSG_CTRUNC) + solaris_flags |= MSG_CTRUNC; + + if (lx_flags & LX_MSG_TRUNC) + solaris_flags |= MSG_TRUNC; + + if (lx_flags & LX_MSG_WAITALL) + solaris_flags |= MSG_WAITALL; + + if (lx_flags & LX_MSG_DONTWAIT) + solaris_flags |= MSG_DONTWAIT; + + if (lx_flags & LX_MSG_EOR) + solaris_flags |= MSG_EOR; + + if (lx_flags & LX_MSG_PROXY) + lx_unsupported("socket operation with MSG_PROXY flag set"); + + if (lx_flags & LX_MSG_FIN) + lx_unsupported("socket operation with MSG_FIN flag set"); + + if (lx_flags & LX_MSG_SYN) + lx_unsupported("socket operation with MSG_SYN flag set"); + + if (lx_flags & LX_MSG_CONFIRM) + lx_unsupported("socket operation with MSG_CONFIRM set"); + + if (lx_flags & LX_MSG_RST) + lx_unsupported("socket operation with MSG_RST flag set"); + + if (lx_flags & LX_MSG_MORE) + lx_unsupported("socket operation with MSG_MORE flag set"); + + return (solaris_flags); +} + +static int +lx_socket(ulong_t *args) +{ + int domain; + int type; + int protocol = (int)args[2]; + int fd; + int err; + + err = convert_sock_args((int)args[0], (int)args[1], protocol, + &domain, &type); + if (err != 0) + return (err); + + lx_debug("\tsocket(%d, %d, %d)", domain, type, protocol); + + /* Right now IPv6 sockets don't work */ + if (domain == AF_INET6) + return (-EAFNOSUPPORT); + + /* + * Clients of the auditing subsystem used by CentOS 4 and 5 expect to + * be able to create AF_ROUTE SOCK_RAW sockets to communicate with the + * auditing daemons. Failure to create these sockets will cause login, + * ssh and useradd, amoung other programs to fail. To trick these + * programs into working, we convert the socket domain and type to + * something that we do support. Then when sendto is called on these + * sockets, we return an error code. See lx_sendto. + */ + if (domain == AF_ROUTE && type == SOCK_RAW) { + domain = AF_INET; + type = SOCK_STREAM; + protocol = 0; + } + + fd = socket(domain, type, protocol); + if (fd >= 0) + return (fd); + + if (errno == EPROTONOSUPPORT) + return (-ESOCKTNOSUPPORT); + + return (-errno); +} + +static int +lx_bind(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct stat64 statbuf; + struct sockaddr *name, oldname; + socklen_t len; + int r, r2, ret, tmperrno; + int abst_sock; + struct stat sb; + + if (uucopy((struct sockaddr *)args[1], &oldname, + sizeof (struct sockaddr)) != 0) + return (-errno); + + /* + * Handle Linux abstract sockets, which are UNIX sockets whose path + * begins with a NULL character. + */ + abst_sock = (oldname.sa_family == AF_UNIX) && + (oldname.sa_data[0] == '\0'); + + /* + * convert_sockaddr will expand the socket path if it is abstract, so + * we need to allocate extra memory for it now. + */ + if ((name = SAFE_ALLOCA((socklen_t)args[2] + + abst_sock * ABST_PRFX_LEN)) == NULL) + return (-EINVAL); + + if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1], + (socklen_t)args[2])) < 0) + return (r); + + /* + * Linux abstract namespace unix sockets are simply socket that do not + * exist on the filesystem. We emulate them by changing their paths + * in covert_sockaddr so that they point real files names on the + * filesystem. Because in Linux they do not exist on the filesystem + * applications do not have to worry about deleting files, however in + * our filesystem based emulation we do. To solve this problem, we first + * check to see if the socket already exists before we create one. If it + * does we attempt to connect to it to see if it is in use, or just + * left over from a previous lx_bind call. If we are unable to connect, + * we assume it is not in use and remove the file, then continue on + * as if the file never existed. + */ + if (abst_sock && stat(name->sa_data, &sb) == 0 && + S_ISSOCK(sb.st_mode)) { + if ((r2 = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) + return (-ENOSR); + ret = connect(r2, name, len); + tmperrno = errno; + if (close(r2) < 0) + return (-EINVAL); + + /* + * if we can't connect to the socket, assume no one is using it + * and remove it, otherwise assume it is in use and return + * EADDRINUSE. + */ + if ((ret < 0) && (tmperrno == ECONNREFUSED)) { + if (unlink(name->sa_data) < 0) { + return (-EADDRINUSE); + } + } else { + return (-EADDRINUSE); + } + } + + lx_debug("\tbind(%d, 0x%p, %d)", sockfd, name, len); + + if (name->sa_family == AF_UNIX) + lx_debug("\t\tAF_UNIX, path = %s", name->sa_data); + + r = bind(sockfd, name, len); + + /* + * Linux returns EADDRINUSE for attempts to bind to UNIX domain + * sockets that aren't sockets. + */ + if ((r < 0) && (errno == EINVAL) && (name->sa_family == AF_UNIX) && + ((stat64(name->sa_data, &statbuf) == 0) && + (!S_ISSOCK(statbuf.st_mode)))) + return (-EADDRINUSE); + + return ((r < 0) ? -errno : r); +} + +static int +lx_connect(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct sockaddr *name, oldname; + socklen_t len; + int r; + int abst_sock; + + if (uucopy((struct sockaddr *)args[1], &oldname, + sizeof (struct sockaddr)) != 0) + return (-errno); + + + /* Handle Linux abstract sockets */ + abst_sock = (oldname.sa_family == AF_UNIX) && + (oldname.sa_data[0] == '\0'); + + /* + * convert_sockaddr will expand the socket path, if it is abstract, so + * we need to allocate extra memory for it now. + */ + if ((name = SAFE_ALLOCA((socklen_t)args[2] + + abst_sock * ABST_PRFX_LEN)) == NULL) + return (-EINVAL); + + if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1], + (socklen_t)args[2])) < 0) + return (r); + + lx_debug("\tconnect(%d, 0x%p, %d)", sockfd, name, len); + + if (name->sa_family == AF_UNIX) + lx_debug("\t\tAF_UNIX, path = %s", name->sa_data); + + r = connect(sockfd, name, len); + + return ((r < 0) ? -errno : r); +} + +static int +lx_listen(ulong_t *args) +{ + int sockfd = (int)args[0]; + int backlog = (int)args[1]; + int r; + + lx_debug("\tlisten(%d, %d)", sockfd, backlog); + r = listen(sockfd, backlog); + + return ((r < 0) ? -errno : r); +} + +static int +lx_accept(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct sockaddr *name = (struct sockaddr *)args[1]; + socklen_t namelen = 0; + int r; + + lx_debug("\taccept(%d, 0x%p, 0x%p", sockfd, args[1], args[2]); + + /* + * The Linux man page says that -1 is returned and errno is set to + * EFAULT if the "name" address is bad, but it is silent on what to + * set errno to if the "namelen" address is bad. Experimentation + * shows that Linux (at least the 2.4.21 kernel in CentOS) actually + * sets errno to EINVAL in both cases. + * + * Note that we must first check the name pointer, as the Linux + * docs state nothing is copied out if the "name" pointer is NULL. + * If it is NULL, we don't care about the namelen pointer's value + * or about dereferencing it. + * + * Happily, Solaris' accept(3SOCKET) treats NULL name pointers and + * zero namelens the same way. + */ + if ((name != NULL) && + (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0)) + return ((errno == EFAULT) ? -EINVAL : -errno); + + lx_debug("\taccept namelen = %d", namelen); + + if ((r = accept(sockfd, name, &namelen)) < 0) + return ((errno == EFAULT) ? -EINVAL : -errno); + + lx_debug("\taccept namelen returned %d bytes", namelen); + + /* + * In Linux, accept()ed sockets do not inherit anything set by + * fcntl(), so filter those out. + */ + if (fcntl(r, F_SETFL, 0) < 0) + return (-errno); + + /* + * Once again, a bad "namelen" address sets errno to EINVAL, not + * EFAULT. If namelen was zero, there's no need to copy a zero back + * out. + * + * Logic might dictate that we should check if we can write to + * the namelen pointer earlier so we don't accept a pending connection + * only to fail the call because we can't write the namelen value back + * out. However, testing shows Linux does indeed fail the call after + * accepting the connection so we must behave in a compatible manner. + */ + if ((name != NULL) && (namelen != 0) && + (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0)) + return ((errno == EFAULT) ? -EINVAL : -errno); + + return (r); +} + +static int +lx_getsockname(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct sockaddr *name = NULL; + socklen_t namelen, namelen_orig; + + if (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0) + return (-errno); + namelen_orig = namelen; + + lx_debug("\tgetsockname(%d, 0x%p, 0x%p (=%d))", + sockfd, args[1], args[2], namelen); + + if (namelen > 0) { + if ((name = SAFE_ALLOCA(namelen)) == NULL) + return (-EINVAL); + bzero(name, namelen); + } + + if ((getsockname(sockfd, name, &namelen)) < 0) + return (-errno); + + /* + * If the name that getsockname() want's to return is larger + * than namelen, getsockname() will copy out the maximum amount + * of data possible and then update namelen to indicate the + * actually size of all the data that it wanted to copy out. + */ + if (uucopy(name, (void *)args[1], namelen_orig) != 0) + return (-errno); + if (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0) + return (-errno); + + return (0); +} + +static int +lx_getpeername(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct sockaddr *name; + socklen_t namelen; + + if (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0) + return (-errno); + + lx_debug("\tgetpeername(%d, 0x%p, 0x%p (=%d))", + sockfd, args[1], args[2], namelen); + + /* + * Linux returns EFAULT in this case, even if the namelen parameter + * is 0. This check will not catch other illegal addresses, but + * the benefit catching a non-null illegal address here is not + * worth the cost of another system call. + */ + if ((void *)args[1] == NULL) + return (-EFAULT); + + if ((name = SAFE_ALLOCA(namelen)) == NULL) + return (-EINVAL); + if ((getpeername(sockfd, name, &namelen)) < 0) + return (-errno); + + if (uucopy(name, (void *)args[1], namelen) != 0) + return (-errno); + + if (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0) + return (-errno); + + return (0); +} + +static int +lx_socketpair(ulong_t *args) +{ + int domain; + int type; + int protocol = (int)args[2]; + int *sv = (int *)args[3]; + int fds[2]; + int r; + + r = convert_sock_args((int)args[0], (int)args[1], protocol, + &domain, &type); + if (r != 0) + return (r); + + lx_debug("\tsocketpair(%d, %d, %d, 0x%p)", domain, type, protocol, sv); + + r = socketpair(domain, type, protocol, fds); + + if (r == 0) { + if (uucopy(fds, sv, sizeof (fds)) != 0) { + r = errno; + (void) close(fds[0]); + (void) close(fds[1]); + return (-r); + } + return (0); + } + + if (errno == EPROTONOSUPPORT) + return (-ESOCKTNOSUPPORT); + + return (-errno); +} + +static ssize_t +lx_send(ulong_t *args) +{ + int sockfd = (int)args[0]; + void *buf = (void *)args[1]; + size_t len = (size_t)args[2]; + int flags = (int)args[3]; + ssize_t r; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\tsend(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags); + + flags = convert_sockflags(flags); + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "send()"); + } + + r = send(sockfd, buf, len, flags); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "send()"); + + return ((r < 0) ? -errno : r); +} + +static ssize_t +lx_recv(ulong_t *args) +{ + int sockfd = (int)args[0]; + void *buf = (void *)args[1]; + size_t len = (size_t)args[2]; + int flags = (int)args[3]; + ssize_t r; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\trecv(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags); + + flags = convert_sockflags(flags); + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "recv()"); + } + + r = recv(sockfd, buf, len, flags); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "recv()"); + + return ((r < 0) ? -errno : r); +} + +static ssize_t +lx_sendto(ulong_t *args) +{ + int sockfd = (int)args[0]; + void *buf = (void *)args[1]; + size_t len = (size_t)args[2]; + int flags = (int)args[3]; + struct sockaddr *to = NULL, oldto; + socklen_t tolen = 0; + ssize_t r; + int abst_sock; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + if ((args[4] != NULL) && (args[5] > 0)) { + if (uucopy((struct sockaddr *)args[4], &oldto, + sizeof (struct sockaddr)) != 0) + return (-errno); + + /* Handle Linux abstract sockets */ + abst_sock = (oldto.sa_family == AF_UNIX) && + (oldto.sa_data[0] == '\0'); + + /* + * convert_sockaddr will expand the socket path, if it is + * abstract, so we need to allocate extra memory for it now. + */ + if ((to = SAFE_ALLOCA(args[5] + abst_sock * ABST_PRFX_LEN)) + == NULL) + return (-EINVAL); + + if ((r = convert_sockaddr(to, &tolen, + (struct sockaddr *)args[4], (socklen_t)args[5])) < 0) + return (r); + } + + + lx_debug("\tsendto(%d, 0x%p, 0x%d, 0x%x, 0x%x, %d)", sockfd, buf, len, + flags, to, tolen); + + flags = convert_sockflags(flags); + + /* return this error to make auditing subsystem happy */ + if (to && to->sa_family == AF_ROUTE) { + return (-ECONNREFUSED); + } + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "sendto()"); + } + + r = sendto(sockfd, buf, len, flags, to, tolen); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "sendto()"); + + if (r < 0) { + /* + * according to the man page and LTP, the expected error in + * this case is EPIPE. + */ + if (errno == ENOTCONN) + return (-EPIPE); + else + return (-errno); + } + return (r); +} + +static ssize_t +lx_recvfrom(ulong_t *args) +{ + int sockfd = (int)args[0]; + void *buf = (void *)args[1]; + size_t len = (size_t)args[2]; + int flags = (int)args[3]; + struct sockaddr *from = (struct sockaddr *)args[4]; + socklen_t *from_lenp = (socklen_t *)args[5]; + ssize_t r; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\trecvfrom(%d, 0x%p, 0x%d, 0x%x, 0x%x, 0x%p)", sockfd, buf, + len, flags, from, from_lenp); + + flags = convert_sockflags(flags); + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "recvfrom()"); + } + + r = recvfrom(sockfd, buf, len, flags, from, from_lenp); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "recvfrom()"); + + return ((r < 0) ? -errno : r); +} + +static int +lx_shutdown(ulong_t *args) +{ + int sockfd = (int)args[0]; + int how = (int)args[1]; + int r; + + lx_debug("\tshutdown(%d, %d)", sockfd, how); + r = shutdown(sockfd, how); + + return ((r < 0) ? -errno : r); +} + +static int +lx_setsockopt(ulong_t *args) +{ + int sockfd = (int)args[0]; + int level = (int)args[1]; + int optname = (int)args[2]; + void *optval = (void *)args[3]; + int optlen = (int)args[4]; + int internal_opt; + int r; + + lx_debug("\tsetsockopt(%d, %d, %d, 0x%p, %d)", sockfd, level, optname, + optval, optlen); + + /* + * The kernel returns EFAULT for all invalid addresses except NULL, + * for which it returns EINVAL. Linux wants EFAULT for NULL too. + */ + if (optval == NULL) + return (-EFAULT); + + /* + * Do a table lookup of the Solaris equivalent of the given option + */ + if (level < IPPROTO_IP || level >= IPPROTO_TAB_SIZE) + return (-ENOPROTOOPT); + + if (ltos_proto_opts[level].maxentries == 0 || + optname <= 0 || optname >= (ltos_proto_opts[level].maxentries)) + return (-ENOPROTOOPT); + + /* + * Linux sets this option when it wants to send credentials over a + * socket. Currently we just ignore it to make Linux programs happy. + */ + if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED)) + return (0); + + + if ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK)) { + /* + * TCP_CORK is a Linux-only option that instructs the TCP + * stack not to send out partial frames. Solaris doesn't + * include this option but some apps require it. So, we do + * our best to emulate the option by disabling TCP_NODELAY. + * If the app requests that we disable TCP_CORK, we just + * ignore it since enabling TCP_NODELAY may be + * overcompensating. + */ + optname = TCP_NODELAY; + if (optlen != sizeof (int)) + return (-EINVAL); + if (uucopy(optval, &internal_opt, sizeof (int)) != 0) + return (-errno); + if (internal_opt == 0) + return (0); + internal_opt = 1; + optval = &internal_opt; + } else { + optname = ltos_proto_opts[level].proto[optname]; + + if (optname == OPTNOTSUP) + return (-ENOPROTOOPT); + } + + if (level == LX_SOL_SOCKET) + level = SOL_SOCKET; + + r = setsockopt(sockfd, level, optname, optval, optlen); + + return ((r < 0) ? -errno : r); +} + +static int +lx_getsockopt(ulong_t *args) +{ + int sockfd = (int)args[0]; + int level = (int)args[1]; + int optname = (int)args[2]; + void *optval = (void *)args[3]; + int *optlenp = (int *)args[4]; + int r; + + lx_debug("\tgetsockopt(%d, %d, %d, 0x%p, 0x%p)", sockfd, level, optname, + optval, optlenp); + + /* + * According to the Linux man page, a NULL optval should indicate + * (as in Solaris) that no return value is expected. Instead, it + * actually triggers an EFAULT error. + */ + if (optval == NULL) + return (-EFAULT); + + /* + * Do a table lookup of the Solaris equivalent of the given option + */ + if (level < IPPROTO_IP || level >= IPPROTO_TAB_SIZE) + return (-EOPNOTSUPP); + + if (ltos_proto_opts[level].maxentries == 0 || + optname <= 0 || optname >= (ltos_proto_opts[level].maxentries)) + return (-ENOPROTOOPT); + + if (((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED)) || + ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK))) { + /* + * Linux sets LX_SO_PASSCRED when it wants to send credentials + * over a socket. Since we do not support it, it is never set + * and we return 0. + * + * We don't support TCP_CORK but some apps rely on it. So, + * rather than return an error we just return 0. This + * isn't exactly a lie, since this option really isn't set, + * but it's not the whole truth either. Fortunately, we + * aren't under oath. + */ + r = 0; + if (uucopy(&r, optval, sizeof (int)) != 0) + return (-errno); + r = sizeof (int); + if (uucopy(&r, optlenp, sizeof (int)) != 0) + return (-errno); + return (0); + } + if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PEERCRED)) { + struct lx_ucred lx_ucred; + ucred_t *ucp; + + /* + * We don't support SO_PEERCRED, but we do have equivalent + * functionality in getpeerucred() so invoke that here. + */ + + /* Verify there's going to be enough room for the results. */ + if (uucopy(optlenp, &r, sizeof (int)) != 0) + return (-errno); + if (r < sizeof (struct lx_ucred)) + return (-EOVERFLOW); + + /* + * We allocate a ucred_t ourselves rather than allow + * getpeerucred() to do it for us because getpeerucred() + * uses malloc(3C) and we'd rather use SAFE_ALLOCA(). + */ + if ((ucp = (ucred_t *)SAFE_ALLOCA(ucred_size())) == NULL) + return (-ENOMEM); + + /* Get the credential for the remote end of this socket. */ + if (getpeerucred(sockfd, &ucp) != 0) + return (-errno); + if (((lx_ucred.lxu_pid = ucred_getpid(ucp)) == -1) || + ((lx_ucred.lxu_uid = ucred_geteuid(ucp)) == (uid_t)-1) || + ((lx_ucred.lxu_gid = ucred_getegid(ucp)) == (gid_t)-1)) { + return (-errno); + } + + /* Copy out the results. */ + if ((uucopy(&lx_ucred, optval, sizeof (lx_ucred))) != 0) + return (-errno); + r = sizeof (lx_ucred); + if ((uucopy(&r, optlenp, sizeof (int))) != 0) + return (-errno); + return (0); + } + + optname = ltos_proto_opts[level].proto[optname]; + + if (optname == OPTNOTSUP) + return (-ENOPROTOOPT); + + if (level == LX_SOL_SOCKET) + level = SOL_SOCKET; + + r = getsockopt(sockfd, level, optname, optval, optlenp); + + return ((r < 0) ? -errno : r); +} + +/* + * libc routines that issue these system calls. We bypass the libsocket + * wrappers since they explicitly turn off the MSG_XPG_2 flag we need for + * Linux compatibility. + */ +extern int _so_sendmsg(); +extern int _so_recvmsg(); + +static int +lx_sendmsg(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct lx_msghdr msg; + struct cmsghdr *cmsg; + int flags = (int)args[2]; + int r; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\tsendmsg(%d, 0x%p, 0x%x)", sockfd, (void *)args[1], flags); + + flags = convert_sockflags(flags); + + if ((uucopy((void *)args[1], &msg, sizeof (msg))) != 0) + return (-errno); + + /* + * If there are control messages bundled in this message, we need + * to convert them from Linux to Solaris. + */ + if (msg.msg_control != NULL) { + if (msg.msg_controllen == 0) { + cmsg = NULL; + } else { + cmsg = SAFE_ALLOCA(msg.msg_controllen); + if (cmsg == NULL) + return (-EINVAL); + } + if ((uucopy(msg.msg_control, cmsg, msg.msg_controllen)) != 0) + return (-errno); + msg.msg_control = cmsg; + if ((r = convert_cmsgs(LX_TO_SOL, &msg, "sendmsg()")) != 0) + return (-r); + } + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "sendmsg()"); + } + + r = _so_sendmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "sendmsg()"); + + if (r < 0) { + /* + * according to the man page and LTP, the expected error in + * this case is EPIPE. + */ + if (errno == ENOTCONN) + return (-EPIPE); + else + return (-errno); + } + + return (r); +} + +static int +lx_recvmsg(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct lx_msghdr msg; + struct lx_msghdr *msgp = (struct lx_msghdr *)args[1]; + struct cmsghdr *cmsg = NULL; + int flags = (int)args[2]; + int r, err; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\trecvmsg(%d, 0x%p, 0x%x)", sockfd, (void *)args[1], flags); + + flags = convert_sockflags(flags); + + if ((uucopy(msgp, &msg, sizeof (msg))) != 0) + return (-errno); + + /* + * If we are expecting to have to convert any control messages, + * then we should receive them into our address space instead of + * the app's. + */ + if (msg.msg_control != NULL) { + cmsg = msg.msg_control; + if (msg.msg_controllen == 0) { + msg.msg_control = NULL; + } else { + msg.msg_control = SAFE_ALLOCA(msg.msg_controllen); + if (msg.msg_control == NULL) + return (-EINVAL); + } + } + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "recvmsg()"); + } + + r = _so_recvmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "recvmsg()"); + + if (r >= 0 && msg.msg_control != NULL) { + /* + * If there are control messages bundled in this message, + * we need to convert them from Linux to Solaris. + */ + if ((err = convert_cmsgs(SOL_TO_LX, &msg, "recvmsg()")) != 0) + return (-err); + + if ((uucopy(msg.msg_control, cmsg, msg.msg_controllen)) != 0) + return (-errno); + } + + /* + * A handful of the values in the msghdr are set by the recvmsg() + * call, so copy their values back to the caller. Rather than iterate, + * just copy the whole structure back. + */ + if (uucopy(&msg, msgp, sizeof (msg)) != 0) + return (-errno); + + return ((r < 0) ? -errno : r); +} + +int +lx_socketcall(uintptr_t p1, uintptr_t p2) +{ + int subcmd = (int)p1 - 1; /* subcommands start at 1 - not 0 */ + ulong_t args[6]; + int r; + + if (subcmd < 0 || subcmd >= LX_RECVMSG) + return (-EINVAL); + + /* + * Copy the arguments to the subcommand in from the app's address + * space, returning EFAULT if we get a bogus pointer. + */ + if (uucopy((void *)p2, args, + sockfns[subcmd].s_nargs * sizeof (ulong_t))) + return (-errno); + + r = (sockfns[subcmd].s_fn)(args); + + return (r); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/stat.c b/usr/src/lib/brand/lx/lx_brand/common/stat.c new file mode 100644 index 0000000000..7ba2312565 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/stat.c @@ -0,0 +1,551 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * when a stat() is done for a non-device file, the devt returned + * via the stat is the devt of the device backing the filesystem which + * contains the file the stat was performed on. these devts are currently + * untranslated. if this turns out to cause problems in the future then + * we might want to add more devt translators to convert sd and cmdk + * devts into linux devts that normally represent disks. + * + * XXX this may not be the best place to have the devt translation code. + * devt translation will also be needed for /proc fs support, which will + * probably be done in the kernel. we may need to move this code into + * the kernel and add a brand syscall to do the translation for us. this + * will need to be worked out before putback. + */ + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <strings.h> +#include <unistd.h> +#include <libintl.h> +#include <sys/fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/lx_types.h> +#include <sys/lx_stat.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/lx_ptm.h> +#include <sys/lx_audio.h> +#include <sys/lx_fcntl.h> +#include <sys/modctl.h> + +/* define _KERNEL to get the devt manipulation macros */ +#define _KERNEL +#include <sys/sysmacros.h> +#undef _KERNEL + + +#define LX_PTS_MAJOR_MIN 136 +#define LX_PTS_MAJOR_MAX 143 +#define LX_PTS_MAX \ + ((LX_PTS_MAJOR_MAX - LX_PTS_MAJOR_MIN + 1) * LX_MINORMASK) + +#define LX_PTM_MAJOR 5 +#define LX_PTM_MINOR 2 + +/* values for dt_type */ +#define DTT_INVALID 0 +#define DTT_LIST 1 +#define DTT_CUSTOM 2 + +/* convience macros for access the dt_minor union */ +#define dt_list dt_minor.dtm_list +#define dt_custom dt_minor.dtm_custom + +/* + * structure used to define devt translators + */ +typedef struct minor_translator { + char *mt_path; /* solaris minor node path */ + minor_t mt_minor; /* solaris minor node number */ + int mt_lx_major; /* linux major node number */ + int mt_lx_minor; /* linux minor node number */ +} minor_translator_t; + +typedef struct devt_translator { + char *dt_driver; /* solaris driver name */ + major_t dt_major; /* solaris driver number */ + + /* dt_type dictates how we intrepret dt_minor */ + int dt_type; + union { + uintptr_t dtm_foo; /* required to compile */ + minor_translator_t *dtm_list; + int (*dtm_custom)(dev_t, lx_dev_t *, int); + } dt_minor; +} devt_translator_t; + + +/* + * forward declerations + */ +static devt_translator_t devt_translators[]; + +/* + * called to initialize the devt translation subsystem + */ +int +lx_stat_init() +{ + minor_translator_t *mt; + struct stat st; + major_t major; + char *driver; + int i, j, ret; + + for (i = 0; devt_translators[i].dt_driver != NULL; i++) { + + assert(devt_translators[i].dt_type != DTT_INVALID); + + /* figure out the major numbers for our devt translators */ + driver = devt_translators[i].dt_driver; + ret = modctl(MODGETMAJBIND, + driver, strlen(driver) + 1, &major); + if (ret != 0) { + lx_err(gettext("%s%s) failed: %s\n"), + "lx_stat_init(): modctl(MODGETMAJBIND, ", + driver, strerror(errno)); + lx_err(gettext("%s: %s translator disabled for: %s\n"), + "lx_stat_init()", "devt", driver); + devt_translators[i].dt_major = (major_t)-1; + continue; + } + + /* save the major node value */ + devt_translators[i].dt_major = major; + + /* if this translator doesn't use a list mapping we're done. */ + if (devt_translators[i].dt_type != DTT_LIST) + continue; + + /* for each device listed, lookup the minor node number */ + mt = devt_translators[i].dt_list; + for (j = 0; mt[j].mt_path != NULL; j++) { + + /* stat the device */ + ret = stat(mt[j].mt_path, &st); + if (ret != 0) { + lx_err(gettext("%s%s) failed: %s\n"), + "lx_stat_init(): stat(", + mt[j].mt_path, strerror(errno)); + lx_err(gettext( + "%s: %s translator disabled for: %s\n"), + "lx_stat_init()", "devt", + mt[j].mt_path); + st.st_rdev = NODEV; + } else { + /* make sure the major node matches */ + assert(getmajor(st.st_rdev) == major); + assert(mt[j].mt_minor < LX_MINORMASK); + } + + /* save the minor node value */ + mt[j].mt_minor = getminor(st.st_rdev); + } + } + return (0); +} + +static int +/*ARGSUSED*/ +pts_devt_translator(dev_t dev, lx_dev_t *jdev, int fd) +{ + minor_t min = getminor(dev); + int lx_maj; + int lx_min; + + /* + * linux has a really small minor number name space (8 bits). + * so if pts devices are limited to one major number you could + * only have 256 of them. linux addresses this issue by using + * multiple major numbers for pts devices. + */ + if (min >= LX_PTS_MAX) + return (EOVERFLOW); + + lx_maj = LX_PTS_MAJOR_MIN + (min / LX_MINORMASK); + lx_min = min % LX_MINORMASK; + + *jdev = LX_MAKEDEVICE(lx_maj, lx_min); + return (0); +} + + +static int +/*ARGSUSED*/ +ptm_devt_translator(dev_t dev, lx_dev_t *jdev, int fd) +{ + *jdev = LX_MAKEDEVICE(LX_PTM_MAJOR, LX_PTM_MINOR); + return (0); +} + +static int +audio_devt_translator(dev_t dev, lx_dev_t *jdev, int fd) +{ + int s_minor, l_minor; + + if (fd == -1) { + s_minor = getminor(dev); + } else { + /* + * this is a cloning device so we have to ask the driver + * what kind of minor node this is + */ + if (ioctl(fd, LXA_IOC_GETMINORNUM, &s_minor) < 0) + return (-EINVAL); + } + + switch (s_minor) { + case LXA_MINORNUM_DSP: + l_minor = 3; + break; + case LXA_MINORNUM_MIXER: + l_minor = 0; + break; + default: + return (-EINVAL); + } + + *jdev = LX_MAKEDEVICE(14, l_minor); + return (0); +} + +static void +s2l_dev_report(dev_t dev, lx_dev_t jdev) +{ + major_t maj; + minor_t min; + int lx_maj, lx_min; + + if (lx_debug_enabled == 0) + return; + + maj = getmajor(dev); + min = getminor(dev); + + lx_maj = LX_GETMAJOR(jdev); + lx_min = LX_GETMINOR(jdev); + + lx_debug("\ttranslated devt [%d, %d] -> [%d, %d]", + maj, min, lx_maj, lx_min); +} + +static int +s2l_devt(dev_t dev, lx_dev_t *jdev, int fd) +{ + minor_translator_t *mt; + int i, j, err; + major_t maj = getmajor(dev); + minor_t min = getminor(dev); + + /* look for a devt translator for this major number */ + for (i = 0; devt_translators[i].dt_driver != NULL; i++) { + if (devt_translators[i].dt_major == maj) + break; + } + if (devt_translators[i].dt_driver != NULL) { + + /* try to translate the solaris devt to a linux devt */ + switch (devt_translators[i].dt_type) { + case DTT_LIST: + mt = devt_translators[i].dt_list; + for (j = 0; mt[j].mt_path != NULL; j++) { + if (mt[j].mt_minor == min) { + assert(mt[j].mt_minor < LX_MINORMASK); + + /* found a translation */ + *jdev = LX_MAKEDEVICE( + mt[j].mt_lx_major, + mt[j].mt_lx_minor); + s2l_dev_report(dev, *jdev); + return (0); + } + } + break; + + case DTT_CUSTOM: + err = devt_translators[i].dt_custom(dev, jdev, fd); + if (err == 0) + s2l_dev_report(dev, *jdev); + return (err); + break; + } + } + + /* we don't have a translator for this device */ + *jdev = LX_MAKEDEVICE(maj, min); + return (0); +} + +static int +stat_convert(uintptr_t lx_statp, struct stat *s, int fd) +{ + struct lx_stat buf; + lx_dev_t st_dev, st_rdev; + int err; + + if ((err = s2l_devt(s->st_dev, &st_dev, fd)) != 0) + return (err); + if ((err = s2l_devt(s->st_rdev, &st_rdev, fd)) != 0) + return (err); + + if ((st_dev > USHRT_MAX) || (st_rdev > USHRT_MAX) || + (s->st_nlink > USHRT_MAX) || (s->st_size > ULONG_MAX)) + return (-EOVERFLOW); + + /* Linux seems to report a 0 st_size for all block devices */ + if ((s->st_mode & S_IFMT) == S_IFBLK) + s->st_size = 0; + + bzero(&buf, sizeof (buf)); + buf.st_dev = st_dev; + buf.st_rdev = st_rdev; + buf.st_ino = s->st_ino; + buf.st_mode = s->st_mode; + buf.st_nlink = s->st_nlink; + buf.st_uid = LX_UID32_TO_UID16(s->st_uid); + buf.st_gid = LX_GID32_TO_GID16(s->st_gid); + buf.st_size = s->st_size; + buf.st_blksize = s->st_blksize; + buf.st_blocks = s->st_blocks; + buf.st_atime.ts_sec = s->st_atim.tv_sec; + buf.st_atime.ts_nsec = s->st_atim.tv_nsec; + buf.st_ctime.ts_sec = s->st_ctim.tv_sec; + buf.st_ctime.ts_nsec = s->st_ctim.tv_nsec; + buf.st_mtime.ts_sec = s->st_mtim.tv_sec; + buf.st_mtime.ts_nsec = s->st_mtim.tv_nsec; + + if (uucopy(&buf, (void *)lx_statp, sizeof (buf)) != 0) + return (-errno); + + return (0); +} + +static int +stat64_convert(uintptr_t lx_statp, struct stat64 *s, int fd) +{ + struct lx_stat64 buf; + lx_dev_t st_dev, st_rdev; + int err; + + if ((err = s2l_devt(s->st_dev, &st_dev, fd)) != 0) + return (err); + if ((err = s2l_devt(s->st_rdev, &st_rdev, fd)) != 0) + return (err); + + /* Linux seems to report a 0 st_size for all block devices */ + if ((s->st_mode & S_IFMT) == S_IFBLK) + s->st_size = 0; + + bzero(&buf, sizeof (buf)); + buf.st_dev = st_dev; + buf.st_rdev = st_rdev; + buf.st_small_ino = (lx_ino_t)(s->st_ino & UINT_MAX); + buf.st_ino = (lx_ino64_t)s->st_ino; + buf.st_mode = s->st_mode; + buf.st_nlink = s->st_nlink; + buf.st_uid = s->st_uid; + buf.st_gid = s->st_gid; + buf.st_size = s->st_size; + buf.st_blksize = s->st_blksize; + buf.st_blocks = s->st_blocks; + buf.st_atime.ts_sec = s->st_atim.tv_sec; + buf.st_atime.ts_nsec = s->st_atim.tv_nsec; + buf.st_ctime.ts_sec = s->st_ctim.tv_sec; + buf.st_ctime.ts_nsec = s->st_ctim.tv_nsec; + buf.st_mtime.ts_sec = s->st_mtim.tv_sec; + buf.st_mtime.ts_nsec = s->st_mtim.tv_nsec; + + if (uucopy(&buf, (void *)lx_statp, sizeof (buf)) != 0) + return (-errno); + + return (0); +} + +int +lx_stat(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + struct stat sbuf; + + lx_debug("\tstat(%s, ...)", path); + if (stat(path, &sbuf)) + return (-errno); + + return (stat_convert(p2, &sbuf, -1)); +} + + +int +lx_fstat(uintptr_t p1, uintptr_t p2) +{ + int fd = (int)p1; + struct stat sbuf; + char *path, path_buf[MAXPATHLEN]; + + if (lx_debug_enabled != 0) { + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + if (path == NULL) + path = "?"; + + lx_debug("\tfstat(%d - %s, ...)", fd, path); + } + if (fstat(fd, &sbuf)) + return (-errno); + + return (stat_convert(p2, &sbuf, fd)); +} + + +int +lx_lstat(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + struct stat sbuf; + + lx_debug("\tlstat(%s, ...)", path); + if (lstat(path, &sbuf)) + return (-errno); + + return (stat_convert(p2, &sbuf, -1)); +} + +int +lx_stat64(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + struct stat64 sbuf; + + lx_debug("\tstat64(%s, ...)", path); + if (stat64(path, &sbuf)) + return (-errno); + + return (stat64_convert(p2, &sbuf, -1)); +} + + +int +lx_fstat64(uintptr_t p1, uintptr_t p2) +{ + int fd = (int)p1; + struct stat64 sbuf; + char *path, path_buf[MAXPATHLEN]; + + if (lx_debug_enabled != 0) { + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + if (path == NULL) + path = "?"; + + lx_debug("\tfstat64(%d - %s, ...)", fd, path); + } + if (fstat64(fd, &sbuf)) + return (-errno); + + return (stat64_convert(p2, &sbuf, fd)); +} + +int +lx_fstatat64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + int atfd = (int)p1; + const char *path = (const char *)p2; + int flag; + struct stat64 sbuf; + + if (atfd == LX_AT_FDCWD) + atfd = AT_FDCWD; + + flag = ltos_at_flag(p4, AT_SYMLINK_NOFOLLOW); + if (flag < 0) + return (-EINVAL); + + if (fstatat64(atfd, path, &sbuf, flag)) + return (-errno); + + return (stat64_convert(p3, &sbuf, -1)); +} + + +int +lx_lstat64(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + struct stat64 sbuf; + + lx_debug("\tlstat64(%s, ...)", path); + if (lstat64(path, &sbuf)) + return (-errno); + + return (stat64_convert(p2, &sbuf, -1)); +} + +/* + * devt translator definitions + */ +#define MINOR_TRANSLATOR(path, lx_major, lx_minor) \ + { path, 0, lx_major, lx_minor } + +#define MINOR_TRANSLATOR_END \ + { NULL, 0, 0, 0 } + +#define DEVT_TRANSLATOR(drv, flags, i) \ + { drv, 0, flags, (uintptr_t)i } + +/* + * translators for devts + */ +static minor_translator_t mtranslator_mm[] = { + MINOR_TRANSLATOR("/dev/null", 1, 3), + MINOR_TRANSLATOR("/dev/zero", 1, 5), + MINOR_TRANSLATOR_END +}; +static minor_translator_t mtranslator_random[] = { + MINOR_TRANSLATOR("/dev/random", 1, 8), + MINOR_TRANSLATOR("/dev/urandom", 1, 9), + MINOR_TRANSLATOR_END +}; +static minor_translator_t mtranslator_sy[] = { + MINOR_TRANSLATOR("/dev/tty", 5, 0), + MINOR_TRANSLATOR_END +}; +static minor_translator_t mtranslator_zcons[] = { + MINOR_TRANSLATOR("/dev/console", 5, 1), + MINOR_TRANSLATOR_END +}; +static devt_translator_t devt_translators[] = { + DEVT_TRANSLATOR("mm", DTT_LIST, &mtranslator_mm), + DEVT_TRANSLATOR("random", DTT_LIST, &mtranslator_random), + DEVT_TRANSLATOR("sy", DTT_LIST, &mtranslator_sy), + DEVT_TRANSLATOR("zcons", DTT_LIST, &mtranslator_zcons), + DEVT_TRANSLATOR(LX_AUDIO_DRV, DTT_CUSTOM, audio_devt_translator), + DEVT_TRANSLATOR(LX_PTM_DRV, DTT_CUSTOM, ptm_devt_translator), + DEVT_TRANSLATOR("pts", DTT_CUSTOM, pts_devt_translator), + DEVT_TRANSLATOR(NULL, 0, 0) +}; diff --git a/usr/src/lib/brand/lx/lx_brand/common/statfs.c b/usr/src/lib/brand/lx/lx_brand/common/statfs.c new file mode 100644 index 0000000000..03e2563d70 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/statfs.c @@ -0,0 +1,309 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <errno.h> +#include <libintl.h> +#include <string.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/statvfs.h> +#include <sys/param.h> + +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_statfs.h> + +/* + * these defines must exist before we include regexp.h, see regexp(5) + */ +#define RE_SIZE 1024 +#define INIT char *sp = instring; +#define GETC() (*sp++) +#define PEEKC() (*sp) +#define UNGETC(c) (--sp) +#define RETURN(c) return (NULL); +#define ERROR(c) return ((char *)c); + +/* + * for regular expressions we're using regexp(5). + * + * we'd really prefer to use some other nicer regular expressions + * interfaces (like regcmp(3c), regcomp(3c), or re_comp(3c)) but we + * can't because all these other interfaces rely on the ability + * to allocate memory via libc malloc()/calloc() calls, which + * we can't really do here. + * + * we could optionally use regexpr(3gen) but we don't since the + * interfaces there are incredibly similar to the regexp(5) + * interfaces we're already using and we'd have the added + * requirement of linking against libgen. + * + * another option that was considered is fnmatch(3c) but the + * limited pattern expansion capability of this interface would + * force us to include more patterns to check against. + */ +#include <regexp.h> + +static struct lx_ftype_path { + char *lfp_path; + char lfp_re[RE_SIZE]; + int lfp_magic; + char *lfp_magic_str; +} ftype_path_list[] = { + { "^/dev/pts$", "", + LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" }, + { "^/dev/pts/$", "", + LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" }, + { "^/dev/pts/[0-9][0-9]*$", "", + LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" }, + { NULL, "", + 0, NULL } +}; + +/* + * For lack of linux equivalents, we present lofs and zfs as being ufs. + */ +static struct lx_ftype_name { + const char *lfn_name; + int lfn_magic; + char *lfn_magic_str; +} ftype_name_list[] = { + { "hsfs", LX_ISOFS_SUPER_MAGIC, "LX_ISOFS_SUPER_MAGIC" }, + { "nfs", LX_NFS_SUPER_MAGIC, "LX_NFS_SUPER_MAGIC" }, + { "pcfs", LX_MSDOS_SUPER_MAGIC, "LX_MSDOS_SUPER_MAGIC" }, + { "lx_proc", LX_PROC_SUPER_MAGIC, "LX_PROC_SUPER_MAGIC" }, + { "ufs", LX_UFS_MAGIC, "LX_UFS_MAGIC" }, + { "lofs", LX_UFS_MAGIC, "LX_UFS_MAGIC" }, + { "zfs", LX_UFS_MAGIC, "LX_UFS_MAGIC" }, + { NULL, 0, NULL } +}; + +int +lx_statfs_init() +{ + int i; + char *rv; + + for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) { + rv = compile( + ftype_path_list[i].lfp_path, + ftype_path_list[i].lfp_re, + ftype_path_list[i].lfp_re + RE_SIZE, '\0'); + if (rv == NULL) + continue; + + lx_debug("lx_statfs_init compile(\"%s\") failed", + ftype_path_list[i].lfp_path); + return (1); + } + return (0); +} + +static int +stol_type(const char *path, const char *name) +{ + int i; + lx_debug("\tstol_type(\"%s\", \"%s\")\n", path == NULL ? "NULL" : path, + name == NULL ? "NULL" : name); + + if (path != NULL) { + char userpath[MAXPATHLEN]; + + if (uucopystr(path, userpath, MAXPATHLEN) == -1) + return (-errno); + + for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) { + if (step(userpath, ftype_path_list[i].lfp_re) == 0) + continue; + + /* got a match on the fs path */ + lx_debug("\ttranslated f_type to 0x%x - %s", + ftype_path_list[i].lfp_magic, + ftype_path_list[i].lfp_magic_str); + return (ftype_path_list[i].lfp_magic); + } + } + + assert(name != NULL); + for (i = 0; ftype_name_list[i].lfn_name != NULL; i++) { + if (strcmp(name, ftype_name_list[i].lfn_name) == 0) { + + /* got a match on the fs name */ + lx_debug("\ttranslated f_type to 0x%x - %s", + ftype_name_list[i].lfn_magic, + ftype_name_list[i].lfn_magic_str); + return (ftype_name_list[i].lfn_magic); + } + } + + /* we don't know what the fs type is so just set it to 0 */ + return (0); +} + +/* + * The Linux statfs() is similar to the Solaris statvfs() call, the main + * difference being the use of a numeric 'f_type' identifier instead of the + * 'f_basetype' string. + */ +static int +stol_statfs(const char *path, struct lx_statfs *l, struct statvfs *s) +{ + int type; + + if ((type = stol_type(path, s->f_basetype)) < 0) + return (type); + + l->f_type = type; + l->f_bsize = s->f_bsize; + l->f_blocks = s->f_blocks; + l->f_bfree = s->f_bfree; + l->f_bavail = s->f_bavail; + l->f_files = s->f_files; + l->f_ffree = s->f_ffree; + l->f_fsid = s->f_fsid; + l->f_namelen = s->f_namemax; + l->f_frsize = s->f_frsize; + bzero(&(l->f_spare), sizeof (l->f_spare)); + + return (0); +} + +static int +stol_statfs64(const char *path, struct lx_statfs64 *l, struct statvfs64 *s) +{ + int type; + + if ((type = stol_type(path, s->f_basetype)) < 0) + return (type); + + l->f_type = type; + l->f_bsize = s->f_bsize; + l->f_blocks = s->f_blocks; + l->f_bfree = s->f_bfree; + l->f_bavail = s->f_bavail; + l->f_files = s->f_files; + l->f_ffree = s->f_ffree; + l->f_fsid = s->f_fsid; + l->f_namelen = s->f_namemax; + l->f_frsize = s->f_frsize; + bzero(&(l->f_spare), sizeof (l->f_spare)); + + return (0); +} + +int +lx_statfs(uintptr_t p1, uintptr_t p2) +{ + const char *path = (const char *)p1; + struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2; + struct statvfs vfs; + int err; + + lx_debug("\tfstatvfs(%s, 0x%p)", path, fs); + if (statvfs(path, &vfs) != 0) + return (-errno); + + if ((err = stol_statfs(path, &lxfs, &vfs)) != 0) + return (err); + + if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0) + return (-errno); + + return (0); +} + +int +lx_fstatfs(uintptr_t p1, uintptr_t p2) +{ + struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2; + struct statvfs vfs; + char *path, path_buf[MAXPATHLEN]; + int fd = (int)p1; + int err; + + lx_debug("\tfstatvfs(%d, 0x%p)", fd, fs); + if (fstatvfs(fd, &vfs) != 0) + return (-errno); + + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + + if ((err = stol_statfs(path, &lxfs, &vfs)) != 0) + return (err); + + if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0) + return (-errno); + + return (0); +} + +/* ARGSUSED */ +int +lx_statfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + const char *path = (const char *)p1; + struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3; + struct statvfs64 vfs; + int err; + + lx_debug("\tstatvfs64(%s, %d, 0x%p)", path, p2, fs); + if (statvfs64(path, &vfs) != 0) + return (-errno); + + if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0) + return (err); + + if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0) + return (-errno); + + return (0); +} + +/* ARGSUSED */ +int +lx_fstatfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3; + struct statvfs64 vfs; + char *path, path_buf[MAXPATHLEN]; + int fd = (int)p1; + int err; + + lx_debug("\tfstatvfs64(%d, %d, 0x%p)", fd, p2, fs); + if (fstatvfs64(fd, &vfs) != 0) + return (-errno); + + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + + if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0) + return (err); + + if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0) + return (-errno); + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysctl.c b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c new file mode 100644 index 0000000000..1cf4ca3ac1 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <alloca.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <sys/lx_syscall.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> + +/* + * sysctl() implementation. The full set of possible values is incredibly + * large; we only implement the bare minimum here, namely basic kernel + * information. + * + * For the moment, we also print out debugging messages if the application + * attempts to write or access any other values, so we can tell if we are not + * supporting something we should be. + */ + +struct lx_sysctl_args { + int *name; + int nlen; + void *oldval; + size_t *oldlenp; + void *newval; + size_t newlen; +}; + +#define LX_CTL_KERN 1 + +#define LX_KERN_OSTYPE 1 +#define LX_KERN_OSRELEASE 2 +#define LX_KERN_OSREV 3 +#define LX_KERN_VERSION 4 + +int +lx_sysctl(uintptr_t raw) +{ + struct lx_sysctl_args args; + int name[2]; + size_t oldlen; + char *namebuf; + + if (uucopy((void *)raw, &args, sizeof (args)) < 0) + return (-EFAULT); + + /* + * We only allow [ CTL_KERN, KERN_* ] pairs, so reject anything that + * doesn't have exactly two values starting with LX_CTL_KERN. + */ + if (args.nlen != 2) + return (-ENOTDIR); + + if (uucopy(args.name, name, sizeof (name)) < 0) + return (-EFAULT); + + if (name[0] != LX_CTL_KERN) { + lx_debug("sysctl: read of [%d, %d] unsupported", + name[0], name[1]); + return (-ENOTDIR); + } + + /* We don't support writing new sysctl values. */ + if ((args.newval != NULL) || (args.newlen != 0)) { + lx_debug("sysctl: write of [%d, %d] unsupported", + name[0], name[1]); + return (-EPERM); + } + + /* + * It may seem silly, but passing in a NULL oldval pointer and not + * writing any new values is a perfectly legal thing to do and should + * succeed. + */ + if (args.oldval == NULL) + return (0); + + /* + * Likewise, Linux specifies that setting a non-NULL oldval but a + * zero *oldlenp should result in an errno of EFAULT. + */ + if ((uucopy(args.oldlenp, &oldlen, sizeof (oldlen)) < 0) || + (oldlen == 0)) + return (-EFAULT); + + namebuf = SAFE_ALLOCA(oldlen); + if (namebuf == NULL) + return (-ENOMEM); + + switch (name[1]) { + case LX_KERN_OSTYPE: + (void) strlcpy(namebuf, LX_UNAME_SYSNAME, oldlen); + break; + case LX_KERN_OSRELEASE: + (void) strlcpy(namebuf, lx_release, oldlen); + break; + case LX_KERN_VERSION: + (void) strlcpy(namebuf, LX_UNAME_VERSION, oldlen); + break; + default: + lx_debug("sysctl: read of [CTL_KERN, %d] unsupported", name[1]); + return (-ENOTDIR); + } + + oldlen = strlen(namebuf); + + if ((uucopy(namebuf, args.oldval, oldlen) < 0) || + (uucopy(&oldlen, args.oldlenp, sizeof (oldlen)) < 0)) + return (-EFAULT); + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c new file mode 100644 index 0000000000..62efcdbe3d --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c @@ -0,0 +1,893 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <strings.h> +#include <rctl.h> +#include <alloca.h> +#include <values.h> +#include <sys/syscall.h> +#include <sys/msg.h> +#include <sys/ipc.h> +#include <sys/sem.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/lx_debug.h> +#include <sys/lx_types.h> +#include <sys/lx_sysv_ipc.h> +#include <sys/lx_misc.h> +#include <sys/lx_syscall.h> + +#define SLOT_SEM 0 +#define SLOT_SHM 1 +#define SLOT_MSG 2 + +static int +get_rctlval(rctlblk_t *rblk, char *name) +{ + rctl_qty_t r; + + if (getrctl(name, NULL, rblk, RCTL_FIRST) == -1) + return (-errno); + + r = rctlblk_get_value(rblk); + if (r > MAXINT) + return (-EOVERFLOW); + return (r); +} + +/* + * Given a slot number and a maximum number of ids to extract from the + * kernel, return the msgid in the provided slot. + */ +static int +slot_to_id(int type, int slot) +{ + uint_t nids, max; + int *idbuf = NULL; + int r = 0; + + nids = 0; + for (;;) { + switch (type) { + case SLOT_SEM: + r = semids(idbuf, nids, &max); + break; + case SLOT_SHM: + r = shmids(idbuf, nids, &max); + break; + case SLOT_MSG: + r = msgids(idbuf, nids, &max); + break; + } + + if (r < 0) + return (-errno); + + if (max == 0) + return (-EINVAL); + + if (max <= nids) + return (idbuf[slot]); + + nids = max; + if ((idbuf = (int *)SAFE_ALLOCA(sizeof (int) * nids)) == NULL) + return (-ENOMEM); + } +} + +/* + * Semaphore operations. + */ +static int +lx_semget(key_t key, int nsems, int semflg) +{ + int sol_flag; + int r; + + lx_debug("\nsemget(%d, %d, %d)\n", key, nsems, semflg); + sol_flag = semflg & S_IAMB; + if (semflg & LX_IPC_CREAT) + sol_flag |= IPC_CREAT; + if (semflg & LX_IPC_EXCL) + sol_flag |= IPC_EXCL; + + r = semget(key, nsems, sol_flag); + return ((r < 0) ? -errno : r); +} + +static int +lx_semop(int semid, struct sembuf *sops, size_t nsops) +{ + int r; + + lx_debug("\nsemop(%d, 0x%p, %u)\n", semid, sops, nsops); + if (nsops == 0) + return (-EINVAL); + + r = semop(semid, sops, nsops); + return ((r < 0) ? -errno : r); +} + +static int +lx_semctl_ipcset(int semid, void *buf) +{ + struct lx_semid_ds semds; + struct semid_ds sol_semds; + int r; + + if (uucopy(buf, &semds, sizeof (semds))) + return (-errno); + + bzero(&sol_semds, sizeof (sol_semds)); + sol_semds.sem_perm.uid = semds.sem_perm.uid; + sol_semds.sem_perm.gid = semds.sem_perm.gid; + sol_semds.sem_perm.mode = semds.sem_perm.mode; + + r = semctl(semid, 0, IPC_SET, &sol_semds); + return ((r < 0) ? -errno : r); +} + +static int +lx_semctl_ipcstat(int semid, void *buf) +{ + struct lx_semid_ds semds; + struct semid_ds sol_semds; + + if (semctl(semid, 0, IPC_STAT, &sol_semds) != 0) + return (-errno); + + bzero(&semds, sizeof (semds)); + semds.sem_perm.key = sol_semds.sem_perm.key; + semds.sem_perm.seq = sol_semds.sem_perm.seq; + semds.sem_perm.uid = sol_semds.sem_perm.uid; + semds.sem_perm.gid = sol_semds.sem_perm.gid; + semds.sem_perm.cuid = sol_semds.sem_perm.cuid; + semds.sem_perm.cgid = sol_semds.sem_perm.cgid; + + /* Linux only uses the bottom 9 bits */ + semds.sem_perm.mode = sol_semds.sem_perm.mode & S_IAMB; + semds.sem_otime = sol_semds.sem_otime; + semds.sem_ctime = sol_semds.sem_ctime; + semds.sem_nsems = sol_semds.sem_nsems; + + if (uucopy(&semds, buf, sizeof (semds))) + return (-errno); + + return (0); +} + +static int +lx_semctl_ipcinfo(void *buf) +{ + struct lx_seminfo i; + rctlblk_t *rblk; + int rblksz; + uint_t nids; + int idbuf; + + rblksz = rctlblk_size(); + if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL) + return (-ENOMEM); + + bzero(&i, sizeof (i)); + if ((i.semmni = get_rctlval(rblk, "project.max-sem-ids")) < 0) + return (i.semmni); + if ((i.semmsl = get_rctlval(rblk, "process.max-sem-nsems")) < 0) + return (i.semmsl); + if ((i.semopm = get_rctlval(rblk, "process.max-sem-ops")) < 0) + return (i.semopm); + + /* + * We don't have corresponding rctls for these fields. The values + * are taken from the formulas used to derive the defaults listed + * in the Linux header file. We're lying, but trying to be + * coherent about it. + */ + i.semmap = i.semmni; + i.semmns = i.semmni * i.semmsl; + i.semmnu = INT_MAX; + i.semume = INT_MAX; + i.semvmx = LX_SEMVMX; + if (semids(&idbuf, 0, &nids) < 0) + return (-errno); + i.semusz = nids; + i.semaem = INT_MAX; + + if (uucopy(&i, buf, sizeof (i)) != 0) + return (-errno); + + return (nids); +} + +static int +lx_semctl_semstat(int slot, void *buf) +{ + int r, semid; + + semid = slot_to_id(SLOT_SEM, slot); + if (semid < 0) + return (semid); + + r = lx_semctl_ipcstat(semid, buf); + return (r < 0 ? r : semid); +} + +/* + * For the SETALL operation, we have to examine each of the semaphore + * values to be sure it is legal. + */ +static int +lx_semctl_setall(int semid, union lx_semun *arg) +{ + struct semid_ds semds; + ushort_t *vals; + int i, sz, r; + + /* + * Find out how many semaphores are involved, reserve enough + * memory for an internal copy of the array, and then copy it in + * from the process. + */ + if (semctl(semid, 0, IPC_STAT, &semds) != 0) + return (-errno); + sz = semds.sem_nsems * sizeof (ushort_t); + if ((vals = SAFE_ALLOCA(sz)) == NULL) + return (-ENOMEM); + if (uucopy(arg->sems, vals, sz)) + return (-errno); + + /* Validate each of the values. */ + for (i = 0; i < semds.sem_nsems; i++) + if (vals[i] > LX_SEMVMX) + return (-ERANGE); + + r = semctl(semid, 0, SETALL, arg->sems); + + return ((r < 0) ? -errno : r); +} + +static int +lx_semctl(int semid, int semnum, int cmd, void *ptr) +{ + union lx_semun arg; + int rval; + int opt = cmd & ~LX_IPC_64; + int use_errno = 0; + + lx_debug("\nsemctl(%d, %d, %d, 0x%p)\n", semid, semnum, cmd, ptr); + + /* + * The final arg to semctl() is a pointer to a union. For some + * commands we can hand that pointer directly to the kernel. For + * these commands, we need to extract an argument from the union + * before calling into the kernel. + */ + if (opt == LX_SETVAL || opt == LX_SETALL || opt == LX_GETALL || + opt == LX_IPC_SET || opt == LX_IPC_STAT || opt == LX_SEM_STAT || + opt == LX_IPC_INFO || opt == LX_SEM_INFO) + if (uucopy(ptr, &arg, sizeof (arg))) + return (-errno); + + switch (opt) { + case LX_GETVAL: + use_errno = 1; + rval = semctl(semid, semnum, GETVAL, NULL); + break; + case LX_SETVAL: + if (arg.val > LX_SEMVMX) { + rval = -ERANGE; + break; + } + use_errno = 1; + rval = semctl(semid, semnum, SETVAL, arg.val); + break; + case LX_GETPID: + use_errno = 1; + rval = semctl(semid, semnum, GETPID, NULL); + break; + case LX_GETNCNT: + use_errno = 1; + rval = semctl(semid, semnum, GETNCNT, NULL); + break; + case LX_GETZCNT: + use_errno = 1; + rval = semctl(semid, semnum, GETZCNT, NULL); + break; + case LX_GETALL: + use_errno = 1; + rval = semctl(semid, semnum, GETALL, arg.sems); + break; + case LX_SETALL: + rval = lx_semctl_setall(semid, &arg); + break; + case LX_IPC_RMID: + use_errno = 1; + rval = semctl(semid, semnum, IPC_RMID, NULL); + break; + case LX_SEM_STAT: + rval = lx_semctl_semstat(semid, arg.semds); + break; + case LX_IPC_STAT: + rval = lx_semctl_ipcstat(semid, arg.semds); + break; + + case LX_IPC_SET: + rval = lx_semctl_ipcset(semid, arg.semds); + break; + + case LX_IPC_INFO: + case LX_SEM_INFO: + rval = lx_semctl_ipcinfo(arg.semds); + break; + + default: + rval = -EINVAL; + } + + if (use_errno == 1 && rval < 0) + return (-errno); + return (rval); +} + +/* + * msg operations. + */ +static int +lx_msgget(key_t key, int flag) +{ + int sol_flag; + int r; + + lx_debug("\tlx_msgget(%d, %d)\n", key, flag); + + sol_flag = flag & S_IAMB; + if (flag & LX_IPC_CREAT) + sol_flag |= IPC_CREAT; + if (flag & LX_IPC_EXCL) + sol_flag |= IPC_EXCL; + + r = msgget(key, sol_flag); + return (r < 0 ? -errno : r); +} + +static int +lx_msgsnd(int id, struct msgbuf *buf, size_t sz, int flag) +{ + int sol_flag = 0; + int r; + + lx_debug("\tlx_msgsnd(%d, 0x%p, %d, %d)\n", id, buf, sz, flag); + + if (flag & LX_IPC_NOWAIT) + sol_flag |= IPC_NOWAIT; + + if (((ssize_t)sz < 0) || (sz > LX_MSGMAX)) + return (-EINVAL); + + r = msgsnd(id, buf, sz, sol_flag); + return (r < 0 ? -errno : r); +} + +static int +lx_msgrcv(int id, struct msgbuf *buf, size_t sz, int flag) +{ + int sol_flag = 0; + struct { + void *msgp; + long msgtype; + } args; + int r; + + /* + * Rather than passing 5 args into ipc(2) directly, glibc passes 4 + * args and uses the buf argument to point to a structure + * containing two args: a pointer to the message and the message + * type. + */ + if (uucopy(buf, &args, sizeof (args))) + return (-errno); + + lx_debug("\tlx_msgrcv(%d, 0x%p, %d, %d, %ld, %d)\n", + id, args.msgp, sz, args.msgtype, flag); + + /* + * Check for a negative sz parameter. + * + * Unlike msgsnd(2), the Linux man page does not specify that + * msgrcv(2) should return EINVAL if (sz > MSGMAX), only if (sz < 0). + */ + if ((ssize_t)sz < 0) + return (-EINVAL); + + if (flag & LX_MSG_NOERROR) + sol_flag |= MSG_NOERROR; + if (flag & LX_IPC_NOWAIT) + sol_flag |= IPC_NOWAIT; + + r = msgrcv(id, args.msgp, sz, args.msgtype, sol_flag); + return (r < 0 ? -errno : r); +} + +static int +lx_msgctl_ipcstat(int msgid, void *buf) +{ + struct lx_msqid_ds msgids; + struct msqid_ds sol_msgids; + int r; + + r = msgctl(msgid, IPC_STAT, &sol_msgids); + if (r < 0) + return (-errno); + + bzero(&msgids, sizeof (msgids)); + msgids.msg_perm.key = sol_msgids.msg_perm.key; + msgids.msg_perm.seq = sol_msgids.msg_perm.seq; + msgids.msg_perm.uid = sol_msgids.msg_perm.uid; + msgids.msg_perm.gid = sol_msgids.msg_perm.gid; + msgids.msg_perm.cuid = sol_msgids.msg_perm.cuid; + msgids.msg_perm.cgid = sol_msgids.msg_perm.cgid; + + /* Linux only uses the bottom 9 bits */ + msgids.msg_perm.mode = sol_msgids.msg_perm.mode & S_IAMB; + + msgids.msg_stime = sol_msgids.msg_stime; + msgids.msg_rtime = sol_msgids.msg_rtime; + msgids.msg_ctime = sol_msgids.msg_ctime; + msgids.msg_qbytes = sol_msgids.msg_qbytes; + msgids.msg_cbytes = sol_msgids.msg_cbytes; + msgids.msg_qnum = sol_msgids.msg_qnum; + msgids.msg_lspid = sol_msgids.msg_lspid; + msgids.msg_lrpid = sol_msgids.msg_lrpid; + + if (uucopy(&msgids, buf, sizeof (msgids))) + return (-errno); + + return (0); +} + +static int +lx_msgctl_ipcinfo(int cmd, void *buf) +{ + struct lx_msginfo m; + rctlblk_t *rblk; + int idbuf, rblksz, msgseg, maxmsgs; + uint_t nids; + int rval; + + rblksz = rctlblk_size(); + if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL) + return (-ENOMEM); + + bzero(&m, sizeof (m)); + if ((m.msgmni = get_rctlval(rblk, "project.max-msg-ids")) < 0) + return (m.msgmni); + if ((m.msgmnb = get_rctlval(rblk, "process.max-msg-qbytes")) < 0) + return (m.msgmnb); + + if (cmd == LX_IPC_INFO) { + if ((maxmsgs = get_rctlval(rblk, + "process.max-msg-messages")) < 0) + return (maxmsgs); + m.msgtql = maxmsgs * m.msgmni; + m.msgmap = m.msgmnb; + m.msgpool = m.msgmax * m.msgmnb; + rval = 0; + } else { + if (msgids(&idbuf, 0, &nids) < 0) + return (-errno); + m.msgpool = nids; + + /* + * For these fields, we can't even come up with a good fake + * approximation. These are listed as 'obsolete' or + * 'unused' in the header files, so hopefully nobody is + * relying on them anyway. + */ + m.msgtql = INT_MAX; + m.msgmap = INT_MAX; + rval = nids; + } + + /* + * We don't have corresponding rctls for these fields. The values + * are taken from the formulas used to derive the defaults listed + * in the Linux header file. We're lying, but trying to be + * coherent about it. + */ + m.msgmax = m.msgmnb; + m.msgssz = 16; + msgseg = (m.msgpool * 1024) / m.msgssz; + m.msgseg = (msgseg > 0xffff) ? 0xffff : msgseg; + + if (uucopy(&m, buf, sizeof (m))) + return (-errno); + return (rval); +} + +static int +lx_msgctl_ipcset(int msgid, void *buf) +{ + struct lx_msqid_ds msgids; + struct msqid_ds sol_msgids; + int r; + + if (uucopy(buf, &msgids, sizeof (msgids))) + return (-errno); + + bzero(&sol_msgids, sizeof (sol_msgids)); + sol_msgids.msg_perm.uid = LX_UID16_TO_UID32(msgids.msg_perm.uid); + sol_msgids.msg_perm.gid = LX_UID16_TO_UID32(msgids.msg_perm.gid); + + /* Linux only uses the bottom 9 bits */ + sol_msgids.msg_perm.mode = msgids.msg_perm.mode & S_IAMB; + sol_msgids.msg_qbytes = msgids.msg_qbytes; + + r = msgctl(msgid, IPC_SET, &sol_msgids); + return (r < 0 ? -errno : r); +} + +static int +lx_msgctl_msgstat(int slot, void *buf) +{ + int r, msgid; + + lx_debug("msgstat(%d, 0x%p)\n", slot, buf); + + msgid = slot_to_id(SLOT_MSG, slot); + + if (msgid < 0) + return (msgid); + + r = lx_msgctl_ipcstat(msgid, buf); + return (r < 0 ? r : msgid); +} + +/* + * Split off the various msgctl's here + */ +static int +lx_msgctl(int msgid, int cmd, void *buf) +{ + int r; + + lx_debug("\tlx_msgctl(%d, %d, 0x%p)\n", msgid, cmd, buf); + switch (cmd & ~LX_IPC_64) { + case LX_IPC_RMID: + r = msgctl(msgid, IPC_RMID, NULL); + if (r < 0) + r = -errno; + break; + case LX_IPC_SET: + r = lx_msgctl_ipcset(msgid, buf); + break; + case LX_IPC_STAT: + r = lx_msgctl_ipcstat(msgid, buf); + break; + case LX_MSG_STAT: + r = lx_msgctl_msgstat(msgid, buf); + break; + + case LX_IPC_INFO: + case LX_MSG_INFO: + r = lx_msgctl_ipcinfo(cmd, buf); + break; + + default: + r = -EINVAL; + break; + } + + return (r); +} + +/* + * shm-related operations. + */ +static int +lx_shmget(key_t key, size_t size, int flag) +{ + int sol_flag; + int r; + + lx_debug("\tlx_shmget(%d, %d, %d)\n", key, size, flag); + + sol_flag = flag & S_IAMB; + if (flag & LX_IPC_CREAT) + sol_flag |= IPC_CREAT; + if (flag & LX_IPC_EXCL) + sol_flag |= IPC_EXCL; + + r = shmget(key, size, sol_flag); + return (r < 0 ? -errno : r); +} + +static int +lx_shmat(int shmid, void *addr, int flags, void **rval) +{ + int sol_flags; + void *ptr; + + lx_debug("\tlx_shmat(%d, 0x%p, %d, 0%o)\n", shmid, addr, flags); + + sol_flags = 0; + if (flags & LX_SHM_RDONLY) + sol_flags |= SHM_RDONLY; + if (flags & LX_SHM_RND) + sol_flags |= SHM_RND; + if ((flags & LX_SHM_REMAP) && (addr == NULL)) + return (-EINVAL); + + ptr = shmat(shmid, addr, sol_flags); + if (ptr == (void *)-1) + return (-errno); + if (uucopy(&ptr, rval, sizeof (ptr)) != 0) + return (-errno); + + return (0); +} + +static int +lx_shmctl_ipcinfo(void *buf) +{ + struct lx_shminfo s; + rctlblk_t *rblk; + int rblksz; + + rblksz = rctlblk_size(); + if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL) + return (-ENOMEM); + + bzero(&s, sizeof (s)); + if ((s.shmmni = get_rctlval(rblk, "project.max-shm-ids")) < 0) + return (s.shmmni); + if ((s.shmmax = get_rctlval(rblk, "project.max-shm-memory")) < 0) + return (s.shmmax); + + /* + * We don't have corresponding rctls for these fields. The values + * are taken from the formulas used to derive the defaults listed + * in the Linux header file. We're lying, but trying to be + * coherent about it. + */ + s.shmmin = 1; + s.shmseg = INT_MAX; + s.shmall = s.shmmax / getpagesize(); + + if (uucopy(&s, buf, sizeof (s))) + return (-errno); + + return (0); +} + +static int +lx_shmctl_ipcstat(int shmid, void *buf) +{ + struct lx_shmid_ds shmds; + struct shmid_ds sol_shmds; + + if (shmctl(shmid, IPC_STAT, &sol_shmds) != 0) + return (-errno); + + bzero(&shmds, sizeof (shmds)); + shmds.shm_perm.key = sol_shmds.shm_perm.key; + shmds.shm_perm.seq = sol_shmds.shm_perm.seq; + shmds.shm_perm.uid = sol_shmds.shm_perm.uid; + shmds.shm_perm.gid = sol_shmds.shm_perm.gid; + shmds.shm_perm.cuid = sol_shmds.shm_perm.cuid; + shmds.shm_perm.cgid = sol_shmds.shm_perm.cgid; + shmds.shm_perm.mode = sol_shmds.shm_perm.mode & S_IAMB; + if (sol_shmds.shm_lkcnt > 0) + shmds.shm_perm.mode |= LX_SHM_LOCKED; + shmds.shm_segsz = sol_shmds.shm_segsz; + shmds.shm_atime = sol_shmds.shm_atime; + shmds.shm_dtime = sol_shmds.shm_dtime; + shmds.shm_ctime = sol_shmds.shm_ctime; + shmds.shm_cpid = sol_shmds.shm_cpid; + shmds.shm_lpid = sol_shmds.shm_lpid; + shmds.shm_nattch = (ushort_t)sol_shmds.shm_nattch; + + if (uucopy(&shmds, buf, sizeof (shmds))) + return (-errno); + + return (0); +} + +static int +lx_shmctl_ipcset(int shmid, void *buf) +{ + struct lx_shmid_ds shmds; + struct shmid_ds sol_shmds; + int r; + + if (uucopy(buf, &shmds, sizeof (shmds))) + return (-errno); + + bzero(&sol_shmds, sizeof (sol_shmds)); + sol_shmds.shm_perm.uid = shmds.shm_perm.uid; + sol_shmds.shm_perm.gid = shmds.shm_perm.gid; + sol_shmds.shm_perm.mode = shmds.shm_perm.mode & S_IAMB; + + r = shmctl(shmid, IPC_SET, &sol_shmds); + return (r < 0 ? -errno : r); +} + +/* + * Build and return a shm_info structure. We only return the bare + * essentials required by ipcs. The rest of the info is not readily + * available. + */ +static int +lx_shmctl_shminfo(void *buf) +{ + struct lx_shm_info shminfo; + uint_t nids; + int idbuf; + + bzero(&shminfo, sizeof (shminfo)); + + if (shmids(&idbuf, 0, &nids) < 0) + return (-errno); + + shminfo.used_ids = nids; + if (uucopy(&shminfo, buf, sizeof (shminfo)) != 0) + return (-errno); + + return (nids); +} + +static int +lx_shmctl_shmstat(int slot, void *buf) +{ + int r, shmid; + + lx_debug("shmctl_shmstat(%d, 0x%p)\n", slot, buf); + shmid = slot_to_id(SLOT_SHM, slot); + if (shmid < 0) + return (shmid); + + r = lx_shmctl_ipcstat(shmid, buf); + return (r < 0 ? r : shmid); +} + +static int +lx_shmctl(int shmid, int cmd, void *buf) +{ + int r; + int use_errno = 0; + + lx_debug("\tlx_shmctl(%d, %d, 0x%p)\n", shmid, cmd, buf); + switch (cmd & ~LX_IPC_64) { + case LX_IPC_RMID: + use_errno = 1; + r = shmctl(shmid, IPC_RMID, NULL); + break; + + case LX_IPC_SET: + r = lx_shmctl_ipcset(shmid, buf); + break; + + case LX_IPC_STAT: + r = lx_shmctl_ipcstat(shmid, buf); + break; + + case LX_IPC_INFO: + r = lx_shmctl_ipcinfo(buf); + break; + + case LX_SHM_LOCK: + use_errno = 1; + r = shmctl(shmid, SHM_LOCK, NULL); + break; + + case LX_SHM_UNLOCK: + use_errno = 1; + r = shmctl(shmid, SHM_UNLOCK, NULL); + break; + + case LX_SHM_INFO: + r = lx_shmctl_shminfo(buf); + break; + + case LX_SHM_STAT: + r = lx_shmctl_shmstat(shmid, buf); + break; + default: + r = -EINVAL; + break; + } + + if (use_errno == 1 && r < 0) + return (-errno); + + return (r); +} + +/* + * Under Linux, glibc funnels all of the sysv IPC operations into this + * single ipc(2) system call. We need to blow that up and filter the + * remnants into the proper Solaris system calls. + */ +int +lx_ipc(uintptr_t cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, + uintptr_t arg4) +{ + int r; + void *bufptr = (void *)arg4; + + lx_debug("lx_ipc(%d, %d, %d, %d, 0x%p, %d)\n", + cmd, arg1, arg2, arg3, bufptr, arg4); + + switch (cmd) { + case LX_MSGGET: + r = lx_msgget((key_t)arg1, (int)arg2); + break; + case LX_MSGSND: + r = lx_msgsnd((int)arg1, bufptr, (size_t)arg2, (int)arg3); + break; + case LX_MSGRCV: + r = lx_msgrcv((int)arg1, bufptr, (size_t)arg2, (int)arg3); + break; + case LX_MSGCTL: + r = lx_msgctl((int)arg1, (int)arg2, bufptr); + break; + case LX_SEMCTL: + r = lx_semctl((int)arg1, (size_t)arg2, (int)arg3, bufptr); + break; + case LX_SEMOP: + /* + * 'struct sembuf' is the same on Linux and Solaris, so we + * pass bufptr straight through. + */ + r = lx_semop((int)arg1, bufptr, (size_t)arg2); + break; + case LX_SEMGET: + r = lx_semget((int)arg1, (size_t)arg2, (int)arg3); + break; + case LX_SHMAT: + r = lx_shmat((int)arg1, bufptr, (size_t)arg2, (void *)arg3); + break; + case LX_SHMDT: + r = shmdt(bufptr); + if (r < 0) + r = -errno; + break; + case LX_SHMGET: + r = lx_shmget((int)arg1, (size_t)arg2, (int)arg3); + break; + case LX_SHMCTL: + r = lx_shmctl((int)arg1, (int)arg2, bufptr); + break; + + default: + r = -EINVAL; + } + + return (r); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/time.c b/usr/src/lib/brand/lx/lx_brand/common/time.c new file mode 100644 index 0000000000..16b883ec0a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/time.c @@ -0,0 +1,184 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <time.h> +#include <string.h> +#include <strings.h> +#include <sys/times.h> +#include <sys/lx_syscall.h> +#include <sys/lx_misc.h> + +/* + * time() - This cannot be passthrough because on Linux a bad buffer will + * set errno to EFAULT, and on Solaris the failure mode is documented + * as "undefined." + * + * (At present, Solaris' time(2) will segmentation fault, as the call + * is simply a libc wrapper atop the time() syscall that will + * dereference the passed pointer if it is non-zero.) + */ +int +lx_time(uintptr_t p1) +{ + time_t ret = time((time_t *)0); + + if ((ret == (time_t)-1) || + ((p1 != 0) && (uucopy(&ret, (time_t *)p1, sizeof (ret)) != 0))) + return (-errno); + + return (ret); +} + +/* + * times() - The Linux implementation avoids writing to NULL, while Solaris + * returns EFAULT. + */ +int +lx_times(uintptr_t p1) +{ + clock_t ret; + struct tms buf, *tp = (struct tms *)p1; + + ret = times(&buf); + + if ((ret == -1) || + ((tp != NULL) && uucopy((void *)&buf, tp, sizeof (buf)) != 0)) + return (-errno); + + return ((ret == -1) ? -errno : ret); +} + +/* + * setitimer() - the Linux implementation can handle tv_usec values greater + * than 1,000,000 where Solaris would return EINVAL. + * + * There's still an issue here where Linux can handle a + * tv_sec value greater than 100,000,000 but Solaris cannot, + * but that would also mean setting an interval timer to fire + * over _three years_ in the future so it's unlikely anything + * other than Linux test suites will trip over it. + */ +int +lx_setitimer(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + struct itimerval itv; + struct itimerval *itp = (struct itimerval *)p2; + + if (itp != NULL) { + if (uucopy(itp, &itv, sizeof (itv)) != 0) + return (-errno); + + /* + * Adjust any tv_usec fields >= 1,000,000 by adding any whole + * seconds so indicated to tv_sec and leaving tv_usec as the + * remainder. + */ + if (itv.it_interval.tv_usec >= MICROSEC) { + itv.it_interval.tv_sec += + itv.it_interval.tv_usec / MICROSEC; + + itv.it_interval.tv_usec %= MICROSEC; + } + if (itv.it_value.tv_usec >= MICROSEC) { + itv.it_value.tv_sec += + itv.it_value.tv_usec / MICROSEC; + + itv.it_value.tv_usec %= MICROSEC; + } + + itp = &itv; + } + + return ((setitimer((int)p1, itp, (struct itimerval *)p3) != 0) ? + -errno : 0); +} + +/* + * NOTE: The Linux man pages state this structure is obsolete and is + * unsupported, so it is declared here for sizing purposes only. + */ +struct lx_timezone { + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +/* + * lx_gettimeofday() and lx_settimeofday() are implemented here rather than + * as pass-through calls to Solaris' libc due to the need to return EFAULT + * for a bad buffer rather than die with a segmentation fault. + */ +int +lx_gettimeofday(uintptr_t p1, uintptr_t p2) +{ + struct timeval tv; + struct lx_timezone tz; + + bzero(&tz, sizeof (tz)); + (void) gettimeofday(&tv, NULL); + + if ((p1 != NULL) && + (uucopy(&tv, (struct timeval *)p1, sizeof (tv)) < 0)) + return (-errno); + + /* + * The Linux man page states use of the second parameter is obsolete, + * but gettimeofday(2) should still return EFAULT if it is set + * to a bad non-NULL pointer (sigh...) + */ + if ((p2 != NULL) && + (uucopy(&tz, (struct lx_timezone *)p2, sizeof (tz)) < 0)) + return (-errno); + + return (0); +} + +int +lx_settimeofday(uintptr_t p1, uintptr_t p2) +{ + struct timeval tv; + struct lx_timezone tz; + + if ((p1 != NULL) && + (uucopy((struct timeval *)p1, &tv, sizeof (tv)) < 0)) + return (-errno); + + /* + * The Linux man page states use of the second parameter is obsolete, + * but settimeofday(2) should still return EFAULT if it is set + * to a bad non-NULL pointer (sigh...) + */ + if ((p2 != NULL) && + (uucopy((struct lx_timezone *)p2, &tz, sizeof (tz)) < 0)) + return (-errno); + + if ((p1 != NULL) && (settimeofday(&tv, NULL) < 0)) + return (-errno); + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/truncate.c b/usr/src/lib/brand/lx/lx_brand/common/truncate.c new file mode 100644 index 0000000000..81fdfbac35 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/truncate.c @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <sys/lx_types.h> +#include <sys/lx_misc.h> + +/* + * On Solaris, truncate() and ftruncate() are implemented in libc, so these are + * layered on those interfaces. + */ + +int +lx_truncate(uintptr_t path, uintptr_t length) +{ + return (truncate((const char *)path, (off_t)length) == 0 ? 0 : -errno); +} + +int +lx_ftruncate(uintptr_t fd, uintptr_t length) +{ + return (ftruncate((int)fd, (off_t)length) == 0 ? 0 : -errno); +} + +int +lx_truncate64(uintptr_t path, uintptr_t length_lo, uintptr_t length_hi) +{ + return (truncate64((const char *)path, + LX_32TO64(length_lo, length_hi)) == 0 ? 0 : -errno); +} + +int +lx_ftruncate64(uintptr_t fd, uintptr_t length_lo, uintptr_t length_hi) +{ + return (ftruncate64((int)fd, + LX_32TO64(length_lo, length_hi)) == 0 ? 0 : -errno); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c new file mode 100644 index 0000000000..33b3d49923 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c @@ -0,0 +1,288 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * wait() family of functions. + * + * The first minor difference between the Linux and Solaris family of wait() + * calls is that the values for WNOHANG and WUNTRACED are different. Solaris + * also has additional options (WCONTINUED, WNOWAIT) which should be flagged as + * invalid on Linux. Thankfully, the exit status values are identical between + * the two implementations. + * + * Things get very different and very complicated when we introduce the Linux + * threading model. Under linux, both threads and child processes are + * represented as processes. However, the behavior of wait() with respect to + * each child varies according to the flags given to clone() + * + * SIGCHLD The SIGCHLD signal should be sent on termination + * CLONE_THREAD The child shares the same thread group as the parent + * CLONE_DETACHED The parent receives no notification when the child exits + * + * The following flags control the Linux behavior w.r.t. the above attributes: + * + * __WALL Wait on all children, regardless of type + * __WCLONE Wait only on non-SIGCHLD children + * __WNOTHREAD Don't wait on children of other threads in this group + * + * The following chart shows whether wait() returns when the child exits: + * + * default __WCLONE __WALL + * no SIGCHLD - X X + * SIGCHLD X - X + * + * The following chart shows whether wait() returns when the grandchild exits: + * + * default __WNOTHREAD + * no CLONE_THREAD - - + * CLONE_THREAD X - + * + * The CLONE_DETACHED flag is universal - when the child exits, no state is + * stored and wait() has no effect. + * + * XXX Support the above combination of options, or some reasonable subset that + * covers at least fork() and pthread_create(). + */ + +#include <errno.h> +#include <sys/wait.h> +#include <sys/lx_types.h> +#include <sys/lx_signal.h> +#include <sys/lx_misc.h> +#include <sys/lx_syscall.h> +#include <sys/times.h> +#include <strings.h> +#include <unistd.h> +#include <assert.h> + +/* + * Convert between Linux options and Solaris options, returning -1 if any + * invalid flags are found. + */ +#define LX_WNOHANG 0x1 +#define LX_WUNTRACED 0x2 + +#define LX_WNOTHREAD 0x20000000 +#define LX_WALL 0x40000000 +#define LX_WCLONE 0x80000000 + +#define LX_P_ALL 0x0 +#define LX_P_PID 0x1 +#define LX_P_GID 0x2 + +static int +ltos_options(uintptr_t options) +{ + int newoptions = 0; + + if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WNOTHREAD | + LX_WALL | LX_WCLONE)) != 0) { + return (-1); + } + /* XXX implement LX_WNOTHREAD, LX_WALL, LX_WCLONE */ + + if (options & LX_WNOHANG) + newoptions |= WNOHANG; + if (options & LX_WUNTRACED) + newoptions |= WUNTRACED; + + return (newoptions); +} + +static int +lx_wstat(int code, int status) +{ + int stat = 0; + + switch (code) { + case CLD_EXITED: + stat = status << 8; + break; + case CLD_DUMPED: + stat = stol_signo[status]; + assert(stat != -1); + stat |= WCOREFLG; + break; + case CLD_KILLED: + stat = stol_signo[status]; + assert(stat != -1); + break; + case CLD_TRAPPED: + case CLD_STOPPED: + stat = stol_signo[status]; + assert(stat != -1); + stat <<= 8; + stat |= WSTOPFLG; + break; + case CLD_CONTINUED: + stat = WCONTFLG; + break; + } + + return (stat); +} + +/* wrapper to make solaris waitid work properly with ptrace */ +static int +lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *info, int options) +{ + do { + /* + * It's possible that we return EINVAL here if the idtype is + * P_PID or P_PGID and id is out of bounds for a valid pid or + * pgid, but Linux expects to see ECHILD. No good way occurs to + * handle this so we'll punt for now. + */ + if (waitid(idtype, id, info, options) < 0) + return (-errno); + + /* + * If the WNOHANG flag was specified and no child was found + * return 0. + */ + if ((options & WNOHANG) && info->si_pid == 0) + return (0); + + /* + * It's possible that we may have a spurious return for one of + * the child processes created by the ptrace subsystem. If + * that's the case, we simply try again. + */ + } while (lx_ptrace_wait(info) == -1); + return (0); +} + +int +lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + siginfo_t info = { 0 }; + struct rusage ru = { 0 }; + idtype_t idtype; + id_t id; + int options, status = 0; + pid_t pid = (pid_t)p1; + int rval; + + if ((options = ltos_options(p3)) == -1) + return (-EINVAL); + + /* + * While not listed as a valid return code, Linux's wait4(2) does, + * in fact, get an EFAULT if either the status pointer or rusage + * pointer is invalid. Since a failed waitpid should leave child + * process in a state where a future wait4(2) will succeed, we + * check them by copying out the values their buffers originally + * contained. (We need to do this as a failed system call should + * never affect the contents of a passed buffer.) + * + * This will fail if the buffers in question are write-only. + */ + if ((void *)p2 != NULL && + ((uucopy((void *)p2, &status, sizeof (status)) != 0) || + (uucopy(&status, (void *)p2, sizeof (status)) != 0))) + return (-EFAULT); + + if ((void *)p4 != NULL) { + if ((uucopy((void *)p4, &ru, sizeof (ru)) != 0) || + (uucopy(&ru, (void *)p4, sizeof (ru)) != 0)) + return (-EFAULT); + } + + if (pid < -1) { + idtype = P_PGID; + id = -pid; + } else if (pid == -1) { + idtype = P_ALL; + id = 0; + } else if (pid == 0) { + idtype = P_PGID; + id = getpgrp(); + } else { + idtype = P_PID; + id = pid; + } + + options |= WEXITED | WTRAPPED; + + if ((rval = lx_waitid_helper(idtype, id, &info, options)) < 0) + return (rval); + /* + * If the WNOHANG flag was specified and no child was found return 0. + */ + if ((options & WNOHANG) && info.si_pid == 0) + return (0); + + status = lx_wstat(info.si_code, info.si_status); + + /* + * Unfortunately if this attempt to copy out either the status or the + * rusage fails, the process will be in an inconsistent state as + * subsequent calls to wait for the same child will fail where they + * should succeed on a Linux system. This, however, is rather + * unlikely since we tested the validity of both above. + */ + if (p2 != NULL && uucopy(&status, (void *)p2, sizeof (status)) != 0) + return (-EFAULT); + + if (p4 != NULL && (rval = lx_getrusage(LX_RUSAGE_CHILDREN, p4)) != 0) + return (rval); + + return (info.si_pid); +} + +int +lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + return (lx_wait4(p1, p2, p3, NULL)); +} + +int +lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt) +{ + int rval, options; + siginfo_t s_infop = {0}; + if ((options = ltos_options(opt)) == -1) + return (-1); + switch (idtype) { + case LX_P_ALL: + idtype = P_ALL; + break; + case LX_P_PID: + idtype = P_PID; + break; + case LX_P_GID: + idtype = P_GID; + break; + default: + return (-EINVAL); + } + if ((rval = lx_waitid_helper(idtype, (id_t)id, &s_infop, options)) < 0) + return (rval); + + return (stol_siginfo(&s_infop, (lx_siginfo_t *)infop)); +} diff --git a/usr/src/lib/brand/lx/lx_brand/i386/Makefile b/usr/src/lib/brand/lx/lx_brand/i386/Makefile new file mode 100644 index 0000000000..fd38a056f6 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/Makefile @@ -0,0 +1,56 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/lx/i386/Makefile + +ISASRCDIR=. + +ASFLAGS += -P -D_ASM + +include ../Makefile.com + +POFILE= lx_brand.po +MSGFILES= $(CSRCS) + +ASSYMDEP_OBJS = lx_handler.o + +$(ASSYMDEP_OBJS:%=pics/%): assym.h + +OFFSETS = ../$(MACH)/offsets.in + +assym.h: $(OFFSETS) + $(OFFSETS_CREATE) $(CTF_FLAGS) < $(OFFSETS) > $@ + +CLOBBERFILES += assym.h + +install: all $(ROOTLIBS) + +$(POFILE): $(MSGFILES) + $(BUILDPO.msgfiles) + +_msg: $(MSGDOMAINPOFILE) + +include $(SRC)/Makefile.msg.targ diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s new file mode 100644 index 0000000000..c457c1c209 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> + +#if defined(lint) + +void +_start(void) +{ +} + +#else /* lint */ + + /* + * C language startup routine for the lx brand shared library. + */ + ENTRY_NP(_start) + pushl $0 / Build a stack frame. retpc = NULL + pushl $0 / fp = NULL + movl %esp, %ebp / first stack frame + + /* + * Calculate the location of the envp array by adding the size of + * the argv array to the start of the argv array. + */ + movl 8(%ebp), %eax / argc in %eax + leal 16(%ebp,%eax,4), %edx / envp in %edx + andl $-16, %esp + pushl %edx / push envp + leal 12(%ebp),%edx / compute &argv[0] + pushl %edx / push argv + pushl %eax / push argc + call lx_init + /* + * lx_init will never return. + */ + SET_SIZE(_start) + +#endif /* lint */ diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s new file mode 100644 index 0000000000..413ef9852d --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s @@ -0,0 +1,377 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> +#include <sys/regset.h> +#include <sys/segments.h> +#include <sys/syscall.h> +#include <sys/lx_brand.h> + +#if defined(_ASM) +#include <sys/lx_signal.h> +#include <sys/lx_syscall.h> +#endif /* _ASM */ + +#include "assym.h" + +#define PIC_SETUP(r) \ + call 9f; \ +9: popl r; \ + addl $_GLOBAL_OFFSET_TABLE_ + [. - 9b], r + +/* + * Each JMP must occupy 16 bytes + */ +#define JMP \ + pushl $_CONST(. - lx_handler_table); \ + jmp lx_handler; \ + .align 16; + +#define JMP4 JMP; JMP; JMP; JMP +#define JMP16 JMP4; JMP4; JMP4; JMP4 +#define JMP64 JMP16; JMP16; JMP16; JMP16 +#define JMP256 JMP64; JMP64; JMP64; JMP64 + +/* + * Alternate jump table that turns on lx_traceflag before proceeding with + * the normal emulation routine. + */ +#define TJMP \ + pushl $_CONST(. - lx_handler_trace_table); \ + jmp lx_handler_trace; \ + .align 16; + +#define TJMP4 TJMP; TJMP; TJMP; TJMP +#define TJMP16 TJMP4; TJMP4; TJMP4; TJMP4 +#define TJMP64 TJMP16; TJMP16; TJMP16; TJMP16 +#define TJMP256 TJMP64; TJMP64; TJMP64; TJMP64 + + +#if defined(lint) + +#include <sys/types.h> +#include <sys/regset.h> +#include <sys/signal.h> + +void +lx_handler_table(void) +{} + +void +lx_handler(void) +{} + +/* ARGSUSED */ +void +lx_setup_clone(uintptr_t gs, void *retaddr, void *stk) +{} + +/* ARGSUSED */ +void +lx_sigdeliver(int sig, siginfo_t *sip, void *p, size_t stacksz, + void (*stack_frame_builder)(void), void (*lx_sighandler)(void), + uintptr_t gs) +{} + +/* ARGSUSED */ +void +lx_sigacthandler(int sig, siginfo_t *s, void *p) +{} + +void +lx_sigreturn_tramp(void) +{} + +void +lx_rt_sigreturn_tramp(void) +{} + +/* ARGSUSED */ +void +lx_sigreturn_tolibc(uintptr_t sp) +{} + +#else /* lint */ + + /* + * On entry to this table, %eax will hold the return address. The + * location where we enter the table is a function of the system + * call number. The table needs the same alignment as the individual + * entries. + */ + .align 16 + ENTRY_NP(lx_handler_trace_table) + TJMP256 + TJMP64 + SET_SIZE(lx_handler_trace_table) + + .align 16 + ENTRY_NP(lx_handler_table) + JMP256 + JMP64 + SET_SIZE(lx_handler_table) + + ENTRY_NP(lx_handler_trace) + pushl %esi + PIC_SETUP(%esi) + movl lx_traceflag@GOT(%esi), %esi + movl $1, (%esi) + popl %esi + /* + * While we could just fall through to lx_handler(), we "tail-call" it + * instead to make ourselves a little more comprehensible to trace + * tools. + */ + jmp lx_handler + SET_SIZE(lx_handler_trace) + + ALTENTRY(lx_handler) + /* + * %ebp isn't always going to be a frame pointer on Linux, but when + * it is, saving it here lets us have a coherent stack backtrace. + */ + pushl %ebp + + /* + * Fill in a lx_regs_t structure on the stack. + */ + subl $SIZEOF_LX_REGS_T, %esp + + /* + * Save %ebp and then fill it with what would be its usual value as + * the frame pointer. The value we save for %esp needs to be the + * stack pointer at the time of the interrupt so we need to skip the + * saved %ebp and (what will be) the return address. + */ + movl %ebp, LXR_EBP(%esp) + movl %esp, %ebp + addl $_CONST(SIZEOF_LX_REGS_T), %ebp + movl %ebp, LXR_ESP(%esp) + addl $_CONST(_MUL(CPTRSIZE, 2)), LXR_ESP(%esp) + + movl $0, LXR_GS(%esp) + movw %gs, LXR_GS(%esp) + movl %edi, LXR_EDI(%esp) + movl %esi, LXR_ESI(%esp) + movl %ebx, LXR_EBX(%esp) + movl %edx, LXR_EDX(%esp) + movl %ecx, LXR_ECX(%esp) + movl %eax, LXR_EIP(%esp) + + /* + * The kernel drops us into the middle of one of the tables above + * that then pushes that table offset onto the stack, and calls into + * lx_handler. That offset indicates the system call number while + * %eax holds the return address for the system call. We replace the + * value on the stack with the return address, and use the value to + * compute the system call number by dividing by the table entry size. + */ + xchgl CPTRSIZE(%ebp), %eax + shrl $4, %eax + movl %eax, LXR_EAX(%esp) + + /* + * Switch to the Solaris libc's %gs. + */ + movl $LWPGS_SEL, %ebx + movw %bx, %gs + + /* + * Call lx_emulate() whose only argument is a pointer to the + * lx_regs_t structure we've placed on the stack. + */ + pushl %esp + call lx_emulate + + /* + * We use this global symbol to identify this return site when + * walking the stack backtrace. It needs to remain immediately + * after the call to lx_emulate(). + */ + ALTENTRY(lx_emulate_done) + + /* + * Clean up the argument to lx_emulate(). + */ + addl $4, %esp + + /* + * Restore the saved register state; we get %ebp, %esp and %esp from + * the ordinary locations rather than the saved state. + */ + movl LXR_EDI(%esp), %edi + movl LXR_ESI(%esp), %esi + movl LXR_EBX(%esp), %ebx + movl LXR_EDX(%esp), %edx + movl LXR_ECX(%esp), %ecx + movl LXR_EAX(%esp), %eax + movw LXR_GS(%esp), %gs + + addl $SIZEOF_LX_REGS_T, %esp + + movl %ebp, %esp + popl %ebp + ret + SET_SIZE(lx_handler) + + ENTRY_NP(lx_swap_gs) + push %eax /* save the current eax value */ + movl 0xc(%esp),%eax /* 2nd param is a pointer */ + movw %gs,(%eax) /* use the pointer to save current gs */ + movl 0x8(%esp),%eax /* first parameter is the new gs value */ + movw %ax, %gs /* switch to the new gs value */ + pop %eax /* restore eax */ + ret + SET_SIZE(lx_swap_gs) + + ENTRY_NP(lx_setup_clone) + xorl %ebp, %ebp /* terminating stack */ + popl %edx /* eat the start_clone() return address */ + popl %gs /* Switch back to the Linux libc's %gs */ + popl %edx /* Linux clone() return address */ + popl %esp /* New stack pointer */ + xorl %eax, %eax /* child returns 0 to SYS_clone() */ + jmp *%edx /* return to Linux app. */ + SET_SIZE(lx_setup_clone) + + /* + * lx_sigdeliver(sig, siginfo_t *, ucontext_t *, stack_size, + * stack_build_routine, signal_handler, glibc_gs) + * + * This routine allocates stack space for the Linux signal stack, + * calls a routine to build the signal stack and then calls the Linux + * signal handler. This is written in assembly because of the way + * we need to directly manipulate the stack and pass the resulting + * stack to the signal handler with the Linux signal stack on top. + * + * When the Linux signal handler is called, the stack will look + * like this: + * + * ================================================= + * | Linux signal frame built by lx_stackbuilder() | + * ================================================= + * | LX_SIGRT_MAGIC | + * ================================================= + * | %ebp | + * ================================================= + */ + ENTRY_NP(lx_sigdeliver) + pushl %ebp + movl %esp, %ebp + movl 16(%ebp), %edx /* pointer to Solaris ucontext_t */ + pushl %edx /* save ucontext_t ptr for later */ + pushl $LX_SIGRT_MAGIC /* marker value for lx_(rt)_sigreturn */ + + subl 20(%ebp), %esp /* create stack buffer */ + pushl %esp /* push stack pointer */ + pushl %edx /* push pointer to ucontext_t */ + pushl 12(%ebp) /* push pointer to siginfo_t */ + pushl 8(%ebp) /* push signal number */ + call *24(%ebp) /* lx_stackbuilder(sig, sip, ucp, sp) */ + add $16, %esp /* remove args from stack */ + movw 32(%ebp), %gs /* only low 16 bits are used */ + + mov 4(%ebp),%eax /* fetch old %ebp from stack */ + mov 28(%ebp), %edx /* get address of Linux handler */ + mov %eax, %ebp /* restore old %ebp */ + jmp *%edx /* jmp to the Linux signal handler */ + SET_SIZE(lx_sigdeliver) + + /* + * Due to the nature of signals, we need to be able to force the %gs + * value to that used by Solaris by running any Solaris code. + * + * This routine does that, then calls a C routine that will save the + * %gs value at the time of the signal off into a thread-specific data + * structure. Finally, we trampoline to the libc code that would + * normally interpose itself before calling a signal handler. + * + * The libc routine that calls user signal handlers ends with a + * setcontext, so we would never return here even if we used a call + * rather than a jmp. + * + * %esi is used for the PIC as it is guaranteed by the 386 ABI to + * survive the call to lx_sigsavegs. The downside is we must also + * preserve its value for our caller. + * + * Note that because lx_sigsavegs and libc_sigacthandler are externs, + * they need to be dereferenced via the GOT. + * + * IMPORTANT: Because libc apparently gets upset if extra data is + * left on its stack, this routine needs to be crafted + * in assembly so that the jmp to the libc interposer + * doesn't leave any cruft lying around. + */ + ENTRY_NP(lx_sigacthandler) + pushl %esi /* save %esi */ + pushl %gs /* push the Linux %gs */ + pushl $LWPGS_SEL + popl %gs /* install the Solaris %gs */ + + PIC_SETUP(%esi) + movl lx_sigsavegs@GOT(%esi), %eax + call *%eax /* save the Linux %gs */ + movl libc_sigacthandler@GOT(%esi), %eax + add $4, %esp /* clear Linux %gs from stack */ + popl %esi /* restore %esi */ + jmp *(%eax) /* jmp to libc's interposer */ + SET_SIZE(lx_sigacthandler) + + /* + * Trampoline code is called by the return at the end of a Linux + * signal handler to return control to the interrupted application + * via the lx_sigreturn() or lx_rt_sigreturn() syscalls. + * + * (lx_sigreturn() is called for legacy signal handling, and + * lx_rt_sigreturn() is called for "new"-style signals.) + * + * These two routines must consist of the EXACT code sequences below + * as gdb looks at the sequence of instructions a routine will return + * to determine whether it is in a signal handler or not. + */ + ENTRY_NP(lx_sigreturn_tramp) + popl %eax + movl $LX_SYS_sigreturn, %eax + int $0x80 + SET_SIZE(lx_sigreturn_tramp) + + ENTRY_NP(lx_rt_sigreturn_tramp) + movl $LX_SYS_rt_sigreturn, %eax + int $0x80 + SET_SIZE(lx_rt_sigreturn_tramp) + + /* + * Manipulate the stack in the way necessary for it to appear to libc + * that the signal handler it invoked via call_user_handler() is + * returning. + */ + ENTRY_NP(lx_sigreturn_tolibc) + movl 4(%esp), %esp /* set %esp to passed value */ + popl %ebp /* restore proper %ebp */ + ret /* return to libc interposer */ + SET_SIZE(lx_sigreturn_tolibc) +#endif /* lint */ diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s new file mode 100644 index 0000000000..28ec39938e --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> + +#if defined(lint) + +/*ARGSUSED*/ +void +lx_runexe(void *argv, int32_t entry) +{ +} + +#else /* lint */ + + /* + * Set our stack pointer, clear the general registers, + * and jump to the brand linker's entry point. + */ + ENTRY_NP(lx_runexe) + movl 4(%esp), %eax / %eax = &argv[0] + movl 8(%esp), %ebx / Brand linker's entry point in %ebx + subl $4, %eax / Top of stack - must point at argc + movl %eax, %esp / Set %esp to what linkers expect + + movl $0, %eax + movl $0, %ecx + movl $0, %edx + movl $0, %esi + movl $0, %edi + movl $0, %ebp + + jmp *%ebx / And away we go... + SET_SIZE(lx_runexe) + +#endif /* lint */ diff --git a/usr/src/lib/brand/lx/lx_brand/i386/offsets.in b/usr/src/lib/brand/lx/lx_brand/i386/offsets.in new file mode 100644 index 0000000000..ac934ee76c --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/offsets.in @@ -0,0 +1,40 @@ +\ +\ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +\ Use is subject to license terms. +\ +\ CDDL HEADER START +\ +\ The contents of this file are subject to the terms of the +\ Common Development and Distribution License (the "License"). +\ You may not use this file except in compliance with the License. +\ +\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +\ or http://www.opensolaris.org/os/licensing. +\ See the License for the specific language governing permissions +\ and limitations under the License. +\ +\ When distributing Covered Code, include this CDDL HEADER in each +\ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +\ If applicable, add the following below this CDDL HEADER, with the +\ fields enclosed by brackets "[]" replaced with your own identifying +\ information: Portions Copyright [yyyy] [name of copyright owner] +\ +\ CDDL HEADER END +\ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/lx_brand.h> + +lx_regs_t SIZEOF_LX_REGS_T + lxr_gs + lxr_edi + lxr_esi + lxr_ebp + lxr_esp + lxr_ebx + lxr_edx + lxr_ecx + lxr_eax + lxr_eip + lxr_orig_eax diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h new file mode 100644 index 0000000000..80fb579665 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_DEBUG_H +#define _LX_DEBUG_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* initialize the debugging subsystem */ +extern void lx_debug_init(void); + +/* printf() style debug message functionality */ +extern void lx_debug(const char *, ...); + +/* set non-zero if the debugging subsystem is enabled */ +extern int lx_debug_enabled; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_DEBUG_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h new file mode 100644 index 0000000000..997f1c4589 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h @@ -0,0 +1,114 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_FCNTL_H +#define _SYS_LX_FCNTL_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Lx open/fcntl flags + */ +#define LX_O_RDONLY 00 +#define LX_O_WRONLY 01 +#define LX_O_RDWR 02 +#define LX_O_CREAT 0100 +#define LX_O_EXCL 0200 +#define LX_O_NOCTTY 0400 +#define LX_O_TRUNC 01000 +#define LX_O_APPEND 02000 +#define LX_O_NONBLOCK 04000 +#define LX_O_NDELAY LX_O_NONBLOCK +#define LX_O_SYNC 010000 +#define LX_O_FSYNC LX_O_SYNC +#define LX_O_ASYNC 020000 +#define LX_O_DIRECT 040000 +#define LX_O_LARGEFILE 0100000 +#define LX_O_DIRECTORY 0200000 +#define LX_O_NOFOLLOW 0400000 + +#define LX_F_DUPFD 0 +#define LX_F_GETFD 1 +#define LX_F_SETFD 2 +#define LX_F_GETFL 3 +#define LX_F_SETFL 4 +#define LX_F_GETLK 5 +#define LX_F_SETLK 6 +#define LX_F_SETLKW 7 +#define LX_F_SETOWN 8 +#define LX_F_GETOWN 9 +#define LX_F_SETSIG 10 +#define LX_F_GETSIG 11 + +#define LX_F_GETLK64 12 +#define LX_F_SETLK64 13 +#define LX_F_SETLKW64 14 + +#define LX_F_SETLEASE 1024 +#define LX_F_GETLEASE 1025 +#define LX_F_NOTIFY 1026 + +#define LX_F_RDLCK 0 +#define LX_F_WRLCK 1 +#define LX_F_UNLCK 2 + +/* + * Lx flock codes. + */ +#define LX_NAME_MAX 255 +#define LX_LOCK_SH 1 /* shared */ +#define LX_LOCK_EX 2 /* exclusive */ +#define LX_LOCK_NB 4 /* non-blocking */ +#define LX_LOCK_UN 8 /* unlock */ + +#define LX_AT_FDCWD -100 +#define LX_AT_EACCESS 512 +#define LX_AT_REMOVEDIR 512 +#define LX_AT_SYMLINK_NOFOLLOW 256 +#define LX_AT_SYMLINK_FOLLOW 1024 + +struct lx_flock { + short l_type; + short l_whence; + long l_start; + long l_len; + int l_pid; +}; + +struct lx_flock64 { + short l_type; + short l_whence; + long long l_start; + long long l_len; + int l_pid; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_FCNTL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h new file mode 100644 index 0000000000..01bfb9499f --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h @@ -0,0 +1,382 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_IOCTL_H +#define _SYS_LX_IOCTL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +extern int lx_ioctl_init(void); + +/* + * LX_NCC must be different from LX_NCCS since while the termio and termios + * structures may look similar they are fundamentally different sizes and + * have different members. + */ +#define LX_NCC 8 +#define LX_NCCS 19 + +struct lx_termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[LX_NCC]; /* control characters */ +}; + +struct lx_termios { + uint32_t c_iflag; /* input mode flags */ + uint32_t c_oflag; /* output mode flags */ + uint32_t c_cflag; /* control mode flags */ + uint32_t c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[LX_NCCS]; /* control characters */ +}; + +/* + * c_cc characters which are valid for lx_termio and lx_termios + */ +#define LX_VINTR 0 +#define LX_VQUIT 1 +#define LX_VERASE 2 +#define LX_VKILL 3 +#define LX_VEOF 4 +#define LX_VTIME 5 +#define LX_VMIN 6 +#define LX_VSWTC 7 + +/* + * c_cc characters which are valid for lx_termios + */ +#define LX_VSTART 8 +#define LX_VSTOP 9 +#define LX_VSUSP 10 +#define LX_VEOL 11 +#define LX_VREPRINT 12 +#define LX_VDISCARD 13 +#define LX_VWERASE 14 +#define LX_VLNEXT 15 +#define LX_VEOL2 16 + +/* + * Sound formats + */ +#define LX_AFMT_QUERY 0x00000000 +#define LX_AFMT_MU_LAW 0x00000001 +#define LX_AFMT_A_LAW 0x00000002 +#define LX_AFMT_IMA_ADPCM 0x00000004 +#define LX_AFMT_U8 0x00000008 +#define LX_AFMT_S16_LE 0x00000010 +#define LX_AFMT_S16_BE 0x00000020 +#define LX_AFMT_S8 0x00000040 +#define LX_AFMT_U16_LE 0x00000080 +#define LX_AFMT_U16_BE 0x00000100 +#define LX_AFMT_MPEG 0x00000200 +#define LX_AFMT_AC3 0x00000400 + +/* + * Supported ioctls + */ +#define LX_TCGETS 0x5401 +#define LX_TCSETS 0x5402 +#define LX_TCSETSW 0x5403 +#define LX_TCSETSF 0x5404 +#define LX_TCGETA 0x5405 +#define LX_TCSETA 0x5406 +#define LX_TCSETAW 0x5407 +#define LX_TCSETAF 0x5408 +#define LX_TCSBRK 0x5409 +#define LX_TCXONC 0x540a +#define LX_TCFLSH 0x540b +#define LX_TIOCEXCL 0x540c +#define LX_TIOCNXCL 0x540d +#define LX_TIOCSCTTY 0x540e +#define LX_TIOCGPGRP 0x540f +#define LX_TIOCSPGRP 0x5410 +#define LX_TIOCOUTQ 0x5411 +#define LX_TIOCSTI 0x5412 +#define LX_TIOCGWINSZ 0x5413 +#define LX_TIOCSWINSZ 0x5414 +#define LX_TIOCMGET 0x5415 +#define LX_TIOCMBIS 0x5416 +#define LX_TIOCMBIC 0x5417 +#define LX_TIOCMSET 0x5418 +#define LX_TIOCGSOFTCAR 0x5419 +#define LX_TIOCSSOFTCAR 0x541a +#define LX_FIONREAD 0x541b +#define LX_TIOCPKT 0x5420 +#define LX_FIONBIO 0x5421 +#define LX_TIOCNOTTY 0x5422 +#define LX_TIOCSETD 0x5423 +#define LX_TIOCGETD 0x5424 +#define LX_TCSBRKP 0x5425 +#define LX_TIOCGSID 0x5429 +#define LX_TIOCGPTN 0x80045430 +#define LX_TIOCSPTLCK 0x40045431 +#define LX_FIONCLEX 0x5450 +#define LX_FIOCLEX 0x5451 +#define LX_FIOASYNC 0x5452 +#define LX_FIOSETOWN 0x8901 +#define LX_SIOCSPGRP 0x8902 +#define LX_FIOGETOWN 0x8903 +#define LX_SIOCGPGRP 0x8904 +#define LX_SIOCATMARK 0x8905 +#define LX_SIOCGIFCONF 0x8912 +#define LX_SIOCGIFFLAGS 0x8913 +#define LX_SIOCSIFFLAGS 0x8914 +#define LX_SIOCGIFADDR 0x8915 +#define LX_SIOCSIFADDR 0x8916 +#define LX_SIOCGIFDSTADDR 0x8917 +#define LX_SIOCSIFDSTADDR 0x8918 +#define LX_SIOCGIFBRDADDR 0x8919 +#define LX_SIOCSIFBRDADDR 0x891a +#define LX_SIOCGIFNETMASK 0x891b +#define LX_SIOCSIFNETMASK 0x891c +#define LX_SIOCGIFMETRIC 0x891d +#define LX_SIOCSIFMETRIC 0x891e +#define LX_SIOCGIFMEM 0x891f +#define LX_SIOCSIFMEM 0x8920 +#define LX_SIOCGIFMTU 0x8921 +#define LX_SIOCSIFMTU 0x8922 +#define LX_SIOCSIFHWADDR 0x8924 +#define LX_SIOCGIFHWADDR 0x8927 + +/* + * /dev/dsp ioctls - supported + */ +#define LX_OSS_SNDCTL_DSP_RESET 0x5000 +#define LX_OSS_SNDCTL_DSP_SYNC 0x5001 +#define LX_OSS_SNDCTL_DSP_SPEED 0xc0045002 +#define LX_OSS_SNDCTL_DSP_STEREO 0xc0045003 +#define LX_OSS_SNDCTL_DSP_GETBLKSIZE 0xc0045004 +#define LX_OSS_SNDCTL_DSP_SETFMTS 0xc0045005 +#define LX_OSS_SNDCTL_DSP_CHANNELS 0xc0045006 +#define LX_OSS_SNDCTL_DSP_SETFRAGMENT 0xc004500a +#define LX_OSS_SNDCTL_DSP_GETFMTS 0x8004500b +#define LX_OSS_SNDCTL_DSP_GETOSPACE 0x8010500c +#define LX_OSS_SNDCTL_DSP_GETCAPS 0x8004500f +#define LX_OSS_SNDCTL_DSP_SETTRIGGER 0x40045010 +#define LX_OSS_SNDCTL_DSP_GETOPTR 0x800c5012 +#define LX_OSS_SNDCTL_DSP_GETISPACE 0x8010500d + +/* + * support for /dev/dsp SNDCTL_DSP_GETFMTS and SNDCTL_DSP_SETFMTS + */ +#define LX_OSS_AFMT_QUERY 0x0000 +#define LX_OSS_AFMT_MU_LAW 0x0001 +#define LX_OSS_AFMT_A_LAW 0x0002 +#define LX_OSS_AFMT_IMA_ADPCM 0x0004 +#define LX_OSS_AFMT_U8 0x0008 +#define LX_OSS_AFMT_S16_LE 0x0010 +#define LX_OSS_AFMT_S16_BE 0x0020 +#define LX_OSS_AFMT_S8 0x0040 +#define LX_OSS_AFMT_U16_LE 0x0080 +#define LX_OSS_AFMT_U16_BE 0x0100 +#define LX_OSS_AFMT_MPEG 0x0200 + +#ifdef _LITTLE_ENDIAN +#define LX_OSS_AFMT_S16_NE LX_OSS_AFMT_S16_LE +#define LX_OSS_AFMT_U16_NE LX_OSS_AFMT_U16_LE +#elif defined(_BIG_ENDIAN) +#define LX_OSS_AFMT_S16_NE LX_OSS_AFMT_S16_BE +#define LX_OSS_AFMT_U16_NE LX_OSS_AFMT_U16_BE +#else /* _LITTLE_ENDIAN */ +#error NO ENDIAN defined. +#endif /* _LITTLE_ENDIAN */ + +/* + * support for /dev/dsp SNDCTL_DSP_GETISPACE and SNDCTL_DSP_GETOSPACE + */ +typedef struct lx_oss_audio_buf_info { + int fragments; /* fragments that can be rd/wr without blocking */ + int fragstotal; /* total number of fragments allocated for buffering */ + int fragsize; /* size of fragments, same as SNDCTL_DSP_GETBLKSIZE */ + int bytes; /* what can be rd/wr immediatly without blocking */ +} lx_oss_audio_buf_info_t; + +/* + * support for /dev/dsp SNDCTL_DSP_GETOPTR + */ +typedef struct lx_oss_count_info { + /* # of bytes processed since opening the device */ + int bytes; + + /* + * # of fragment transitions since last call to this function. + * only valid for mmap acess mode. + */ + int blocks; + + /* + * byte offset of the current recording/playback position from + * the beginning of the audio buffer. only valid for mmap access + * mode. + */ + int ptr; +} lx_oss_count_info_t; + +/* + * support for /dev/dsp SNDCTL_DSP_GETCAPS + */ +#define LX_OSS_DSP_CAP_TRIGGER 0x1000 +#define LX_OSS_DSP_CAP_MMAP 0x2000 + +/* + * support for /dev/dsp/ SNDCTL_DSP_SETTRIGGER + */ +#define LX_OSS_PCM_DISABLE_OUTPUT 0 +#define LX_OSS_PCM_ENABLE_OUTPUT 2 + +/* + * /dev/mixer ioctl macros + */ +#define LX_OSS_SM_NRDEVICES 25 +#define LX_OSS_SM_READ(x) (0x80044d00 | (x)) +#define LX_OSS_SM_WRITE(x) (0xc0044d00 | (x)) + +/* + * /dev/mixer ioctls - supported + */ +#define LX_OSS_SOUND_MIXER_READ_VOLUME LX_OSS_SM_READ(LX_OSS_SM_VOLUME) +#define LX_OSS_SOUND_MIXER_READ_PCM LX_OSS_SM_READ(LX_OSS_SM_PCM) +#define LX_OSS_SOUND_MIXER_READ_MIC LX_OSS_SM_READ(LX_OSS_SM_MIC) +#define LX_OSS_SOUND_MIXER_READ_IGAIN LX_OSS_SM_READ(LX_OSS_SM_IGAIN) +#define LX_OSS_SOUND_MIXER_WRITE_VOLUME LX_OSS_SM_WRITE(LX_OSS_SM_VOLUME) +#define LX_OSS_SOUND_MIXER_WRITE_PCM LX_OSS_SM_WRITE(LX_OSS_SM_PCM) +#define LX_OSS_SOUND_MIXER_WRITE_MIC LX_OSS_SM_WRITE(LX_OSS_SM_MIC) +#define LX_OSS_SOUND_MIXER_WRITE_IGAIN LX_OSS_SM_WRITE(LX_OSS_SM_IGAIN) +#define LX_OSS_SOUND_MIXER_READ_STEREODEVS LX_OSS_SM_READ(LX_OSS_SM_STEREODEVS) +#define LX_OSS_SOUND_MIXER_READ_RECMASK LX_OSS_SM_READ(LX_OSS_SM_RECMASK) +#define LX_OSS_SOUND_MIXER_READ_DEVMASK LX_OSS_SM_READ(LX_OSS_SM_DEVMASK) +#define LX_OSS_SOUND_MIXER_READ_RECSRC LX_OSS_SM_READ(LX_OSS_SM_RECSRC) + +/* + * /dev/mixer channels + */ +#define LX_OSS_SM_VOLUME 0 +#define LX_OSS_SM_BASS 1 +#define LX_OSS_SM_TREBLE 2 +#define LX_OSS_SM_SYNTH 3 +#define LX_OSS_SM_PCM 4 +#define LX_OSS_SM_SPEAKER 5 +#define LX_OSS_SM_LINE 6 +#define LX_OSS_SM_MIC 7 +#define LX_OSS_SM_CD 8 +#define LX_OSS_SM_MIX 9 +#define LX_OSS_SM_PCM2 10 +#define LX_OSS_SM_REC 11 +#define LX_OSS_SM_IGAIN 12 +#define LX_OSS_SM_OGAIN 13 +#define LX_OSS_SM_LINE1 14 +#define LX_OSS_SM_LINE2 15 +#define LX_OSS_SM_LINE3 16 +#define LX_OSS_SM_DIGITAL1 17 +#define LX_OSS_SM_DIGITAL2 18 +#define LX_OSS_SM_DIGITAL3 19 +#define LX_OSS_SM_PHONEIN 20 +#define LX_OSS_SM_PHONEOUT 21 +#define LX_OSS_SM_VIDEO 22 +#define LX_OSS_SM_RADIO 23 +#define LX_OSS_SM_MONITOR 24 + +/* + * /dev/mixer operations + */ +#define LX_OSS_SM_STEREODEVS 251 +#define LX_OSS_SM_CAPS 252 +#define LX_OSS_SM_RECMASK 253 +#define LX_OSS_SM_DEVMASK 254 +#define LX_OSS_SM_RECSRC 255 + +/* + * /dev/mixer value conversion macros + * + * solaris expects gain level on a scale of 0 - 255 + * oss expects gain level on a scale of 0 - 100 + * + * oss also encodes multiple channels volume values in a single int, + * one channel value per byte. + */ +#define LX_OSS_S2L_GAIN(v) (((v) * 100) / 255) +#define LX_OSS_L2S_GAIN(v) (((v) * 255) / 100) +#define LX_OSS_MIXER_DEC1(v) ((v) & 0xff) +#define LX_OSS_MIXER_DEC2(v) (((v) >> 8) & 0xff) +#define LX_OSS_MIXER_ENC2(v1, v2) (((v2) << 8) | (v1)) + +/* + * /dev/mixer value verification macros + */ +#define LX_OSS_MIXER_VCHECK(x) (((int)(x) >= 0) && ((int)(x) <= 100)) +#define LX_OSS_MIXER_1CH_OK(x) ((((x) & ~0xff) == 0) && \ + LX_OSS_MIXER_VCHECK(LX_OSS_MIXER_DEC1(x))) +#define LX_OSS_MIXER_2CH_OK(x) ((((x) & ~0xffff) == 0) && \ + LX_OSS_MIXER_VCHECK(LX_OSS_MIXER_DEC1(x)) && \ + LX_OSS_MIXER_VCHECK(LX_OSS_MIXER_DEC2(x))) + +/* + * Unsupported ioctls (NOT a comprehensive list) + */ +#define LX_TIOCLINUX 0x541c +#define LX_TIOCCONS 0x541d +#define LX_TIOCGSERIAL 0x541e +#define LX_TIOCSSERIAL 0x541f +#define LX_TIOCTTYGSTRUCT 0x5426 +#define LX_TIOCSERCONFIG 0x5453 +#define LX_TIOCSERGWILD 0x5454 +#define LX_TIOCSERSWILD 0x5455 +#define LX_TIOCGLCKTRMIOS 0x5456 +#define LX_TIOCSLCKTRMIOS 0x5457 +#define LX_TIOCSERGSTRUCT 0x5458 +#define LX_TIOCSERGETLSR 0x5459 +#define LX_TIOCSERGETMULTI 0x545a +#define LX_TIOCSERSETMULTI 0x545b +#define LX_OLD_SIOCGIFHWADDR 0x8923 +#define LX_SIOCSIFENCAP 0x8926 +#define LX_SIOCGIFSLAVE 0x8929 +#define LX_SIOCSIFSLAVE 0x8930 +#define LX_SIOCADDMULTI 0x8931 +#define LX_SIOCDELMULTI 0x8932 +#define LX_SIOCADDRTOLD 0x8940 +#define LX_SIOCDELRTOLD 0x8941 +#define LX_SIOCGIFTXQLEN 0x8942 +#define LX_SIOCDARP 0x8950 +#define LX_SIOCGARP 0x8951 +#define LX_SIOCSARP 0x8952 +#define LX_SIOCDRARP 0x8960 +#define LX_SIOCGRARP 0x8961 +#define LX_SIOCSRARP 0x8962 +#define LX_SIOCGIFMAP 0x8970 +#define LX_SIOCSIFMAP 0x8971 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_IOCTL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h new file mode 100644 index 0000000000..307952656d --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h @@ -0,0 +1,153 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_H +#define _SYS_LX_H + +#include <stdio.h> +#include <alloca.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/lwp.h> + +#include <sys/lx_brand.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern char lx_release[128]; +extern pid_t zoneinit_pid; + +/* + * Support for the unfortunate RPM race condition workaround. + */ +extern int lx_rpm_delay; +extern boolean_t lx_is_rpm; + +/* + * Values Linux expects for init + */ +#define LX_INIT_PGID 0 +#define LX_INIT_SID 0 +#define LX_INIT_PID 1 + +/* + * Codes to reboot(2). + */ +#define LINUX_REBOOT_MAGIC1 0xfee1dead +#define LINUX_REBOOT_MAGIC2 672274793 +#define LINUX_REBOOT_MAGIC2A 85072278 +#define LINUX_REBOOT_MAGIC2B 369367448 +#define LINUX_REBOOT_MAGIC2C 537993216 + +/* + * This was observed as coming from Red Hat's init process, but it's not in + * their reboot(2) man page. + */ +#define LINUX_REBOOT_MAGIC2D 0x28121969 + +#define LINUX_REBOOT_CMD_RESTART 0x1234567 +#define LINUX_REBOOT_CMD_HALT 0xcdef0123 +#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc +#define LINUX_REBOOT_CMD_RESTART2 0xa1b2c3d4 +#define LINUX_REBOOT_CMD_CAD_ON 0x89abcdef +#define LINUX_REBOOT_CMD_CAD_OFF 0 + +/* + * the maximum length of messages to be output with lx_msg(), lx_err(), + * lx_debug(), or lx_unsupported(). + */ +#define LX_MSG_MAXLEN (128 + MAXPATHLEN) + +/* + * Linux scheduler priority ranges. + */ +#define LX_SCHED_PRIORITY_MIN_OTHER 0 +#define LX_SCHED_PRIORITY_MAX_OTHER 0 +#define LX_SCHED_PRIORITY_MIN_RRFIFO 1 +#define LX_SCHED_PRIORITY_MAX_RRFIFO 99 + +/* + * Constants to indicate who getrusage() should return information about. + */ +#define LX_RUSAGE_SELF 0 +#define LX_RUSAGE_CHILDREN (-1) + +/* + * normally we never want to write to stderr or stdout because it's unsafe + * to make assumptions about the underlying file descriptors. to protect + * against writes to these file descriptors we go ahead and close them + * our brand process initalization code. but there are still occasions + * where we are willing to make assumptions about our file descriptors + * and write to them. at thes times we should use one lx_msg() or + * lx_msg_error() + */ +extern void lx_msg(char *, ...); +extern void lx_err(char *, ...); +extern void lx_err_fatal(char *, ...); +extern void lx_unsupported(char *, ...); + +struct ucontext; + +extern void lx_handler_table(void); +extern void lx_handler_trace_table(void); +extern void lx_emulate_done(void); +extern lx_regs_t *lx_syscall_regs(void); + +extern char *lx_fd_to_path(int fd, char *buf, int buf_size); +extern int lx_lpid_to_spair(pid_t, pid_t *, lwpid_t *); +extern int lx_lpid_to_spid(pid_t, pid_t *); + +extern int lx_ptrace_wait(siginfo_t *); +extern void lx_ptrace_fork(void); + +extern int lx_get_kern_version(void); + +extern int lx_check_alloca(size_t); +#define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL) + +extern int ltos_at_flag(int lflag, int allow); + +/* + * NO_UUCOPY disables calls to the uucopy* system calls to help with + * debugging brand library accesses to linux application memory. + */ +#ifdef NO_UUCOPY + +int uucopy_unsafe(const void *src, void *dst, size_t n); +int uucopystr_unsafe(const void *src, void *dst, size_t n); + +#define uucopy(src, dst, n) uucopy_unsafe((src), (dst), (n)) +#define uucopystr(src, dst, n) uucopystr_unsafe((src), (dst), (n)) + +#endif /* NO_UUCOPY */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h new file mode 100644 index 0000000000..41db8cca03 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h @@ -0,0 +1,139 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_MOUNT_H +#define _LX_MOUNT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rpc/rpc.h> +#include <nfs/nfs.h> + +/* + * mount() is significantly different between Linux and Solaris. The main + * difference is between the set of flags. Some flags on Linux can be + * translated to a Solaris equivalent, some are converted to a + * filesystem-specific option, while others have no equivalent whatsoever. + */ +#define LX_MS_MGC_VAL 0xC0ED0000 +#define LX_MS_RDONLY 0x00000001 +#define LX_MS_NOSUID 0x00000002 +#define LX_MS_NODEV 0x00000004 +#define LX_MS_NOEXEC 0x00000008 +#define LX_MS_SYNCHRONOUS 0x00000010 +#define LX_MS_REMOUNT 0x00000020 +#define LX_MS_MANDLOCK 0x00000040 +#define LX_MS_NOATIME 0x00000400 +#define LX_MS_NODIRATIME 0x00000800 +#define LX_MS_BIND 0x00001000 +#define LX_MS_SUPPORTED (LX_MS_MGC_VAL | \ + LX_MS_RDONLY | LX_MS_NOSUID | \ + LX_MS_NODEV | LX_MS_NOEXEC | \ + LX_MS_REMOUNT | LX_MS_NOATIME | \ + LX_MS_BIND) + +/* + * support for nfs mounts + */ +#define LX_NMD_MAXHOSTNAMELEN 256 + +#define LX_NFS_MOUNT_SOFT 0x00000001 +#define LX_NFS_MOUNT_INTR 0x00000002 +#define LX_NFS_MOUNT_SECURE 0x00000004 +#define LX_NFS_MOUNT_POSIX 0x00000008 +#define LX_NFS_MOUNT_NOCTO 0x00000010 +#define LX_NFS_MOUNT_NOAC 0x00000020 +#define LX_NFS_MOUNT_TCP 0x00000040 +#define LX_NFS_MOUNT_VER3 0x00000080 +#define LX_NFS_MOUNT_KERBEROS 0x00000100 +#define LX_NFS_MOUNT_NONLM 0x00000200 +#define LX_NFS_MOUNT_BROKEN_SUID 0x00000400 +#define LX_NFS_MOUNT_SUPPORTED (LX_NFS_MOUNT_SOFT | \ + LX_NFS_MOUNT_INTR | \ + LX_NFS_MOUNT_POSIX | \ + LX_NFS_MOUNT_NOCTO | \ + LX_NFS_MOUNT_NOAC | \ + LX_NFS_MOUNT_TCP | \ + LX_NFS_MOUNT_VER3 | \ + LX_NFS_MOUNT_NONLM) + +#define LX_NMD_DEFAULT_RSIZE 0 +#define LX_NMD_DEFAULT_WSIZE 0 + +/* + * the nfs v3 file handle structure definitions are _almost_ the same + * on linux and solaris. the key difference are: + * + * 1) on linux fh3_length is an unsigned short where as on solaris it's + * an int. + * + * 2) on linux the file handle data doesn't 32 bit members, so the structure + * is not 32 bit aligned. (where as on solaris it is.) + * + * so rather than defining a structure that would allow us to intrepret + * all the contents of the nfs v3 file handle here, we decide to treate + * the file handle as an array of chars. this works just fine since it + * avoids the alignment issues and the actual file handle handle contects + * are defined by the nfs specification so they are common across solaris + * and linux. we do the same thing for nfs v2 file handles. + */ +struct lx_nfs_fh2 { + unsigned char lx_fh_data[NFS_FHSIZE]; +} lx_nfs_fh2; + +struct lx_nfs_fh3 { + unsigned short lx_fh3_length; + unsigned char lx_fh3_data[NFS3_FHSIZE]; +} lx_nfs_fh3; + +typedef struct lx_nfs_mount_data { + int nmd_version; + int nmd_fd; + struct lx_nfs_fh2 nmd_old_root; + int nmd_flags; + int nmd_rsize; + int nmd_wsize; + int nmd_timeo; + int nmd_retrans; + int nmd_acregmin; + int nmd_acregmax; + int nmd_acdirmin; + int nmd_acdirmax; + struct sockaddr_in nmd_addr; + char nmd_hostname[LX_NMD_MAXHOSTNAMELEN]; + int nmd_namlen; + uint_t nmd_bsize; + struct lx_nfs_fh3 nmd_root; +} lx_nfs_mount_data_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_MOUNT_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h new file mode 100644 index 0000000000..cb5706fab2 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_POLL_H +#define _SYS_LX_POLL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * These events are identical between Linux and Solaris + */ +#define LX_POLLIN 0x001 +#define LX_POLLPRI 0x002 +#define LX_POLLOUT 0x004 +#define LX_POLLERR 0x008 +#define LX_POLLHUP 0x010 +#define LX_POLLNVAL 0x020 +#define LX_POLLRDNORM 0x040 +#define LX_POLLRDBAND 0x080 + +#define LX_POLL_COMMON_EVENTS (LX_POLLIN | LX_POLLPRI | LX_POLLOUT | \ + LX_POLLERR | LX_POLLHUP | LX_POLLNVAL | LX_POLLRDNORM | LX_POLLRDBAND) + +/* + * These events differ between Linux and Solaris + */ +#define LX_POLLWRNORM 0x100 +#define LX_POLLWRBAND 0x200 + +#define LX_POLL_SUPPORTED_EVENTS \ + (LX_POLL_COMMON_EVENTS | LX_POLLWRNORM | LX_POLLWRBAND) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_POLL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h new file mode 100644 index 0000000000..b7609962ae --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h @@ -0,0 +1,299 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_SIGNAL_H +#define _SYS_LX_SIGNAL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if !defined(_ASM) +#include <sys/lx_types.h> +#include <lx_signum.h> + +#endif /* !defined(_ASM) */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Linux sigaction flags + */ +#define LX_SA_NOCLDSTOP 0x00000001 +#define LX_SA_NOCLDWAIT 0x00000002 +#define LX_SA_SIGINFO 0x00000004 +#define LX_SA_RESTORER 0x04000000 +#define LX_SA_ONSTACK 0x08000000 +#define LX_SA_RESTART 0x10000000 +#define LX_SA_NODEFER 0x40000000 +#define LX_SA_RESETHAND 0x80000000 +#define LX_SA_NOMASK LX_SA_NODEFER +#define LX_SA_ONESHOT LX_SA_RESETHAND + +#define LX_SIG_BLOCK 0 +#define LX_SIG_UNBLOCK 1 +#define LX_SIG_SETMASK 2 + +#define LX_MINSIGSTKSZ 2048 +#define LX_SS_ONSTACK 1 +#define LX_SS_DISABLE 2 + +#define LX_SIGRT_MAGIC 0xdeadf00d + +#if !defined(_ASM) + +/* + * NOTE: Linux uses different definitions for sigset_ts and sigaction_ts + * depending on whether the definition is for user space or the kernel. + * + * The definitions below MUST correspond to the Linux kernel versions, + * as glibc will do the necessary translation from the Linux user + * versions. + */ +typedef struct { + ulong_t __bits[LX_NSIG_WORDS]; +} lx_sigset_t; + +#define LX_NBITS (sizeof (ulong_t) * NBBY) +#define lx_sigmask(n) (1UL << (((n) - 1) % LX_NBITS)) +#define lx_sigword(n) (((ulong_t)((n) - 1))>>5) +#define lx_sigismember(s, n) (lx_sigmask(n) & (s)->__bits[lx_sigword(n)]) +#define lx_sigaddset(s, n) ((s)->__bits[lx_sigword(n)] |= lx_sigmask(n)) + +typedef struct lx_sigaction { + void (*lxsa_handler)(); + int lxsa_flags; + void (*lxsa_restorer)(void); + lx_sigset_t lxsa_mask; +} lx_sigaction_t; + +typedef uint32_t lx_osigset_t; + +#define OSIGSET_NBITS (sizeof (lx_osigset_t) * NBBY) +#define OSIGSET_BITSET(sig) (1U << (((sig) - 1) % OSIGSET_NBITS)) + +/* + * Flag settings to determine whether common routines should operate on + * lx_sigset_ts or lx_osigset_ts. + */ +#define USE_OSIGSET 0 +#define USE_SIGSET 1 + +typedef struct lx_osigaction { + void (*lxsa_handler)(); + lx_osigset_t lxsa_mask; + int lxsa_flags; + void (*lxsa_restorer)(void); +} lx_osigaction_t; + +#define LX_SI_MAX_SIZE 128 +#define LX_SI_PAD_SIZE ((LX_SI_MAX_SIZE/sizeof (int)) - 3) + +typedef struct lx_siginfo { + int lsi_signo; + int lsi_errno; + int lsi_code; + union { + int _pad[LX_SI_PAD_SIZE]; + + struct { + pid_t _pid; + lx_uid16_t _uid; + } _kill; + + struct { + uint_t _timer1; + uint_t _timer2; + } _timer; + + struct { + pid_t _pid; /* sender's pid */ + lx_uid16_t _uid; /* sender's uid */ + union sigval _sigval; + } _rt; + + struct { + pid_t _pid; /* which child */ + lx_uid16_t _uid; /* sender's uid */ + int _status; /* exit code */ + clock_t _utime; + clock_t _stime; + } _sigchld; + + struct { + void *_addr; /* faulting insn/memory ref. */ + } _sigfault; + + struct { + int _band; /* POLL_IN,POLL_OUT,POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} lx_siginfo_t; + +/* + * lx_siginfo_t lsi_code values + * + * LX_SI_ASYNCNL: Sent by asynch name lookup completion + * LX_SI_TKILL: Sent by tkill + * LX_SI_SIGIO: Sent by queued SIGIO + * LX_SI_ASYNCIO: Sent by asynchronous I/O completion + * LX_SI_MESGQ: Sent by real time message queue state change + * LX_SI_TIMER: Sent by timer expiration + * LX_SI_QUEUE: Sent by sigqueue + * LX_SI_USER: Sent by kill, sigsend, raise, etc. + * LX_SI_KERNEL: Sent by kernel + * + * At present, LX_SI_ASYNCNL and LX_SI_SIGIO are unused by BrandZ. + */ +#define LX_SI_ASYNCNL (-60) +#define LX_SI_TKILL (-6) +#define LX_SI_SIGIO (-5) +#define LX_SI_ASYNCIO (-4) +#define LX_SI_MESGQ (-3) +#define LX_SI_TIMER (-2) +#define LX_SI_QUEUE (-1) +#define LX_SI_USER (0) +#define LX_SI_KERNEL (0x80) + +typedef struct lx_sighandlers { + struct lx_sigaction lx_sa[LX_NSIG]; +} lx_sighandlers_t; + +typedef struct lx_sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} lx_stack_t; + +struct lx_fpreg { + ushort_t significand[4]; + ushort_t exponent; +}; + +struct lx_fpxreg { + ushort_t significand[4]; + ushort_t exponent; + ushort_t padding[3]; +}; + +struct lx_xmmreg { + uint32_t element[4]; +}; + +#define LX_X86_FXSR_MAGIC 0x0000 +#define LX_X86_FXSR_NONE 0xffff + +typedef struct lx_fpstate { + /* Regular FPU environment */ + ulong_t cw; + ulong_t sw; + ulong_t tag; + ulong_t ipoff; + ulong_t cssel; + ulong_t dataoff; + ulong_t datasel; + struct lx_fpreg _st[8]; + ushort_t status; + ushort_t magic; /* 0xffff = regular FPU data */ + + /* FXSR FPU environment */ + ulong_t _fxsr_env[6]; /* env is ignored */ + ulong_t mxcsr; + ulong_t reserved; + struct lx_fpxreg _fxsr_st[8]; /* reg data is ignored */ + struct lx_xmmreg _xmm[8]; + ulong_t padding[56]; +} lx_fpstate_t; + +typedef struct lx_sigcontext { + ulong_t sc_gs; + ulong_t sc_fs; + ulong_t sc_es; + ulong_t sc_ds; + ulong_t sc_edi; + ulong_t sc_esi; + ulong_t sc_ebp; + ulong_t sc_esp; + ulong_t sc_ebx; + ulong_t sc_edx; + ulong_t sc_ecx; + ulong_t sc_eax; + ulong_t sc_trapno; + ulong_t sc_err; + ulong_t sc_eip; + ulong_t sc_cs; + ulong_t sc_eflags; + ulong_t sc_esp_at_signal; + ulong_t sc_ss; + lx_fpstate_t *sc_fpstate; + ulong_t sc_mask; + ulong_t sc_cr2; +} lx_sigcontext_t; + +typedef struct lx_ucontext { + ulong_t uc_flags; + struct lx_ucontext *uc_link; + lx_stack_t uc_stack; + lx_sigcontext_t uc_sigcontext; + lx_sigset_t uc_sigmask; +} lx_ucontext_t; + +#define LX_SI_MAX_SIZE 128 +#define LX_SI_PAD_SIZE ((LX_SI_MAX_SIZE/sizeof (int)) - 3) + +#define lsi_pid _sifields._kill._pid +#define lsi_uid _sifields._kill._uid +#define lsi_status _sifields._sigchld._status +#define lsi_utime _sifields._sigchld._utime +#define lsi_stime _sifields._sigchld._stime +#define lsi_value _sifields._rt._sigval +#define lsi_int _sifields._rt._sigval.sivalx_int +#define lsi_ptr _sifields._rt._sigval.sivalx_ptr +#define lsi_addr _sifields._sigfault._addr +#define lsi_band _sifields._sigpoll._band +#define lsi_fd _sifields._sigpoll._fd + +extern const int ltos_signo[]; +extern const int stol_signo[]; + +extern void setsigacthandler(void (*)(int, siginfo_t *, void *), + void (**)(int, siginfo_t *, void *)); + +extern int lx_siginit(void); + +extern void lx_sigreturn_tolibc(uintptr_t); +extern void lx_sigdeliver(int, siginfo_t *, void *, size_t, void (*)(), + void (*)(), uintptr_t); + +extern int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop); + +#endif /* !defined(_ASM) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_SIGNAL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h new file mode 100644 index 0000000000..ce8d71c962 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h @@ -0,0 +1,258 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_SOCKET_H +#define _SYS_LX_SOCKET_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/lx_types.h> + +/* + * Linux address family definitions + * Some of these are not supported + */ +#define LX_AF_UNSPEC 0 /* Unspecified */ +#define LX_AF_UNIX 1 /* local file/pipe name */ +#define LX_AF_INET 2 /* IP protocol family */ +#define LX_AF_AX25 3 /* Amateur Radio AX.25 */ +#define LX_AF_IPX 4 /* Novell Internet Protocol */ +#define LX_AF_APPLETALK 5 /* Appletalk */ +#define LX_AF_NETROM 6 /* Amateur radio */ +#define LX_AF_BRIDGE 7 /* Multiprotocol bridge */ +#define LX_AF_ATMPVC 8 /* ATM PVCs */ +#define LX_AF_X25 9 /* X.25 */ +#define LX_AF_INET6 10 /* IPV 6 */ +#define LX_AF_ROSE 11 /* Amateur Radio X.25 */ +#define LX_AF_DECnet 12 /* DECnet */ +#define LX_AF_NETBEUI 13 /* 802.2LLC */ +#define LX_AF_SECURITY 14 /* Security callback */ +#define LX_AF_KEY 15 /* key management */ +#define LX_AF_ROUTE 16 /* Alias to emulate 4.4BSD */ +#define LX_AF_PACKET 17 /* Packet family */ +#define LX_AF_ASH 18 /* Ash ? */ +#define LX_AF_ECONET 19 /* Acorn Econet */ +#define LX_AF_ATMSVC 20 /* ATM SVCs */ +#define LX_AF_SNA 22 /* Linux SNA */ +#define LX_AF_IRDA 23 /* IRDA sockets */ +#define LX_AF_PPPOX 24 /* PPPoX sockets */ +#define LX_AF_WANPIPE 25 /* Wanpipe API sockets */ +#define LX_AF_BLUETOOTH 31 /* Bluetooth sockets */ +#define LX_AF_MAX 32 /* MAX socket type */ + +#define AF_NOTSUPPORTED -1 +#define AF_INVAL -2 + +/* + * Linux ARP protocol hardware identifiers + */ +#define LX_ARPHRD_ETHER 1 /* Ethernet */ +#define LX_ARPHRD_LOOPBACK 772 /* Loopback */ +#define LX_ARPHRD_VOID 0xffff /* Unknown */ + +/* + * Linux socket type definitions + */ +#define LX_SOCK_STREAM 1 /* Connection-based byte streams */ +#define LX_SOCK_DGRAM 2 /* Connectionless, datagram */ +#define LX_SOCK_RAW 3 /* Raw protocol interface */ +#define LX_SOCK_RDM 4 /* Reliably-delivered message */ +#define LX_SOCK_SEQPACKET 5 /* Sequenced packet stream */ +#define LX_SOCK_PACKET 10 /* Linux specific */ +#define LX_SOCK_MAX 11 + +#define SOCK_NOTSUPPORTED -1 +#define SOCK_INVAL -2 + +/* + * Options for use with [gs]etsockopt at the IP level. + * IPPROTO_IP + */ +#define LX_IP_TOS 1 +#define LX_IP_TTL 2 +#define LX_IP_HDRINCL 3 +#define LX_IP_OPTIONS 4 +#define LX_IP_ROUTER_ALERT 5 +#define LX_IP_RECVOPTS 6 +#define LX_IP_RETOPTS 7 +#define LX_IP_PKTINFO 8 +#define LX_IP_PKTOPTIONS 9 +#define LX_IP_MTU_DISCOVER 10 +#define LX_IP_RECVERR 11 +#define LX_IP_RECVTTL 12 +#define LX_IP_RECVTOS 13 +#define LX_IP_MTU 14 +#define LX_IP_FREEBIND 15 +#define LX_IP_MULTICAST_IF 32 +#define LX_IP_MULTICAST_TTL 33 +#define LX_IP_MULTICAST_LOOP 34 +#define LX_IP_ADD_MEMBERSHIP 35 +#define LX_IP_DROP_MEMBERSHIP 36 + +/* + * Options for use with [gs]etsockopt at the TCP level. + * IPPROTO_TCP + */ +#define LX_TCP_NODELAY 1 /* Don't delay send to coalesce packets */ +#define LX_TCP_MAXSEG 2 /* Set maximum segment size */ +#define LX_TCP_CORK 3 /* Control sending of partial frames */ +#define LX_TCP_KEEPIDLE 4 /* Start keeplives after this period */ +#define LX_TCP_KEEPINTVL 5 /* Interval between keepalives */ +#define LX_TCP_KEEPCNT 6 /* Number of keepalives before death */ +#define LX_TCP_SYNCNT 7 /* Number of SYN retransmits */ +#define LX_TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */ +#define LX_TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ +#define LX_TCP_WINDOW_CLAMP 10 /* Bound advertised window */ +#define LX_TCP_INFO 11 /* Information about this connection. */ +#define LX_TCP_QUICKACK 12 /* Bock/reenable quick ACKs. */ + +/* + * Options for use with [gs]etsockopt at the IGMP level. + * IPPROTO_IGMP + */ +#define LX_IGMP_MINLEN 8 +#define LX_IGMP_MAX_HOST_REPORT_DELAY 10 +#define LX_IGMP_HOST_MEMBERSHIP_QUERY 0x11 +#define LX_IGMP_HOST_MEMBERSHIP_REPORT 0x12 +#define LX_IGMP_DVMRP 0x13 +#define LX_IGMP_PIM 0x14 +#define LX_IGMP_TRACE 0x15 +#define LX_IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16 +#define LX_IGMP_HOST_LEAVE_MESSAGE 0x17 +#define LX_IGMP_MTRACE_RESP 0x1e +#define LX_IGMP_MTRACE 0x1f + +/* + * Options for use with [gs]etsockopt at the SOL_SOCKET level. + */ +#define LX_SOL_SOCKET 1 + +#define LX_SCM_RIGHTS 1 +#define LX_SCM_CRED 2 + +#define LX_SO_DEBUG 1 +#define LX_SO_REUSEADDR 2 +#define LX_SO_TYPE 3 +#define LX_SO_ERROR 4 +#define LX_SO_DONTROUTE 5 +#define LX_SO_BROADCAST 6 +#define LX_SO_SNDBUF 7 +#define LX_SO_RCVBUF 8 +#define LX_SO_KEEPALIVE 9 +#define LX_SO_OOBINLINE 10 +#define LX_SO_NO_CHECK 11 +#define LX_SO_PRIORITY 12 +#define LX_SO_LINGER 13 +#define LX_SO_BSDCOMPAT 14 +/* To add :#define LX_SO_REUSEPORT 15 */ +#define LX_SO_PASSCRED 16 +#define LX_SO_PEERCRED 17 +#define LX_SO_RCVLOWAT 18 +#define LX_SO_SNDLOWAT 19 +#define LX_SO_RCVTIMEO 20 +#define LX_SO_SNDTIMEO 21 +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define LX_SO_SECURITY_AUTHENTICATION 22 +#define LX_SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define LX_SO_SECURITY_ENCRYPTION_NETWORK 24 +#define LX_SO_BINDTODEVICE 25 +/* Socket filtering */ +#define LX_SO_ATTACH_FILTER 26 +#define LX_SO_DETACH_FILTER 27 +#define LX_SO_PEERNAME 28 +#define LX_SO_TIMESTAMP 29 +#define LX_SCM_TIMESTAMP LX_SO_TIMESTAMP +#define LX_SO_ACCEPTCONN 30 + +/* + * Linux socketcall indices. + * These constitute all 17 socket related system calls + * + * These system calls are called via a single system call socketcall(). + * The first arg being the endex of the system call type + */ +#define LX_SOCKET 1 +#define LX_BIND 2 +#define LX_CONNECT 3 +#define LX_LISTEN 4 +#define LX_ACCEPT 5 +#define LX_GETSOCKNAME 6 +#define LX_GETPEERNAME 7 +#define LX_SOCKETPAIR 8 +#define LX_SEND 9 +#define LX_RECV 10 +#define LX_SENDTO 11 +#define LX_RECVFROM 12 +#define LX_SHUTDOWN 13 +#define LX_SETSOCKOPT 14 +#define LX_GETSOCKOPT 15 +#define LX_SENDMSG 16 +#define LX_RECVMSG 17 + +/* + * Linux socket flags for use with recv(2)/send(2)/recvmsg(2)/sendmsg(2) + */ +#define LX_MSG_OOB 1 +#define LX_MSG_PEEK 2 +#define LX_MSG_DONTROUTE 4 +#define LX_MSG_CTRUNC 8 +#define LX_MSG_PROXY 0x10 +#define LX_MSG_TRUNC 0x20 +#define LX_MSG_DONTWAIT 0x40 +#define LX_MSG_EOR 0x80 +#define LX_MSG_WAITALL 0x100 +#define LX_MSG_FIN 0x200 +#define LX_MSG_SYN 0x400 +#define LX_MSG_CONFIRM 0x800 +#define LX_MSG_RST 0x1000 +#define LX_MSG_ERRQUEUE 0x2000 +#define LX_MSG_NOSIGNAL 0x4000 +#define LX_MSG_MORE 0x8000 + +struct lx_msghdr { + void *msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + struct iovec *msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + void *msg_control; /* ancillary data */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ +}; + +struct lx_ucred { + pid_t lxu_pid; + lx_uid_t lxu_uid; + lx_gid_t lxu_gid; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_SOCKET_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h new file mode 100644 index 0000000000..67aa86bb1c --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_STAT_H +#define _SYS_LX_STAT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/lx_types.h> +#include <sys/stat.h> + +#define LX_MAJORSHIFT 8 +#define LX_MINORMASK ((1 << LX_MAJORSHIFT) - 1) +#define LX_MAKEDEVICE(lx_maj, lx_min) \ + ((lx_dev_t)((lx_maj) << LX_MAJORSHIFT | ((lx_min) & LX_MINORMASK))) + +#define LX_GETMAJOR(lx_dev) ((lx_dev) >> LX_MAJORSHIFT) +#define LX_GETMINOR(lx_dev) ((lx_dev) & LX_MINORMASK) + +#undef st_atime +#undef st_mtime +#undef st_ctime + +struct lx_stat { + lx_dev16_t st_dev; + uint16_t st_pad1; + lx_ino_t st_ino; + lx_mode16_t st_mode; + uint16_t st_nlink; + lx_uid16_t st_uid; + lx_gid16_t st_gid; + lx_dev16_t st_rdev; + uint16_t st_pad2; + lx_off_t st_size; + lx_blksize_t st_blksize; + lx_blkcnt_t st_blocks; + struct lx_timespec st_atime; + struct lx_timespec st_mtime; + struct lx_timespec st_ctime; + uint32_t st_pad3; + uint32_t st_pad4; +}; + +struct lx_stat64 { + lx_dev_t st_dev; + uint32_t st_pad1; + lx_ino_t st_small_ino; + lx_mode_t st_mode; + uint_t st_nlink; + lx_uid_t st_uid; + lx_gid_t st_gid; + lx_dev_t st_rdev; + uint32_t st_pad2; + lx_off64_t st_size; + lx_blksize_t st_blksize; + lx_blkcnt64_t st_blocks; + struct lx_timespec st_atime; + struct lx_timespec st_mtime; + struct lx_timespec st_ctime; + lx_ino64_t st_ino; +}; + +extern int lx_stat_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_STAT_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h new file mode 100644 index 0000000000..839d36d7ae --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_STATFS_H +#define _LX_STATFS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +extern int lx_statfs_init(void); + +struct lx_statfs { + int f_type; + int f_bsize; + ulong_t f_blocks; + ulong_t f_bfree; + ulong_t f_bavail; + ulong_t f_files; + ulong_t f_ffree; + u_longlong_t f_fsid; + int f_namelen; + int f_frsize; + int f_spare[5]; +}; + +struct lx_statfs64 { + int f_type; + int f_bsize; + u_longlong_t f_blocks; + u_longlong_t f_bfree; + u_longlong_t f_bavail; + u_longlong_t f_files; + u_longlong_t f_ffree; + u_longlong_t f_fsid; + int f_namelen; + int f_frsize; + int f_spare[5]; +}; + +/* + * These magic values are taken mostly from statfs(2). + */ +#define LX_ISOFS_SUPER_MAGIC 0x9660 +#define LX_NFS_SUPER_MAGIC 0x6969 +#define LX_MSDOS_SUPER_MAGIC 0x4d44 +#define LX_PROC_SUPER_MAGIC 0x9fa0 +#define LX_UFS_MAGIC 0x00011954 +#define LX_DEVPTS_SUPER_MAGIC 0x1cd1 + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_STATFS_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h new file mode 100644 index 0000000000..b0d4cf2eb6 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h @@ -0,0 +1,556 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_SYSCALL_H +#define _SYS_LX_SYSCALL_H + +#if !defined(_ASM) + +#include <sys/types.h> +#include <sys/procset.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern int lx_install; + +extern int lx_openat(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_mkdirat(uintptr_t, uintptr_t, uintptr_t); +extern int lx_mknodat(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_fchownat(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_futimesat(uintptr_t, uintptr_t, uintptr_t); +extern int lx_fstatat64(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_unlinkat(uintptr_t, uintptr_t, uintptr_t); +extern int lx_renameat(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_linkat(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_symlinkat(uintptr_t, uintptr_t, uintptr_t); +extern int lx_readlinkat(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_fchmodat(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_faccessat(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_stat(uintptr_t, uintptr_t); +extern int lx_fstat(uintptr_t, uintptr_t); +extern int lx_lstat(uintptr_t, uintptr_t); +extern int lx_stat64(uintptr_t, uintptr_t); +extern int lx_fstat64(uintptr_t, uintptr_t); +extern int lx_lstat64(uintptr_t, uintptr_t); +extern int lx_fcntl(uintptr_t, uintptr_t, uintptr_t); +extern int lx_fcntl64(uintptr_t, uintptr_t, uintptr_t); +extern int lx_flock(uintptr_t, uintptr_t); +extern int lx_open(uintptr_t, uintptr_t, uintptr_t); +extern int lx_readdir(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getdents64(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getpid(void); +extern int lx_execve(uintptr_t, uintptr_t, uintptr_t); +extern int lx_dup2(uintptr_t, uintptr_t); +extern int lx_ioctl(uintptr_t, uintptr_t, uintptr_t); +extern int lx_vhangup(void); + +extern int lx_read(uintptr_t, uintptr_t, uintptr_t); +extern int lx_readv(uintptr_t, uintptr_t, uintptr_t); +extern int lx_writev(uintptr_t, uintptr_t, uintptr_t); +extern int lx_pread64(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_pwrite64(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_socketcall(uintptr_t, uintptr_t); +extern int lx_select(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_poll(uintptr_t, uintptr_t, uintptr_t); +extern int lx_oldgetrlimit(uintptr_t, uintptr_t); +extern int lx_getrlimit(uintptr_t, uintptr_t); +extern int lx_setrlimit(uintptr_t, uintptr_t); +extern int lx_gettimeofday(uintptr_t, uintptr_t); +extern int lx_settimeofday(uintptr_t, uintptr_t); +extern int lx_getrusage(uintptr_t, uintptr_t); +extern int lx_mknod(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_getpgrp(void); +extern int lx_getpgid(uintptr_t); +extern int lx_setpgid(uintptr_t, uintptr_t); +extern int lx_getsid(uintptr_t); +extern int lx_setsid(void); +extern int lx_setgroups(uintptr_t, uintptr_t); + + +extern int lx_waitpid(uintptr_t, uintptr_t, uintptr_t); +extern int lx_waitid(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_wait4(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_getuid16(void); +extern int lx_getgid16(void); +extern int lx_geteuid16(void); +extern int lx_getegid16(void); +extern int lx_geteuid(void); +extern int lx_getegid(void); +extern int lx_getresuid16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getresgid16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getresuid(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getresgid(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_setuid16(uintptr_t); +extern int lx_setreuid16(uintptr_t, uintptr_t); +extern int lx_setregid16(uintptr_t, uintptr_t); +extern int lx_setgid16(uintptr_t); +extern int lx_setfsuid16(uintptr_t); +extern int lx_setfsgid16(uintptr_t); + +extern int lx_setfsuid(uintptr_t); +extern int lx_setfsgid(uintptr_t); + +extern int lx_clock_settime(int, struct timespec *); +extern int lx_clock_gettime(int, struct timespec *); +extern int lx_clock_getres(int, struct timespec *); +extern int lx_clock_nanosleep(int, int flags, struct timespec *, + struct timespec *); + +extern int lx_truncate(uintptr_t, uintptr_t); +extern int lx_ftruncate(uintptr_t, uintptr_t); +extern int lx_truncate64(uintptr_t, uintptr_t, uintptr_t); +extern int lx_ftruncate64(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sysctl(uintptr_t); +extern int lx_fsync(uintptr_t); +extern int lx_fdatasync(uintptr_t); +extern int lx_pipe(uintptr_t); +extern int lx_link(uintptr_t, uintptr_t); +extern int lx_unlink(uintptr_t); +extern int lx_rmdir(uintptr_t); +extern int lx_chown16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_fchown16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_lchown16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_chown(uintptr_t, uintptr_t, uintptr_t); +extern int lx_fchown(uintptr_t, uintptr_t, uintptr_t); +extern int lx_chmod(uintptr_t, uintptr_t); +extern int lx_rename(uintptr_t, uintptr_t); +extern int lx_utime(uintptr_t, uintptr_t); +extern int lx_llseek(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_lseek(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sysfs(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_getcwd(uintptr_t, uintptr_t); +extern int lx_uname(uintptr_t); +extern int lx_reboot(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_getgroups16(uintptr_t, uintptr_t); +extern int lx_setgroups16(uintptr_t, uintptr_t); +extern int lx_personality(uintptr_t); + +extern int lx_query_module(uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); + +extern int lx_time(uintptr_t); +extern int lx_times(uintptr_t); +extern int lx_setitimer(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_clone(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_exit(uintptr_t); +extern int lx_group_exit(uintptr_t); + +extern int lx_mlock(uintptr_t, uintptr_t); +extern int lx_mlockall(uintptr_t); +extern int lx_munlock(uintptr_t, uintptr_t); +extern int lx_munlockall(void); +extern int lx_msync(uintptr_t, uintptr_t, uintptr_t); +extern int lx_madvise(uintptr_t, uintptr_t, uintptr_t); +extern int lx_mprotect(uintptr_t, uintptr_t, uintptr_t); +extern int lx_mmap(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); +extern int lx_mmap2(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); + +extern int lx_mount(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_umount(uintptr_t); +extern int lx_umount2(uintptr_t, uintptr_t); + +extern int lx_statfs(uintptr_t, uintptr_t); +extern int lx_fstatfs(uintptr_t, uintptr_t); +extern int lx_statfs64(uintptr_t, uintptr_t, uintptr_t); +extern int lx_fstatfs64(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sigreturn(void); +extern int lx_rt_sigreturn(void); +extern int lx_signal(uintptr_t, uintptr_t); +extern int lx_sigaction(uintptr_t, uintptr_t, uintptr_t); +extern int lx_rt_sigaction(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_sigaltstack(uintptr_t, uintptr_t); +extern int lx_sigpending(uintptr_t); +extern int lx_rt_sigpending(uintptr_t, uintptr_t); +extern int lx_sigprocmask(uintptr_t, uintptr_t, uintptr_t); +extern int lx_rt_sigprocmask(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_sigsuspend(uintptr_t); +extern int lx_rt_sigsuspend(uintptr_t, uintptr_t); +extern int lx_sigwaitinfo(uintptr_t, uintptr_t); +extern int lx_rt_sigwaitinfo(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sigtimedwait(uintptr_t, uintptr_t, uintptr_t); +extern int lx_rt_sigtimedwait(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sync(void); + +extern int lx_futex(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); + +extern int lx_tkill(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); +extern int lx_tgkill(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sethostname(uintptr_t, uintptr_t); +extern int lx_setdomainname(uintptr_t, uintptr_t); + +extern int lx_sendfile(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_sendfile64(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_fork(void); +extern int lx_vfork(void); +extern int lx_exec(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_getpriority(uintptr_t, uintptr_t); +extern int lx_setpriority(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_ptrace(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sched_getaffinity(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sched_setaffinity(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sched_getparam(uintptr_t, uintptr_t); +extern int lx_sched_setparam(uintptr_t, uintptr_t); +extern int lx_sched_rr_get_interval(uintptr_t pid, uintptr_t); +extern int lx_sched_getscheduler(uintptr_t); +extern int lx_sched_setscheduler(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sched_get_priority_min(uintptr_t); +extern int lx_sched_get_priority_max(uintptr_t); + +extern int lx_keyctl(void); + +extern int lx_ipc(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +#endif /* !defined(_ASM) */ + +#define EBP_HAS_ARG6 0x01 + +/* + * Linux syscall numbers + */ +#define LX_SYS_exit 1 +#define LX_SYS_fork 2 +#define LX_SYS_read 3 +#define LX_SYS_write 4 +#define LX_SYS_open 5 +#define LX_SYS_close 6 +#define LX_SYS_waitpid 7 +#define LX_SYS_creat 8 +#define LX_SYS_link 9 +#define LX_SYS_unlink 10 +#define LX_SYS_execve 11 +#define LX_SYS_chdir 12 +#define LX_SYS_time 13 +#define LX_SYS_mknod 14 +#define LX_SYS_chmod 15 +#define LX_SYS_lchown 16 +#define LX_SYS_break 17 +#define LX_SYS_oldstat 18 +#define LX_SYS_lseek 19 +#define LX_SYS_getpid 20 +#define LX_SYS_mount 21 +#define LX_SYS_umount 22 +#define LX_SYS_setuid 23 +#define LX_SYS_getuid 24 +#define LX_SYS_stime 25 +#define LX_SYS_ptrace 26 +#define LX_SYS_alarm 27 +#define LX_SYS_oldfstat 28 +#define LX_SYS_pause 29 +#define LX_SYS_utime 30 +#define LX_SYS_stty 31 +#define LX_SYS_gtty 32 +#define LX_SYS_access 33 +#define LX_SYS_nice 34 +#define LX_SYS_ftime 35 +#define LX_SYS_sync 36 +#define LX_SYS_kill 37 +#define LX_SYS_rename 38 +#define LX_SYS_mkdir 39 +#define LX_SYS_rmdir 40 +#define LX_SYS_dup 41 +#define LX_SYS_pipe 42 +#define LX_SYS_times 43 +#define LX_SYS_prof 44 +#define LX_SYS_brk 45 +#define LX_SYS_setgid 46 +#define LX_SYS_getgid 47 +#define LX_SYS_signal 48 +#define LX_SYS_geteuid 49 +#define LX_SYS_getegid 50 +#define LX_SYS_acct 51 +#define LX_SYS_umount2 52 +#define LX_SYS_lock 53 +#define LX_SYS_ioctl 54 +#define LX_SYS_fcntl 55 +#define LX_SYS_mpx 56 +#define LX_SYS_setpgid 57 +#define LX_SYS_ulimit 58 +#define LX_SYS_oldolduname 59 +#define LX_SYS_umask 60 +#define LX_SYS_chroot 61 +#define LX_SYS_ustat 62 +#define LX_SYS_dup2 63 +#define LX_SYS_getppid 64 +#define LX_SYS_getpgrp 65 +#define LX_SYS_setsid 66 +#define LX_SYS_sigaction 67 +#define LX_SYS_sgetmask 68 +#define LX_SYS_ssetmask 69 +#define LX_SYS_setreuid 70 +#define LX_SYS_setregid 71 +#define LX_SYS_sigsuspend 72 +#define LX_SYS_sigpending 73 +#define LX_SYS_sethostname 74 +#define LX_SYS_setrlimit 75 +#define LX_SYS_getrlimit 76 +#define LX_SYS_getrusage 77 +#define LX_SYS_gettimeofday 78 +#define LX_SYS_settimeofday 79 +#define LX_SYS_getgroups 80 +#define LX_SYS_setgroups 81 +#define LX_SYS_select 82 +#define LX_SYS_symlink 83 +#define LX_SYS_oldlstat 84 +#define LX_SYS_readlink 85 +#define LX_SYS_uselib 86 +#define LX_SYS_swapon 87 +#define LX_SYS_reboot 88 +#define LX_SYS_readdir 89 +#define LX_SYS_mmap 90 +#define LX_SYS_munmap 91 +#define LX_SYS_truncate 92 +#define LX_SYS_ftruncate 93 +#define LX_SYS_fchmod 94 +#define LX_SYS_fchown 95 +#define LX_SYS_getpriority 96 +#define LX_SYS_setpriority 97 +#define LX_SYS_profil 98 +#define LX_SYS_statfs 99 +#define LX_SYS_fstatfs 100 +#define LX_SYS_ioperm 101 +#define LX_SYS_socketcall 102 +#define LX_SYS_syslog 103 +#define LX_SYS_setitimer 104 +#define LX_SYS_getitimer 105 +#define LX_SYS_stat 106 +#define LX_SYS_lstat 107 +#define LX_SYS_fstat 108 +#define LX_SYS_olduname 109 +#define LX_SYS_iopl 110 +#define LX_SYS_vhangup 111 +#define LX_SYS_idle 112 +#define LX_SYS_vm86old 113 +#define LX_SYS_wait4 114 +#define LX_SYS_swapoff 115 +#define LX_SYS_sysinfo 116 +#define LX_SYS_ipc 117 +#define LX_SYS_fsync 118 +#define LX_SYS_sigreturn 119 +#define LX_SYS_clone 120 +#define LX_SYS_setdomainname 121 +#define LX_SYS_uname 122 +#define LX_SYS_modify_ldt 123 +#define LX_SYS_adjtimex 124 +#define LX_SYS_mprotect 125 +#define LX_SYS_sigprocmask 126 +#define LX_SYS_create_module 127 +#define LX_SYS_init_module 128 +#define LX_SYS_delete_module 129 +#define LX_SYS_get_kernel_syms 130 +#define LX_SYS_quotactl 131 +#define LX_SYS_getpgid 132 +#define LX_SYS_fchdir 133 +#define LX_SYS_sysfs 135 +#define LX_SYS_setfsuid 138 +#define LX_SYS_setfsgid 139 +#define LX_SYS_llseek 140 +#define LX_SYS_getdents 141 +#define LX_SYS_newselect 142 +#define LX_SYS_flock 143 +#define LX_SYS_msync 144 +#define LX_SYS_readv 145 +#define LX_SYS_writev 146 +#define LX_SYS_getsid 147 +#define LX_SYS_fdatasync 148 +#define LX_SYS_sysctl 149 +#define LX_SYS_mlock 150 +#define LX_SYS_munlock 151 +#define LX_SYS_mlockall 152 +#define LX_SYS_munlockall 153 +#define LX_SYS_sched_setparam 154 +#define LX_SYS_sched_getparam 155 +#define LX_SYS_sched_setscheduler 156 +#define LX_SYS_sched_getscheduler 157 +#define LX_SYS_sched_yield 158 +#define LX_SYS_sched_get_priority_max 159 +#define LX_SYS_sched_get_priority_min 160 +#define LX_SYS_sched_rr_get_interval 161 +#define LX_SYS_nanosleep 162 +#define LX_SYS_mremap 163 +#define LX_SYS_setresuid 164 +#define LX_SYS_getresuid 165 +#define LX_SYS_poll 168 +#define LX_SYS_setresgid 170 +#define LX_SYS_getresgid 171 +#define LX_SYS_prctl 172 +#define LX_SYS_rt_sigreturn 173 +#define LX_SYS_rt_sigaction 174 +#define LX_SYS_rt_sigprocmask 175 +#define LX_SYS_rt_sigpending 176 +#define LX_SYS_rt_sigtimedwait 177 +#define LX_SYS_rt_sigqueueinfo 178 +#define LX_SYS_rt_sigsuspend 179 +#define LX_SYS_pread 180 +#define LX_SYS_pwrite 181 +#define LX_SYS_chown 182 +#define LX_SYS_getcwd 183 +#define LX_SYS_capget 184 +#define LX_SYS_capset 185 +#define LX_SYS_sigaltstack 186 +#define LX_SYS_sendfile 187 +#define LX_SYS_getpmsg 188 +#define LX_SYS_putpmsg 189 +#define LX_SYS_vfork 190 +#define LX_SYS_ugetrlimit 191 +#define LX_SYS_mmap2 192 +#define LX_SYS_truncate64 193 +#define LX_SYS_ftruncate64 194 +#define LX_SYS_stat64 195 +#define LX_SYS_lstat64 196 +#define LX_SYS_fstat64 197 +#define LX_SYS_lchown32 198 +#define LX_SYS_getuid32 199 +#define LX_SYS_getgid32 200 +#define LX_SYS_geteuid32 201 +#define LX_SYS_getegid32 202 +#define LX_SYS_setreuid32 203 +#define LX_SYS_setregid32 204 +#define LX_SYS_getgroups32 205 +#define LX_SYS_setgroups32 206 +#define LX_SYS_fchown32 207 +#define LX_SYS_setresuid32 208 +#define LX_SYS_getresuid32 209 +#define LX_SYS_setresgid32 210 +#define LX_SYS_getresgid32 211 +#define LX_SYS_chown32 212 +#define LX_SYS_setuid32 213 +#define LX_SYS_setgid32 214 +#define LX_SYS_setfsuid32 215 +#define LX_SYS_setfsgid32 216 +#define LX_SYS_mincore 218 +#define LX_SYS_madvise 219 +#define LX_SYS_getdents64 220 +#define LX_SYS_fcntl64 221 +#define LX_SYS_gettid 224 +#define LX_SYS_readahead 225 +#define LX_SYS_setxattr 226 +#define LX_SYS_lsetxattr 227 +#define LX_SYS_fsetxattr 228 +#define LX_SYS_getxattr 229 +#define LX_SYS_lgetxattr 230 +#define LX_SYS_fgetxattr 231 +#define LX_SYS_listxattr 232 +#define LX_SYS_llistxattr 233 +#define LX_SYS_flistxattr 234 +#define LX_SYS_removexattr 235 +#define LX_SYS_lremovexattr 236 +#define LX_SYS_fremovexattr 237 +#define LX_SYS_tkill 238 +#define LX_SYS_sendfile64 239 +#define LX_SYS_futex 240 +#define LX_SYS_sched_setaffinity 241 +#define LX_SYS_sched_getaffinity 242 +#define LX_SYS_set_thread_area 243 +#define LX_SYS_get_thread_area 244 +#define LX_SYS_fadvise64 250 +#define LX_SYS_exit_group 252 +#define LX_SYS_remap_file_pages 257 +#define LX_SYS_set_tid_address 258 +#define LX_SYS_timer_create 259 +#define LX_SYS_timer_settime 260 +#define LX_SYS_timer_gettime 261 +#define LX_SYS_timer_getoverrun 262 +#define LX_SYS_timer_delete 263 +#define LX_SYS_clock_settime 264 +#define LX_SYS_clock_gettime 265 +#define LX_SYS_clock_getres 266 +#define LX_SYS_clock_nanosleep 267 +#define LX_SYS_tgkill 270 +/* the following syscalls are for 2.6 and later kernels */ +#define LX_SYS_utimes 271 +#define LX_SYS_fadvise64_64 272 +#define LX_SYS_vserver 273 +#define LX_SYS_mbind 274 +#define LX_SYS_get_mempolicyd 275 +#define LX_SYS_set_mempolicy 276 +#define LX_SYS_mq_open 277 +#define LX_SYS_mq_unlink 278 +#define LX_SYS_mq_timedsend 279 +#define LX_SYS_mq_timedreceive 280 +#define LX_SYS_mq_notify 281 +#define LX_SYS_mq_getsetattr 282 +#define LX_SYS_kexec_load 283 +#define LX_SYS_waitid 284 +#define LX_SYS_setaltroot 285 +#define LX_SYS_add_key 286 +#define LX_SYS_request_key 287 +#define LX_SYS_keyctl 288 +#define LX_SYS_ioprio_set 289 +#define LX_SYS_ioprio_get 290 +#define LX_SYS_inotify_init 291 +#define LX_SYS_inotify_add_watch 292 +#define LX_SYS_inotify_rm_watch 293 +#define LX_SYS_migrate_pages 294 +#define LX_SYS_openat 295 +#define LX_SYS_mkdirat 296 +#define LX_SYS_mknodat 297 +#define LX_SYS_fchownat 298 +#define LX_SYS_futimesat 299 +#define LX_SYS_fstatat64 300 +#define LX_SYS_unlinkat 301 +#define LX_SYS_renameat 302 +#define LX_SYS_linkat 303 +#define LX_SYS_symlinkat 304 +#define LX_SYS_readlinkat 305 +#define LX_SYS_fchmodat 306 +#define LX_SYS_faccessat 307 +#define LX_SYS_pselect6 308 +#define LX_SYS_ppoll 309 +#define LX_SYS_unshare 310 +#define LX_SYS_set_robust_list 311 +#define LX_SYS_get_robust_list 312 +#define LX_SYS_splice 313 +#define LX_SYS_sync_file_range 314 +#define LX_SYS_tee 315 +#define LX_SYS_vmsplice 316 +#define LX_SYS_move_pages 317 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_SYSCALL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h new file mode 100644 index 0000000000..08c824d5b4 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h @@ -0,0 +1,211 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_SYSV_IPC_H +#define _LX_SYSV_IPC_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * msg-related definitions. + */ +#define LX_IPC_CREAT 00001000 +#define LX_IPC_EXCL 00002000 +#define LX_IPC_NOWAIT 00004000 + +#define LX_IPC_RMID 0 +#define LX_IPC_SET 1 +#define LX_IPC_STAT 2 +#define LX_IPC_INFO 3 + +#define LX_IPC_64 0x0100 + +#define LX_SEMOP 1 +#define LX_SEMGET 2 +#define LX_SEMCTL 3 +#define LX_MSGSND 11 +#define LX_MSGRCV 12 +#define LX_MSGGET 13 +#define LX_MSGCTL 14 +#define LX_SHMAT 21 +#define LX_SHMDT 22 +#define LX_SHMGET 23 +#define LX_SHMCTL 24 + +#define LX_MSG_STAT 11 +#define LX_MSG_INFO 12 + +#define LX_MSG_NOERROR 010000 + +/* + * Linux hard codes the maximum msgbuf length to be 8192 bytes. Really. + */ +#define LX_MSGMAX 8192 + +struct lx_ipc_perm { + key_t key; + uid_t uid; + uid_t gid; + uid_t cuid; + uid_t cgid; + ushort_t mode; + ushort_t _pad1; + ushort_t seq; + ushort_t _pad2; + ulong_t _unused1; + ulong_t _unused2; +}; + +struct lx_msqid_ds { + struct lx_ipc_perm msg_perm; + time_t msg_stime; + ulong_t _unused1; + time_t msg_rtime; + ulong_t _unused2; + time_t msg_ctime; + ulong_t _unused3; + ulong_t msg_cbytes; + ulong_t msg_qnum; + ulong_t msg_qbytes; + pid_t msg_lspid; + pid_t msg_lrpid; + ulong_t _unused4; + ulong_t _unused5; +}; + +struct lx_msginfo { + int msgpool; + int msgmap; + int msgmax; + int msgmnb; + int msgmni; + int msgssz; + int msgtql; + ushort_t msgseg; +}; + +/* + * semaphore-related definitions. + */ +#define LX_GETPID 11 +#define LX_GETVAL 12 +#define LX_GETALL 13 +#define LX_GETNCNT 14 +#define LX_GETZCNT 15 +#define LX_SETVAL 16 +#define LX_SETALL 17 +#define LX_SEM_STAT 18 +#define LX_SEM_INFO 19 +#define LX_SEM_UNDO 0x1000 +#define LX_SEMVMX 32767 + +struct lx_semid_ds { + struct lx_ipc_perm sem_perm; + time_t sem_otime; + ulong_t _unused1; + time_t sem_ctime; + ulong_t _unused2; + ulong_t sem_nsems; + ulong_t _unused3; + ulong_t _unused4; +}; + +struct lx_seminfo { + int semmap; + int semmni; + int semmns; + int semmnu; + int semmsl; + int semopm; + int semume; + int semusz; + int semvmx; + int semaem; +}; + +union lx_semun { + int val; + struct lx_semid_ds *semds; + ushort_t *sems; + struct lx_seminfo *info; + uintptr_t dummy; +}; + +/* + * shm-related definitions + */ +#define LX_SHM_LOCKED 02000 +#define LX_SHM_RDONLY 010000 +#define LX_SHM_RND 020000 +#define LX_SHM_REMAP 040000 + +#define LX_SHM_LOCK 11 +#define LX_SHM_UNLOCK 12 +#define LX_SHM_STAT 13 +#define LX_SHM_INFO 14 + +struct lx_shmid_ds { + struct lx_ipc_perm shm_perm; + size_t shm_segsz; + time_t shm_atime; + ulong_t _unused1; + time_t shm_dtime; + ulong_t _unused2; + time_t shm_ctime; + ulong_t _unused3; + pid_t shm_cpid; + pid_t shm_lpid; + ushort_t shm_nattch; + ulong_t _unused4; + ulong_t _unused5; +}; + +struct lx_shm_info { + int used_ids; + ulong_t shm_tot; + ulong_t shm_rss; + ulong_t shm_swp; + ulong_t swap_attempts; + ulong_t swap_successes; +}; + +struct lx_shminfo { + int shmmax; + int shmmin; + int shmmni; + int shmseg; + int shmall; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_SYSV_IPC_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h new file mode 100644 index 0000000000..9c1952bfe7 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_THREAD_H +#define _SYS_LX_THREAD_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <thread.h> + +typedef struct lx_tsd { + uintptr_t lxtsd_gs; + int lxtsd_exit; + int lxtsd_exit_status; + ucontext_t lxtsd_exit_context; +} lx_tsd_t; + +extern thread_key_t lx_tsd_key; /* thread-specific Linux %gs value */ + +extern void lx_swap_gs(long, long *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_THREAD_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h new file mode 100644 index 0000000000..a56fe8eeb3 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_THUNK_SERVER_H +#define _LX_THUNK_SERVER_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <netdb.h> +#include <procfs.h> + +/* + * Binary that should be exec'd to start up the thunking server + */ +#define LXT_SERVER_BINARY "/native/usr/lib/brand/lx/lx_thunk" + +/* + * When the thunking server is started it will need to communicate + * to the client via two fifos. These fifos will be passed to the + * thunking server via the following file descriptors: + */ +#define LXT_SERVER_FIFO_RD_FD 3 +#define LXT_SERVER_FIFO_WR_FD 4 + +/* + * Operations supported by the thunking server + */ +#define LXT_SERVER_OP_MIN 0 +#define LXT_SERVER_OP_PING 0 +#define LXT_SERVER_OP_NAME2HOST 1 +#define LXT_SERVER_OP_ADDR2HOST 2 +#define LXT_SERVER_OP_NAME2SERV 3 +#define LXT_SERVER_OP_PORT2SERV 4 +#define LXT_SERVER_OP_OPENLOG 5 +#define LXT_SERVER_OP_SYSLOG 6 +#define LXT_SERVER_OP_CLOSELOG 7 +#define LXT_SERVER_OP_MAX 8 + +/* + * Macros used to translate pointer into offsets for when they are + * being transmitted between the client and server processes. + * + * NOTE: We're going to add 1 to every offset value. The reason + * for this is that some of the pointers we're converting to offsets are + * stored in NULL terminated arrays, and if one of the members of + * one of these arrays happened to be at the beginning of the storage + * buffer it would have an offset of 0 and when the client tries to + * translate the offsets back into pointers it wouldn't be able + * to differentiate between the 0 offset from the end of the array. + */ +#define LXT_PTR_TO_OFFSET(ptr, base) \ + ((void *)((uintptr_t)(ptr) - (uintptr_t)(base) + 1)) +#define LXT_OFFSET_TO_PTR(offset, base) \ + ((void *)((uintptr_t)(offset) + (uintptr_t)(base) - 1)) + +/* + * Structures passed to the thunking server via door calls + */ +typedef struct lxt_server_arg { + int lxt_sa_op; + int lxt_sa_success; + int lxt_sa_errno; + char lxt_sa_data[1]; +} lxt_server_arg_t; + +typedef struct lxt_gethost_arg { + struct hostent lxt_gh_result; + + int lxt_gh_h_errno; + + int lxt_gh_type; + int lxt_gh_token_len; + int lxt_gh_buf_len; + + int lxt_gh_storage_len; + char lxt_gh_storage[1]; +} lxt_gethost_arg_t; + +typedef struct lxt_getserv_arg { + struct servent lxt_gs_result; + + int lxt_gs_token_len; + int lxt_gs_buf_len; + char lxt_gs_proto[5]; + + int lxt_gs_storage_len; + char lxt_gs_storage[1]; +} lxt_getserv_arg_t; + +typedef struct lxt_openlog_arg { + int lxt_ol_logopt; + int lxt_ol_facility; + char lxt_ol_ident[128]; +} lxt_openlog_arg_t; + +typedef struct lxt_syslog_arg { + int lxt_sl_priority; + pid_t lxt_sl_pid; + char lxt_sl_progname[PRFNSZ]; + char lxt_sl_message[1024]; +} lxt_syslog_arg_t; + + +/* + * Functions called by the brand library to manage startup of the + * thunk server process. + */ +void lxt_server_init(int, char *[]); +int lxt_server_pid(int *pid); +void lxt_server_exec_check(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_THUNK_SERVER_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h new file mode 100644 index 0000000000..6152634459 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h @@ -0,0 +1,108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_TYPES_H +#define _SYS_LX_TYPES_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SHRT_MIN (-32768) /* min value of a "short int" */ +#define SHRT_MAX 32767 /* max value of a "short int" */ +#define USHRT_MAX 65535 /* max of "unsigned short int" */ +#define INT_MIN (-2147483647-1) /* min value of an "int" */ +#define INT_MAX 2147483647 /* max value of an "int" */ +#define UINT_MAX 4294967295U /* max value of an "unsigned int" */ +#define LONG_MIN (-2147483647L-1L) + /* min value of a "long int" */ +#define LONG_MAX 2147483647L /* max value of a "long int" */ +#define ULONG_MAX 4294967295UL /* max of "unsigned long int" */ + +#define LX_SYS_UTS_LN 65 + +struct lx_utsname { + char sysname[LX_SYS_UTS_LN]; + char nodename[LX_SYS_UTS_LN]; + char release[LX_SYS_UTS_LN]; + char version[LX_SYS_UTS_LN]; + char machine[LX_SYS_UTS_LN]; + char domainname[LX_SYS_UTS_LN]; +}; + +typedef uint64_t lx_dev_t; +typedef uint16_t lx_dev16_t; +typedef uint32_t lx_ino_t; +typedef uint64_t lx_ino64_t; +typedef uint32_t lx_uid_t; +typedef uint16_t lx_uid16_t; +typedef uint32_t lx_gid_t; +typedef uint16_t lx_gid16_t; +typedef uint32_t lx_off_t; +typedef uint64_t lx_off64_t; +typedef uint32_t lx_blksize_t; +typedef uint32_t lx_blkcnt_t; +typedef uint64_t lx_blkcnt64_t; +typedef ulong_t lx_mode_t; +typedef uint16_t lx_mode16_t; + +#define LX_UID16_TO_UID32(uid16) \ + (((uid16) == (lx_uid16_t)-1) ? ((lx_uid_t)-1) : (lx_uid_t)(uid16)) + +#define LX_GID16_TO_GID32(gid16) \ + (((gid16) == (lx_gid16_t)-1) ? ((lx_gid_t)-1) : (lx_gid_t)(gid16)) + +/* Overflow values default to NFS nobody. */ + +#define UID16_OVERFLOW ((lx_uid16_t)65534) +#define GID16_OVERFLOW ((lx_gid16_t)65534) + +/* + * All IDs with high word non-zero are converted to default overflow values to + * avoid inadvertent truncation to zero (root) (!). + */ +#define LX_UID32_TO_UID16(uid32) \ + ((((uid32) & 0xffff0000) == 0) ? ((lx_uid16_t)(uid32)) : \ + (((uid32) == ((lx_uid_t)-1)) ? ((lx_uid16_t)-1) : UID16_OVERFLOW)) + +#define LX_GID32_TO_GID16(gid32) \ + ((((gid32) & 0xffff0000) == 0) ? ((lx_gid16_t)(gid32)) : \ + (((gid32) == ((lx_gid_t)-1)) ? ((lx_gid16_t)-1) : GID16_OVERFLOW)) + +struct lx_timespec { + time_t ts_sec; + long ts_nsec; +}; + +#define LX_32TO64(lo, hi) \ + ((uint64_t)((uint64_t)(lo) | ((uint64_t)(hi) << 32))) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_TYPES_H */ diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile new file mode 100644 index 0000000000..f69dcec561 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile @@ -0,0 +1,52 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +LINT_SUBDIRS = $(MACH) +$(BUILD64)LINT_SUBDIRS += $(MACH64) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all install clean clobber: $(SUBDIRS) + +lint: $(LINT_SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com new file mode 100644 index 0000000000..a0fd9da3fe --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com @@ -0,0 +1,68 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = lx_nametoaddr.a +VERS = .1 + +COBJS = lx_nametoaddr.o +OBJECTS = $(COBJS) + +include ../../../../Makefile.lib +include ../../Makefile.lx + +MAPFILES = ../common/mapfile-vers +MAPOPTS = $(MAPFILES:%=-M%) + +CSRCS = $(COBJS:%o=../common/%c) +SRCS = $(CSRCS) + +SRCDIR = ../common +LX_THUNK = ../../lx_thunk + +ASFLAGS += -P -D_ASM +LDLIBS += -lc -lnsl +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I../ -I$(LX_THUNK) +DYNFLAGS += $(MAPOPTS) '-R$$ORIGIN' + +LIBS = $(DYNLIB) + +LINTFLAGS += $(LX_THUNK)/$(MACH)/llib-llx_thunk.ln +LINTFLAGS64 += $(LX_THUNK)/$(MACH64)/llib-llx_thunk.ln + +CLEANFILES = $(DYNLIB) +ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx +ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64) + +.KEEP_STATE: + +all: $(DYNLIB) + +lint: lintcheck + +include ../../../../Makefile.targ diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile new file mode 100644 index 0000000000..a526d34834 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +DYNFLAGS += $(LX_THUNK)/$(MACH64)/lx_thunk.so.1 +CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB) + +install: all $(ROOTLIBS64) diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c b/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c new file mode 100644 index 0000000000..4b6a0532a9 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c @@ -0,0 +1,479 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * BrandZ lx name services translation library. + * + * This library is specified as the default name services translation + * library in a custom netconfig(4) file that is only used when running + * native solaris processes in a Linux branded zone. + * + * What this means it that when a native solaris process runs in a + * Linux branded zone and issues a name service request to libnsl.so + * (either directly or indirectly via any libraries the program may + * be linked against) libnsl.so will dlopen(3c) this library and call + * into it to service these requests. + * + * This library is in turn linked against lx_thunk.so and will attempt + * to call interfaces in lx_thunk.so to resolve these requests. The + * functions that are called in lx_thunk.so are designed to have the + * same signature and behavior as the existing solaris name service + * interfaces. The name services interfaces we call are: + * + * Native Interface -> lx_thunk.so Interface + * ---------------- -> --------------------- + * gethostbyname_r -> lxt_gethostbyname_r + * gethostbyaddr_r -> lxt_gethostbyaddr_r + * getservbyname_r -> lxt_getservbyname_r + * getservbyport_r -> lxt_getservbyport_r + * + * This library also uses one additional interface from lx_thunk.so: + * lxt_debug + * Information debugging messages are sent to lx_thunk.so via this + * interface and that library can decided if it wants to drop the + * messages or output them somewhere. + */ + +#include <assert.h> +#include <dlfcn.h> +#include <errno.h> +#include <fcntl.h> +#include <netdb.h> +#include <netdir.h> +#include <nss_dbdefs.h> +#include <rpc/clnt.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/varargs.h> +#include <sys/wait.h> +#include <thread.h> +#include <tiuser.h> +#include <unistd.h> +#include <sys/lx_thunk.h> + + +/* + * Private nametoaddr library interfaces. + */ +static int +netconfig_is_ipv4(struct netconfig *config) +{ + int i; + /* + * If we look at the rpc services registered on a Linux system + * (this can be done via rpcinfo(1M)) for both on the loopback + * interface and on any remote interfaces we only see services + * registered for tcp and udp. So here we'll limit our support + * to these transports. + */ + char *ipv4_netids[] = { + "tcp", + "udp", + NULL + }; + + for (i = 0; ipv4_netids[i] != NULL; i++) { + if (strcmp(ipv4_netids[i], config->nc_netid) == 0) + return (1); + } + return (0); +} + +/* + * Public nametoaddr library interfaces. + * + * These are the functional entry points that libnsl will lookup (via + * the symbol names) when it loads this nametoaddr translation library. + */ + +/* + * _netdir_getbyname() returns all of the addresses for + * a specified host and service. + */ +struct nd_addrlist * +_netdir_getbyname(struct netconfig *netconfigp, + struct nd_hostserv *nd_hostservp) +{ + struct nd_addrlist *rp = NULL; + struct netbuf *nbp = NULL; + struct sockaddr_in *sap = NULL; + struct hostent n2h_result; + struct servent n2s_result; + char *n2h_buf = NULL, *n2s_buf = NULL; + int h_errno, i, host_self = 0, r_count; + int n2h_count = 0, n2s_count = 0; + + lxt_debug("_netdir_getbyname: request recieved\n"); + + /* Make sure this is an ipv4 request. */ + if (!netconfig_is_ipv4(netconfigp)) { + _nderror = ND_BADARG; + goto fail; + } + + /* Allocate memory for the queries. */ + if (((n2h_buf = malloc(NSS_BUFLEN_HOSTS)) == NULL) || + ((n2s_buf = malloc(NSS_BUFLEN_SERVICES)) == NULL)) + goto malloc_fail; + + /* Check if the host name specified is HOST_SELF. */ + if (strcmp(nd_hostservp->h_host, HOST_SELF) == 0) + host_self = 1; + + /* + * If the hostname specified is HOST_SELF, the we're just + * just doing a service lookup so don't bother with trying + * to lookup the host name. + */ + if (!host_self) { + /* Resolve the hostname. */ + lxt_debug("_netdir_getbyname: " + "resolving host name: %s\n", nd_hostservp->h_host); + if (lxt_gethostbyname_r(nd_hostservp->h_host, &n2h_result, + n2h_buf, NSS_BUFLEN_HOSTS, &h_errno) == NULL) { + if (errno == ERANGE) { + _nderror = ND_SYSTEM; + } else if (h_errno == HOST_NOT_FOUND) { + _nderror = ND_NOHOST; + } else if (h_errno == TRY_AGAIN) { + _nderror = ND_TRY_AGAIN; + } else if (h_errno == NO_RECOVERY) { + _nderror = ND_NO_RECOVERY; + } else if (h_errno == NO_DATA) { + _nderror = ND_NO_DATA; + } else { + _nderror = ND_SYSTEM; + } + goto fail; + } + while (n2h_result.h_addr_list[n2h_count++] != NULL); + n2h_count--; + } + + if (nd_hostservp->h_serv != NULL) { + /* Resolve the service name */ + lxt_debug("_netdir_getbyname: " + "resolving service name: %s\n", nd_hostservp->h_serv); + if (lxt_getservbyname_r(nd_hostservp->h_serv, + netconfigp->nc_proto, &n2s_result, + n2s_buf, NSS_BUFLEN_SERVICES) == NULL) { + _nderror = ND_SYSTEM; + goto fail; + } + n2s_count = 1; + } + + /* Make sure we got some results. */ + if ((n2h_count + n2s_count) == 0) { + lxt_debug("_netdir_getbyname: no results!\n"); + goto exit; + } + r_count = (n2h_count != 0) ? n2h_count : 1; + + /* + * Allocate the return buffers. These buffers will be free'd + * by libnsl`netdir_free(), so we need to allocate them in the + * way that libnsl`netdir_free() expects. + */ + if (((rp = calloc(1, sizeof (struct nd_addrlist))) == NULL) || + ((nbp = calloc(1, sizeof (struct netbuf) * r_count)) == NULL) || + ((sap = calloc(1, sizeof (struct sockaddr_in) * r_count)) == NULL)) + goto malloc_fail; + + /* Initialize the structures we're going to return. */ + rp->n_cnt = r_count; + rp->n_addrs = nbp; + for (i = 0; i < r_count; i++) { + + /* Initialize the netbuf. */ + nbp[i].maxlen = nbp[i].len = sizeof (struct sockaddr_in); + nbp[i].buf = (char *)&sap[i]; + + /* Initialize the sockaddr_in. */ + sap[i].sin_family = AF_INET; + + /* If we looked up any host address copy them out. */ + if (!host_self) + bcopy(n2h_result.h_addr_list[i], &sap[i].sin_addr, + sizeof (sap[i].sin_addr)); + + /* If we looked up any service ports copy them out. */ + if (nd_hostservp->h_serv != NULL) + sap[i].sin_port = n2s_result.s_port; + } + + /* We're finally done. */ + lxt_debug("_netdir_getbyname: success\n"); + return (rp); + +malloc_fail: + _nderror = ND_NOMEM; + +fail: + lxt_debug("_netdir_getbyname: failed!\n"); + +exit: + if (n2h_buf == NULL) + free(n2h_buf); + if (n2s_buf == NULL) + free(n2s_buf); + if (rp == NULL) + free(rp); + if (nbp == NULL) + free(nbp); + if (sap == NULL) + free(sap); + return (NULL); +} + +/* + * _netdir_getbyaddr() takes an address (hopefully obtained from + * someone doing a _netdir_getbyname()) and returns all hosts with + * that address. + */ +struct nd_hostservlist * +/*ARGSUSED*/ +_netdir_getbyaddr(struct netconfig *netconfigp, struct netbuf *nbp) +{ + struct nd_hostservlist *rp = NULL; + struct nd_hostserv *hsp = NULL; + struct sockaddr_in *sap; + struct servent p2s_result; + struct hostent a2h_result; + char *a2h_buf = NULL, *p2s_buf = NULL; + int h_errno, i; + int r_count = 0; + int a2h_count = 0, p2s_count = 0; + + lxt_debug("_netdir_getbyaddr: request recieved\n"); + + /* Make sure this is an ipv4 request. */ + if (!netconfig_is_ipv4(netconfigp)) { + _nderror = ND_BADARG; + goto fail; + } + + /* + * Make sure the netbuf contains one struct sockaddr_in of + * type AF_INET. + */ + if ((nbp->len != sizeof (struct sockaddr_in)) || + (nbp->len < nbp->maxlen)) { + _nderror = ND_BADARG; + goto fail; + } + /*LINTED*/ + sap = (struct sockaddr_in *)nbp->buf; + if (sap->sin_family != AF_INET) { + _nderror = ND_BADARG; + goto fail; + } + + /* Allocate memory for the queries. */ + if (((a2h_buf = malloc(NSS_BUFLEN_HOSTS)) == NULL) || + ((p2s_buf = malloc(NSS_BUFLEN_SERVICES)) == NULL)) + goto malloc_fail; + + if (sap->sin_addr.s_addr != INADDR_ANY) { + lxt_debug("_netdir_getbyaddr: " + "resolving host address: 0x%x\n", sap->sin_addr.s_addr); + if (lxt_gethostbyaddr_r((char *)&sap->sin_addr.s_addr, + sizeof (sap->sin_addr.s_addr), AF_INET, + &a2h_result, a2h_buf, NSS_BUFLEN_HOSTS, + &h_errno) == NULL) { + if (errno == ERANGE) { + _nderror = ND_SYSTEM; + } else if (h_errno == HOST_NOT_FOUND) { + _nderror = ND_NOHOST; + } else if (h_errno == TRY_AGAIN) { + _nderror = ND_TRY_AGAIN; + } else if (h_errno == NO_RECOVERY) { + _nderror = ND_NO_RECOVERY; + } else if (h_errno == NO_DATA) { + _nderror = ND_NO_DATA; + } else { + _nderror = ND_SYSTEM; + } + goto fail; + } + while (a2h_result.h_aliases[a2h_count++] != NULL); + /* + * We need to count a2h_result.h_name as a valid name for + * for the address we just looked up. Of course a2h_count + * is actually over estimated by one, so instead of + * decrementing it here we'll just leave it as it to + * account for a2h_result.h_name. + */ + } + + if (sap->sin_port != 0) { + lxt_debug("_netdir_getbyaddr: " + "resolving service port: 0x%x\n", sap->sin_port); + if (lxt_getservbyport_r(sap->sin_port, + netconfigp->nc_proto, &p2s_result, + p2s_buf, NSS_BUFLEN_SERVICES) == NULL) { + _nderror = ND_SYSTEM; + goto fail; + } + p2s_count = 1; + } + + /* Make sure we got some results. */ + if ((a2h_count + p2s_count) == 0) { + lxt_debug("_netdir_getbyaddr: no results!\n"); + goto exit; + } + r_count = (a2h_count != 0) ? a2h_count : 1; + + /* + * Allocate the return buffers. These buffers will be free'd + * by libnsl`netdir_free(), so we need to allocate them in the + * way that libnsl`netdir_free() expects. + */ + if (((rp = calloc(1, sizeof (struct nd_hostservlist))) == NULL) || + ((hsp = calloc(1, sizeof (struct nd_hostserv) * r_count)) == NULL)) + goto malloc_fail; + + lxt_debug("_netdir_getbyaddr: hahaha0 - %d\n", r_count); + rp->h_cnt = r_count; + rp->h_hostservs = hsp; + for (i = 0; i < r_count; i++) { + /* If we looked up any host names copy them out. */ + lxt_debug("_netdir_getbyaddr: hahaha1 - %d\n", r_count); + if ((a2h_count > 0) && (i == 0) && + ((hsp[i].h_host = strdup(a2h_result.h_name)) == NULL)) + goto malloc_fail; + + if ((a2h_count > 0) && (i > 0) && + ((hsp[i].h_host = + strdup(a2h_result.h_aliases[i - 1])) == NULL)) + goto malloc_fail; + + lxt_debug("_netdir_getbyaddr: hahaha2 - %d\n", r_count); + /* If we looked up any service names copy them out. */ + if ((p2s_count > 0) && + ((hsp[i].h_serv = strdup(p2s_result.s_name)) == NULL)) + goto malloc_fail; + lxt_debug("_netdir_getbyaddr: hahaha3 - %d\n", r_count); + } + + /* We're finally done. */ + lxt_debug("_netdir_getbyaddr: success\n"); + return (rp); + +malloc_fail: + _nderror = ND_NOMEM; + +fail: + lxt_debug("_netdir_getbyaddr: failed!\n"); + +exit: + if (a2h_buf == NULL) + free(a2h_buf); + if (p2s_buf == NULL) + free(p2s_buf); + if (rp == NULL) + free(rp); + if (hsp != NULL) { + for (i = 0; i < r_count; i++) { + if (hsp[i].h_host != NULL) + free(hsp[i].h_host); + if (hsp[i].h_serv != NULL) + free(hsp[i].h_serv); + } + free(hsp); + } + return (NULL); +} + +char * +/* ARGSUSED */ +_taddr2uaddr(struct netconfig *netconfigp, struct netbuf *nbp) +{ + extern char *inet_ntoa_r(); + + struct sockaddr_in *sa; + char tmp[RPC_INET6_MAXUADDRSIZE]; + unsigned short myport; + + if (netconfigp == NULL || nbp == NULL || nbp->buf == NULL) { + _nderror = ND_BADARG; + return (NULL); + } + + if (strcmp(netconfigp->nc_protofmly, NC_INET) != 0) { + /* we only support inet address translation */ + assert(0); + _nderror = ND_SYSTEM; + return (NULL); + } + + /* LINTED pointer cast */ + sa = (struct sockaddr_in *)(nbp->buf); + myport = ntohs(sa->sin_port); + (void) inet_ntoa_r(sa->sin_addr, tmp); + + (void) sprintf(tmp + strlen(tmp), ".%d.%d", + myport >> 8, myport & 255); + return (strdup(tmp)); /* Doesn't return static data ! */ +} + +/* + * _uaddr2taddr() translates a universal address back into a + * netaddr structure. Since the universal address is a string, + * put that into the TLI buffer (making sure to change all \ddd + * characters back and strip off the trailing \0 character). + */ +struct netbuf * +/* ARGSUSED */ +_uaddr2taddr(struct netconfig *netconfigp, char *uaddr) +{ + assert(0); + _nderror = ND_SYSTEM; + return (NULL); +} + +/* + * _netdir_options() is a "catch-all" routine that does + * transport specific things. The only thing that these + * routines have to worry about is ND_MERGEADDR. + */ +int +/* ARGSUSED */ +_netdir_options(struct netconfig *netconfigp, int option, int fd, void *par) +{ + assert(0); + _nderror = ND_SYSTEM; + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers b/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers new file mode 100644 index 0000000000..3ed165195b --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers @@ -0,0 +1,51 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +SUNWprivate_1.1 { + global: + _netdir_getbyname; + _netdir_getbyaddr; + _taddr2uaddr; + _uaddr2taddr; + _netdir_options; + + local: + *; +}; diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile new file mode 100644 index 0000000000..67545e46cd --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +DYNFLAGS += $(LX_THUNK)/$(MACH)/lx_thunk.so.1 +CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB) + +install: all $(ROOTLIBS) diff --git a/usr/src/lib/brand/lx/lx_support/Makefile b/usr/src/lib/brand/lx/lx_support/Makefile new file mode 100644 index 0000000000..e7c958e13a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_support/Makefile @@ -0,0 +1,54 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +PROG = lx_support +PROGS = $(PROG) +OBJS = lx_support + +all: $(PROG) + +include ../Makefile.lx +include $(SRC)/cmd/Makefile.cmd + +# override the install directory +ROOTBIN = $(ROOTBRANDDIR) +CLOBBERFILES = $(OBJS) $(ROOTPROGS) + +UTSBASE = $(SRC)/uts + +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I$(UTSBASE)/common/brand/lx +LDLIBS += -lzonecfg + +.KEEP_STATE: + +install: all $(ROOTPROGS) + +clean: + $(RM) $(PROG) $(OBJS) + +lint: lint_PROG + +include $(SRC)/cmd/Makefile.targ diff --git a/usr/src/lib/brand/lx/lx_support/lx_support.c b/usr/src/lib/brand/lx/lx_support/lx_support.c new file mode 100644 index 0000000000..70de13dd71 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_support/lx_support.c @@ -0,0 +1,578 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * lx_support is a small cli utility used to perform some brand-specific + * tasks when booting, halting, or verifying a zone. This utility is not + * intended to be called by users - it is intended to be invoked by the + * zones utilities. + */ + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <stropts.h> +#include <sys/ioccom.h> +#include <sys/stat.h> +#include <sys/systeminfo.h> +#include <sys/types.h> +#include <sys/varargs.h> +#include <unistd.h> +#include <libintl.h> +#include <locale.h> + +#include <libzonecfg.h> +#include <sys/lx_audio.h> +#include <sys/lx_brand.h> + +static void lxs_err(char *msg, ...) __NORETURN; +static void usage(void) __NORETURN; + +#define CP_CMD "/usr/bin/cp" +#define MOUNT_CMD "/sbin/mount" + +#define LXA_AUDIO_DEV "/dev/brand/lx/audio_devctl" +#define INTSTRLEN 32 +#define KVSTRLEN 10 + +static char *bname = NULL; +static char *zonename = NULL; +static char *zoneroot = NULL; + +#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ +#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ +#endif + +static void +lxs_err(char *msg, ...) +{ + char buf[1024]; + va_list ap; + + va_start(ap, msg); + /*LINTED*/ + (void) vsnprintf(buf, sizeof (buf), msg, ap); + va_end(ap); + + (void) printf("%s error: %s\n", bname, buf); + + exit(1); + /*NOTREACHED*/ +} + +/* + * The Linux init(1M) command requires communication over the /dev/initctl + * FIFO. Since any attempt to create a file in /dev will fail, we must + * create it here. + */ +static void +lxs_make_initctl() +{ + char cmdbuf[ARG_MAX]; + char path[MAXPATHLEN]; + char special[MAXPATHLEN]; + struct stat buf; + int err; + + if (snprintf(special, sizeof (special), "%s/dev/initctl", zoneroot) >= + sizeof (special)) + lxs_err("%s: %s", gettext("Failed to create /dev/initctl"), + gettext("zoneroot is too long")); + + if (snprintf(path, sizeof (path), "%s/root/dev/initctl", zoneroot) >= + sizeof (path)) + lxs_err("%s: %s", gettext("Failed to create /dev/initctl"), + gettext("zoneroot is too long")); + + /* create the actual fifo as <zoneroot>/dev/initctl */ + if (stat(special, &buf) != 0) { + err = errno; + if (err != ENOENT) + lxs_err("%s: %s", + gettext("Failed to create /dev/initctl"), + strerror(err)); + if (mkfifo(special, 0644) < 0) { + err = errno; + lxs_err("%s: %s", + gettext("Failed to create /dev/initctl"), + strerror(err)); + } + } else { + if ((buf.st_mode & S_IFIFO) == 0) + lxs_err("%s: %s", + gettext("Failed to create /dev/initctl"), + gettext("It already exists, and is not a FIFO.")); + } + + /* + * now lofs mount the <zoneroot>/dev/initctl fifo onto + * <zoneroot>/root/dev/initctl + */ + if (snprintf(cmdbuf, sizeof (cmdbuf), "%s -F lofs %s %s", MOUNT_CMD, + special, path) >= sizeof (cmdbuf)) + lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"), + gettext("zoneroot is too long")); + + if (system(cmdbuf) < 0) { + err = errno; + lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"), + strerror(err)); + } +} + +/* + * fsck gets really confused when run inside a zone. Removing this file + * prevents it from running + */ +static void +lxs_remove_autofsck() +{ + char path[MAXPATHLEN]; + int err; + + if (snprintf(path, MAXPATHLEN, "%s/root/.autofsck", zoneroot) >= + MAXPATHLEN) + lxs_err("%s: %s", gettext("Failed to remove /.autofsck"), + gettext("zoneroot is too long")); + + if (unlink(path) < 0) { + err = errno; + if (err != ENOENT) + lxs_err("%s: %s", + gettext("Failed to remove /.autofsck"), + strerror(err)); + } +} + +/* + * Extract any lx-supported attributes from the zone configuration file. + */ +static void +lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio, + char **idev, char **odev, char **kvers) +{ + struct zone_attrtab attrtab; + int err; + + /* initialize the attribute iterator */ + if (zonecfg_setattrent(zdh) != Z_OK) { + zonecfg_fini_handle(zdh); + lxs_err(gettext("error accessing zone configuration")); + } + + *idev = (char *)malloc(INTSTRLEN); + *odev = (char *)malloc(INTSTRLEN); + *kvers = (char *)malloc(KVSTRLEN); + if (*idev == NULL || *odev == NULL || *kvers == NULL) + lxs_err(gettext("out of memory")); + + *audio = B_FALSE; + *restart = B_FALSE; + bzero(*idev, INTSTRLEN); + bzero(*odev, INTSTRLEN); + bzero(*kvers, KVSTRLEN); + while ((err = zonecfg_getattrent(zdh, &attrtab)) == Z_OK) { + if ((strcmp(attrtab.zone_attr_name, "init-restart") == 0) && + (zonecfg_get_attr_boolean(&attrtab, restart) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); + if ((strcmp(attrtab.zone_attr_name, "audio") == 0) && + (zonecfg_get_attr_boolean(&attrtab, audio) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); + if ((strcmp(attrtab.zone_attr_name, "audio-inputdev") == 0) && + (zonecfg_get_attr_string(&attrtab, *idev, + INTSTRLEN) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); + if ((strcmp(attrtab.zone_attr_name, "audio-outputdev") == 0) && + (zonecfg_get_attr_string(&attrtab, *odev, + INTSTRLEN) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); + if ((strcmp(attrtab.zone_attr_name, "kernel-version") == 0) && + (zonecfg_get_attr_string(&attrtab, *kvers, + KVSTRLEN) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); + } + + if (strlen(*kvers) == 0) { + free(*kvers); + *kvers = NULL; + } + + /* some kind of error while looking up attributes */ + if (err != Z_NO_ENTRY) + lxs_err(gettext("error accessing zone configuration")); +} + +static int +lxs_iodev_ok(char *dev) +{ + int i, j; + + if ((j = strlen(dev)) == 0) + return (1); + if (strcmp(dev, "default") == 0) + return (1); + if (strcmp(dev, "none") == 0) + return (1); + for (i = 0; i < j; i++) { + if (!isdigit(dev[i])) + return (0); + } + return (1); +} + +/* + * The audio configuration settings are read from the zone configuration + * file. Audio configuration is specified via the following attributes + * (settable via zonecfg): + * attr name: audio + * attr type: boolean + * + * attr name: audio-inputdev + * attr type: string + * attr values: "none" | [0-9]+ + * + * attr name: audio-outputdev + * attr type: string + * attr values: "none" | [0-9]+ + * + * The user can enable linux brand audio device (ie /dev/dsp and /dev/mixer) + * for a zone by setting the "audio" attribute to true. (The absence of + * this attribute leads to an assumed value of false.) + * + * If the "audio" attribute is set to true and "audio-inputdev" and + * "audio-outputdev" are not set, then when a linux applications access + * audio devices these access will be mapped to the system default audio + * device, ie /dev/audio and/dev/audioctl. + * + * If "audio-inputdev" is set to none, then audio input will be disabled. + * If "audio-inputdev" is set to an integer, then when a Linux application + * attempts to access audio devices these access will be mapped to + * /dev/sound/<audio-inputdev attribute value>. The same behavior will + * apply to the "audio-outputdev" attribute for linux audio output + * device accesses. + * + * If "audio-inputdev" or "audio-outputdev" exist but the audio attribute + * is missing (or set to false) audio will not be enabled for the zone. + */ +static void +lxs_init_audio(char *idev, char *odev) +{ + int err, fd; + lxa_zone_reg_t lxa_zr; + + /* sanity check the input and output device properties */ + if (!lxs_iodev_ok(idev)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "audio-inputdev"); + + if (!lxs_iodev_ok(odev)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "audio-outputdev"); + + /* initialize the zone name in the ioctl request */ + bzero(&lxa_zr, sizeof (lxa_zr)); + (void) strlcpy(lxa_zr.lxa_zr_zone_name, zonename, + sizeof (lxa_zr.lxa_zr_zone_name)); + + /* initialize the input device property in the ioctl request */ + (void) strlcpy(lxa_zr.lxa_zr_inputdev, idev, + sizeof (lxa_zr.lxa_zr_inputdev)); + if (lxa_zr.lxa_zr_inputdev[0] == '\0') { + /* + * if no input device was specified, set the input device + * to "default" + */ + (void) strlcpy(lxa_zr.lxa_zr_inputdev, "default", + sizeof (lxa_zr.lxa_zr_inputdev)); + } + + /* initialize the output device property in the ioctl request */ + (void) strlcpy(lxa_zr.lxa_zr_outputdev, odev, + sizeof (lxa_zr.lxa_zr_outputdev)); + if (lxa_zr.lxa_zr_outputdev[0] == '\0') { + /* + * if no output device was specified, set the output device + * to "default" + */ + (void) strlcpy(lxa_zr.lxa_zr_outputdev, "default", + sizeof (lxa_zr.lxa_zr_outputdev)); + } + + /* open the audio device control node */ + if ((fd = open(LXA_AUDIO_DEV, O_RDWR)) < 0) + lxs_err(gettext("error accessing lx_audio device")); + + /* enable audio for this zone */ + err = ioctl(fd, LXA_IOC_ZONE_REG, &lxa_zr); + (void) close(fd); + if (err != 0) + lxs_err(gettext("error configuring lx_audio device")); +} + +static int +lxs_boot() +{ + zoneid_t zoneid; + zone_dochandle_t zdh; + boolean_t audio, restart; + char *idev, *odev, *kvers; + int kversnum; + + lxs_make_initctl(); + lxs_remove_autofsck(); + + if ((zdh = zonecfg_init_handle()) == NULL) + lxs_err(gettext("unable to initialize zone handle")); + + if (zonecfg_get_handle((char *)zonename, zdh) != Z_OK) { + zonecfg_fini_handle(zdh); + lxs_err(gettext("unable to load zone configuration")); + } + + /* Extract any relevant attributes from the config file. */ + lxs_getattrs(zdh, &restart, &audio, &idev, &odev, &kvers); + zonecfg_fini_handle(zdh); + + /* Configure the zone's audio support (if any). */ + if (audio == B_TRUE) + lxs_init_audio(idev, odev); + + /* + * Let the kernel know whether or not this zone's init process + * should be automatically restarted on its death. + */ + if ((zoneid = getzoneidbyname(zonename)) < 0) + lxs_err(gettext("unable to get zoneid")); + if (zone_setattr(zoneid, LX_ATTR_RESTART_INIT, &restart, + sizeof (boolean_t)) == -1) + lxs_err(gettext("error setting zone's restart_init property")); + + if ((kvers != NULL) && (strcmp(kvers, "2.6") == 0)) + kversnum = LX_KERN_2_6; + else + kversnum = LX_KERN_2_4; + + if (zone_setattr(zoneid, LX_KERN_VERSION_NUM, &kversnum, + sizeof (int)) < 0) + lxs_err(gettext("unable to set kernel version")); + + return (0); +} + +static int +lxs_halt() +{ + lxa_zone_reg_t lxa_zr; + int fd, rv; + + /* + * We don't bother to check if audio is configured for this zone + * before issuing a request to unconfigure it. There's no real + * reason to do this, it would require looking up the xml zone and + * brand configuration information (which could have been changed + * since the zone was booted), and it would involve more library + * calls there by increasing chances for failure. + */ + + /* initialize the zone name in the ioctl request */ + bzero(&lxa_zr, sizeof (lxa_zr)); + (void) strlcpy(lxa_zr.lxa_zr_zone_name, zonename, + sizeof (lxa_zr.lxa_zr_zone_name)); + + /* open the audio device control node */ + if ((fd = open(LXA_AUDIO_DEV, O_RDWR)) < 0) + lxs_err(gettext("error accessing lx_audio device")); + + /* + * disable audio for this zone + * + * we ignore ENOENT errors here because it's possible that + * audio is not configured for this zone. (either it was + * already unconfigured or someone could have added the + * audio resource to this zone after it was booted.) + */ + rv = ioctl(fd, LXA_IOC_ZONE_UNREG, &lxa_zr); + (void) close(fd); + if ((rv == 0) || (errno == ENOENT)) + return (0); + lxs_err(gettext("error unconfiguring lx_audio device: %s"), + strerror(errno)); + /*NOTREACHED*/ + return (0); +} + +static int +lxs_verify(char *xmlfile) +{ + zone_dochandle_t handle; + struct zone_dstab dstab; + struct zone_devtab devtab; + boolean_t audio, restart; + char *idev, *odev, *kvers; + zone_iptype_t iptype; + char hostidp[HW_HOSTID_LEN]; + + if ((handle = zonecfg_init_handle()) == NULL) + lxs_err(gettext("internal libzonecfg.so.1 error"), 0); + + if (zonecfg_get_xml_handle(xmlfile, handle) != Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("zonecfg provided an invalid XML file")); + } + + /* + * Check to see whether the zone has any ZFS datasets configured. + */ + if (zonecfg_setdsent(handle) != Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("zonecfg provided an invalid XML file")); + } + + if (zonecfg_getdsent(handle, &dstab) == Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("lx zones do not support ZFS datasets")); + } + + /* + * Check to see whether the zone has any devices configured. + */ + if (zonecfg_setdevent(handle) != Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("zonecfg provided an invalid XML file")); + } + + if (zonecfg_getdevent(handle, &devtab) == Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("lx zones do not support added devices")); + } + + /* + * Check to see whether the zone has ip-type configured as exclusive + */ + if (zonecfg_get_iptype(handle, &iptype) != Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("zonecfg provided an invalid XML file")); + } + + if (iptype == ZS_EXCLUSIVE) { + zonecfg_fini_handle(handle); + lxs_err(gettext("lx zones do not support an 'exclusive' " + "ip-type")); + } + + /* + * Check to see whether the zone has hostid emulation enabled. + */ + if (zonecfg_get_hostid(handle, hostidp, sizeof (hostidp)) == Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("lx zones do not support hostid emulation")); + } + + /* Extract any relevant attributes from the config file. */ + lxs_getattrs(handle, &restart, &audio, &idev, &odev, &kvers); + zonecfg_fini_handle(handle); + + if (audio) { + /* sanity check the input and output device properties */ + if (!lxs_iodev_ok(idev)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "audio-inputdev"); + + if (!lxs_iodev_ok(odev)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "audio-outputdev"); + } + if (kvers) { + if ((strcmp(kvers, "2.4")) != 0 && (strcmp(kvers, "2.6") != 0)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "kernel-version"); + } + return (0); +} + +static void +usage() +{ + + (void) fprintf(stderr, + gettext("usage:\t%s boot <zoneroot> <zonename>\n"), bname); + (void) fprintf(stderr, + gettext(" \t%s halt <zoneroot> <zonename>\n"), bname); + (void) fprintf(stderr, + gettext(" \t%s verify <xml file>\n\n"), bname); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + (void) setlocale(LC_ALL, ""); + (void) textdomain(TEXT_DOMAIN); + + bname = basename(argv[0]); + + if (argc < 3) + usage(); + + if (strcmp(argv[1], "boot") == 0) { + if (argc != 4) + lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"), + bname, argv[1]); + zoneroot = argv[2]; + zonename = argv[3]; + return (lxs_boot()); + } + + if (strcmp(argv[1], "halt") == 0) { + if (argc != 4) + lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"), + bname, argv[1]); + zoneroot = argv[2]; + zonename = argv[3]; + return (lxs_halt()); + } + + if (strcmp(argv[1], "verify") == 0) { + if (argc != 3) + lxs_err(gettext("usage: %s verify <xml file>"), + bname); + return (lxs_verify(argv[2])); + } + + usage(); + /*NOTREACHED*/ +} diff --git a/usr/src/lib/brand/lx/lx_thunk/Makefile b/usr/src/lib/brand/lx/lx_thunk/Makefile new file mode 100644 index 0000000000..f69dcec561 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/Makefile @@ -0,0 +1,52 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +LINT_SUBDIRS = $(MACH) +$(BUILD64)LINT_SUBDIRS += $(MACH64) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all install clean clobber: $(SUBDIRS) + +lint: $(LINT_SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/lx_thunk/Makefile.com b/usr/src/lib/brand/lx/lx_thunk/Makefile.com new file mode 100644 index 0000000000..75629a6d61 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/Makefile.com @@ -0,0 +1,74 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = lx_thunk.a +VERS = .1 + +COBJS = lx_thunk.o +OBJECTS = $(COBJS) + +include ../../../../Makefile.lib +include ../../Makefile.lx + +# +# Since our name doesn't start with "lib", Makefile.lib incorrectly +# calculates LIBNAME. Therefore, we set it here. +# +LIBNAME = lx_thunk + +MAPFILES = ../common/mapfile-vers +MAPOPTS = $(MAPFILES:%=-M%) + +CSRCS = $(COBJS:%o=../common/%c) +SRCS = $(CSRCS) + +SRCDIR = ../common +UTSBASE = ../../../../../uts + +ASFLAGS += -P -D_ASM +LDLIBS += -lc +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I../ -I ../../lx_brand \ + -I$(UTSBASE)/common/brand/lx + +# lx_think.so.1 interposes on a number of libc.so.1 routines. +DYNFLAGS += $(MAPOPTS) $(ZINTERPOSE) + +LIBS = $(DYNLIB) + +CLEANFILES = $(DYNLIB) +ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx +ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64) + +.KEEP_STATE: + +all: $(DYNLIB) + +lint: $(LINTLIB) lintcheck + +include ../../../../Makefile.targ diff --git a/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile b/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile new file mode 100644 index 0000000000..dbb283dff1 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB) + +install: all $(ROOTLIBS64) diff --git a/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c b/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c new file mode 100644 index 0000000000..130f8fdc86 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c @@ -0,0 +1,1123 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The BrandZ Linux thunking library. + * + * The interfaces defined in this file form the client side of a bridge + * to allow native Solaris process to access Linux services. Currently + * the Linux services that is made accessible by these interfaces here + * are: + * - Linux host <-> address naming services + * - Linux service <-> port naming services + * - Linux syslog + * + * Currently, to use this library it must be LD_PRELOADed into the + * application that needs to access Linux services. Once loaded + * Linux services are accessed by the client application in two + * different ways: + * + * - Direct library calls: + * lxt_gethostbyname_r + * lxt_gethostbyaddr_r + * lxt_getservbyname_r + * lxt_getservbyport_r + * lxt_debug + * + * These library functions are used by the BrandZ lx name services + * translation library (lx_nametoaddr.so) to handle libnsl.so name + * service requests. + * + * - Intercepted library calls: + * openlog(3c) + * syslog(3c) + * vsyslog(3c) + * closelog(3c) + * + * Via the LD_PRELOAD mechanism this library interposes itself on + * these interfaces and when the application calls these interfaces + * (either directly or indirectly via any libraries the program may + * be linked against) this library intercepts the request and passes + * it onto a Linux process to handle the request. + * + * Once this library receives a request that needs to be serviced by a + * Linux process, it packs up that request and attempts to send it + * to a doors server. The door server interfaces are defined in + * lx_thunk_server.h. If the doors server is not running or not + * responding, this library will attempt to spawn a new doors server + * by forking and executing the following shell script (which runs as + * a native /bin/sh Linux process): + * /native/usr/lib/brand/lx/lx_thunk + * + * Notes: + * - This library also intercepts the following system calls: + * close(2) - We intercept close(2) to prevent the caller from + * accidentally closing any of the file descriptors we + * need to do our work. + * + * setppriv(2) - We intercept setppriv(2) to prevent a process + * from dropping any of the privileges we'll need to create + * a new lx_thunk server process and to deal with service + * requests. + * + * - To facilitate the running of native Solaris programs and libraries + * when this library is preloaded into an application it will chroot() + * into /native. This way the Solaris application and libraries can + * access files via their expected paths and we can avoid having to + * either do path mapping or modifying all libraries to make them + * aware of "/native" so that they can pre-pend it to all their + * filesystem operations. + * + * - This library can only be used with processes that are initially + * run by root in a zone. The reason is that we use the chroot() + * system call and this requires the PRIV_PROC_CHROOT privilege, + * which non-root users don't have. + */ + +#include <alloca.h> +#include <assert.h> +#include <dlfcn.h> +#include <door.h> +#include <errno.h> +#include <fcntl.h> +#include <netdb.h> +#include <netdir.h> +#include <priv.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <synch.h> +#include <sys/brand.h> +#include <sys/fcntl.h> +#include <sys/lx_thunk_server.h> +#include <sys/lx_thunk.h> +#include <sys/mman.h> +#include <sys/priv_impl.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <thread.h> +#include <unistd.h> +#include <sys/varargs.h> + +#define LXT_DOOR_DIR "/tmp" +#define LXT_DOOR_PREFIX "lxt" +#define LXT_MSG_MAXLEN (128 + MAXPATHLEN) + +#pragma init(init) + +typedef uintptr_t (*fp1_t)(uintptr_t); +typedef uintptr_t (*fp3_t)(uintptr_t, uintptr_t, uintptr_t); + +static char *lxt_debug_path = NULL; /* debug output file path */ +static char lxt_debug_path_buf[MAXPATHLEN]; +static int root_fd; +static int debug_fd = -1; + +void lxt_debug(const char *msg, ...); + +void +init(void) +{ + if (getenv("LX_DEBUG") != NULL) { + + /* check if there's a debug log file specified */ + lxt_debug_path = getenv("LX_DEBUG_FILE"); + if (lxt_debug_path == NULL) { + /* send all debugging output to /dev/tty */ + lxt_debug_path = "/dev/tty"; + } + + (void) strlcpy(lxt_debug_path_buf, lxt_debug_path, + sizeof (lxt_debug_path_buf)); + lxt_debug_path = lxt_debug_path_buf; + + /* + * Open the debugging output file. We need to open it + * and hold it open because we're going to call chroot() + * in just a second, so we won't be able to open it later. + */ + if ((debug_fd = open(lxt_debug_path, + O_WRONLY|O_APPEND|O_CREAT|O_NDELAY|O_NOCTTY, + 0666)) != -1) { + (void) fchmod(debug_fd, 0666); + } + } + lxt_debug("lxt_init: executing native process"); + + /* Get a fd that points to the root directory */ + if ((root_fd = open("/", O_RDONLY)) < 0) { + lxt_debug("lxt_init(): " + "failed to open root directory: %s", strerror(errno)); + exit(-1); + } + + /* + * Now, so that we can avoid having to do path mapping, + * just chdir() and chroot() into /native. + */ + if (chdir("/native") != 0) { + lxt_debug("lxt_init(): " + "failed to chdir to /native: %s", strerror(errno)); + exit(-1); + } + if (chroot("/native") != 0) { + lxt_debug("lxt_init(): " + "failed to chroot to /native: %s", strerror(errno)); + exit(-1); + } +} + +/* + * Linux Thunking Interfaces - Client Side + */ +static mutex_t lxt_door_lock = DEFAULTMUTEX; +static int lxt_door_fd = -1; + +static void +lxt_server_exec(int fifo_wr, int fifo_rd) +{ + extern const char **environ; + char *nullist[] = { NULL }; + + lxt_debug("lxt_server_exec: server starting"); + + /* + * First we need to dup our fifos to the file descriptors + * the brand library is expecting them to be at. + */ + + /* Check if the write fifo needs to be moved aside */ + if ((fifo_wr == LXT_SERVER_FIFO_RD_FD) && + ((fifo_wr = dup(fifo_wr)) < 0)) + return; + + /* Check if the read fifo needs to be moved aside */ + if ((fifo_rd == LXT_SERVER_FIFO_WR_FD) && + ((fifo_rd = dup(fifo_rd)) < 0)) + return; + + if ((fifo_wr != LXT_SERVER_FIFO_WR_FD) && + (dup2(fifo_wr, LXT_SERVER_FIFO_WR_FD) < 0)) + return; + if ((fifo_rd != LXT_SERVER_FIFO_RD_FD) && + (dup2(fifo_rd, LXT_SERVER_FIFO_RD_FD) < 0)) + return; + + /* + * We're about to execute a native Linux process. + * Since we've been loaded into a Solaris process with + * LD_PRELOAD and LD_LIBRARY_PATH we should clear these + * variables from the environment before calling exec. + */ + (void) unsetenv("LD_PRELOAD"); + (void) unsetenv("LD_LIBRARY_PATH"); + + /* + * Now we need to exec the thunk server process. This is a + * branded Linux process that will act as a doors server and + * service our requests to perform native Linux operations. + * Since we're currently running as a native Solaris process + * to start up the server we'll use the brand system call to + * the kernel that the target of the exec will be a branded + * process. + */ + lxt_debug("lxt_server_exec: execing as Linux process"); + (void) syscall(SYS_brand, B_EXEC_BRAND, + LXT_SERVER_BINARY, nullist, environ); +} + + +static void * +lxt_door_waitpid(void *arg) +{ + pid_t child_pid = (pid_t)(uintptr_t)arg; + int stat; + + (void) waitpid(child_pid, &stat, 0); + return (NULL); +} + +static char * +lxt_door_mkfifo() +{ + char *path; + + for (;;) { + path = tempnam(LXT_DOOR_DIR, LXT_DOOR_PREFIX); + if (path == NULL) + return (NULL); + if (mkfifo(path, S_IWUSR | S_IRUSR) != 0) { + if (errno != EEXIST) { + free(path); + return (NULL); + } + /* This file path exists, pick a new name. */ + free(path); + continue; + } + /* We successfully created the fifo */ + break; + } + return (path); +} + +static void +lxt_door_init() +{ + char *fifo1_path = NULL, *fifo2_path = NULL; + char fifo1_path_native[MAXPATHLEN]; + int fifo1_rd = -1, fifo1_wr = -1; + int fifo2_rd = -1, fifo2_wr = -1; + int junk; + pid_t child_pid; + thread_t tid; + + lxt_debug("lxt_door_init: preparint to start server"); + + /* Create two new fifos. */ + if (((fifo1_path = lxt_door_mkfifo()) == NULL) || + ((fifo2_path = lxt_door_mkfifo()) == NULL)) + goto fail; + + (void) snprintf(fifo1_path_native, sizeof (fifo1_path_native), + "/native%s", fifo1_path); + + /* + * Open both fifos for reading and writing. We have to open + * the read side of the fifo first (because the write side will + * fail to open if there is no reader) and we have to use the + * O_NONBLOCK flag (because the read open with hang without it). + */ + if (((fifo1_rd = open(fifo1_path, O_RDONLY | O_NONBLOCK)) < 0) || + ((fifo1_wr = open(fifo1_path, O_WRONLY)) < 0) || + ((fifo2_rd = open(fifo2_path, O_RDONLY | O_NONBLOCK)) < 0) || + ((fifo2_wr = open(fifo2_path, O_WRONLY)) < 0)) + goto fail; + + /* + * Now we have to close the read side of fifo1 and fifo2 and re-open + * them without the O_NONBLOCK flag. This is because we're using + * the fifos for synchronization and when we actually try to read + * from them we want to block. + */ + (void) close(fifo1_rd); + if ((fifo1_rd = open(fifo1_path, O_RDONLY)) < 0) + goto fail; + (void) close(fifo2_rd); + if ((fifo2_rd = open(fifo2_path, O_RDONLY)) < 0) + goto fail; + + /* + * Once fifo2 is opened no one will ever need to open it again + * so delete it now. + */ + (void) unlink(fifo2_path); + free(fifo2_path); + fifo2_path = NULL; + + /* Attempt to fork and start the door server */ + lxt_debug("lxt_door_init: starting server"); + switch (child_pid = fork1()) { + case -1: + /* fork1() failed. */ + goto fail; + case 0: + /* Child process - new door server. */ + (void) close(fifo1_rd); + (void) close(fifo2_wr); + + /* Need to chroot back to the real root directory */ + if (fchroot(root_fd) != 0) { + lxt_debug("lxt_server_exec: " + "failed fchroot(\"/\"): %s", strerror(errno)); + exit(-1); + } + (void) close(root_fd); + + /* Start the server */ + lxt_server_exec(fifo1_wr, fifo2_rd); + lxt_debug("lxt_server_exec: server init failed"); + exit(-1); + /*NOTREACHED*/ + } + /* Parent process - door client. */ + + /* + * fifo2 is used to send the door path to the child. + * (We can't simply pass it via the address space since the + * child will need to exec.) We'll write the name of the door + * file to fifo2 before we close the read end of the fifo2 so + * that if the child has exited for some reason we won't get + * a SIGPIPE. Note that we're reusing the name of fifo1 as + * the door path. Also note that we've pre-pended /native + * to the fifo/door path. The reason is that we're chroot'ed + * to /native, but when the thunking server executes it will + * be chroot'ed back to the real root directory. + */ + (void) write(fifo2_wr, + fifo1_path_native, strlen(fifo1_path_native) + 1); + (void) close(fifo2_wr); + (void) close(fifo2_rd); + + /* + * Start up a thread that will perfom a waitpid() on the child + * door server process. We do this because if the calling + * application that is using our interfaces is forking it's own + * children and using wait(), then it won't expect to see our + * children. We take advantage of the fact that if there are + * wait() and a waitpid() calls in progress at the same time + * when a child exists, preference will be given to any + * waitpid() calls that are explicity waiting for that child. + * There is of course a window of time where the child could + * exit after we've forked it but before we've called waitpid() + * where another wait() in this process could collect the result. + * There's nothing we can really do to prevent this short of + * stopping all the other threads in this process. + */ + (void) thr_create(NULL, 0, + lxt_door_waitpid, (void *)(uintptr_t)child_pid, THR_DAEMON, &tid); + + /* + * fifo1 is used for the child process to signal us that the + * door server is ready to take requests. + */ + (void) close(fifo1_wr); + (void) read(fifo1_rd, &junk, 1); + (void) close(fifo1_rd); + + /* If there was a door that was open, close it now. */ + + if (lxt_door_fd >= 0) + (void) close(lxt_door_fd); + /* + * The server should be started up by now and fattach()ed the door + * server to the fifo/door path. so if we re-open that path now we + * should get a fd to the door server. + */ + lxt_door_fd = open(fifo1_path, O_RDWR); + + lxt_debug("lxt_door_init: new server door = %d", lxt_door_fd); + + /* We don't need the fifo/door anymore so delete it. */ + (void) unlink(fifo1_path); + free(fifo1_path); + return; + +fail: + if (fifo1_path != NULL) + (void) unlink(fifo1_path); + if (fifo2_path != NULL) + (void) unlink(fifo2_path); + if (fifo1_rd != -1) + (void) close(fifo1_rd); + if (fifo1_wr != -1) + (void) close(fifo1_wr); + if (fifo2_rd != -1) + (void) close(fifo2_rd); + if (fifo2_wr != -1) + (void) close(fifo2_wr); +} + +static int +lxt_door_call(door_arg_t *door_arg, int lock_held) +{ + int fd; + + if (!lock_held) + (void) mutex_lock(&lxt_door_lock); + + /* Get a copy of lxt_door_fd */ + fd = lxt_door_fd; + + if (!lock_held) + (void) mutex_unlock(&lxt_door_lock); + + if (fd == -1) { + lxt_debug("lxt_door_call: no door available"); + return (-1); + } + + if (door_call(fd, door_arg) != 0) { + lxt_debug("lxt_door_call: call failed"); + return (-1); + } + if (door_arg->rbuf == NULL) { + lxt_debug("lxt_door_call: call returned NULL"); + return (-1); + } + return (0); +} + +static int +lxt_door_request(door_arg_t *door_arg) +{ + door_arg_t door_ping; + lxt_server_arg_t ping_request, *ping_result; + int rv, ping_success = 0; + + /* First just try the door call. */ + lxt_debug("lxt_door_request: calling server"); + if (lxt_door_call(door_arg, 0) == 0) + return (0); + + /* Prepare a door server ping request. */ + bzero(&door_ping, sizeof (door_ping)); + bzero(&ping_request, sizeof (ping_request)); + door_ping.data_ptr = (char *)&ping_request; + door_ping.data_size = sizeof (ping_request); + ping_request.lxt_sa_op = LXT_SERVER_OP_PING; + + (void) mutex_lock(&lxt_door_lock); + + /* Ping the doors server. */ + lxt_debug("lxt_door_request: pinging server"); + if (lxt_door_call(&door_ping, 1) == 0) { + /*LINTED*/ + ping_result = (lxt_server_arg_t *)door_ping.rbuf; + ping_success = ping_result->lxt_sa_success; + (void) munmap(door_ping.rbuf, door_ping.rsize); + } + + if (!ping_success) { + /* The server is not responding so start up a new one. */ + lxt_door_init(); + } + (void) mutex_unlock(&lxt_door_lock); + + /* Retry the original request */ + lxt_debug("lxt_door_request: calling server, retry"); + if ((rv = lxt_door_call(door_arg, 0)) == 0) + return (0); + return (rv); +} + +static struct hostent * +lxt_gethost(int op, const char *token, int token_len, int type, + struct hostent *result, char *buf, int buf_len, int *h_errnop) +{ + door_arg_t door_arg; + lxt_gethost_arg_t *data; + lxt_server_arg_t *request; + int request_size, errno_tmp, i; + + lxt_debug("lxt_gethost: request caught"); + + request_size = sizeof (*request) + sizeof (*data) + + token_len + buf_len - 1; + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_gethost: calloc() failed"); + *h_errnop = TRY_AGAIN; + return (NULL); + } + /*LINTED*/ + data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0]; + + /* Initialize the server request. */ + request->lxt_sa_op = op; + data->lxt_gh_type = type; + data->lxt_gh_token_len = token_len; + data->lxt_gh_buf_len = buf_len; + data->lxt_gh_storage_len = token_len + token_len; + bcopy(token, &data->lxt_gh_storage[0], token_len); + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_gethost: door_call() failed"); + /* Don't know what caused the error so clear errno. */ + errno = 0; + *h_errnop = ND_SYSTEM; + free(request); + return (NULL); + } + + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_gethost: door_call() returned NULL"); + /* Don't know what caused the error so clear errno. */ + errno = 0; + *h_errnop = ND_SYSTEM; + return (NULL); + } + + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + /*LINTED*/ + data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0]; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_gethost: remote function call failed"); + errno_tmp = request->lxt_sa_errno; + *h_errnop = data->lxt_gh_h_errno; + (void) munmap(door_arg.rbuf, door_arg.rsize); + errno = errno_tmp; + return (NULL); + } + + /* Copy out the results and output buffer. */ + bcopy(&data->lxt_gh_result, result, sizeof (*result)); + bcopy(&data->lxt_gh_storage[token_len], buf, buf_len); + (void) munmap(door_arg.rbuf, door_arg.rsize); + + /* Now go through the results and convert all offsets to pointers */ + result->h_name = LXT_OFFSET_TO_PTR(result->h_name, buf); + result->h_aliases = LXT_OFFSET_TO_PTR(result->h_aliases, buf); + result->h_addr_list = LXT_OFFSET_TO_PTR(result->h_addr_list, buf); + for (i = 0; result->h_aliases[i] != NULL; i++) { + result->h_aliases[i] = + LXT_OFFSET_TO_PTR(result->h_aliases[i], buf); + } + for (i = 0; result->h_addr_list[i] != NULL; i++) { + result->h_addr_list[i] = + LXT_OFFSET_TO_PTR(result->h_addr_list[i], buf); + } + + return (result); +} + +static struct servent * +lxt_getserv(int op, const char *token, const int token_len, const char *proto, + struct servent *result, char *buf, int buf_len) +{ + door_arg_t door_arg; + lxt_getserv_arg_t *data; + lxt_server_arg_t *request; + int request_size, errno_tmp, i; + + lxt_debug("lxt_getserv: request caught"); + + request_size = sizeof (*request) + sizeof (*data) + + token_len + buf_len - 1; + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_getserv: calloc() failed"); + return (NULL); + } + /*LINTED*/ + data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0]; + + /* Initialize the server request. */ + request->lxt_sa_op = op; + data->lxt_gs_token_len = token_len; + data->lxt_gs_buf_len = buf_len; + data->lxt_gs_storage_len = token_len + token_len; + bcopy(token, &data->lxt_gs_storage[0], token_len); + + bzero(data->lxt_gs_proto, sizeof (data->lxt_gs_proto)); + if (proto != NULL) + (void) strncpy(data->lxt_gs_proto, proto, + sizeof (data->lxt_gs_proto)); + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + /* Call the doors server */ + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_getserv: door_call() failed"); + /* Don't know what caused the error so clear errno */ + errno = 0; + free(request); + return (NULL); + } + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_getserv: door_call() returned NULL"); + /* Don't know what caused the error so clear errno */ + errno = 0; + return (NULL); + } + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + /*LINTED*/ + data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0]; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_getserv: remote function call failed"); + errno_tmp = request->lxt_sa_errno; + (void) munmap(door_arg.rbuf, door_arg.rsize); + errno = errno_tmp; + return (NULL); + } + + /* Copy out the results and output buffer. */ + bcopy(&data->lxt_gs_result, result, sizeof (*result)); + bcopy(&data->lxt_gs_storage[token_len], buf, buf_len); + (void) munmap(door_arg.rbuf, door_arg.rsize); + + /* + * Now go through the results and convert all offsets to pointers. + * See the comments in lxt_server_getserv() for why we need + * to subtract 1 from each offset. + */ + result->s_name = LXT_OFFSET_TO_PTR(result->s_name, buf); + result->s_proto = LXT_OFFSET_TO_PTR(result->s_proto, buf); + result->s_aliases = LXT_OFFSET_TO_PTR(result->s_aliases, buf); + for (i = 0; result->s_aliases[i] != NULL; i++) { + result->s_aliases[i] = + LXT_OFFSET_TO_PTR(result->s_aliases[i], buf); + } + + return (result); +} + +static void +lxt_openlog(const char *ident, int logopt, int facility) +{ + door_arg_t door_arg; + lxt_openlog_arg_t *data; + lxt_server_arg_t *request; + int request_size; + + request_size = sizeof (*request) + sizeof (*data); + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_openlog: calloc() failed"); + return; + } + /*LINTED*/ + data = (lxt_openlog_arg_t *)&request->lxt_sa_data[0]; + + /* Initialize the server request. */ + request->lxt_sa_op = LXT_SERVER_OP_OPENLOG; + data->lxt_ol_facility = facility; + data->lxt_ol_logopt = logopt; + (void) strlcpy(data->lxt_ol_ident, ident, sizeof (data->lxt_ol_ident)); + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + /* Call the doors server */ + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_openlog: door_call() failed"); + free(request); + return; + } + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_openlog: door_call() returned NULL"); + return; + } + + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_openlog: remote function call failed"); + } + (void) munmap(door_arg.rbuf, door_arg.rsize); +} + +static void +lxt_vsyslog(int priority, const char *message, va_list va) +{ + door_arg_t door_arg; + lxt_syslog_arg_t *data; + lxt_server_arg_t *request; + psinfo_t p; + char procfile[PRFNSZ], *buf = NULL, *estr; + int buf_len, buf_i, estr_len, request_size, procfd; + int i, key, err_count = 0, tok_count = 0; + int errno_backup = errno; + + /* + * Here we're going to use vsnprintf() to expand the message + * string passed in before we hand it off to a Linux process. + * Before we can call vsnprintf() we'll need to do modify the + * string to deal with certain special tokens. + * + * syslog() supports a special '%m' format token that expands to + * the error message string associated with the current value + * of errno. Unfortunatly if we pass this token to vsnprintf() + * it will choke so we need to expand that token manually here. + * + * We also need to expand any "%%" characters into "%%%%". + * The reason is that we'll be calling vsnprintf() which will + * translate "%%%%" back to "%%", which is safe to pass to the + * Linux version if syslog. If we didn't do this then vsnprintf() + * would translate "%%" to "%" and then the Linux syslog would + * attempt to intrepret "%" and whatever character follows it + * as a printf format style token. + */ + for (key = i = 0; message[i] != '\0'; i++) { + if (!key && message[i] == '%') { + key = 1; + continue; + } + if (key && message[i] == '%') + tok_count++; + if (key && message[i] == 'm') + err_count++; + key = 0; + } + + /* We found some tokens that we need to expand. */ + if (err_count || tok_count) { + estr = strerror(errno_backup); + estr_len = strlen(estr); + assert(estr_len >= 2); + + /* Allocate a buffer to hold the expanded string. */ + buf_len = i + 1 + + (tok_count * 2) + (err_count * (estr_len - 2)); + if ((buf = calloc(1, buf_len)) == NULL) { + lxt_debug("lxt_vsyslog: calloc() failed"); + return; + } + + /* Finally, expand %% and %m. */ + for (key = buf_i = i = 0; message[i] != '\0'; i++) { + assert(buf_i < buf_len); + if (!key && message[i] == '%') { + buf[buf_i++] = '%'; + key = 1; + continue; + } + if (key && message[i] == 'm') { + (void) bcopy(estr, &buf[buf_i - 1], estr_len); + buf_i += estr_len - 1; + } else if (key && message[i] == '%') { + (void) bcopy("%%%%", &buf[buf_i - 1], 4); + buf_i += 4 - 1; + } else { + buf[buf_i++] = message[i]; + } + key = 0; + } + assert(buf[buf_i] == '\0'); + assert(buf_i == (buf_len - 1)); + + /* Use the expanded buffer as our format string. */ + message = buf; + } + + /* Allocate the request we're going to send to the server */ + request_size = sizeof (*request) + sizeof (*data); + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_vsyslog: calloc() failed"); + return; + } + + /*LINTED*/ + data = (lxt_syslog_arg_t *)&request->lxt_sa_data[0]; + + /* Initialize the server request. */ + request->lxt_sa_op = LXT_SERVER_OP_SYSLOG; + data->lxt_sl_priority = priority; + data->lxt_sl_pid = getpid(); + (void) vsnprintf(data->lxt_sl_message, sizeof (data->lxt_sl_message), + message, va); + + /* If we did token expansion then free the intermediate buffer. */ + if (err_count || tok_count) + free(buf); + + /* Add the current program name into the request */ + (void) sprintf(procfile, "/proc/%u/psinfo", (int)getpid()); + /* (void) sprintf(procfile, "/native/proc/%u/psinfo", (int)getpid()); */ + if ((procfd = open(procfile, O_RDONLY)) >= 0) { + if (read(procfd, &p, sizeof (psinfo_t)) >= 0) { + (void) strncpy(data->lxt_sl_progname, p.pr_fname, + sizeof (data->lxt_sl_progname)); + } + (void) close(procfd); + } + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + /* Call the doors server */ + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_vsyslog: door_call() failed"); + free(request); + return; + } + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_vsyslog: door_call() returned NULL"); + return; + } + + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_vsyslog: remote function call failed"); + } + (void) munmap(door_arg.rbuf, door_arg.rsize); +} + +static void +lxt_closelog(void) +{ + door_arg_t door_arg; + lxt_server_arg_t *request; + int request_size; + + request_size = sizeof (*request); + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_closelog: calloc() failed"); + return; + } + + /* Initialize the server request. */ + request->lxt_sa_op = LXT_SERVER_OP_CLOSELOG; + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + /* Call the doors server */ + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_closelog: door_call() failed"); + free(request); + return; + } + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_closelog: door_call() returned NULL"); + return; + } + + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_closelog: remote function call failed"); + } + (void) munmap(door_arg.rbuf, door_arg.rsize); +} + +static void +lxt_pset_keep(priv_op_t op, priv_ptype_t type, priv_set_t *pset, + const char *priv) +{ + if (priv_ismember(pset, priv) == B_TRUE) { + if (op == PRIV_OFF) { + (void) priv_delset(pset, priv); + lxt_debug("lxt_pset_keep: " + "preventing drop of \"%s\" from \"%s\" set", + priv, type); + } + } else { + if (op == PRIV_SET) { + (void) priv_addset(pset, priv); + lxt_debug("lxt_pset_keep: " + "preventing drop of \"%s\" from \"%s\" set", + priv, type); + } + } +} + +/* + * Public interfaces - used by lx_nametoaddr + */ +void +lxt_vdebug(const char *msg, va_list va) +{ + char buf[LXT_MSG_MAXLEN + 1]; + int rv, n; + + if (debug_fd == -1) + return; + + /* Prefix the message with pid/tid. */ + if ((n = snprintf(buf, sizeof (buf), "%u/%u: ", + getpid(), thr_self())) == -1) + return; + + /* Format the message. */ + if (vsnprintf(&buf[n], sizeof (buf) - n, msg, va) == -1) + return; + + /* Add a carrige return if there isn't one already. */ + if ((buf[strlen(buf) - 1] != '\n') && + (strlcat(buf, "\n", sizeof (buf)) >= sizeof (buf))) + return; + + /* We retry in case of EINTR */ + do { + rv = write(debug_fd, buf, strlen(buf)); + } while ((rv == -1) && (errno == EINTR)); +} + +void +lxt_debug(const char *msg, ...) +{ + va_list va; + int errno_backup; + + if (debug_fd == -1) + return; + + errno_backup = errno; + va_start(va, msg); + lxt_vdebug(msg, va); + va_end(va); + errno = errno_backup; +} + +struct hostent * +lxt_gethostbyaddr_r(const char *addr, int addr_len, int type, + struct hostent *result, char *buf, int buf_len, int *h_errnop) +{ + lxt_debug("lxt_gethostbyaddr_r: request recieved"); + return (lxt_gethost(LXT_SERVER_OP_ADDR2HOST, + addr, addr_len, type, result, buf, buf_len, h_errnop)); +} + +struct hostent * +lxt_gethostbyname_r(const char *name, + struct hostent *result, char *buf, int buf_len, int *h_errnop) +{ + lxt_debug("lxt_gethostbyname_r: request recieved"); + return (lxt_gethost(LXT_SERVER_OP_NAME2HOST, + name, strlen(name) + 1, 0, result, buf, buf_len, h_errnop)); +} + +struct servent * +lxt_getservbyport_r(int port, const char *proto, + struct servent *result, char *buf, int buf_len) +{ + lxt_debug("lxt_getservbyport_r: request recieved"); + return (lxt_getserv(LXT_SERVER_OP_PORT2SERV, + (const char *)&port, sizeof (int), proto, result, buf, buf_len)); +} + +struct servent * +lxt_getservbyname_r(const char *name, const char *proto, + struct servent *result, char *buf, int buf_len) +{ + lxt_debug("lxt_getservbyname_r: request recieved"); + return (lxt_getserv(LXT_SERVER_OP_NAME2SERV, + name, strlen(name) + 1, proto, result, buf, buf_len)); +} + +/* + * "Public" interfaces - used to override public existing interfaces + */ +#pragma weak _close = close +int +close(int fd) +{ + static fp1_t fp = NULL; + + /* + * Don't let the process close our file descriptor that points + * back to the root directory. + */ + if (fd == root_fd) + return (0); + if (fd == debug_fd) + return (0); + + if (fp == NULL) + fp = (fp1_t)dlsym(RTLD_NEXT, "close"); + return (fp((uintptr_t)fd)); +} + +int +_setppriv(priv_op_t op, priv_ptype_t type, const priv_set_t *pset) +{ + static fp3_t fp = NULL; + priv_set_t *pset_new; + int rv; + + lxt_debug("_setppriv: request caught"); + + if (fp == NULL) + fp = (fp3_t)dlsym(RTLD_NEXT, "_setppriv"); + + while ((pset_new = priv_allocset()) == NULL) + (void) sleep(1); + + priv_copyset(pset, pset_new); + lxt_pset_keep(op, type, pset_new, PRIV_PROC_EXEC); + lxt_pset_keep(op, type, pset_new, PRIV_PROC_FORK); + lxt_pset_keep(op, type, pset_new, PRIV_PROC_CHROOT); + lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_READ); + lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_WRITE); + lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_SEARCH); + + rv = fp(op, (uintptr_t)type, (uintptr_t)pset_new); + priv_freeset(pset_new); + return (rv); +} + +void +openlog(const char *ident, int logopt, int facility) +{ + lxt_debug("openlog: request caught"); + lxt_openlog(ident, logopt, facility); +} + +void +syslog(int priority, const char *message, ...) +{ + va_list va; + + lxt_debug("syslog: request caught"); + va_start(va, message); + lxt_vsyslog(priority, message, va); + va_end(va); +} + +void +vsyslog(int priority, const char *message, va_list va) +{ + lxt_debug("vsyslog: request caught"); + lxt_vsyslog(priority, message, va); +} + +void +closelog(void) +{ + lxt_debug("closelog: request caught"); + lxt_closelog(); +} diff --git a/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers b/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers new file mode 100644 index 0000000000..a898b55613 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers @@ -0,0 +1,58 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +SUNWprivate_1.1 { + global: + lxt_vdebug; + lxt_debug; + lxt_gethostbyaddr_r; + lxt_gethostbyname_r; + lxt_getservbyport_r; + lxt_getservbyname_r; + _close; + _setppriv; + openlog; + syslog; + vsyslog; + closelog; + + local: + *; +}; diff --git a/usr/src/lib/brand/lx/lx_thunk/i386/Makefile b/usr/src/lib/brand/lx/lx_thunk/i386/Makefile new file mode 100644 index 0000000000..c4b6c71027 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/i386/Makefile @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB) + +install: all $(ROOTLIBS) diff --git a/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h b/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h new file mode 100644 index 0000000000..b19c91873a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_THUNK_H +#define _LX_THUNK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +struct hostent *lxt_gethostbyaddr_r(const char *addr, int addr_len, int type, + struct hostent *result, char *buf, int buf_len, int *h_errnop); +struct hostent *lxt_gethostbyname_r(const char *name, + struct hostent *result, char *buf, int buf_len, int *h_errnop); +struct servent *lxt_getservbyport_r(int port, const char *proto, + struct servent *result, char *buf, int buf_len); +struct servent *lxt_getservbyname_r(const char *name, const char *proto, + struct servent *result, char *buf, int buf_len); + +void openlog(const char *ident, int logopt, int facility); +void syslog(int priority, const char *message, ...); +void closelog(void); + +void lxt_debug(const char *msg, ...); +void lxt_vdebug(const char *msg, va_list va); + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_THUNK_H */ diff --git a/usr/src/lib/brand/lx/netfiles/Makefile b/usr/src/lib/brand/lx/netfiles/Makefile new file mode 100644 index 0000000000..47be18db0f --- /dev/null +++ b/usr/src/lib/brand/lx/netfiles/Makefile @@ -0,0 +1,48 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +TXTS = etc_netconfig etc_default_nfs +NFS_DFL = ../../../../cmd/fs.d/nfs/etc/nfs.dfl + +all: $(TXTS) + +include ../Makefile.lx + +lint: + +install: $(ROOTTXTS) + +clean: + -$(RM) etc_default_nfs + +clobber: clean + -$(RM) $(ROOTXMLDOCS) $(ROOTTXTS) + +etc_default_nfs: $(NFS_DFL) + $(RM) $@ + $(CP) $(NFS_DFL) $@ diff --git a/usr/src/lib/brand/lx/netfiles/etc_netconfig b/usr/src/lib/brand/lx/netfiles/etc_netconfig new file mode 100644 index 0000000000..56222abf56 --- /dev/null +++ b/usr/src/lib/brand/lx/netfiles/etc_netconfig @@ -0,0 +1,38 @@ +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# The "Network Configuration" File. +# +# Each entry is of the form: +# +# <network_id> <semantics> <flags> <protofamily> <protoname> \ +# <device> <nametoaddr_libs> +# +# For running solaris daemons in a linux zone we use this non-default +# /etc/netconfig. The reason is that all name resolution has to be +# done linux name service interfaces. To do this we specify a custom +# nametoaddr library that libnsl will invoke to do name service lookups. +# +udp tpi_clts v inet udp /dev/udp lx_nametoaddr.so.1 +tcp tpi_cots_ord v inet tcp /dev/tcp lx_nametoaddr.so.1 diff --git a/usr/src/lib/brand/lx/zone/Makefile b/usr/src/lib/brand/lx/zone/Makefile new file mode 100644 index 0000000000..4ae4f128b5 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/Makefile @@ -0,0 +1,67 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +PROGS = lx_install lx_distro_install lx_init_zone +SUBDIRS = distros +XMLDOCS = config.xml platform.xml +TEMPLATES = SUNWlx.xml SUNWlx26.xml + +all: $(PROGS) + +include $(SRC)/cmd/Makefile.cmd +include ../Makefile.lx + +all := TARGET= all +install := TARGET= install +clobber := TARGET= clobber + +POFILES= $(PROGS:%=%.po) +POFILE= lx_zone.po + +$(POFILE): $(POFILES) + $(RM) $@ + $(BUILDPO.pofiles) + +_msg: $(MSGDOMAINPOFILE) + +install: $(PROGS) $(ROOTXMLDOCS) $(ROOTTEMPLATES) $(ROOTPROGS) $(SUBDIRS) + +lint: + +clean: + -$(RM) $(PROGS) + +clobber: clean $(SUBDIRS) + -$(RM) $(ROOTXMLDOCS) $(ROOTPROGS) $(ROOTTEMPLATES) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include $(SRC)/Makefile.msg.targ diff --git a/usr/src/lib/brand/lx/zone/SUNWlx.xml b/usr/src/lib/brand/lx/zone/SUNWlx.xml new file mode 100644 index 0000000000..04c38873de --- /dev/null +++ b/usr/src/lib/brand/lx/zone/SUNWlx.xml @@ -0,0 +1,34 @@ +<?xml version="1.0"?> + +<!-- + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. Use zonecfg(1M) instead. +--> + +<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1"> + +<zone name="default" zonepath="" autoboot="false" brand="lx"> +</zone> diff --git a/usr/src/lib/brand/lx/zone/SUNWlx26.xml b/usr/src/lib/brand/lx/zone/SUNWlx26.xml new file mode 100644 index 0000000000..9bd8af4d92 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/SUNWlx26.xml @@ -0,0 +1,35 @@ +<?xml version="1.0"?> + +<!-- + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. Use zonecfg(1M) instead. +--> + +<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1"> + +<zone name="default" zonepath="" autoboot="false" brand="lx"> + <attr name="kernel-version" type="string" value="2.6"/> +</zone> diff --git a/usr/src/lib/brand/lx/zone/config.xml b/usr/src/lib/brand/lx/zone/config.xml new file mode 100644 index 0000000000..b28fbcd2c2 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/config.xml @@ -0,0 +1,94 @@ +<?xml version="1.0"?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + + DO NOT EDIT THIS FILE. +--> + +<!DOCTYPE brand PUBLIC "-//Sun Microsystems Inc//DTD Brands//EN" + "file:///usr/share/lib/xml/dtd/brand.dtd.1"> + +<brand name="lx"> + <modname>lx_brand</modname> + + <initname>/sbin/init</initname> + <login_cmd>/bin/login -h zone:%Z %u</login_cmd> + <forcedlogin_cmd>/bin/login -h zone:%Z -f %u</forcedlogin_cmd> + <user_cmd>/usr/bin/getent passwd %u</user_cmd> + + <install>/usr/lib/brand/lx/lx_install %z %R</install> + <installopts>d:hsvX</installopts> + <boot>/usr/lib/brand/lx/lx_support boot %R %z</boot> + <halt>/usr/lib/brand/lx/lx_support halt %R %z</halt> + <verify_cfg>/usr/lib/brand/lx/lx_support verify</verify_cfg> + <verify_adm></verify_adm> + <postclone></postclone> + <postinstall></postinstall> + + <privilege set="default" name="contract_event" /> + <privilege set="default" name="contract_identity" /> + <privilege set="default" name="contract_observer" /> + <privilege set="default" name="file_chown" /> + <privilege set="default" name="file_chown_self" /> + <privilege set="default" name="file_dac_execute" /> + <privilege set="default" name="file_dac_read" /> + <privilege set="default" name="file_dac_search" /> + <privilege set="default" name="file_dac_write" /> + <privilege set="default" name="file_owner" /> + <privilege set="default" name="file_setid" /> + <privilege set="default" name="ipc_dac_read" /> + <privilege set="default" name="ipc_dac_write" /> + <privilege set="default" name="ipc_owner" /> + <privilege set="default" name="net_bindmlp" /> + <privilege set="default" name="net_icmpaccess" /> + <privilege set="default" name="net_mac_aware" /> + <privilege set="default" name="net_privaddr" /> + <privilege set="default" name="proc_chroot" /> + <privilege set="default" name="sys_audit" /> + <privilege set="default" name="proc_audit" /> + <privilege set="default" name="proc_lock_memory" /> + <privilege set="default" name="proc_owner" /> + <privilege set="default" name="proc_setid" /> + <privilege set="default" name="proc_taskid" /> + <privilege set="default" name="sys_acct" /> + <privilege set="default" name="sys_admin" /> + <privilege set="default" name="sys_mount" /> + <privilege set="default" name="sys_nfs" /> + <privilege set="default" name="sys_resource" /> + + <privilege set="prohibited" name="dtrace_kernel" /> + <privilege set="prohibited" name="proc_zone" /> + <privilege set="prohibited" name="sys_config" /> + <privilege set="prohibited" name="sys_devices" /> + <privilege set="prohibited" name="sys_ip_config" /> + <privilege set="prohibited" name="sys_linkdir" /> + <privilege set="prohibited" name="sys_net_config" /> + <privilege set="prohibited" name="sys_res_config" /> + <privilege set="prohibited" name="sys_suser_compat" /> + <privilege set="prohibited" name="xvm_control" /> + <privilege set="prohibited" name="virt_manage" /> + + <privilege set="required" name="proc_exec" /> + <privilege set="required" name="proc_fork" /> + <privilege set="required" name="sys_mount" /> +</brand> diff --git a/usr/src/lib/brand/lx/zone/distros/Makefile b/usr/src/lib/brand/lx/zone/distros/Makefile new file mode 100644 index 0000000000..7b5a600c94 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/Makefile @@ -0,0 +1,50 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +include ../../Makefile.lx + +DISTROS = centos35.distro centos36.distro centos37.distro \ + centos38.distro rhel35.distro rhel36.distro rhel37.distro \ + rhel38.distro rhel_centos_common + +ROOTDISTRODIR= $(ROOTBRANDDIR)/distros +ROOTDISTROS= $(DISTROS:%=$(ROOTDISTRODIR)/%) + +$(ROOTDISTROS) := FILEMODE = 444 + +$(ROOTDISTRODIR): + $(INS.dir) + +$(ROOTDISTRODIR)/%: % + $(INS.file) + +install: $(ROOTDISTROS) + +lint clean all: + +clobber: + -$(RM) $(ROOTDISTROS) + diff --git a/usr/src/lib/brand/lx/zone/distros/centos35.distro b/usr/src/lib/brand/lx/zone/distros/centos35.distro new file mode 100644 index 0000000000..cb5c2add9f --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/centos35.distro @@ -0,0 +1,66 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the CentOS 3.5 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Version Name +# + Order CDs holding the distribution must be installed in +# + MB of disk space required to hold a full install of the distribution +# +distro_serial=1118161135.08 +distro_version="3.5" +set -A distro_cdorder 1 2 3 + +distro_mb_required=500 + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the CentOS 3.5 deltas from the common cluster lists +delta_miniroot_rpms=centos-release +delta_core_rpms="centos-yumconf centos-yumcache yum" +delta_server_rpms=$delta_core_rpms +delta_desktop_rpms="$delta_server_rpms \ + mozilla \ + mozilla-chat \ + mozilla-dom-inspector \ + mozilla-js-debugger \ + mozilla-mail \ + mozilla-nspr \ + mozilla-nss \ + openoffice.org-style-gnome" +delta_developer_rpms=$delta_desktop_rpms +delta_all_rpms=$delta_developer_rpms + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/centos36.distro b/usr/src/lib/brand/lx/zone/distros/centos36.distro new file mode 100644 index 0000000000..8dbc4307ac --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/centos36.distro @@ -0,0 +1,66 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the CentOS 3.6 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Version Name +# + Order CDs holding the distribution must be installed in +# + MB of disk space required to hold a full install of the distribution +# +distro_serial=1130453594.8 +distro_version="3.6" +set -A distro_cdorder 1 2 3 + +distro_mb_required=500 + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the CentOS 3.6 deltas from the common cluster lists +delta_miniroot_rpms=centos-release +delta_core_rpms="centos-yumconf centos-yumcache yum" +delta_server_rpms=$delta_core_rpms +delta_desktop_rpms="$delta_server_rpms \ + mozilla \ + mozilla-chat \ + mozilla-dom-inspector \ + mozilla-js-debugger \ + mozilla-mail \ + mozilla-nspr \ + mozilla-nss \ + openoffice.org-style-gnome" +delta_developer_rpms="$delta_desktop_rpms gd-progs" +delta_all_rpms="$delta_developer_rpms emacs-nox" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/centos37.distro b/usr/src/lib/brand/lx/zone/distros/centos37.distro new file mode 100644 index 0000000000..f8ac5e0fb1 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/centos37.distro @@ -0,0 +1,65 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the CentOS 3.7 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Version Name +# + Order CDs holding the distribution must be installed in +# + MB of disk space required to hold a full install of the distribution +# +distro_serial=1144177644.47 +distro_version="3.7" +set -A distro_cdorder 1 2 3 + +distro_mb_required=500 + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the CentOS 3.7 deltas from the common cluster lists +delta_miniroot_rpms=centos-release +delta_core_rpms="centos-yumconf centos-yumcache yum" +delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config" +delta_desktop_rpms="$delta_server_rpms \ + mozilla \ + mozilla-chat \ + mozilla-dom-inspector \ + mozilla-js-debugger \ + mozilla-mail \ + mozilla-nspr \ + mozilla-nss" +delta_developer_rpms="$delta_desktop_rpms gd-progs ruby-docs irb ruby-tcltk" +delta_all_rpms="$delta_developer_rpms emacs-nox" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/centos38.distro b/usr/src/lib/brand/lx/zone/distros/centos38.distro new file mode 100644 index 0000000000..22ae2e43b2 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/centos38.distro @@ -0,0 +1,79 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the CentOS 3.8 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Version Name +# + Order CDs holding the distribution must be installed in +# + MB of disk space required to hold a full install of the distribution +# +distro_serial=1155307611.42 +distro_version="3.8" +set -A distro_cdorder 1 2 3 + +distro_mb_required=500 + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the CentOS 3.8 deltas from the common cluster lists +delta_miniroot_rpms=centos-release +delta_core_rpms="centos-yumconf centos-yumcache yum" +delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config" +delta_desktop_rpms="$delta_server_rpms \ + expectk \ + seamonkey \ + seamonkey-chat \ + seamonkey-mail \ + seamonkey-nspr \ + seamonkey-nss \ + tcl-html \ + tcllib" +delta_developer_rpms="$delta_desktop_rpms \ + gd-progs \ + freetype-demos \ + freetype-utils \ + glibc-debug \ + irb \ + python-docs \ + ruby-docs \ + ruby-tcltk \ + seamonkey-dom-inspector \ + seamonkey-js-debugger \ + seamonkey-devel \ + seamonkey-nspr-devel \ + seamonkey-nss-devel" +delta_all_rpms="$delta_developer_rpms emacs-nox" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/rhel35.distro b/usr/src/lib/brand/lx/zone/distros/rhel35.distro new file mode 100644 index 0000000000..0b6b23ae52 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/rhel35.distro @@ -0,0 +1,98 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the RHEL 3 Update 5 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Version Name +# + Order CDs holding the distribution must be installed in +# + MB of disk space required to hold a full install of the distribution +# +distro_serial=1115874580.003298 +distro_version="Update 5" +set -A distro_cdorder 2 3 4 1 + +distro_mb_required=500 + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the RHEL 3.5 deltas from the common cluster lists +delta_miniroot_rpms=redhat-release +delta_core_rpms="" +delta_server_rpms=$delta_core_rpms +delta_desktop_rpms="$delta_server_rpms \ + mozilla \ + mozilla-chat \ + mozilla-dom-inspector \ + mozilla-js-debugger \ + mozilla-mail \ + mozilla-nspr \ + mozilla-nss \ + openoffice.org-style-gnome" +delta_developer_rpms=$delta_desktop_rpms +delta_all_rpms="$delta_developer_rpms comps" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" + +# +# List of packages missing from the "WS" personality of this distribution +# as compared to the "AS" personality. +# +distro_WS_missing="amanda-server \ + caching-nameserver \ + finger-server \ + freeradius \ + inews \ + inn \ + krb5-server \ + netdump-server \ + openldap-servers \ + pxe \ + quagga \ + radvd \ + redhat-config-bind \ + samba-swat \ + tftp-server \ + tux \ + vsftpd \ + ypserv \ + arptables_jf \ + mtx \ + redhat-config-netboot" + +# +# No packages are missing from the "ES" personality as compared to the "AS" +# personality. +# +unset distro_ES_missing diff --git a/usr/src/lib/brand/lx/zone/distros/rhel36.distro b/usr/src/lib/brand/lx/zone/distros/rhel36.distro new file mode 100644 index 0000000000..51c80832ff --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/rhel36.distro @@ -0,0 +1,97 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the RHEL 3 Update 6 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Version Name +# + Order CDs holding the distribution must be installed in +# + MB of disk space required to hold a full install of the distribution +# +distro_serial=1127323691.616555 +distro_version="Update 6" +set -A distro_cdorder 2 3 4 1 + +distro_mb_required=500 + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the RHEL 3.6 deltas from the common cluster lists +delta_miniroot_rpms=redhat-release +delta_core_rpms="" +delta_server_rpms=$delta_core_rpms +delta_desktop_rpms="$delta_server_rpms \ + mozilla \ + mozilla-chat \ + mozilla-dom-inspector \ + mozilla-js-debugger \ + mozilla-mail \ + mozilla-nspr \ + mozilla-nss \ + openoffice.org-style-gnome" +delta_developer_rpms="$delta_desktop_rpms gd-progs" +delta_all_rpms="$delta_developer_rpms emacs-nox comps" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" + +# +# List of packages missing from the "WS" personality of this distribution +# as compared to the "AS" personality. +# +distro_WS_missing="amanda-server \ + caching-nameserver \ + finger-server \ + freeradius \ + inews \ + inn \ + netdump-server \ + openldap-servers \ + pxe \ + quagga \ + radvd \ + redhat-config-bind \ + samba-swat \ + tftp-server \ + tux \ + vsftpd \ + ypserv \ + arptables_jf \ + mtx \ + redhat-config-netboot" + +# +# No packages are missing from the "ES" personality as compared to the "AS" +# personality. +# +unset distro_ES_missing diff --git a/usr/src/lib/brand/lx/zone/distros/rhel37.distro b/usr/src/lib/brand/lx/zone/distros/rhel37.distro new file mode 100644 index 0000000000..2c3b81d82b --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/rhel37.distro @@ -0,0 +1,96 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the RHEL 3 Update 7 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Version Name +# + Order CDs holding the distribution must be installed in +# + MB of disk space required to hold a full install of the distribution +# +distro_serial=1141679045.364586 +distro_version="Update 7" +set -A distro_cdorder 2 3 4 1 + +distro_mb_required=500 + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the RHEL 3.7 deltas from the common cluster lists +delta_miniroot_rpms=redhat-release +delta_core_rpms="" +delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config" +delta_desktop_rpms="$delta_server_rpms \ + mozilla \ + mozilla-chat \ + mozilla-dom-inspector \ + mozilla-js-debugger \ + mozilla-mail \ + mozilla-nspr \ + mozilla-nss" +delta_developer_rpms="$delta_desktop_rpms gd-progs ruby-docs irb ruby-tcltk" +delta_all_rpms="$delta_developer_rpms emacs-nox comps" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" + +# +# List of packages missing from the "WS" personality of this distribution +# as compared to the "AS" personality. +# +distro_WS_missing="amanda-server \ + caching-nameserver \ + finger-server \ + freeradius \ + inews \ + inn \ + netdump-server \ + openldap-servers \ + pxe \ + quagga \ + radvd \ + redhat-config-bind \ + samba-swat \ + tftp-server \ + tux \ + vsftpd \ + ypserv \ + arptables_jf \ + mtx \ + redhat-config-netboot" + +# +# No packages are missing from the "ES" personality as compared to the "AS" +# personality. +# +unset distro_ES_missing diff --git a/usr/src/lib/brand/lx/zone/distros/rhel38.distro b/usr/src/lib/brand/lx/zone/distros/rhel38.distro new file mode 100644 index 0000000000..5255ac206a --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/rhel38.distro @@ -0,0 +1,109 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the RHEL 3 Update 8 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Version Name +# + Order CDs holding the distribution must be installed in +# + MB of disk space required to hold a full install of the distribution +# +distro_serial=1152738297.776178 +distro_version="Update 8" +set -A distro_cdorder 2 3 4 1 + +distro_mb_required=500 + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the RHEL 3.8 deltas from the common cluster lists +delta_miniroot_rpms=redhat-release +delta_core_rpms="" +delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config" +delta_desktop_rpms="$delta_server_rpms \ + seamonkey \ + seamonkey-chat \ + seamonkey-mail \ + seamonkey-nspr \ + seamonkey-nss" +delta_developer_rpms="$delta_desktop_rpms \ + gd-progs \ + irb \ + ruby-docs \ + ruby-tcltk \ + seamonkey-dom-inspector \ + seamonkey-js-debugger \ + seamonkey-devel \ + seamonkey-nspr-devel \ + seamonkey-nss-devel" +delta_all_rpms="$delta_developer_rpms emacs-nox comps" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" + +# +# List of packages missing from the "WS" personality of this distribution +# as compared to the "AS" personality. +# +distro_WS_missing="amanda-server \ + caching-nameserver \ + finger-server \ + freeradius \ + inews \ + inn \ + netdump-server \ + openldap-servers \ + pxe \ + quagga \ + radvd \ + redhat-config-bind \ + samba-swat \ + tftp-server \ + tux \ + vsftpd \ + ypserv \ + arptables_jf \ + mtx \ + redhat-config-netboot" + +# +# No packages are missing from the "ES" personality as compared to the "AS" +# personality. +# +unset distro_ES_missing + +# +# Identify the packages that need to be set aside for installation after +# all the other packages are installed. +# +deferred_rpms="openoffice.org openoffice.org-i18n openoffice.org-libs" diff --git a/usr/src/lib/brand/lx/zone/distros/rhel_centos_common b/usr/src/lib/brand/lx/zone/distros/rhel_centos_common new file mode 100644 index 0000000000..583264b723 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/rhel_centos_common @@ -0,0 +1,1016 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# This file contains the basic cluster contents shared by all of the +# Linux distros we support. Each distro has its own .distro file that +# expands on the basic cluster lists provided here. +# + +# +# Required packages for the install miniroot, these are the minimum packages a +# system must have installed in order to run rpm (which is then used from +# within the zone to perform the balance of the installation.) +# +common_miniroot_rpms="SysVinit \ + basesystem \ + bash \ + beecrypt \ + bzip2-libs \ + coreutils \ + elfutils \ + elfutils-libelf \ + filesystem \ + glibc \ + glibc-common \ + gpm \ + initscripts \ + iptables \ + iptables-ipv6 \ + kernel-utils \ + laus-libs \ + libacl \ + libattr \ + libgcc \ + libtermcap \ + ncurses \ + pam \ + popt \ + rpm \ + rpm-libs \ + setup \ + termcap \ + zlib" + +# +# This starts a listing of RPMs comprising a variety of install package options +# for a distribution. +# +# The supported package clusters are: +# +# + core +# + server +# + desktop +# + developer +# + system +# +# The RPMs needed to install each cluster are listed in the shell variable +# +# distro_<level>_rpms +# +# This file provides "common_<level>_rpms", which are lists of the packages +# in each cluster that are common to all distros. +# +# The package names are listed alphabetically for readability. rpm will +# reorder the list to ensure that each package's dependencies are installed +# before it is. +# +# Note: Since the distro_install script uses a regular expression to expand +# RPM package names to filenames, there may be some tweaking required to +# guarantee a unique match between a package name and a corresponding RPM +# file on the install media. +# +# One such example below is the package "XFree86-4." The official name of +# the package is "XFree86," but the regular expression in the script +# matches that package name to the XFree86-100dpi-fonts and +# XFree86-75dpi-fonts package RPMs in addition to the proper XFree86 RPM. +# Therefore the "XFree86" package name was modified to be "XFree86-4", +# which does result in a unique package name to RPM file match. +# +common_core_rpms="GConf2 \ + Glide3 \ + ORBit \ + ORBit2 \ + XFree86-Mesa-libGL \ + XFree86-Mesa-libGLU \ + XFree86-libs \ + XFree86-libs-data \ + Xaw3d \ + ash \ + at \ + atk \ + audiofile \ + autofs \ + bc \ + binutils \ + bonobo-activation \ + bzip2 \ + chkconfig \ + compat-pwdb \ + cpio \ + cpp \ + cracklib \ + cracklib-dicts \ + crontabs \ + cups-libs \ + cyrus-sasl \ + cyrus-sasl-md5 \ + db4 \ + desktop-file-utils \ + dev \ + diffutils \ + diskdumputils \ + e2fsprogs \ + ed \ + ethtool \ + expat \ + file \ + findutils \ + finger \ + fontconfig \ + freetype \ + ftp \ + gail \ + gawk \ + gdbm \ + gdk-pixbuf \ + gettext \ + glib \ + glib2 \ + glibc-headers \ + glibc-kernheaders \ + gmp \ + gnupg \ + grep \ + groff \ + gtk+ \ + gtk2 \ + gzip \ + hesiod \ + hwdata \ + indexhtml \ + info \ + iproute \ + iputils \ + kernel \ + kernel-BOOT \ + krb5-libs \ + krb5-workstation \ + kudzu \ + laus \ + less \ + libaio \ + libart_lgpl \ + libbonobo \ + libcap \ + libgcj \ + libgcj-ssa \ + libglade2 \ + libgnomecanvas \ + libjpeg \ + libmng \ + libogg \ + libpng \ + libpng10 \ + libstdc++ \ + libtiff \ + libtool-libs \ + libungif \ + libusb \ + libuser \ + libvorbis \ + libwnck \ + libxml \ + libxml2 \ + libxml2-python \ + libxslt \ + linc \ + lockdev \ + logrotate \ + losetup \ + lsof \ + lvm \ + lynx \ + m4 \ + mailcap \ + make \ + man \ + man-pages \ + mingetty \ + mkinitrd \ + mkisofs \ + mktemp \ + modutils \ + mount \ + mtools \ + nc \ + net-snmp \ + net-snmp-libs \ + net-tools \ + netdump \ + newt \ + nfs-utils \ + nscd \ + nss_db \ + nss_ldap \ + ntp \ + ntsysv \ + openldap \ + openssh \ + openssh-clients \ + openssh-server \ + openssl \ + pango \ + passwd \ + patch \ + pax \ + pcre \ + pdksh \ + perl \ + perl-CGI \ + perl-DateManip \ + perl-Filter \ + perl-HTML-Parser \ + perl-HTML-Tagset \ + perl-Parse-Yapp \ + perl-URI \ + perl-XML-Dumper \ + perl-XML-Encoding \ + perl-XML-Grove \ + perl-XML-Parser \ + perl-XML-Twig \ + perl-libwww-perl \ + perl-libxml-enno \ + perl-libxml-perl \ + portmap \ + procmail \ + procps \ + psacct \ + psmisc \ + pspell \ + pygtk2 \ + pygtk2-libglade \ + python \ + pyxf86config \ + readline \ + redhat-logos \ + redhat-menus \ + rhpl \ + rpm-python \ + rpmdb-redhat \ + rsh \ + rsync \ + rusers \ + rwho \ + sed \ + setarch \ + sgml-common \ + shadow-utils \ + slang \ + startup-notification \ + sudo \ + sysklogd \ + syslinux \ + tar \ + tcl \ + tcp_wrappers \ + tcsh \ + telnet \ + time \ + traceroute \ + ttmkfdir \ + tzdata \ + units \ + unix2dos \ + unzip \ + usermode \ + utempter \ + util-linux \ + vim-common \ + vim-minimal \ + vixie-cron \ + wget \ + which \ + words \ + xinetd \ + xml-common \ + yp-tools \ + ypbind \ + zip" + +common_server_rpms="$common_core_rpms \ + 4Suite \ + MyODBC \ + MySQL-python \ + Omni \ + Omni-foomatic \ + PyXML \ + VFlib2 \ + XFree86-4 \ + XFree86-base-fonts \ + XFree86-font-utils \ + XFree86-truetype-fonts \ + XFree86-xauth \ + XFree86-xdm \ + XFree86-xfs \ + acl \ + alchemist \ + amanda \ + amanda-server \ + arts \ + aspell \ + aspell-config \ + at-spi \ + authd \ + bcel \ + bind \ + bind-chroot \ + bind-libs \ + bind-utils \ + bitmap-fonts \ + caching-nameserver \ + chkfontpath \ + commons-beanutils \ + commons-collections \ + commons-digester \ + commons-logging \ + commons-modeler \ + compat-db \ + compat-libstdc++ \ + crypto-utils \ + cup-v10k \ + cups \ + curl \ + cyrus-sasl-gssapi \ + cyrus-sasl-plain \ + dhcp \ + distcache \ + distcache-devel \ + esound \ + expect \ + fam \ + finger-server \ + foomatic \ + freeradius \ + gd \ + ghostscript \ + ghostscript-fonts \ + gimp-print \ + gnome-libs \ + gnome-mime-data \ + gnome-python2 \ + gnome-python2-bonobo \ + gnome-python2-canvas \ + gnome-python2-gtkhtml2 \ + gnome-vfs2 \ + gnuplot \ + gtkhtml2 \ + htmlview \ + httpd \ + hwcrypto \ + imap \ + imap-utils \ + imlib \ + inews \ + inn \ + jakarta-regexp \ + krb5-server \ + krbafs \ + libIDL \ + libbonoboui \ + libdbi \ + libdbi-dbd-mysql \ + libgnome \ + libgnomeprint22 \ + libgnomeprintui22 \ + libgnomeui \ + libgsf \ + libole2 \ + logwatch \ + mailman \ + mailx \ + mod_auth_mysql \ + mod_auth_pgsql \ + mod_authz_ldap \ + mod_perl \ + mod_python \ + mod_ssl \ + mpage \ + mtr \ + mx \ + mx4j \ + mysql \ + mysql-bench \ + mysql-devel \ + net-snmp-utils \ + netdump-server \ + newt-perl \ + openldap-servers \ + openssl-perl \ + pam_krb5 \ + perl-DBD-MySQL \ + perl-DBD-Pg \ + perl-DBI \ + perl-DB_File \ + perl-Digest-HMAC \ + perl-Digest-SHA1 \ + perl-Net-DNS \ + perl-Time-HiRes \ + php \ + php-imap \ + php-ldap \ + php-mysql \ + php-odbc \ + php-pgsql \ + pnm2ppa \ + postfix \ + postgresql-odbc \ + pxe \ + pyorbit \ + qt \ + qt-MySQL \ + qt-ODBC \ + quagga \ + radvd \ + rdist \ + redhat-config-bind \ + redhat-config-httpd \ + redhat-config-printer \ + redhat-config-printer-gui \ + redhat-config-samba \ + redhat-config-securitylevel \ + redhat-config-securitylevel-tui \ + redhat-config-services \ + redhat-java-rpm-scripts \ + redhat-switch-mail \ + redhat-switch-mail-gnome \ + rh-postgresql \ + rh-postgresql-contrib \ + rh-postgresql-docs \ + rh-postgresql-jdbc \ + rh-postgresql-libs \ + rh-postgresql-python \ + rh-postgresql-server \ + rh-postgresql-tcl \ + rh-postgresql-test \ + rhdb-utils \ + rsh-server \ + rusers-server \ + samba \ + samba-client \ + samba-common \ + samba-swat \ + sendmail \ + sendmail-cf \ + slocate \ + spamassassin \ + squid \ + squirrelmail \ + switchdesk \ + sysreport \ + telnet-server \ + tftp-server \ + tmpwatch \ + tux \ + unixODBC \ + unixODBC-kde \ + urw-fonts \ + usermode-gtk \ + vsftpd \ + webalizer \ + xalan-j \ + xerces-j \ + xinitrc \ + ypserv" + +common_desktop_rpms="$common_server_rpms \ + Canna-libs \ + FreeWnn-libs \ + Gtk-Perl \ + ImageMagick \ + ImageMagick-perl \ + SDL \ + XFree86-100dpi-fonts \ + XFree86-75dpi-fonts \ + XFree86-Xnest \ + XFree86-Xvfb \ + XFree86-doc \ + XFree86-tools \ + XFree86-twm \ + a2ps \ + am-utils \ + amanda-client \ + anacron \ + apel-xemacs \ + aumix \ + authconfig \ + authconfig-gtk \ + autorun \ + cdparanoia-alpha9.8 \ + cdparanoia-libs-alpha9.8 \ + cdrecord \ + cipe \ + ckermit \ + comps-extras \ + control-center \ + ctags \ + desktop-backgrounds-basic \ + desktop-printing \ + dialog \ + docbook-dtds \ + docbook-style-dsssl \ + docbook-style-xsl \ + docbook-utils \ + docbook-utils-pdf \ + dtach \ + dvd+rw-tools \ + dvdrecord \ + eel2 \ + elinks \ + enscript \ + eog \ + evolution \ + evolution-connector \ + fetchmail \ + file-roller \ + firstboot \ + fontilus \ + gaim \ + gconf-editor \ + gdm \ + gedit \ + gftp \ + ggv \ + gimp \ + gimp-data-extras \ + gimp-perl \ + gimp-print-cups \ + gimp-print-plugin \ + gimp-print-utils \ + gnome-applets \ + gnome-audio \ + gnome-desktop \ + gnome-games \ + gnome-icon-theme \ + gnome-media \ + gnome-panel \ + gnome-pilot \ + gnome-python2-applet \ + gnome-session \ + gnome-spell \ + gnome-system-monitor \ + gnome-terminal \ + gnome-themes \ + gnome-user-docs \ + gnome-utils \ + gnome-vfs2-extras \ + gnomemeeting \ + gphoto2 \ + gsl \ + gstreamer \ + gstreamer-plugins \ + gstreamer-tools \ + gtk-engines \ + gtk2-engines \ + gtkam \ + gtkam-gimp \ + gtkglarea \ + gtkhtml3 \ + guile \ + hotplug \ + hpijs \ + hpoj \ + htdig \ + hwbrowser \ + intltool \ + itcl \ + jadetex \ + kdeaddons \ + kdeartwork \ + kdebase \ + kdegames \ + kdegraphics \ + kdelibs \ + kdemultimedia \ + kdenetwork \ + kdepim \ + kdeutils \ + lftp \ + libao \ + libf2c \ + libgail-gnome \ + libgal2 \ + libghttp \ + libglade \ + libgtop2 \ + libmrproject \ + libpcap \ + libraw1394 \ + librsvg2 \ + libsoup \ + linuxdoc-tools \ + lm_sensors \ + magicdev \ + metacity \ + mikmod \ + mrproject \ + mrtg \ + mutt \ + nautilus \ + nautilus-cd-burner \ + nautilus-media \ + netpbm \ + netpbm-progs \ + open \ + openh323 \ + openjade \ + openldap-clients \ + openmotif \ + openmotif21 \ + openoffice.org \ + openoffice.org-i18n \ + openoffice.org-libs \ + openssh-askpass \ + openssh-askpass-gnome \ + parted \ + passivetex \ + perl-PDL \ + perl-SGMLSpm \ + perl-suidperl \ + pilot-link \ + printman \ + psutils \ + pwlib \ + pyOpenSSL \ + python-optik \ + redhat-artwork \ + redhat-config-date \ + redhat-config-keyboard \ + redhat-config-kickstart \ + redhat-config-language \ + redhat-config-mouse \ + redhat-config-network \ + redhat-config-network-tui \ + redhat-config-nfs \ + redhat-config-packages \ + redhat-config-proc \ + redhat-config-rootpassword \ + redhat-config-soundcard \ + redhat-config-users \ + redhat-config-xfree86 \ + redhat-logviewer \ + rhn-applet \ + rhnlib \ + sane-backends \ + sane-frontends \ + screen \ + scrollkeeper \ + shapecfg \ + sharutils \ + sox \ + star \ + switchdesk-gnome \ + switchdesk-kde \ + sysstat \ + talk \ + tclx \ + tetex \ + tetex-afm \ + tetex-dvips \ + tetex-fonts \ + tetex-latex \ + tetex-xdvi \ + tix \ + tk \ + tkinter \ + transfig \ + ttfprint \ + umb-scheme \ + up2date \ + up2date-gnome \ + usbutils \ + uucp \ + vim-enhanced \ + vlock \ + vnc \ + vnc-server \ + vorbis-tools \ + vte \ + w3c-libwww \ + xchat \ + xdelta \ + xemacs \ + xemacs-el \ + xemacs-info \ + xfig \ + xhtml1-dtds \ + xloadimage \ + xmltex \ + xmlto \ + xmms \ + xpdf \ + xsane \ + xsane-gimp \ + xscreensaver \ + xsri \ + xterm \ + yelp \ + zsh" + +common_developer_rpms="$common_desktop_rpms \ + ElectricFence \ + GConf2-devel \ + ORBit-devel \ + ORBit2-devel \ + SDL-devel \ + XFree86-devel \ + ant \ + ant-libs \ + arts-devel \ + at-spi-devel \ + atk-devel \ + audiofile-devel \ + autoconf \ + autoconf213 \ + automake \ + automake14 \ + automake15 \ + bison \ + blas \ + bonobo-activation-devel \ + bug-buddy \ + byacc \ + cdecl \ + cproto \ + crash \ + cscope \ + cups-devel \ + cvs \ + ddd \ + dejagnu \ + dev86 \ + diffstat \ + doxygen \ + eel2-devel \ + emacs \ + emacs-el \ + emacs-leim \ + esound-devel \ + flex \ + fontconfig-devel \ + freetype-devel \ + gail-devel \ + gcc \ + gcc-c++ \ + gcc-c++-ssa \ + gcc-g77 \ + gcc-g77-ssa \ + gcc-gnat \ + gcc-java \ + gcc-java-ssa \ + gcc-objc \ + gcc-objc-ssa \ + gcc-ssa \ + gd-devel \ + gdb \ + gdk-pixbuf-devel \ + gdk-pixbuf-gnome \ + glade2 \ + glib-devel \ + glib2-devel \ + glibc-devel \ + glibc-profile \ + glibc-utils \ + gnome-desktop-devel \ + gnome-libs-devel \ + gnome-vfs2-devel \ + gperf \ + gtk+-devel \ + gtk-doc \ + gtk2-devel \ + gtkhtml2-devel \ + httpd-devel \ + im-sdk \ + imlib-devel \ + indent \ + jaf \ + javamail \ + joe \ + jpackage-utils \ + junit \ + kdebase-devel \ + kdegraphics-devel \ + kdelibs-devel \ + kdenetwork-devel \ + kdepim-devel \ + kdesdk \ + kdesdk-devel \ + kdeutils-devel \ + kdevelop \ + kdoc \ + kernel-doc \ + kernel-source \ + lam \ + lapack \ + lha \ + libIDL-devel \ + libacl-devel \ + libart_lgpl-devel \ + libattr-devel \ + libbonobo-devel \ + libbonoboui-devel \ + libgcc-ssa \ + libgcj-devel \ + libgcj-ssa-devel \ + libglade2-devel \ + libgnat \ + libgnome-devel \ + libgnomecanvas-devel \ + libgnomeprint22-devel \ + libgnomeprintui22-devel \ + libgnomeui-devel \ + libjpeg-devel \ + libmng-devel \ + libmudflap \ + libmudflap-devel \ + libobjc \ + libole2-devel \ + libpng-devel \ + librsvg2-devel \ + libstdc++-devel \ + libstdc++-ssa \ + libstdc++-ssa-devel \ + libtiff-devel \ + libtool \ + libungif-devel \ + libxml2-devel \ + libxslt-devel \ + linc-devel \ + ltrace \ + memprof \ + nasm \ + ncurses-devel \ + nedit \ + netpbm-devel \ + openmotif-devel \ + oprofile \ + pango-devel \ + patchutils \ + pcre-devel \ + perl-CPAN \ + perl-Crypt-SSLeay \ + pilot-link-devel \ + pkgconfig \ + pstack \ + pygtk2-devel \ + python-devel \ + python-tools \ + qt-designer \ + qt-devel \ + rcs \ + redhat-rpm-config \ + rpm-build \ + ruby \ + ruby-libs \ + ruby-mode \ + sane-backends-devel \ + sip \ + sip-devel \ + splint \ + startup-notification-devel \ + strace \ + swig \ + texinfo \ + tora \ + vim-X11 \ + vte-devel \ + zlib-devel" + +common_all_rpms="$common_developer_rpms \ + Canna + FreeWnn \ + ImageMagick-c++-5.5.6 \ + Wnn6-SDK \ + ami \ + amtu \ + anaconda \ + anaconda-help \ + anaconda-images \ + anaconda-product \ + anaconda-runtime \ + apmd \ + arptables_jf \ + attr \ + bg5ps \ + bitmap-fonts-cjk \ + bogl \ + bogl-bterm \ + bootparamd \ + booty \ + bridge-utils \ + busybox \ + busybox-anaconda \ + compat-gcc \ + compat-gcc-c++ \ + compat-glibc-7.x \ + compat-libstdc++-devel \ + compat-slang \ + db4-java \ + db4-utils \ + dbskkd-cdb \ + desktop-backgrounds-extra \ + devlabel \ + dhclient \ + dietlibc \ + dos2unix \ + dosfstools \ + dump \ + eject \ + emacspeak \ + ethereal \ + ethereal-gnome \ + fbset \ + festival \ + grub \ + h2ps \ + hdparm \ + ipsec-tools \ + irda-utils \ + iscsi-initiator-utils \ + isdn4k-utils \ + jfsutils \ + jisksp14 \ + jisksp16 \ + jwhois \ + kappa20 \ + kbd \ + kernel-pcmcia-cs \ + knm_new \ + kon2 \ + kon2-fonts \ + libtabe \ + libwvstreams \ + lilo \ + linuxwacom \ + lslk \ + mdadm \ + mgetty \ + minicom \ + mkbootdisk \ + mt-st \ + mtx \ + nano \ + ncompress \ + net-snmp-perl \ + netconfig \ + nhpf \ + nmap \ + octave \ + openssl096b \ + pam_passwdqc \ + pam_smb \ + pinfo \ + ppp \ + prelink \ + psgml \ + pvm \ + quota \ + rdate \ + rdesktop \ + redhat-config-netboot \ + rhgb \ + rmt \ + rootfiles \ + rp-pppoe \ + schedutils \ + setserial \ + setuptool \ + sg3_utils \ + skkdic \ + skkinput + specspo \ + stunnel \ + tcpdump \ + tftp \ + tn5250 \ + tsclient \ + vconfig \ + wireless-tools \ + wvdial \ + x3270 \ + x3270-text \ + x3270-x11 \ + xcin" diff --git a/usr/src/lib/brand/lx/zone/lx_distro_install.ksh b/usr/src/lib/brand/lx/zone/lx_distro_install.ksh new file mode 100644 index 0000000000..7c4e89dd25 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/lx_distro_install.ksh @@ -0,0 +1,2772 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# This script is called from /usr/lib/brand/lx/lx_install. +# +# options passed down from lx_install: +# -z $ZONENAME +# -r $LINUX_ROOT +# +# options passed down from zoneadm -z <zone-name> install +# -d <Linux-archives-dir> +# [core | server | desktop | development | all] +# +# The desktop cluster will be installed by default. +# + +# Restrict executables to /bin, /usr/bin and /usr/sbin +PATH=/bin:/usr/bin:/usr/sbin +export PATH + + +# Setup i18n output +TEXTDOMAIN="SUNW_OST_OSCMD" +export TEXTDOMAIN + +# Log passed arguments to file descriptor 2 +log() +{ + [[ -n $logfile ]] && echo "$@" >&2 +} + +# +# Send the provided printf()-style arguments to the screen and to the +# logfile. +# +screenlog() +{ + typeset fmt="$1" + shift + + printf "$fmt\n" "$@" + [[ -n $logfile ]] && printf "$fmt\n" "$@" >&2 +} + +# Print and log provided text if the shell variable "verbose_mode" is set +verbose() +{ + [[ -n $verbose_mode ]] && echo "$@" + [[ -n $logfile ]] && [[ -n $verbose_mode ]] && echo "$@" >&2 +} + +# +# Print to the screen if the shell variable "verbose_mode" is set, but always +# send the output to the log. +# +verboselog() +{ + [[ -n $verbose_mode ]] && echo "$@" + [[ -n $logfile ]] && echo "$@" >&2 +} + +bad_rpmdir=$(gettext "'%s' is not a valid RPM directory!") + +mb_req=$(gettext "(%s MB required, %s MB available)") +no_space=$(gettext "Not enough free space available in '%s'") + +inst_clust=$(gettext "Installing cluster '%s'") +unknown_clust=$(gettext "ERROR: Unknown cluster name: '%s'") + +unknown_media=$(gettext "Unknown or unreadable media loaded in %s") + +eject_fail=$(gettext "Attempt to eject '%s' failed.") + +lofi_failed=$(gettext "Attempt to add '%s' as lofi device FAILED.") +lofs_failed=$(gettext "Attempt to lofs mount '%s' on '%s' FAILED.") + +media_spec=$(gettext "the provided media (%s)") + +distro_mediafail=\ +$(gettext "Attempt to determine Linux distribution from\n %s FAILED.") + +mini_bootfail=$(gettext "Attempt to boot miniroot for zone '%s' FAILED.") +mini_copyfail=$(gettext "Attempt to copy miniroot for zone '%s' FAILED.") +mini_initfail=$(gettext "Attempt to initialize miniroot for zone '%s' FAILED.") +mini_instfail=$(gettext "Attempt to install RPM '%s' to miniroot FAILED.") +mini_mediafail=$(gettext "Install of zone '%s' miniroot from\n %s FAILED.") +mini_setfail=$(gettext "Attempt to setup miniroot for zone '%s' FAILED.") + +mini_mntfsfail=\ +$(gettext "Attempt to mount miniroot filesystems for zone '%s' FAILED.") + +rpm_initfail=\ +$(gettext "Attempt to initialize RPM database for zone '%s' FAILED.") + +symlink_failed=$(gettext "Attempt to symbolically link '%s' to '%s' FAILED.") + +discinfo_nofile=$(gettext "ERROR: Discinfo file '%s' not found!") +discinfo_notreadable=$(gettext "ERROR: Discinfo file '%s': not readable!") +discinfo_wrongarch=\ +$(gettext "ERROR: '%s': disc architecture is '%s'; install requires 'i386'!") + +wrong_serial=$(gettext "Incorrect serial number found on provided %s.") +wrong_ser_expect=$(gettext " (found #%s, expected #%s)") + +wrong_cd=$(gettext "Incorrect CD inserted (found %s, wanted %s)") + +zone_initrootfail=\ +$(gettext "Attempt to initialize root filesystem for zone '%s' FAILED.") + +zone_haltfail=$(gettext "Unable to halt zone '%s'!") +zone_instfail=$(gettext "Install of zone '%s' from '%s' FAILED '%s'.") +zone_mediafail=$(gettext "Install of zone '%s' from\n %s FAILED.") + +zone_rootfail=\ +$(gettext "ERROR: The specified zone root directory '%s' could not be created.") +zone_rootsub=\ +$(gettext "ERROR: The specified zone root subdirectory '%s' does not exist.") + +mk_mntfail=$(gettext "Could not create the mount directory '%s'") +mountfail=$(gettext "Mount of '%s' on '%s' FAILED.") + +insert_discmsg=\ +$(gettext "Please insert %s, or a\n %s DVD in the removable media") + +mount_proper_iso1=$(gettext "Please mount the ISO for %s or a") +mount_proper_iso2=$(gettext "%s DVD on device '%s'") + +silent_nodisc=$(gettext "ERROR: Cannot install from CDs in silent mode.") +silent_nolofi=\ +$(gettext "ERROR: Cannot install from lofi-based CD ISOs in silent mode.") + +install_msg=$(gettext "Installing zone '%s' from\n %s.") +install_ndiscs=\ +$(gettext "You will need CDs 1 - %s (or the equivalent DVD) to") +install_nisos=\ +$(gettext "You will need ISO images representing CDs 1 - %s (or the equivalent") + +locate_npkgs=$(gettext "Attempting to locate %s packages...") + +install_one_rpm=$(gettext "Installing 1 %spackage.") +install_nrpms_few=\ +$(gettext "Installing %s %spackages; this may take a few minutes...") +install_nrpms_several=\ +$(gettext "Installing %s %spackages; this may take several minutes...") + +install_longwait=\ +$(gettext "NOTE: There may be a long delay before you see further output.") + +install_defmkfail=$(gettext "Could not create the temporary directory '%s'") +install_defcpfail=$(gettext "Could not make a local copy of deferred RPM '%s'") +install_dist=$(gettext "Installing distribution '%s'...") +install_zonefail=$(gettext "Attempt to install zone '%s' FAILED.") + +no_distropath=$(gettext "ERROR: Distribution path '%s' doesn't exist.") + +install_done=$(gettext "Installation of %s to zone\n '%s' completed %s.") +install_failed=$(gettext "Installation of %s to zone\n '%s' FAILED %s.") + +eject_final_msg=\ +$(gettext "Would you like the system to eject the %sinstall %s when") +eject_final_prompt=$(gettext "installation of '%s' is complete? (%s)") +eject_final_status=$(gettext "The %sinstall %s %s be ejected.") + +# +# Get the device underlying a specified mounted file system and return it in +# the shell variable "mount_dev" +# +# Returns 0 on success, 1 on failure. +# +get_mountdev() +{ + typeset mount_dir="$1" + typeset device + unset mount_dev + + # + # Obtain information on the specified mounted device. + # + device=`{ df -k "$mount_dir" | egrep "^/" ; } 2>/dev/null` || return 1 + mount_dev=$(echo $device | awk -e '{print $1}' 2>/dev/null) + + [[ "`echo $mount_dev | cut -c 1`" = "/" ]] && return 0 + + unset mount_dev + return 1 +} + +# +# Get the directory name a specified device is mounted as and return it in +# the shell variable "mount_dir" +# +# Returns 0 on success, 1 on failre. +# +get_mountdir() +{ + typeset mount_dev="$1" + typeset dir + unset mount_dir + + [[ -b "$mount_dev" ]] || return 1 + + # + # Obtain information on the specified mounted device. + # + dir=`{ df -k "$mount_dev" | egrep "^/" ; } 2>/dev/null` || return 1 + mount_dir=$(echo $dir | awk -e '{print $6}' 2>/dev/null) + + [[ "`echo $mount_dir | cut -c 1`" = "/" ]] && return 0 + + unset mount_dir + return 1 +} + +# +# Check the free disk space of the passed filesystem against the passed +# argument. +# +# Returns 0 on success, 1 on failure. +# +check_mbfree() +{ + typeset dir="$1" + typeset mb_required=$2 + + # + # Return free space in partition containing passed argument in MB + # + typeset mbfree=`{ LC_ALL=C df -k "$dir" | \ + egrep -v Filesystem ; } 2>/dev/null` || return 1 + mbfree=$(echo $mbfree | awk -e '{print $4}' 2>/dev/null) + + ((mbfree /= 1024)) + if ((mbfree < mb_required)); then + screenlog "$no_space" "$zoneroot" + screenlog "$mb_req" "$mb_required" "$mb_free" + return 1 + fi + return 0 +} + +# +# Find packages by attempting to expand passed RPM names to their full filenames +# in the passed RPM directory. +# +# Arguments: +# +# Argument 1: Path to mounted install media +# Arguments [2 - n]: RPM names to process +# +# The expanded filenames are returned in the shell array "rpm_names." +# +# For example: +# +# find_packages /mnt/iso dev kernel tetex redhat-menus +# +# would return something like: +# +# rpms_found[0]: dev-3.3.12.3-1.centos.0.i386.rpm +# rpms_found[1]: kernel-2.4.21-32.EL.i586.rpm +# rpms_found[2]: tetex-1.0.7-67.7.i386.rpm +# rpms_found[3]: redhat-menus-0.39-1.noarch.rpm +# +# The routine returns 0 on success, 1 on an error. +# +find_packages() +{ + typeset found=0 + typeset left=0 + + typeset rpmdir="$1/$rd_rpmdir" + typeset curdir=${PWD:=$(pwd)} + + typeset arch + typeset procinfo + typeset rpmglob + typeset rpmfile + + unset rpms_found + unset rpms_left + + shift + cd "$rpmdir" + + typeset rpmcheck="$(echo *.rpm)" + + if [[ "$rpmcheck" = "*.rpm" ]]; then + screenlog "$bad_rpmdir" "$rpmdir" + cd "$curdir" + return 1 + fi + + # + # If the miniroot is booted, and the archs list isn't already set, + # ask the zone's rpm command for the list of compatible architectures. + # + if [[ -n $miniroot_booted && -z $archs ]]; then + procinfo=$(zlogin "$zonename" /bin/rpm --showrc | \ + grep "^compatible archs") + + [[ $? -eq 0 ]] && + archs=$(echo $procinfo | sed 's/^compatible archs : //') + + [[ -n $archs ]] && + log "RPM-reported compatible architectures: $archs" + fi + + # + # Either the miniroot isn't booted or asking rpm for the information + # failed for some reason, so make some reasonable assumptions. + # + if [[ -z $archs ]]; then + procinfo=$(LC_ALL=C psrinfo -vp | grep family) + + # + # Check for additional processor capabilities + # + if [[ "$procinfo" = *" family 6 "* || + "$procinfo" = *" family 15 "* || + "$procinfo" = *" family 16 "* || + "$procinfo" = *" family 17 "* ]]; then + if [[ "$procinfo" = *AuthenticAMD* ]]; then + # + # Linux gives "athlon" packages precedence + # over "i686" packages, so duplicate that + # here. + # + archs="athlon i686" + else + archs="i686" + fi + fi + + archs="$archs i586 i486 i386 noarch" + + log "Derived compatible architectures: $archs" + fi + + verboselog "RPM source directory:\n \"$rpmdir\"\n" + + if [[ $# -eq 1 ]]; then + msg=$(gettext "Attempting to locate 1 package...") + screenlog "$msg" + else + screenlog "$locate_npkgs" "$#" + fi + + for rpm in "$@"; do + # + # Search for the appropriate RPM, using the compatible + # architecture list contained in "archs" to look for the best + # match. + # + # For example, if the processor is an i686, and the rpm is + # "glibc", the script will look for the files (in order): + # + # glibc[.-][0-9]*.i686.rpm + # glibc[.-][0-9]*.i586.rpm + # glibc[.-][0-9]*.i486.rpm + # glibc[.-][0-9]*.i386.rpm + # glibc[.-][0-9]*.noarch.rpm + # glibc[.-][0-9]*.fat.rpm + # + # and will stop when it finds the first match. + # + # TODO: Once the miniroot is booted, we should verify that + # the rpm name has been expanded to "$rpmfile" properly + # by comparing "$rpm" and the output of: + # + # zlogin -z <zone> /bin/rpm --qf '%{NAME}' -qp $rpmfile + # + for arch in $archs; do + # + # Use the filename globbing functionality of ksh's + # echo command to search for the file we want. + # + # If no matching file is found, echo will simply + # return the passed string. + # + rpmglob="$rpm[.-][0-9]*.$arch.rpm" + rpmfile="$(echo $rpmglob)" + + [[ "$rpmfile" != "$rpmglob" ]] && break + + unset rpmfile + done + + if [[ -z $rpmfile ]]; then + rpms_left[$left]="$rpm" + ((left += 1)) + else + rpms_found[$found]="$rpmfile" + ((found += 1)) + fi + done + + cd "$curdir" + log "\"$rpmdir\": matched $found of $# packages." + log "\"$rpmdir\": $left RPMs remaining." + return 0 +} + +# +# Build the rpm lists used to install a machine. +# +# The first argument is the number of discs in the distribution. The +# second, optional, argument is the metacluster to install. +# +# The array "distro_rpm[]" is built from the individual package RPM arrays +# read in from an individual distribution definition file. +# +build_rpm_list() +{ + # Default to a desktop installation + typeset cluster=desktop + typeset cnt=0 + typeset pkgs + + for clust in "$@"; do + ((cnt += 1)) + case $clust in + core) cluster=core ;; + desk*) cluster=desktop ;; + serv*) cluster=server ;; + dev*) cluster=developer ;; + all) cluster=all + break;; + *) screenlog "$unknown_clust" "$clust" + exit $ZONE_SUBPROC_USAGE ;; + esac + done + + if [ $cnt -gt 1 ]; then + msg=$(gettext "Too many install clusters specified") + screenlog "$msg" + exit $ZONE_SUBPROC_USAGE + fi + + screenlog "$inst_clust" $cluster + + case $cluster in + core) distro_rpms=$distro_core_rpms ;; + desktop) distro_rpms=$distro_desktop_rpms ;; + server) distro_rpms=$distro_server_rpms ;; + developer) distro_rpms=$distro_developer_rpms ;; + all) distro_rpms=$distro_all_rpms ;; + esac + + # The RPMs in the miniroot must all be installed properly as well + distro_rpms="$distro_miniroot_rpms $distro_rpms" +} + +# +# Install the "miniroot" minimal Linux environment that is booted single-user +# to complete the install. +# +# This works by doing feeding the RPM list needed for the installation one +# by one to rpm2cpio(1). +# +# Usage: +# install_miniroot <mounted media dir> <names of RPMS to install> +# +# +install_miniroot() +{ + typeset mediadir="$1" + typeset rpm + + shift + + # + # There's a quirk in our version of ksh that sometimes resets the + # trap handler for the shell. Since RPM operations will be the + # longest part of any given install, make sure that an interrupt while + # the command is running will bring the miniroot down and clean up + # the interrupted install. + # + trap trap_cleanup INT + + if [[ $# -eq 1 ]]; then + msg=$(gettext "Installing %s miniroot package...") + else + msg=$(gettext "Installing %s miniroot packages...") + fi + + screenlog "\n$msg" "$#" + + for rpm in "$@"; do + verboselog "\nInstalling \"$rpm\" to miniroot at\n" \ + " \"$zoneroot\"..." + + rpm2cpio "$mediadir/$rd_rpmdir/$rpm" | \ + ( cd "$rootdir" && cpio -idu ) 1>&2 + + if [[ $? -ne 0 ]]; then + screenlog "$mini_instfail" "$rpm" + return 1 + fi + done + + screenlog "" + return 0 +} + +# +# Install the zone from the mounted disc image by feeding a list of RPMs to +# install from this image to RPM running on the zone via zlogin(1). +# +# Usage: +# install_zone <path to mounted install media> [<names of RPMS to install>] +# +# If the caller doesn't supply a list of RPMs to install, we install any +# we previously stashed away in the deferred RPMs directory. +# +install_zone() +{ + # + # Convert the passed install media pathname to a zone-relative path + # by stripping $rootpath from the head of the path. + # + typeset zonerpmdir="${1##$rootdir}/$rd_rpmdir" + + typeset defdir="$rootdir/var/lx_install/deferred_rpms" + typeset mounted_root="$1" + typeset rpmopts="-i" + + typeset defer + typeset deferred_found + typeset install_rpms + typeset nrpms + typeset rpm + typeset rpmerr + + shift + + # + # If the caller provided a list of RPMs, determine which of them + # should be installed now, and which should be deferred until + # later. + # + if [[ $# -gt 0 ]]; then + if [[ -n $deferred_rpms ]]; then + [[ -d $defdir ]] || if ! mkdir -p $defdir; then + screenlog "$install_defmkfail" "$mntdir" + return 1 + fi + + msg=$(gettext "Checking for deferred packages...") + screenlog "$msg" + + find_packages "$mounted_root" $deferred_rpms + deferred_found="${rpms_found[@]}" + numdeferred=${#rpms_found[@]} + else + deferred_found="" + fi + + install_rpms="$@" + nrpms=$# + + # + # If this distro has any deferred RPMs, we want to simply + # copy them into the zone instead of installing them. We + # then remove them from the list of RPMs to be installed on + # this pass. + # + for rpm in $deferred_found; do + if echo "$install_rpms" | egrep -s "$rpm"; then + verboselog "Deferring installation of \"$rpm\"" + + # + # Remove the RPM from the install_rpms list + # and append it to the deferred_saved array + # + install_rpms=$(echo "$install_rpms " | + sed "s/ $rpm / /g") + + # remove trailing spaces, if any + install_rpms=${install_rpms%%+( )} + + deferred_saved[${#deferred_saved[@]}]="$rpm" + + if ! cp "$mounted_root/$rd_rpmdir/$rpm" \ + "$defdir"; then + screenlog "$install_defcpfail" "$rpm" + return 1 + fi + fi + + # + # If we've deferred the installation of EVERYTHING, + # simply return success + # + [[ -z $install_rpms ]] && return 0 + done + + [[ -n $deferred_found ]] & verbose "" + elif [[ -z $deferred_saved ]]; then + # There are no deferred RPMs to install, so we're done. + return 0 + else + # Install the RPMs listed in the deferred_saved array + install_rpms=${deferred_saved[@]} + nrpms=${#deferred_saved[@]} + zonerpmdir=/var/lx_install/deferred_rpms + defer="deferred " + fi + + # + # There's a quirk in our version of ksh that sometimes resets the + # trap handler for the shell. Since RPM operations will be the + # longest part of any given install, make sure that an interrupt while + # the command is running will bring the miniroot down and clean up + # the interrupted install. + # + trap trap_cleanup INT + + # + # Print a message depending on how many RPMS we have to install. + # + # 25 RPMS seems like a reasonable boundary between when an install may + # take a "few" or "several" minutes; this may be tuned if needed. + # + screenlog "" + + if [[ $nrpms -eq 1 ]]; then + screenlog "$install_one_rpm" "$defer" + elif [[ $nrpms -lt 25 ]]; then + screenlog "$install_nrpms_few" "$nrpms" "$defer" + else + screenlog "$install_nrpms_several" "$nrpms" "$defer" + + # + # For installs of over 600 packages or so, it can take rpm a + # really, REALLY long time to output anything, even when + # running in verbose mode. + # + # For example, when doing an "all" install from a DVD or DVD + # ISO, depending on the speed of the optical drive and the + # speed of the machine's CPU(s), it may be up to TEN MINUTES or + # MORE before rpm prints out its "Processing..." message even + # though it is, in fact, processing the entire package list, + # checking for dependencies (something it is unfortunately + # entirely silent about.) + # + # Since the user might otherwise think the install was hung + # when running in verbose mode, warn them that it could be + # quite a while before they see any further output from the + # installer. + # + # + [[ $nrpms -gt 600 ]] && verbose "$install_longwait" + fi + + log "" + log "Installing: $install_rpms" + log "" + log "NOTE: Any messages appearing below prefixed with \"warning:\"" + log " and/or that do not cause the installer to abort the" + log " installation process may safely be ignored." + log "" + + echo + + # If verbose mode is selected, run rpm in verbose mode as well. + [[ -n $verbose_mode ]] && rpmopts="-ivh" + + # + # LX_INSTALL must be defined when running this command in order to + # enable switches built into various emulated system calls to allow + # the dev package (which may not actually write to /dev) to function. + # + zlogin "$zonename" "( cd "$zonerpmdir" ; LX_INSTALL=1 \ + /bin/rpm $rpmopts --force --aid --nosignature --root /a \ + $install_rpms )" + + rpmerr=$? + + if [[ $rpmerr -ne 0 ]]; then + log "" + log "Zone rpm install command exited abnormally, code $rpmerr" + log "" + + screenlog "$zone_instfail" "$zonename" "$zonerpmdir" "$rpmerr" + return 1 + fi + + log "" + log "$nrpms package(s) installed." + + return 0 +} + +# +# Attempt to unmount all file systems passed on the command line +# +# Returns 0 if all umounts succeeded, otherwise the number of umount failures +# +umount_list() +{ + typeset failures=0 + typeset mounted + + unset umount_failures + + for mounted in "$@"; do + if ! umount "$mounted"; then + umount_failures="$umount_failures $mounted" + ((failures += 1)) + fi + done + + return $failures +} + +# +# +# Set up lofi mounts required for chroot(1M) to work on a new root directory +# located in /a within a zone. +# +newroot_lofimnt() +{ + typeset dev + typeset mounted + typeset target + + unset newroot_mounted + + # + # /usr and /lib get lofs mounted in the zone on /native read-only + # + # $zoneroot/dev gets lofs mounted on /native/dev read/write to allow + # the use of native devices. + # + mount -F lofs -r /lib "$rootdir/a/native/lib" || return 1 + newroot_mounted="$rootdir/a/native/lib" + + if ! mount -F lofs -r /usr "$rootdir/a/native/usr"; then + umount "$rootdir/a/native/lib" + unset newroot_mounted + return 1 + fi + + newroot_mounted="$newroot_mounted $rootdir/a/native/usr" + + if ! mount -F lofs "$zoneroot/root/native/dev" \ + "$rootdir/a/native/dev"; then + umount_list $newroot_mounted + unset newroot_mounted + return 1 + fi + + newroot_mounted="$newroot_mounted $rootdir/a/native/dev" + + # + # This is a bit ugly; to provide device access within the chrooted + # environment RPM will use for its install, we will create the same + # symlinks "$rootdir/dev" contains in the new dev directory, and will + # lofs mount the balance of "$rootdir/dev" into the same locations in + # /dev in the new filesystem we're installing to. + # + for dev in "$zoneroot"/root/dev/* + do + if [[ "$dev" = "$zoneroot/root/dev/*" ]]; then + log "ERROR: No files found in $zoneroot/root/dev" + umount_list $newroot_mounted + return 1 + fi + + target="$rootdir/a/dev/$(basename $dev)" + + # + # If the device file is a symbolic link, create a new link + # in the target directory with the same source. + # + # If the device file is any other file or directory, lofs + # mount it from the device directory into the target directory. + # + if [[ -h $dev ]]; then + typeset source=$(LC_ALL=C file -h "$dev") + + # + # Remove extraneous text from the output of file(1) so + # we're left only with the target path of the symbolic + # link. + # + source="${source##*link to }" + + [[ -a "$target" ]] && /bin/rm -f "$target" + + if ! ln -s "$source" "$target"; then + screenlog "$symlink_failed" "$source" "$target" + umount_list $newroot_mounted + unset newroot_mounted + return 1 + fi + else + [[ ! -a "$target" ]] && touch "$target" + + if ! mount -F lofs "$dev" "$target"; then + screenlog "$lofs_failed" "$dev" "$target" + umount_list $newroot_mounted + unset newroot_mounted + return 1 + fi + + newroot_mounted="$newroot_mounted $target" + fi + + done + + return 0 +} + +# +# Replace the root directory of a zone with the duplicate previously created +# in the zone's /a directory. +# +replace_miniroot() +{ + # + # The zoneadm halt will automatically unmount any file systems + # mounted via lofs in the zone, so that saves us from having to + # methodically unmount each one. + # + if ! zoneadm -z "$zonename" halt; then + screenlog "$zone_haltfail" "$zonename" + return 1 + fi + + unset miniroot_booted + unset newroot_mounted + + [[ -d "$zoneroot/a" ]] && rm -rf "$zoneroot/a" + [[ -d "$zoneroot/oldroot" ]] && rm -rf "$zoneroot/oldroot" + + # + # Copy the logfile or we'll lose all details of the install into the + # new root directory, so strip "$zoneroot" off the pathname of the + # current logfile and use it to generate the pathname of the log file + # in the new root directory. + # + [[ -n $logfile && -f "$logfile" ]] && + cp "$logfile" "$rootdir/a${logfile##$rootdir}" + + mv -f "$rootdir/a" "$zoneroot/a" || return 1 + mv -f "$rootdir" "$zoneroot/oldroot" || return 1 + mv -f "$zoneroot/a" "$rootdir" || return 1 + + # + # After the directory munging above, we've moved the new copy of the + # logfile atop the logfile we WERE writing to, so if we don't reopen + # the logfile here the shell will continue writing to the old logfile's + # inode, meaning we would lose all log information from this point on. + # + [[ -n $logfile ]] && exec 2>>"$logfile" + + rm -rf "$zoneroot/oldroot" + + # + # Remove the contents of the /dev directory created by the install. + # + # We don't technically need to do this, but the zone infrastructure + # will mount $zoneroot/dev atop $rootdir/dev anyway, hiding its + # contents so we may as well clean up after ourselves. + # + # The extra checks are some basic paranoia due to the potentially + # dangerous nature of this command but are not intended to catch all + # malicious cases + # + [[ "$rootdir" != "" && "$rootdir" != "/" ]] && rm -rf "$rootdir"/dev/* + + return 0 +} + +setup_miniroot() +{ + unset miniroot_booted + + if ! "$cwd/lx_init_zone" "$rootdir" mini; then + screenlog "$mini_initfail" "$zonename" + return 1 + fi + + if ! copy_miniroot; then + screenlog "$mini_copyfail" "$zonename" + return 1 + fi + + # + # zoneadm gets upset if the zone root directory is group or world + # readable or executable, so make sure it isn't before proceeding. + # + chmod 0700 "$zoneroot" + + msg=$(gettext "Booting zone miniroot...") + screenlog "$msg" + + if ! zoneadm -z "$zonename" boot -f; then + screenlog "$mini_bootfail" "$zonename" + return 1 + fi + + miniroot_booted=1 + + # + # Now that the miniroot is booted, unset the compatible architecture + # list that find_packages was using for the miniroot so that it will + # get the list from rpm for the full install. + # + unset archs + + # + # Mount all the filesystems needed to install the new root + # directory. + # + if ! newroot_lofimnt; then + screenlog "$mini_mntfsfail" "$zonename" + + if [[ -n $newroot_mounted ]]; then + umount_list $newroot_mounted + unset newroot_mounted + fi + return 1 + fi + + # + # Attempt to initialize the RPM database for the new zone + # + if ! zlogin "$zonename" /bin/rpm --initdb --root /a; then + screenlog "$rpm_initfail" "$zonename" + return 1 + fi + + msg=$(gettext "Miniroot zone setup complete.") + screenlog "$msg" + return 0 +} + +finish_install() +{ + # + # Perform some last cleanup tasks on the newly installed zone. + # + # Note that the zlogin commands aren't checked for errors, as the + # newly installed zone will still boot even if the commands fail. + # + typeset file + + typeset defdir=$rootdir/var/lx_install/deferred_rpms + + msg=$(gettext "Completing installation; this may take a few minutes.") + screenlog "$msg" + + if [[ -d $defdir ]]; then + rm -f $defdir/*.rpm + rmdir $defdir + fi + + # Run ldconfig in the new root + zlogin "$zonename" /usr/sbin/chroot /a \ + /sbin/ldconfig -f /etc/ld.so.conf + + # + # Create the /etc/shadow and /etc/gshadow files if they don't already + # exist + # + [[ -a "$rootdir/a/etc/shadow" ]] || + zlogin "$zonename" /usr/sbin/chroot /a /usr/sbin/pwconv + + [[ -a "$rootdir/a/etc/gshadow" ]] || + zlogin "$zonename" /usr/sbin/chroot /a /usr/sbin/grpconv + + # + # Make sure all init.d and rc[0-6].d links are set up properly. + # + for file in `ls "$rootdir/a/etc/init.d"`; do + zlogin "$zonename" /usr/sbin/chroot /a \ + /sbin/chkconfig --del $file > /dev/null 2>&1 + + zlogin "$zonename" /usr/sbin/chroot /a \ + /sbin/chkconfig --add $file > /dev/null 2>&1 + done + + replace_miniroot + + rmdir -ps "$media_mntdir" + + if ! "$cwd/lx_init_zone" "$rootdir"; then + screenlog "$zone_initrootfail" "$zonename" + return 1 + fi + + return 0 +} + +# +# Duplicate the installed "miniroot" image in a subdirectory of the base +# directory of the zone. +# +# This is done so that a new root directory can be created that will be used +# as the root of a chrooted directory that RPM running on the zone will install +# into. +# +copy_miniroot() +{ + # + # Create the directory $zoneroot/a if it doesn't already exist + # + [[ -d "$zoneroot/a" ]] || + { mkdir -p "$zoneroot/a" || return 1 ; } + + msg=$(gettext "Duplicating miniroot; this may take a few minutes...") + screenlog "$msg" + + # + # Duplicate the miniroot to /a, but don't copy over any /etc/rc.d or + # lxsave_ files. + # + ( cd "$rootdir"; find . -print | egrep -v "/etc/rc\.d|lxsave_" | \ + cpio -pdm ../a ) + + [[ -d "$rootdir/a" ]] && rm -rf "$rootdir/a" 2>/dev/null + mv -f "$zoneroot/a" "$rootdir/a" || return 1 + + return 0 +} + +# +# Read the first six lines of the .discinfo file from the root of the passed +# disc directory (which should either be a mounted disc or ISO file.) +# +# The read lines will be used to set appropriate shell variables on success: +# +# rd_line[0]: Disc Set Serial Number (sets rd_serial) +# rd_line[1]: Distribution Release Name (sets rd_release) +# rd_line[2]: Distribution Architecture (sets rd_arch) +# rd_line[3]: Disc Number$[s] in Distribution (sets rd_cdnum) +# rd_line[4]: "base" directory for disc (currently unused) +# rd_line[5]: RPM directory for disc (sets rd_rpmdir) +# +# Returns 0 on success, 1 on failure. +# +read_discinfo() +{ + typeset rd_file="$1/.discinfo" + + unset rd_arch + unset rd_cdnum + unset rd_disctype + unset rd_pers + unset rd_release + unset rd_rpmdir + unset rd_serial + + # + # If more than one argument was passed to read_discinfo, the second + # is a flag meaning that we should NOT print a warning message if + # we don't find a .discinfo file, as this is just a test to see if + # a distribution ISO is already mounted on the passed mount point. + # + if [[ ! -f "$rd_file" ]]; then + [[ $# -eq 1 ]] && + screenlog "$discinfo_nofile" "$rd_file" + return 1 + fi + + verbose "Attempting to read \"$rd_file\"..." + + if [[ ! -r "$rd_file" ]]; then + screenlog "$discinfo_notreadable" "$rd_file" + return 1 + fi + + typeset rd_line + typeset linenum=0 + + while read -r rd_line[$linenum]; do + # + # If .discinfo architecture isn't "i386," fail here as + # we only support i386 distros at this time. + # + if [[ $linenum = 2 && "${rd_line[2]}" != "i386" ]]; then + screenlog "$discinfo_wrongarch" "$rd_file" \ + "${rd_line[2]}" + return 1 + fi + + # + # We've successfully read the first six lines of .discinfo + # into $rd_line, so do the appropriate shell variable munging. + # + if ((linenum == 5)); then + rd_serial=${rd_line[0]} + rd_release=${rd_line[1]} + + # CentOS names their releases "final" + [[ "$rd_release" = "final" ]] && rd_release="CentOS" + + # + # Line four of the .discinfo file contains either a + # single disc number for a CD or a comma delimited list + # representing the CDs contained on a particular DVD. + # + rd_cdnum=${rd_line[3]} + + if [[ "$rd_cdnum" = *,* ]]; then + rd_disctype="DVD" + else + rd_disctype="CD" + fi + + rd_rpmdir=${rd_line[5]} + + # + # If the specified RPM directory doesn't exist, this is + # not a valid binary RPM disc (it's most likely a + # source RPM disc), so don't add it to the list of + # valid ISO files. + # + [[ ! -d "$1/$rd_rpmdir" ]] && return 1 + + if [[ "$rd_cdnum" = "1" && + "$rd_release" = "Red Hat"* ]]; then + typeset rh_glob + + # + # If this is a Red Hat release, get its + # personality name from the name of the + # redhat-release RPM package. + # + # Start by looking for the file + # "redhat-release-*.rpm" in the directory + # RedHat/RPMS of the ISO we're examining by + # using ksh's "echo" command to handle + # filename globbing. + # + # If no matching file is found, echo will + # simply return the passed string. + # + rh_glob="$1/RedHat/RPMS/redhat-release-*.rpm" + rd_pers="$(echo $rh_glob)" + + if [[ "$rd_pers" != "$rh_glob" ]]; then + # + # An appropriate file was found, so + # extract the personality type from the + # filename. + # + # For example, the presence of the file: + # + # redhat-release-3WS-13.5.1.i386.rpm + # + # would indicate the ISO either + # represents a "WS" personality CD or + # a "WS" installation DVD. + # + # Start the extraction by deleting the + # pathname up to the personality type. + # + rh_glob="*/redhat-release-[0-9]" + rd_pers="${rd_pers##$rh_glob}" + + # + # Now remove the trailing portion of the + # pathname to leave only the personality + # type, such as "WS" or "ES." + # + rd_pers="${rd_pers%%-*\.rpm}" + else + unset rd_pers + fi + fi + + return 0 + fi + + ((linenum += 1)) + done < "$rd_file" + + # + # The file didn't have at least six lines, so indicate that parsing + # failed. + # + return 1 +} + +# +# Mount install media within the zone. +# +# The media will be mounted at $zoneroot/root/media, either via a loopback +# mount (if it's a managed removable disc) or directly (if the media is an ISO +# file or if the specified filename is a block device.) +# +# Returns 0 on success, 1 on failure, 2 if no disc was available +# +mount_install_media() +{ + typeset device="$1" + typeset mount_err + + unset removable + unset zone_mounted + + [[ -z $mntdir ]] && return 1 + + [[ -d $mntdir ]] || if ! mkdir -p $mntdir; then + screenlog "$mk_mntfail" "$mntdir" + unset mntdir + return 1 + fi + + if [[ "$install_media" = "disc" && "$managed_removable" = "1" ]]; then + # + # The removable disc device is an automatically managed one, + # so just wait for the device mounter to notice a disc has been + # inserted into the drive and for the disc to appear at the + # mount point. + # + typeset mount_interval=2 + typeset mount_timeout=10 + typeset mount_timer=0 + + typeset nickname=$(basename $device) + + eject -q "$nickname" > /dev/null 2>&1 || return 2 + removable="$nickname" + + # + # Double check that the device was mounted. If it wasn't, that + # usually means the disc in the drive isn't in a format we can + # read or the physical disc is unreadable in some way. + # + # The mount_timer loop is needed because the "eject -q" above + # may report a disc is available before the mounter associated + # with the drive actually gets around to mounting the device, + # so we need to give it a chance to do so. The mount_interval + # allows us to short-circuit the timer loop as soon as the + # device is mounted. + # + while ((mount_timer < mount_timeout)); do + [[ -d "$device" ]] && break + + sleep $mount_interval + ((mount_timer += mount_interval)) + done + + if [[ ! -d "$device" ]]; then + screenlog "\n$unknown_media" "$device" + return 2 + fi + + mount -F lofs -r "$device" "$mntdir" + mount_err=$? + else + # + # Attempt to mount the media manually. + # + # First, make sure the passed device name really IS a device. + # + [[ -b "$device" ]] || return 2 + + # + # Now check to see if the device is already mounted and lofi + # mount the existing mount point into the zone if it is. + # + if get_mountdir "$device"; then + mount -F lofs -r "$mount_dir" "$mntdir" + mount_err=$? + else + [[ "$install_media" = "disc" ]] && removable="$device" + + # It wasn't mounted, so go ahead and try to do so. + mount -F hsfs -r "$device" "$mntdir" + mount_err=$? + fi + + # A mount_err of 33 means no suitable media was found + ((mount_err == 33)) && return 2 + fi + + if ((mount_err != 0)); then + screenlog "$mountfail" "$device" "$mntdir" + unset mntdir + return 1 + fi + + zone_mounted="$mntdir" + verbose "Mount of \"$device\" on \"$mntdir\" succeeded." + return 0 +} + +# Eject the disc mounted on the passed directory name +eject_removable_disc() +{ + screenlog "" + verbose " (Attempting to eject '$removable'... \c" + + if [[ -n $zone_mounted ]]; then + umount "$zone_mounted" + unset zone_mounted + fi + + if ! eject "$removable"; then + verbose "failed.)\n" + screenlog "$eject_fail" "$removable" + + msg=$(gettext "Please eject the disc manually.") + screenlog "$msg" + else + verbose "done.)\n" + fi + + unset removable +} + +# +# Ask for the user to provide a disc or ISO. +# +# Returns 0 on success, 1 on failure. +# +prompt_for_media() +{ + # No prompting is allowed in silent mode. + if [[ -n $silent_mode ]]; then + log "$silent_err_msg" + return 1 + fi + + if [[ "$1" != "" ]]; then + msg="$release_name, CD $1" + else + typeset disc=$(gettext "disc") + + msg=$(gettext "any") + msg="$msg $release_name $disc" + fi + + if [[ "$install_media" = "disc" ]]; then + screenlog "$insert_discmsg" "$msg" "$release_name" + + msg=$(gettext "drive and press <RETURN>.") + screenlog " $msg" + + [[ -n $removable ]] && eject_removable_disc + else + if [[ -n $zone_mounted ]]; then + umount "$mntdir" + unset zone_mounted + fi + + # + # This is only be printed in the case of a user + # specifying a device name as an install medium. + # This is handy for testing the installer or if the user + # has ISOs stored in some strange way that somehow + # breaks the "install from ISO" mechanism, as ISOs + # can be manually added using lofiadm(1M) command and + # the resulting lofi device name passed to the + # installer. + # + screenlog "$mount_proper_iso1" "$msg" + screenlog " $mount_proper_iso2" "$release_name" "$mntdev" + + msg=$(gettext "and press <RETURN>.") + screenlog " $msg" + fi + + read && return 0 + + return 1 +} + +# +# Get a particular CD of a multi-disc set. +# +# This basically works by doing the following: +# +# 1) Mount the disc +# 2) Read the disc's .discinfo file to see which CD it is or represents +# 3) If it doesn't contain the desired CD, ask the user for a disc +# containing the CD we wanted. +# +# Returns 0 on success, 1 on failure. +# +get_cd() +{ + typeset mntdev="$1" + + typeset cdnum + typeset discname + typeset enter + typeset mount_err + typeset prompted + + + if [[ $# -eq 2 ]]; then + # Caller specified a particular CD to look for + cdnum="$2" + discname="$release_name, CD $cdnum" + else + # Caller wanted any disc + discname="a $release_name disc" + fi + + verboselog "\nChecking for $discname on device" + verboselog " \"$mntdev\"\n" + + while :; do + # Check to see if a distro disc is already mounted + mntdir="$media_mntdir" + + unset rd_disctype + if ! read_discinfo "$mntdir" "test"; then + mount_install_media "$mntdev" + mount_err=$? + + # + # If the mount succeeded, continue on in the main + # script + # + if ((mount_err == 0)); then + read_discinfo "$mntdir" + elif ((mount_err == 2)); then + # No medium was found, so prompt for one. + prompt_for_media "$cdnum" && prompted=1 continue + + unset mntdir + return 1 + else + # mount failed + unset mntdir + return 1 + fi + fi + + if [[ -n $distro_serial && + "$rd_serial" != "$distro_serial" ]]; then + screenlog "$wrong_serial" "$install_disctype" + screenlog " $wrong_ser_expect" "$rd_serial" \ + "$distro_serial" + + # + # If we're installing from ISOs, don't prompt the user + # if the wrong serial number is present, as there's + # nothing they can do about it. + # + [[ "$install_media" = "ISO" ]] && return 1 + + prompt_for_media "$cdnum" && continue + + umount "$mntdir" + unset zone_mountdir + return 1 + fi + + # + # Make sure that the mounted media is CD $cdnum. + # + # If it is, return to the caller, otherwise eject the + # disc and try again. + # + if [[ "$rd_disctype" = "CD" ]]; then + verboselog "Found CD #$rd_cdnum," \ + "Serial #$rd_serial" + verboselog "Release Name \"$rd_release\"" + + [[ -n $rd_pers ]] && + verboselog "Detected RedHat Personality" \ + "\"$rd_pers\"" + + verboselog "" + + # If we didn't care which CD it was, return success + [[ "$cdnum" = "" ]] && return 0 + + # Return if the CD number read is a match + [[ "$rd_cdnum" = "$cdnum" ]] && return 0 + else + verboselog "\nFound DVD (representing CDs" \ + "$rd_cdnum), Serial #$rd_serial" + verboselog "Release Name \"$rd_release\"\n" + + [[ -n $rd_pers ]] && + verboselog "Detected RedHat Personality" \ + "\"$rd_pers\"" + + verboselog "" + + # If we didn't care which CD it was, return success + [[ "$cdnum" = "" ]] && return 0 + + # + # Since a DVD represents multiple CDs, make sure the + # DVD inserted represents the CD we want. + # + { echo "$rd_cdnum," | egrep -s "$cdnum," ; } && + return 0 + fi + + if [[ -n $prompted ]]; then + if [[ "$rd_disctype" = "CD" ]]; then + screenlog "$wrong_cd" "$rd_cdnum" "$cdnum" + else + msg=$(gettext "Incorrect DVD inserted.") + screenlog "$msg" + + log "(DVD represented CDs $rd_cdnum," \ + " wanted CD $cdnum)" + fi + fi + + # + # If we're installing from ISOs, don't prompt the user if the + # wrong CD is mounted, as there's nothing they can do about it. + # + [[ "$install_media" = "ISO" ]] && return 1 + + prompt_for_media "$cdnum" && prompted=1 && continue + + umount "$mntdir" + unset zone_mountdir + return 1 + done +} + +# +# Find out which distro the mounted disc belongs to by comparing the +# mounted disc's serial number against those contained in the various +# distro files. +# +# When a match is found, the shell variable "distro_file" will be set to +# the name of the matching file. Since that will have been the last file +# sourced by the shell, there's no need for the caller to do it again; the +# variable is only set in case it's of some use later. +# +# Returns 0 on success, 1 on failure. +# +get_disc_distro() +{ + typeset distro + typeset distro_files="$(echo $distro_dir/*.distro)" + + unset distro_file + + [[ "$distro_files" = "$distro_dir/*.distro" ]] && return 1 + + for distro in $distro_files; do + [[ ! -f "$distro" ]] && continue + + verbose "Checking for disc distro \"$distro\"..." + + . "$distro" > /dev/null + + [[ "$rd_serial" != "$distro_serial" ]] && continue + + distro_file="$distro" + release_name="$rd_release $distro_version" + distro_ncds=${#distro_cdorder[@]} + + return 0 + done + + return 1 +} + +# +# Iterate through the install media to install the miniroot and full zone +# +# The install media may be physical discs, a lofi mounted ISO file, or +# iso files located in a directory specified by the user. +# +# All installations, regardless of media type, use a CD as their basic media +# unit. DVDs or ISOs representing DVDs actually contain multiple "CDs" of +# installation packages. +# +# The variable "distro_ncds," as set elsewhere, represents the number +# of CDs required to install the distribution. Whether the installation +# actually requires multiple physical discs or ISOs depends upon their content. +# +# Returns 0 on success, 1 on failure. +# +iterate_media() +{ + typeset cdnum=1 + typeset cds + typeset disc_rpms + typeset err_media + typeset err_msg + typeset install_type="$1" + typeset ldevs + typeset mountdev + typeset rh_pers + + shift + + if [[ "$install_type" = "miniroot" ]]; then + typeset i + + disc_rpms=$distro_miniroot_rpms + err_msg="$mini_mediafail" + + # For miniroot installs, ask for CDs in numerical order + cds[0]="zero_pad" + + for i in ${distro_cdorder[@]}; do + cds[$cdnum]=$cdnum + ((cdnum += 1)) + done + + cdnum=1 + else + disc_rpms=$distro_rpms + err_msg="$zone_mediafail" + + # + # For full zone installs, ask for CDs in the order RPM needs + # to find the packages. + # + set -A cds "zero_pad" ${distro_cdorder[@]} + fi + + if [[ "$install_media" = "ISO" ]]; then + set -A ldevs "zero_pad" "$@" + else + mountdev="$1" + err_media="$release_name, CD ${cds[$cdnum]} (or DVD)" + fi + + unset rpms_left_save + + while ((cdnum <= distro_ncds)); do + [[ -z ${cds[$cdnum]} ]] && ((cdnum += 1)) && continue + + if [[ "$install_media" = "ISO" ]]; then + typeset isonum="${cds[$cdnum]}" + + # + # If this routine was called with a single ISO device + # name, it must be a DVD, so refer to that one lofi + # device (and associated ISO pathname) + # + [[ $# -eq 1 ]] && isonum=1 + + err_media="ISO \"${iso_pathnames[$isonum]}\"" + mountdev="${ldevs[$isonum]}" + fi + + # + # If the disc needed in the install order isn't the one in + # the drive, ask for the correct one. + # + if ! get_cd "$mountdev" "${cds[$cdnum]}"; then + screenlog "$err_msg" "$zonename" "$err_media" + return 1 + fi + + # set the RedHat personality type, if applicable + [[ -n $rd_pers && -z $rh_pers ]] && rh_pers=$rd_pers + + # + # We now know the actual type of media being used, so + # modify the "err_media" string accordingly. + # + if [[ "$install_media" = "disc" ]]; then + if [[ "$rd_disctype" = "DVD" ]]; then + err_media="$release_name DVD" + else + err_media="$release_name, CD ${cds[$cdnum]}" + fi + fi + + find_packages "$mntdir" $disc_rpms + + # + # Save a copy of $rpms_left. Other functions clobber it. + # + rpms_left_save="${rpms_left[@]}" + + if [[ -n $rpms_found ]]; then + if [[ "$install_type" = "miniroot" ]]; then + verboselog "\nInstalling miniroot from" + verboselog " $err_media...\n" + + if ! install_miniroot "$mntdir" \ + "${rpms_found[@]}"; then + screenlog "$err_msg" "$zonename" \ + "$err_media" + return 1 + fi + else + screenlog "\n$install_msg\n" "$zonename" \ + "$err_media" + + if ! install_zone "$mntdir" \ + ${rpms_found[@]}; then + screenlog "$err_msg" "$zonename" \ + "$err_media" + return 1 + fi + fi + + # + # Mark installation from this CD (or ISO representing + # this CD) as completed. + # + if [[ "$rd_disctype" = "CD" ]]; then + unset cds[$cdnum] + fi + fi + + # A DVD install takes a single disc, so stop iterating + [[ "$rd_disctype" = "DVD" ]] && break + + # If there are no RPMs left, we're done. + [[ -z $rpms_left_save ]] && break + + disc_rpms="$rpms_left_save" + ((cdnum += 1)) + + if [[ "$install_media" != "ISO" ]]; then + # + # modify the err_media variable to reflect the next + # CD in the sequence + # + err_media="$release_name, CD ${cds[$cdnum]}" + else + # Unmount the last used ISO if appropriate + if [[ -n $zone_mounted ]]; then + umount "$zone_mounted" + unset zone_mounted + fi + fi + done + + if [[ -n $zone_mounted ]]; then + umount "$zone_mounted" + unset zone_mounted + fi + + if [[ -n $rpms_left_save ]]; then + # + # Uh oh - there were RPMS we couldn't locate. This COULD + # indicate a failed installation, but we need to check for + # a RedHat personality "missing" list first. + # + if [[ -n $rh_pers && "$rh_pers" != "AS" ]]; then + typeset missing + + if [[ $rh_pers = "WS" ]]; then + missing="$distro_WS_missing" + elif [[ $rh_pers = "ES" ]]; then + missing="$distro_ES_missing" + fi + + # + # If any packages left in "rpm_left_save" appear in the + # list of packages expected to be missing from this + # personality, remove them from the "rpm_left_save" + # list. + # + if [[ -n $missing ]]; then + typeset pkg + + for pkg in $missing + do + rpm_left_save=$(echo "$rpm_left_save " | + sed "s/$pkg //g") + + # + # If all of the packages in + # "rpm_left_save" appeared in this + # personality's list of "expected + # missing" packages, then the + # installation completed successfully. + # + [[ -z ${rpm_left_save%%+( )} ]] && + return 0 + done + fi + fi + + log "\nERROR: Unable to locate some needed packages:\n" \ + " ${rpms_left_save%%+( )}\n" + screenlog "$err_msg" "$zonename" + return 1 + fi + + return 0 +} + +# +# Install a zone from installation media +# +# Returns 0 on success, 1 on failure +# +install_from_media() +{ + msg=$(gettext "Installing miniroot for zone '%s'.") + screenlog "$msg" "$zonename" + + iterate_media "miniroot" $@ || return 1 + + if ! setup_miniroot; then + screenlog "$mini_setfail" "$zonename" + return 1 + fi + + msg=$(gettext "Performing full install for zone '%s'.") + + screenlog "\n$msg" "$zonename" + + iterate_media "full" $@ || return 1 + + # + # Attempt to install deferred RPMS, if any + # + if [[ -n $deferred_rpms ]]; then + if ! install_zone ""; then + return 1 + fi + fi + + finish_install + return $? +} + +# +# Add an entry to the valid distro list. +# +# The passed argument is the ISO type ("CD Set" or "DVD") +# +add_to_distro_list() +{ + typeset name + + distro_file[${#distro_file[@]}]="$distro" + + name="$release_name" + [[ -n $redhat_pers ]] && name="$name $redhat_pers" + + select_name[${#select_name[@]}]="$name ($1)" + release[${#release[@]}]="$release_name" + iso_set[${#iso_set[@]}]="${iso_names[@]}" + verboselog "Distro \"$name\" ($1) found." +} + +# +# Find out which distros we have ISO files to support +# +# Do this by cycling through the distro directory and reading each distro +# file in turn looking for: +# +# 1) The number of discs in a distribution +# 2) The serial number of the distribution +# 3) The name of the distribution +# +# Based on this, we can determine based on the ISO files available which +# distributions, if any, we have a complete set of files to support. +# +# The function returns the supported isos in the array "iso_set." +# +validate_iso_distros() +{ + typeset cd + typeset disctype + typeset index + typeset iso + typeset ncds + typeset pers + typeset pers_cd + typeset pers_index + typeset serial + + typeset distro_files="$(echo $distro_dir/*.distro)" + typeset nisos=${#iso_filename[@]} + + unset distro_file + unset iso_set + unset release + unset select_name + + if [[ "$distro_files" = "$distro_dir/*.distro" ]]; then + msg=$(gettext "Unable to find any distro files!") + screenlog "$msg" + return + fi + + for distro in $distro_files; do + # + # We're done if we've already processed all available ISO files + # or if there were none in the first place. + # + ((${#iso_filename[@]} == 0)) && break + + [[ ! -f $distro ]] && continue + + . "$distro" > /dev/null + ncds=${#distro_cdorder[@]} + + unset iso_names + unset pers + unset pers_cd + + verbose "\nChecking ISOs against distro file \"$distro\"..." + + index=0 + + while ((index < nisos)); do + # + # If the filename has been nulled out, it's already + # been found as part of a distro, so continue to the + # next one. + # + if [[ -z ${iso_filename[$index]} ]]; then + ((index += 1)) + continue + fi + + iso="${iso_filename[$index]}" + serial="${iso_serial[$index]}" + release_name="${iso_release[$index]}" + redhat_pers="${iso_pers[$index]}" + + verbose " ISO \"$iso\":" + + # + # If the serial number doesn't match that for + # this distro, check other ISOs + # + if [[ "$serial" != "$distro_serial" ]]; then + ((index += 1)) + continue + fi + + verbose " Serial #$serial" + verbose " Release Name \"$release_name\"" + + [[ -n ${iso_pers[$index]} ]] && + verbose " RedHat Personality \"$redhat_pers\"" + + if [[ "${iso_disctype[$index]}" = "CD" ]]; then + disctype="CD #" + cd="${iso_cdnum[$index]}" + else + disctype="DVD, representing CDs #" + cd=0 + fi + + verbose " ${disctype}${iso_cdnum[$index]}\n" + + # + # Once we've matched a particular distro, don't check + # this ISO to see if it's part of any other. + # + unset iso_filename[$index] + + iso_names[$cd]="$iso" + + # + # A DVD-based distro consists of one and ONLY one disc, + # so process it now. + # + if [[ "${iso_disctype[$index]}" = "DVD" ]]; then + typeset dvd_discs=",${iso_cdnum[$index]}" + + cd=1 + while ((cd <= ncds)); do + dvd_discs=$(echo "$dvd_discs" | + sed "s/,$cd//") + ((cd += 1)) + done + + # + # If no CDs are left in $dvd_discs, the DVD + # was a complete distribution, so add it to + # the valid distro list. + # + if [[ -z $dvd_discs ]]; then + add_to_distro_list "DVD" + unset iso_names[$cd] + fi + elif [[ -n ${iso_pers[$index]} ]]; then + # + # If this is a RedHat personality CD, save off + # some extra information about it so we can + # discern between mutiple personality discs + # later, if needed. + # + pers[${#pers[@]}]=${iso_pers[$index]} + pers_cd[${#pers_cd[@]}]="$iso" + fi + + ((index += 1)) + done + + # + # Check to see if we have ISOs representing a full CD set. + # If we don't, don't mark this as an available distro. + # + (( ${#iso_names[@]} != $ncds )) && continue + + relase_name="$release_name $distro_version" + + if [[ -z ${pers[@]} ]]; then + # + # If there were no personality discs, just add this + # ISO set to the distro list. + # + unset redhat_pers + add_to_distro_list "CD Set" + else + # + # If a valid CD-based distro was found and there are + # RedHat personality discs for that distro present, + # create entries for each personality in the available + # distro list. + # + pers_index=0 + + while ((pers_index < ${#pers[@]})); do + redhat_pers=${pers[$pers_index]} + + if [[ -n ${pers_cd[$pers_index]} ]]; then + # + # RedHat personality discs are always + # disc 1 of a CD set, so if we found a + # valid personality disc for this set, + # set the disc 1 entry for this distro + # to the ISO for the proper personality + # disc. + # + iso_names[1]="${pers_cd[$pers_index]}" + add_to_distro_list "CD Set" + fi + + ((pers_index += 1)) + done + fi + done +} + +# +# Do a lofi add for the passed filename and set lofi_dev to the lofi +# device name lofiadm created for it (e.g. "/dev/lofi/1".) +# +# If the passed filename already has a lofi device name, simply set lofi_dir +# to the existing device name. +# +# Returns 0 on success, 1 on failure. +# +lofi_add() +{ + typeset filename="$1" + + lofi_dev=$(lofiadm "$filename" 2>/dev/null) && return 0 + lofi_dev=$(lofiadm -a "$filename") && return 0 + + screenlog "$lofi_failed" "$filename" + return 1 +} + +# +# Delete the lofi device name passed in. +# +# Returns 0 on success, 1 on failure. +# +lofi_del() +{ + typeset dev="$1" + + [[ "$dev" != /dev/lofi/* ]] && return 1 + + if lofiadm -d "$dev" 2>/dev/null; then + [[ -n $lofi_dev ]] && unset lofi_dev + return 0 + fi + + return 1 +} + +# +# Mount the lofi device name passed in. +# +# Set the variable mntdir to the directory on which the lofi device is +# mounted. +# +# Returns 0 on success, 1 on failure. +# +lofi_mount() +{ + typeset lofidev="$1" + typeset mntpoint="$2" + + # + # Check to see if the lofi device is already mounted and return + # the existing mount point if it is. + # + get_mountdir "$lofidev" && { mntdir="$mount_dir" ; return 0 ; } + + unset mntdir + if [[ ! -d "$mntpoint" ]]; then + if ! mkdir -p "$mntpoint"; then + log "Could not create mountpoint \"$mntpoint\"!\n" + return 1 + fi + lofi_created="$mntpoint" + fi + + verbose "Attempting mount of device \"$lofidev\"" + verbose " on directory \"$mntpoint\"... \c" + + if ! mount -F hsfs -r "$lofidev" "$mntpoint" 2>/dev/null; then + verbose "FAILED." + [[ -n $lofi_created ]] && rmdir -ps "$lofi_created" && + unset lofi_created + return 1 + fi + + mntdir="$mntpoint" + verbose "succeeded." + return 0 +} + +# +# Unmount the lofi device name passed in, and remove the device mount point +# after unmounting the device. +# +# Returns 0 on success, 1 on failure. +# +lofi_umount() +{ + typeset mntdev="$1" + + # + # If the directory name passed wasn't mounted to begin with, + # just return success. + # + get_mountdir "$mntdev" || return 0 + + verbose "Unmounting device \"$mntdev\"... \c" + + if ! umount "$mntdev" ; then + verbose "FAILED." + return 1 + fi + + verbose "succeeded." + return 0 +} + +# Scan the passed list of ISOs. +scan_isos() +{ + typeset iso + typeset index=0 + + unset iso_serial + unset iso_release + unset iso_cdnum + unset iso_disctype + unset iso_filename + unset iso_pers + + for iso in "$@"; do + verbose "Checking possible ISO\n \"$iso\"..." + + if lofi_add "$iso"; then + verbose " added as lofi device \"$lofi_dev\"" + if lofi_mount "$lofi_dev" "/tmp/lxiso"; then + if read_discinfo "$mntdir"; then + iso_release[$index]="$rd_release" + iso_serial[$index]="$rd_serial" + iso_cdnum[$index]="$rd_cdnum" + iso_disctype[$index]="$rd_disctype" + + [[ -n $rd_pers ]] && + iso_pers[$index]="$rd_pers" + + iso_filename[$index]="$iso" + ((index += 1)) + fi + lofi_umount "$lofi_dev" + else + verbose " not a usable ISO image." + log "Unable to mount \"$lofi_dev\" (\"$iso\")" + fi + + lofi_del "$lofi_dev" + else + verbose " not a valid ISO image." + fi + done +} + +# +# Prompt the user with the first argument, then make a menu selection +# from the balance. +# +# This is effectively similar to the ksh "select" function, except it +# outputs to stdout. +# +# Shell variables set: +# choice - set to the menu number selected +# selection - set to the menu text selected +# +pick_one() +{ + typeset menu_items + typeset menu_index + typeset reply + + typeset prompt="$1" + shift + + unset choice + + set -A menu_items "$@" + + until [[ -n $choice ]]; do + menu_index=1 + + echo "\n$prompt\n" + + for f in "${menu_items[@]}"; do + echo "$menu_index) $f" + ((menu_index += 1)) + done + + echo "\n$(gettext "Please select") (1-$#): " "\c" + read reply + echo + + [[ -z $reply ]] && echo && continue + + # + # Reprint menu selections if the answer was not a number in + # range of the menu items available + # + [[ $reply != +([0-9]) ]] && continue + ((reply < 1)) || ((reply > $#)) && continue + + choice=$reply + selection=${menu_items[((choice - 1))]} + done +} + +# +# Select a distribution to install from the arguments passed and set +# "ndsitro" to the value chosen - 1 (so it may be used as an array index.) +# +# The routine will automatically return with ndisto set to 0 if only one +# argument is passed. +# +select_distro() +{ + unset choice + unset ndistro + + if (($# > 1)); then + if [[ -n $silent_mode ]]; then + typeset dist + + log "ERROR: multiple distrubutions present in ISO" \ + "directory but silent install" + log " mode specified. Distros available:" + for dist in "$@"; do + log " \"$dist\"" + done + return 1 + fi + + pick_one \ + "$(gettext "Which distro would you like to install?")" \ + "$@" + fi + + # + # Covers both the cases of when only one distro name is passed + # to the routine as well as when an EOF is sent to the distribution + # selection prompt. + # + if [[ -z $choice ]]; then + screenlog "$install_dist" "$1" + ndistro=0 + else + screenlog "$install_dist" "$selection" + ndistro=$((choice - 1)) + fi + + return 0 +} + +# +# Install a zone from discs or manually lofi-mounted ISOs. +# +# Return 0 on success, 1 on failure +# +do_disc_install() +{ + typeset path="$1" + + typeset eject_final="N" + typeset install_status + + # + # Get a disc, it doesn't matter which one. + # + # We don't know which distro this may be yet, so we can't yet + # ask for the first disc in the install order. + # + if ! get_cd "$path"; then + if [[ -z $silent_mode ]]; then + typeset distro_disc=\ + $(gettext "a supported Linux distribution disc") + + screenlog "\n$distro_mediafail" "$distro_disc ($path)" + fi + return 1 + fi + + if [[ -n $silent_mode && "$rd_disctype" = "CD" ]]; then + log "$silent_err_msg" + return 1 + fi + + if ! get_disc_distro "$mntdir"; then + msg=$(gettext "Unable to find a supported Linux release on") + screenlog "$msg" + screenlog " $media_spec" "$path" + umount "$mntdir" > /dev/null 2>&1 + return 1 + fi + + check_mbfree $zoneroot $distro_mb_required || return 1 + build_rpm_list $install_packages + + echo + + if [[ "$install_media" = "disc" ]]; then + # + # If we're in interactive mode, ask the user if they want the + # disc ejected when the installation is complete. + # + # Silent mode installs will require the user to manually run + # eject(1). + # + if [[ -n $removable && -z $silent_mode ]]; then + typeset ans + typeset disc + typeset status + typeset which="" + + disc="$rd_disctype" + [[ "$disc" = "CD" ]] && which=$(gettext "final ") + + # + # Ask the user if they want the install disc ejected + # when the installation is complete. Any answer but + # "n" or "N" is taken to mean yes, eject it. + # + eject_final="Y" + status=$(gettext "WILL") + + screenlog "$eject_final_msg" "$which" "$disc" + screenlog " $eject_final_prompt" "$zonename" "[y]/n" + + read ans && [[ "$ans" = [Nn]* ]] && eject_final="N" && + status=$(gettext "will NOT") + + screenlog "\n$eject_final_status\n" "$which" "$disc" \ + "$status" + fi + + screenlog "$install_ndiscs" "$distro_ncds" + + msg=$(gettext "install %s.") + screenlog "$msg" "$release_name" + else + screenlog "$install_nisos" "$distro_ncds" + + msg=$(gettext "DVD) to install %s.") + screenlog "$msg" "$release_name" + fi + + install_from_media "$path" + install_status=$? + + [[ "$eject_final" = "Y" ]] && eject_removable_disc + + return $install_status +} + +# +# Install a zone using the list of ISO files passed as arguments to this +# function. +# +# Return 0 on success, 1 on failure. +# +do_iso_install() +{ + typeset install_status + typeset iso_path + typeset ldev + + msg=$(gettext "Checking for valid Linux distribution ISO images...") + screenlog "\n$msg" + + scan_isos "$@" + + if [[ -z ${iso_filename[@]} ]]; then + msg=$(gettext "No valid ISO images available or mountable.") + screenlog "\n$msg" + return 1 + fi + + validate_iso_distros + + if [[ -z ${release[@]} ]]; then + msg=$(gettext "No supported Linux distributions found.") + screenlog "\n$msg" + return 1 + fi + + select_distro "${select_name[@]}" || return 1 + unset select_name + + . ${distro_file[$ndistro]} > /dev/null + distro_ncds=${#distro_cdorder[@]} + + check_mbfree $zoneroot $distro_mb_required || return 1 + build_rpm_list $install_packages + + unset lofi_devs + + verboselog "" + for iso_path in ${iso_set[$ndistro]}; do + if ! lofi_add "$iso_path"; then + for ldev in $lofi_devs; do + lofi_del "$ldev" + done + return 1 + fi + + verboselog "Added \"$iso_path\"" + verboselog " as \"$lofi_dev\"" + lofi_devs="$lofi_devs $lofi_dev" + done + + release_name="${release[$ndistro]}" + + set -A iso_pathnames "zero_pad" ${iso_set[$ndistro]} + install_from_media $lofi_devs + install_status=$? + + for ldev in $lofi_devs; do + lofi_del "$ldev" + done + + unset lofi_devs + return $install_status +} + +# Clean up on interrupt +trap_cleanup() +{ + cd "$cwd" + + msg=$(gettext "Interrupt received, cleaning up partial install...") + screenlog "$msg" + + [[ -n $miniroot_booted ]] && zoneadm -z "$zonename" halt && + unset miniroot_booted && unset newroot_mounted + + # + # OK, why a sync here? Because certain commands may have written data + # to mounted file systems before the interrupt, and given just the right + # timing there may be buffered data not yet sent to the disk or the + # system may still be writing data to the disk. Either way, the umount + # will then fail because the system will still see the mounted + # filesystems as busy. + # + sync + + if [[ -n $newroot_mounted ]]; then + umount_list $newroot_mounted + unset newroot_mounted + fi + + if [[ -n $zone_mounted ]]; then + umount "$zone_mounted" + unset zone_mounted + fi + + # + # Normally, this isn't needed but there is a window where mntdir is set + # before zone_mounted, so account for that case. + # + if [[ -n $mntdir ]]; then + umount "$mntdir" + unset mntdir + fi + + [[ -n $lofi_dev ]] && lofi_del "$lofi_dev" + + if [[ -n $lofi_devs ]]; then + typeset ldev + + for ldev in $lofi_devs + do + lofi_del "$ldev" + done + + unset lofi_devs + fi + + [[ -n $lofi_created ]] && rmdir -ps "$lofi_created" && + unset lofi_created + + msg=$(gettext "Installation aborted.") + screenlog "$msg" + exit $ZONE_SUBPROC_FATAL +} + +# +# Start of main script +# +cwd=$(dirname "$0") +distro_dir="$cwd/distros" + +unset deferred_saved +unset distro_path +unset logfile +unset msg +unset newroot_mounted +unset silent_err_msg +unset silent_mode +unset verbose_mode +unset zone_mounted +unset zoneroot +unset zonename + +# +# Exit values used by the script, as #defined in <sys/zone.h> +# +# ZONE_SUBPROC_OK +# =============== +# Installation was successful +# +# ZONE_SUBPROC_USAGE +# ================== +# Improper arguments were passed, so print a usage message before exiting +# +# ZONE_SUBPROC_NOTCOMPLETE +# ======================== +# Installation did not complete, but another installation attempt can be +# made without an uninstall +# +# ZONE_SUBPROC_FATAL +# ================== +# Installation failed and an uninstall will be required before another +# install can be attempted +# +ZONE_SUBPROC_OK=0 +ZONE_SUBPROC_USAGE=253 +ZONE_SUBPROC_NOTCOMPLETE=254 +ZONE_SUBPROC_FATAL=255 + +# +# Process and set up various global option variables: +# +# distro_path - Path containing files that make up the distribution +# (e.g. a directory containing ISO files or a disc device) +# logfile - Name (if any) of the install log file +# zoneroot - Root directory for the zone to install +# zonename - Name of the zone to install +# +while getopts 'svxd:l:r:z:' opt; do + case $opt in + s) silent_mode=1; unset verbose_mode;; + v) verbose_mode=1; unset silent_mode;; + x) set -x;; + d) distro_path="$OPTARG";; + l) logfile="$OPTARG";; + r) zoneroot="$OPTARG";; + z) zonename="$OPTARG";; + esac +done +shift OPTIND-1 + +distro_path=${distro_path:=/cdrom/cdrom0} + +install_packages="$@" + +[[ -n $silent_mode ]] && exec 1>/dev/null + +if [[ -z $zonename ]]; then + msg=$(gettext "ERROR: Cannot install - no zone name was specified") + screenlog "$msg" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE +fi + +if [[ -z $zoneroot ]]; then + msg=$(gettext "ERROR: Cannot install - no zone root directory was") + screenlog "$msg" + + msg=$(gettext "specified.") + screenlog " $msg" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE +fi + +# Make sure the specified zone root directory exists +[[ -d "$zoneroot" ]] || mkdir -m 0700 -p "$zoneroot" + +if [[ ! -d "$zoneroot" ]]; then + screenlog "$zone_rootfail" "$zoneroot" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE +fi + +rootdir="$zoneroot/root" + +# Make sure the specified zone root subdirectory exists +[[ -d "$rootdir" ]] || mkdir -p "$rootdir" + +if [[ ! -d "$rootdir" ]]; then + screenlog "$zone_rootsub" "$rootdir" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE +fi + +media_mntdir="$rootdir/media" + +if [[ -n $logfile ]]; then + # If a log file was specified, log information regarding the install + log "\nInstallation started `date`" + log "Installing from path \"$distro_path\"" +else + # Redirect stderr to /dev/null if silent mode is specified. + [[ -n $silent_mode ]] && exec 2>/dev/null +fi + +distro_path=${distro_path:=$default_distro_path} + +# From this point on, call trap_cleanup() on interrupt (^C) +trap trap_cleanup INT + +verbose "Installing zone \"$zonename\" at root \"$zoneroot\"" +release_name="supported Linux distribution" + +# +# Based on the pathname, attempt to determine whether this will be a disc or +# lofi-based install or one using ISOs. +# +if [[ "$distro_path" = /cdrom/* || "$distro_path" = /media/* || + "$distro_path" = /dev/dsk/* || "$distro_path" = /dev/lofi/* ]]; then + if [[ "$distro_path" = /dev/lofi/* ]]; then + silent_err_msg="$silent_nolofi" + install_media="lofi" + else + silent_err_msg="$silent_nodisc" + install_media="disc" + fi + + if [[ "$distro_path" = /cdrom/* || "$distro_path" = /media/* ]]; then + managed_removable=1 + else + managed_removable=0 + fi + + log "Installing zone \"$zonename\" at root \"$zoneroot\"" + verboselog " Attempting ${install_media}-based install via:" + verboselog " \"$distro_path\"" + + do_disc_install "$distro_path" +else + typeset dir_start + typeset dir_file + + dir_start=$(dirname "$distro_path" | cut -c 1) + + [[ "$dir_start" != "/" ]] && distro_path="${PWD:=$(pwd)}/$distro_path" + + if [[ ! -d "$distro_path" ]]; then + screenlog "$no_distropath" "$distro_path" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + log "Installing zone \"$zonename\" at root \"$zoneroot\"" + verboselog " Attempting ISO-based install from directory:" + verboselog " \"$distro_path\"" + + unset iso_files + + for dir_file in $distro_path/*; do + # + # Skip this file if it's not a regular file or isn't readable + # + [[ ! -f $dir_file || ! -r $dir_file ]] && continue + + # + # If it's an hsfs file, it's an ISO, so add it to the possible + # distro ISO list + # + filetype=$(LC_ALL=C fstyp $dir_file 2>/dev/null) && + [[ "$filetype" = "hsfs" ]] && + iso_files="$iso_files $dir_file" + done + + install_media="ISO" + do_iso_install $iso_files +fi + +if [[ $? -ne 0 ]]; then + cd "$cwd" + + [[ -n $miniroot_booted ]] && zoneadm -z "$zonename" halt && + unset miniroot_booted && unset newroot_mounted + + if [[ -n $zone_mounted ]]; then + umount "$zone_mounted" + unset zone_mounted + fi + + if [[ -n $newroot_mounted ]]; then + umount_list $newroot_mounted + unset newroot_mounted + fi + + screenlog "\n$install_failed\n" "$release_name" "$zonename" "`date`" + + msg=$(gettext "Cleaning up after failed install...") + screenlog "$msg" + + # + # The extra checks are some basic paranoia due to the potentially + # dangerous nature of these commands but are not intended to catch all + # malicious cases. + # + [[ -d "$zoneroot/a" ]] && rm -rf "$zoneroot/a" + + exit $ZONE_SUBPROC_FATAL +fi + +screenlog "$install_done" "$release_name" "$zonename" "`date`" + +exit $ZONE_SUBPROC_OK diff --git a/usr/src/lib/brand/lx/zone/lx_init_zone.ksh b/usr/src/lib/brand/lx/zone/lx_init_zone.ksh new file mode 100644 index 0000000000..fe2a7ec047 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/lx_init_zone.ksh @@ -0,0 +1,686 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# This script contains various routines used to post-process a zone for use +# with BrandZ after it has been installed from RPM media or a tar image. +# +# Briefly, there are three main jobs we need to do: +# +# 1) Create any needed directories and symlinks BrandZ needs but that the +# Linux install may not create +# +# 2) Modify rc scripts to shut off services that don't apply to a zone +# or that wish to access hardware directly +# +# 3) Modify various Linux system files for use within a zone environment +# + +# +# Restrict executables to /bin and /usr/bin +# +PATH=/bin:/usr/bin +export PATH + +# +# Sends output to a log file via redirection of stderr. +# +# This script assumes its caller has already performed the redirection to the +# logfile. +# +log() +{ + echo "$@" >&2 +} + +# +# Setup i18n output +# +TEXTDOMAIN="SUNW_OST_OSCMD" +export TEXTDOMAIN + +cmd_failed=$(gettext "%s failed! Aborting installation...") +cmd2_failed=$(gettext "%s of '%s' to '%s' failed!") +create_failed=$(gettext "Could not create new file '%s'!") +disable_failed=$(gettext "Attempt to disable entries in '%s' failed!") +install_aborted=$(gettext "Aborting installation...") +install_noroot=$(gettext "Installation root directory '%s' does not exist.") +ln_fail=$(gettext "Unable to symlink '%s' to '%s'!") +mkdir_fail=$(gettext "Unable to create the directory '%s'") +mod_failed=$(gettext -n "Attempt to modify entries in '%s' failed!") + +usage=$(gettext "usage: %s <install_root> [mini]") + +# +# Output an internationalized string followed by a carriage return +# +i18n_echo() +{ + typeset fmt="$1" + shift + + printf "$fmt\n" "$@" +} + +# +# Routine to make a full path out of a supplied path +# +fullpath() +{ + typeset path="$1" + + echo $path | egrep -s "^/" || path="${PWD:=$(pwd)}/$path" + echo $path +} + +# +# Routine to create directories and handle errors +# +makedir() +{ + typeset dirname=$(fullpath "$1") + typeset mode="" + + [[ $# -eq 2 ]] && mode="-m $2" + + [[ -d "$dirname" ]] && return + + if ! mkdir $mode -p "$dirname"; then + log "Unable to create the directory \"$dirname\"!" + i18n_echo "$mkdir_fail" "$dirname" + echo $(gettext "Aborting installation...") + exit 1 + fi +} + +# +# Routine to create initial symlinks and handle errors +# +symlink() +{ + typeset src="$1" + typeset dst=$(fullpath "$2") + + [[ -e "$dst" || -h "$dst" ]] && rm -f "$dst" + + if ! ln -s "$src" "$dst"; then + log "Unable to symlink \"$src\" to \"$dst\"!" + i18n_echo "$ln_fail" "$src" "$dst" + echo $(gettext "Aborting installation...") + exit 1 + fi +} + +# +# Install a file using "ln -s" +# +# Returns 0 on success, 1 on failure. +# +install_ln() +{ + typeset source="$1" + typeset target=$(fullpath "$2") + + log " Installing \"$target\"" + + mv -f "$target" "$target.$tag" 2>/dev/null + + if ! ln -s "$source" "$target"; then + log "" + log "Attempt to install $target FAILED." + return 1 + fi + + return 0 +} + + +# +# Enable NFS servers and the NFS lock daemon for a particular zone. +# +enable_nfs_services() +{ + log "Non-miniroot install; enabing NFS servers and NFS lock daemon" + + # + # Setup files required for NFS: + # + # /native/etc/netconfig + # /native/etc/default/nfs + # + # These two files are treated as read-only in lx branded zones. + # To enfore this restriction we will read-only lofs mount them + # into the zone from the global zone. For these lofs mounts to + # work we'll need to create empty directories now that will serve + # as mount points later. + # + # /sbin/rpc.statd + # /sbin/rpc.lockd + # + # These files are symlinks to scripts supplied by the lx brand + # that will start up the solaris nfs daemons. + # + if { ! makedir native/etc/netconfig || + ! makedir native/etc/default/nfs ; }; then + log "Aborting NFS setup..." + log "" + return + fi + + if { ! install_ln ../native/usr/lib/brand/lx/lx_lockd sbin/rpc.lockd || + ! install_ln ../native/usr/lib/brand/lx/lx_statd \ + sbin/rpc.statd ; }; then + log "Aborting NFS setup..." + log "" + return + fi + + # + # update /etc/services for NFS + # + log "" + log "Adding lockd entry to \"$install_root/etc/services\"..." + + cp -p $install_root/etc/services $install_root/etc/services.$tag + + # + # Brackets in the sed script below contain a space followed by a tab + # + cat $install_root/etc/services.$tag | + sed 's:\(111\/..p[ ][ ]*\):\1rpcbind :' | + cat > $install_root/etc/services + + cat >> $install_root/etc/services <<-EOF + lockd 4045/udp # NFS lock daemon/manager + lockd 4045/tcp # NFS lock daemon/manager + EOF + + # + # Modify /etc/init.d/nfslock to enable the USERLAND_LOCKD option and to + # find some commands in alternate locations. + # + log "" + log "Modifying \"$install_root/etc/init.d/nfslock\"..." + cp -p etc/init.d/nfslock etc/init.d/nfslock.$tag + cat etc/init.d/nfslock.$tag | + sed ' + s/USERLAND_LOCKD=$/USERLAND_LOCKD="yes"/ + s/killproc rpc.statd/killproc statd/ + s/status rpc.statd/status statd/ + s/pidof rpc.statd/pidof statd/ + ' | + cat > etc/init.d/nfslock +} + +# +# The main script starts here. +# +# The syntax is: +# +# lx_init_zone <rootdir> [mini] +# +# Where: +# <rootdir> is the root of the zone directory to be modified +# +# [mini] is an optional second argument that signifies whether this is +# to be a miniroot install; if it is, NFS services are not enabled +# in the processed zone +# +unset is_miniroot +unset install_root + +install_root="$1" + +tag="lxsave_$(date +%m.%d.%Y@%T)" + +if (($# < 1 || $# > 2)); then + i18n_echo "$usage" "$0" + exit 1 +fi + +(($# == 2)) && is_miniroot=1 + +if [[ ! -d "$install_root" ]]; then + i18n_echo "$install_noroot" "$install_root" + echo $(gettext "** Installation aborted **") + exit 1 +fi + +cd "$install_root" + +log "" +log "Initial lx_brand environment modification started `date`" +log "Making needed directories in \"$install_root\"." +echo $(gettext "Setting up the initial lx brand environment.") + +# +# Make various directories in /native that are needed to boot an lx branded +# zone. +# +makedir native/dev +makedir native/etc/default +makedir native/etc/svc/volatile +makedir native/lib +makedir native/proc +makedir native/tmp 1777 +makedir native/usr +makedir native/var + +# +# Make various other directories needed for the lx brand +# +makedir mnt +makedir opt +makedir usr/local/bin +makedir usr/local/include +makedir usr/local/lib +makedir usr/local/sbin +makedir usr/local/share +makedir usr/local/src + +makedir dev 0755 +makedir tmp 1777 +makedir proc 0555 +makedir boot 0755 + +# +# zlogin requires that these utilities live in places other than their +# Linux defaults, so create appropriate links for them here. +# +# XX - The need for these links may go away in the future if zlogin is +# appropriately modified +# +symlink /bin/sh sbin/sh +symlink /bin/su usr/bin/su +symlink /native/usr/lib/ld.so.1 usr/lib/ld.so.1 + +libpam_so="$(echo lib/libpam.so.0.*)" +libpam_misc="$(echo lib/libpam_misc.so.0.*)" +libpamc_so="$(echo lib/libpamc.so.0.*)" + +symlink "/$libpam_so" lib/libpam.so.0 +symlink "/$libpam_misc" lib/libpam_misc.so.0 +symlink "/$libpamc_so" lib/libpamc.so.0 + +log "" +log "Modifying system configuration in \"$install_root\"" + +# +# Create a /var/ld/ld.config that will point to /native/lib for our Solaris +# libraries. +# +log "Creating \"$install_root/var/ld/ld.config\"..." + +makedir var/ld + +if ! crle -c var/ld/ld.config -l /native/lib:/native/usr/lib \ + -s /native/lib/secure:/native/usr/lib/secure; then + log "\tCreation of \"$install_root/var/ld/ld.config\" failed!" + i18n_echo "$cmd_failed" "crle" + exit 1 +fi + +log "" +log "Modifying \"$install_root/etc/fstab\"..." + +mv -f etc/fstab etc/fstab.$tag 2>/dev/null + +cat > etc/fstab <<- EOF + none / ufs defaults 1 1 + none /proc proc defaults 0 0 +EOF + +if [[ $? -ne 0 ]]; then + log "Could not create new \"$install_root/etc/fstab\"!" + i18n_echo "$create_failed" "$install_root/etc/fstab" + exit 1 +fi + +# +# The default /etc/inittab spawns mingetty on each of the virtual consoles +# as well as xdm on the X console. Since we don't have virtual consoles nor +# an X console, spawn a single mingetty on /dev/console instead. +# +# Don't bother changing the file if it looks like we already did. +# +if ! egrep -s "Disabled by lx brand" etc/inittab; then + log "Modifying: \"$install_root/etc/inittab\"..." + + tmpfile=/tmp/inittab.$$ + + sed 's/^[1-6]:/# Disabled by lx brand: &/ + s/^id:5:initdefault:/id:3:initdefault: # Modified by lx brand: &/' \ + etc/inittab > $tmpfile + + # + # Don't bother with further alterations if the sed above failed... + # + if [[ $? -eq 0 ]]; then + egrep -s "console login for lx brand" etc/inittab + if [[ $? -ne 0 ]]; then + cat >> $tmpfile <<- EOF + + # + # console login for lx brand + # + 1:2345:respawn:/sbin/mingetty console + EOF + + # + # Only install the new inittab if the append + # above succeeded. + # + if [[ $? -eq 0 ]]; then + # + # Attempt to save off the original inittab + # before moving over the modified version. + # + mv -f etc/inittab etc/inittab.$tag 2>/dev/null + + mv -f $tmpfile etc/inittab + + if [[ $? -ne 0 ]]; then + log "mv of \"$tmpfile\" to" \ + "\"$installroot/etc/inittab\"" \ + "failed!" + i18n_echo "$cmd2_failed" "mv" \ + "$tmpfile" \ + "$installroot/etc/inittab" + i18n_echo "$install_aborted" + exit 1 + else + chmod 644 etc/inittab + fi + fi + fi + + else + log "Attempt to disable entries in" \ + "\"$install_root/etc/inittab\" failed!" + i18n_echo "$disable_failed" "$install_root/etc/inittab" + i18n_echo "$install_aborted" + exit 1 + fi +fi + +if [[ ! -e "$install_root/etc/hosts" ]]; then + log "" + log "Creating: \"$install_root/etc/hosts\"..." + + cat > "$install_root/etc/hosts" <<-_EOF_ + 127.0.0.1 localhost + _EOF_ +fi + +# +# User must configure various brand-specific items to enable networking, so +# boot the system non-networked. +# +log "" +log "Modifying: \"$install_root/etc/sysconfig/network\"..." + +mv -f etc/sysconfig/network etc/sysconfig/network.$tag 2>/dev/null + +cat > etc/sysconfig/network <<- EOF + NETWORKING="no" + # + # To enable networking, change the "no" above to "yes" and + # uncomment and fill in the following parameters. + # + # If you are specifying a hostname by name rather than by IP address, + # be sure the system can resolve the name properly via the use of a + # name service and/or the proper name files, as specified by + # nsswitch.conf. See nsswitch.conf(5) for further details. + # + # HOSTNAME=your_hostname_here + # +EOF + +if [[ $? -ne 0 ]]; then + log "Could not create new \"$install_root/etc/sysconfig/network\"!" + i18n_echo "$create_failed" "$install_root/etc/sysconfig/network" + i18n_echo "$install_aborted" + exit 1 +fi + +if [[ -a etc/sysconfig/syslog ]]; then + # + # By default, syslogd will attempt to create a socket in /dev/log, but + # /dev is not be writable. Instead, modify /etc/sysconfig/syslog to + # tell it to use /var/run/syslog instead, and make /dev/log a symlink + # to /var/run/syslog. + # + log "" + log "Modifying: \"$install_root/etc/sysconfig/syslog\"..." + + tmpfile=/tmp/lx_sc.syslog.$$ + + sed 's@\(SYSLOGD_OPTIONS="-m 0\)"@\1 -p /var/run/syslog"@' \ + etc/sysconfig/syslog > $tmpfile + + # + # Only install the new sysconfig/syslog if the edit above succeeded. + # + if [[ $? -eq 0 ]]; then + # + # Attempt to save off the original syslog before moving over + # the modified version. + # + mv -f etc/sysconfig/syslog etc/sysconfig/syslog.$tag 2>/dev/null + + if ! mv -f $tmpfile etc/sysconfig/syslog; then + log "mv of \"$tmpfile\" to" \ + "\"$installroot/etc/sysconfig/syslog\" failed!" + i18n_echo "$cmd2_failed" "mv" "$tmpfile" \ + "$installroot/etc/sysconfig/syslog" + i18n_echo "$install_aborted" + exit 1 + else + chmod 755 etc/sysconfig/syslog + fi + else + log "Attempt to modify entries in" \ + "\"$install_root/sysconfig/syslog\" failed!" + i18n_echo "$mod_failed" "$install_root/sysconfig/syslog" + i18n_echo "$install_aborted" + exit 1 + fi +fi + +if [[ $? -ne 0 ]]; then + log "Could not create new \"$install_root/etc/sysconfig/syslog\"!" + i18n_echo "$create_failed" "$install_root/etc/sysconfig/syslog" + i18n_echo "$install_aborted" + exit 1 +fi + +# +# /etc/rc.d/init.d/keytable tries to load a physical keyboard map, which won't +# work in a zone. If we remove etc/sysconfig/keyboard, it won't try this at all. +# +mv -f etc/sysconfig/keyboard etc/sysconfig/keyboard.$tag 2>/dev/null + +# +# /etc/rc.d/init.d/gpm tries to configure the console mouse for cut-and-paste +# text operations, which we don't support. Removing this file disables the +# mouse configuration. +# +mv -f etc/sysconfig/mouse etc/sysconfig/mouse.$tag 2>/dev/null + +# +# The following scripts attempt to start services or otherwise configure +# the system in ways incompatible with zones, so don't execute them at boot +# time. +# +log "" +log "Modifying \"$install_root/etc/rc.d/init.d\" to disable any" +log " services not supported by BrandZ:" +unsupported_services=" + kudzu + microcode_ctl + network + random + pcmcia + isdn + iptables + ip6tables + iscsi + psacct + gpm + irda + smartd + rawdevices + netdump + hpoj + mdmonitor + mdmpd + irqbalance +" + +for file in $unsupported_services; do + if [[ -a "etc/rc.d/init.d/$file" ]]; then + + if mv -f "etc/rc.d/init.d/$file" "etc/rc.d/init.d/$file.$tag"; then + log " + Moved script \"etc/rc.d/init.d/$file\" to" + log " \"etc/rc.d/init.d/$file.$tag\"" + fi + fi + + rc_files="$(echo etc/rc.d/rc[0-6].d/[SK]+([0-9])$file)" + + if [[ "$rc_files" != "etc/rc.d/rc[0-6].d/[SK]+([0-9])$file" ]]; then + for file in $rc_files; do + if [[ -h "$file" ]]; then + rm -f "$file" && + log " + Removed symbolic link \"$file\"" + else + rm -f "$file" && + log " + Removed script \"$file\"" + fi + done + fi +done + +# +# There is a lot of stuff in the standard halt and reboot scripts that we +# have no business running in a zone. Fortunately, the stuff we want to +# skip is all in one contiguous chunk. +# +# Don't bother to modify the file if it looks like we already did. +# +if ! egrep -s "Disabled by lx brand" etc/rc.d/init.d/halt; then + log "" + log "Modifying \"$install_root/etc/rc.d/init.d/halt\" for operation" + log " within a zone..." + awk 'BEGIN {skip = ""} + /^# Save mixer/ {skip = "# Disabled by lx brand: "} + /halt.local/ {skip = ""} + /./ {print skip $0}' etc/rc.d/init.d/halt > /tmp/halt.$$ + + if [[ $? -eq 0 ]]; then + mv -f etc/rc.d/init.d/halt etc/rc.d/init.d/halt.$tag 2>/dev/null + mv -f /tmp/halt.$$ etc/rc.d/init.d/halt + chmod 755 etc/rc.d/init.d/halt + else + log "Attempt to modify \"$install_root/etc/rc.d/init.d/halt\"" \ + "FAILED" + log "Continuing with balance of zone setup..." + fi +fi + +# +# Fix up /etc/rc.d/rc.sysinit: +# +# 1) /sbin/hwclock requires the iopl() system call, which BrandZ won't support. +# Since the hardware clock cannot be set from within a zone, we comment out +# the line. +# +# 2) Disable dmesg commands, since we don't implement klogctl +# +# 3) Disable initlog and the mount of /dev/pts +# +# 4) Don't touch /dev/tty* in order to start virtual terminals, as that won't +# work from within a zone. +# +# 5) Don't try to check the root filesystem (/) as there is no associated +# physical device, and any attempt to run fsck will fail. +# +# Don't modify the rc.sysinit file if it looks like we already did. +# +if ! egrep -s "Disabled by lx brand" etc/rc.d/rc.sysinit; then + log "" + log "Modifying: \"$install_root/etc/rc.d/rc.sysinit\"..." + log "" + + tmpfile=/tmp/lx_rc.sysinit.$$ + + sed 's@^/sbin/hwclock@# Disabled by lx brand: &@ + s@^HOSTTYPE=@HOSTTYPE=\"s390\" # Spoofed for lx brand: &@ + s@/bin/dmesg -n@: # Disabled by lx brand: &@ + s@^dmesg -s@# Disabled by lx brand: &@ + s@initlog -c \"fsck@: # Disabled by lx brand: &@ + s@^.*mount .* /dev/pts$@# Disabled by lx brand: &@' \ + etc/rc.d/rc.sysinit > $tmpfile + + # + # Only install the new rc.sysinit if the edit above succeeded. + # + if [[ $? -eq 0 ]]; then + # + # Attempt to save off the original rc.sysinit + # before moving over the modified version. + # + mv -f etc/rc.d/rc.sysinit etc/rc.d/rc.sysinit.$tag 2>/dev/null + + if ! mv -f $tmpfile etc/rc.d/rc.sysinit; then + log "mv of \"$tmpfile\" to" \ + "\"$installroot/etc/rc.d/rc.sysinit\" failed!" + i18n_echo "$cmd2_failed" "mv" "$tmpfile" \ + "$installroot/etc/rc.d/rc.sysinit" + i18n_echo "$install_aborted" + exit 1 + else + chmod 755 etc/rc.d/rc.sysinit + fi + else + log "Attempt to modify entries in" \ + "\"$install_root/rc.d/rc.sysinit\" failed!" + i18n_echo "$mod_failed" "$install_root/rc.d/rc.sysinit" + i18n_echo "$install_aborted" + exit 1 + fi +fi + +if [[ -z $is_miniroot ]]; then + enable_nfs_services || log "NFS services were not properly enabled." +fi + +log "" +log "System configuration modifications complete `date`" +log "" +i18n_echo "System configuration modifications complete." +exit 0 diff --git a/usr/src/lib/brand/lx/zone/lx_install.ksh b/usr/src/lib/brand/lx/zone/lx_install.ksh new file mode 100644 index 0000000000..d2ad32fc42 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/lx_install.ksh @@ -0,0 +1,579 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# Restrict executables to /bin, /usr/bin, /usr/sbin and /usr/sfw/bin +PATH=/bin:/usr/bin:/usr/sbin:/usr/sfw/bin + +export PATH + +# Setup i18n output +TEXTDOMAIN="SUNW_OST_OSCMD" +export TEXTDOMAIN + +# Log passed arguments to file descriptor 2 +log() +{ + [[ -n $logfile ]] && echo "$@" >&2 +} + +# +# Send the provided printf()-style arguments to the screen and to the +# logfile. +# +screenlog() +{ + typeset fmt="$1" + shift + + printf "$fmt\n" "$@" + [[ -n $logfile ]] && printf "$fmt\n" "$@" >&2 +} + +# Print and log provided text if the shell variable "verbose_mode" is set +verbose() +{ + [[ -n $verbose_mode ]] && echo "$@" + [[ -n $logfile ]] && [[ -n $verbose_mode ]] && echo "$@" >&2 +} + +unsupported_cpu=\ +$(gettext "ERROR: Cannot install branded zone: processor must be %s-compatible") + +cmd_not_found=$(gettext "Required command '%s' cannot be found!") +cmd_not_exec=$(gettext "Required command '%s' not executable!") +zone_initfail=$(gettext "Attempt to initialize zone '%s' FAILED.") +path_abs=$(gettext "Pathname specified to -d '%s' must be absolute.") + +cmd_h=$(gettext "%s -z <zone name> %s -h") +cmd_full=\ +$(gettext "%s -z <zone name> %s [-v | -s] [-d <dir>|<device>] [<cluster> ... ]") + +both_modes=$(gettext "%s: error: cannot select both silent and verbose modes") + +not_found=$(gettext "%s: error: file or directory not found.") + +wrong_type=\ +$(gettext "%s: error: must be a gzip, bzip2, .Z or uncompressed tar archive.") + +not_readable=$(gettext "Cannot read file '%s'") + +no_install=$(gettext "Could not create install directory '%s'") +no_log=$(gettext "Could not create log directory '%s'") +no_logfile=$(gettext "Could not create log file '%s'") + +root_full=$(gettext "Zonepath root %s exists and contains data; remove or move aside prior to install.") + +install_zone=$(gettext "Installing zone '%s' at root directory '%s'") +install_from=$(gettext "from archive '%s'") + +install_fail=$(gettext "Installation of zone '%s' FAILED.") +see_log=$(gettext "See the log file:\n '%s'\nfor details.") + +install_abort=$(gettext "Installation of zone '%s' aborted.") +install_good=$(gettext "Installation of zone '%s' completed successfully.") + +# Check if commands passed in exist and are executable. +check_cmd() +{ + for cmd in "$@"; do + if [[ ! -f $cmd ]]; then + screenlog "$cmd_not_found" "$cmd" + screenlog "$install_abort" "$zonename" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + if [[ ! -x $cmd ]]; then + screenlog "$cmd_not_exec" "$cmd" + screenlog "$install_abort" "$zonename" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + done +} + +# Post process as tarball-installed zone for use by BrandZ. +init_tarzone() +{ + typeset rootdir="$1" + + if ! $branddir/lx_init_zone "$rootdir"; then + screenlog "$zone_initfail" "$zonename" + return 1 + fi +} + +# Clean up on interrupt +trap_cleanup() +{ + msg=$(gettext "Installation cancelled due to interrupt.") + + screenlog "$msg" + exit $int_code +} + +# +# Output the usage message. +# +# This is done this way due to limitations in the way gettext strings are +# extracted from shell scripts and processed. Use of this somewhat awkward +# syntax allows us to produce longer lines of text than otherwise would be +# possible without wrapping lines across more than one line of code. +# +usage() +{ + int_code=$ZONE_SUBPROC_USAGE + + echo $(gettext "Usage:") + printf " $cmd_h\n" "zoneadm" "install" + printf " $cmd_full\n" "zoneadm" "install" + + echo + + echo $(gettext "The installer will attempt to use the default system") \ + $(gettext "removable disc device if <archive dir> is not") \ + $(gettext "specified.") | fmt -80 + + echo + + echo $(gettext "<cluster> specifies which package cluster you wish") \ + $(gettext "to install.") | fmt -80 + + echo + echo $(gettext "The 'desktop' cluster will be installed by default.") + echo + echo $(gettext "The available clusters are:") + echo " + core" + echo " + server" + echo " + desktop" + echo " + development" + echo " + all" + echo + + echo $(gettext "Each cluster includes all of the clusters preceding") \ + $(gettext "it, so the 'server' cluster includes the 'core'") \ + $(gettext "cluster, the 'desktop' cluster includes the 'core'") \ + $(gettext "and 'server' clusters, and so on.") | fmt -80 + + echo + echo $(gettext "Examples") + echo "========" + + echo $(gettext "Example 1: Install a base Linux system from CDs or a") \ + $(gettext "DVD using the system default removable disc device:") | + fmt -80 + + echo + echo " # zoneadm -z myzone install" + echo + + echo $(gettext "Example 2: Install the 'server' cluster from CDs or") \ + $(gettext "a DVD via an alternative removable disc device:") | + fmt -80 + + echo + echo " # zoneadm -z myzone install -d /cdrom/cdrom1 server" + echo + + echo $(gettext "Example 3: Install the desktop Linux environment") \ + $(gettext "from an ISO image made available as '/dev/lofi/1' by") \ + $(gettext "use of lofiadm(1M):") | fmt -80 + + echo + echo " # zoneadm -z myzone install -d /dev/lofi/1 desktop" + echo + + echo $(gettext "Example 4: Install the entire Linux environment from") \ + $(gettext "ISO images located in the directory") \ + "'/export/centos_3.8/isos':" | fmt -80 + + echo + echo " # zoneadm -z myzone install -d /export/centos_3.8/isos all" + echo + + echo $(gettext "Example 5: Install from a compressed tar archive of") \ + $(gettext "an existing Linux installation (a tar ball) with") \ + $(gettext "verbose output regarding the progress of the") \ + $(gettext "installation:") | fmt -80 + + echo + echo " # zoneadm -z myzone install -v -d /tmp/linux_full.tar.gz" + echo + + echo $(gettext "Example 6: Install from a compressed tar archive of") \ + $(gettext "an existing Linux installation (a tar ball) with NO") \ + $(gettext "output regarding the progress of the installation") \ + $(gettext "(silent mode.)") | fmt -80 + + echo + + echo $(gettext "NOTE: Silent mode is only recommended for use by") \ + $(gettext "shell scripts and other non-interactive programs:") | + fmt -80 + + echo + echo " # zoneadm -z myzone install -d /tmp/linux_full.tar.gz -s" + echo + + exit $int_code +} + +# +# The main body of the script starts here. +# +# This script should never be called directly by a user but rather should +# only be called by zoneadm to install a BrandZ Linux zone. +# + +# +# Exit values used by the script, as #defined in <sys/zone.h> +# +# ZONE_SUBPROC_OK +# =============== +# Installation was successful +# +# ZONE_SUBPROC_USAGE +# ================== +# Improper arguments were passed, so print a usage message before exiting +# +# ZONE_SUBPROC_NOTCOMPLETE +# ======================== +# Installation did not complete, but another installation attempt can be +# made without an uninstall +# +# ZONE_SUBPROC_FATAL +# ================== +# Installation failed and an uninstall will be required before another +# install can be attempted +# +ZONE_SUBPROC_OK=0 +ZONE_SUBPROC_USAGE=253 +ZONE_SUBPROC_NOTCOMPLETE=254 +ZONE_SUBPROC_FATAL=255 + +# +# An unspecified exit or interrupt should exit with ZONE_SUBPROC_NOTCOMPLETE, +# meaning a user will not need to do an uninstall before attempting another +# install. +# +int_code=$ZONE_SUBPROC_NOTCOMPLETE + +trap trap_cleanup INT + +# If we weren't passed at least two arguments, exit now. +[[ $# -lt 2 ]] && usage + +# +# This script is always started with a full path so we can extract the +# brand directory name here. +# +branddir=$(dirname "$0") +zonename="$1" +zoneroot="$2" + +install_root="$zoneroot/root" +logdir="$install_root/var/log" + +shift; shift # remove zonename and zoneroot from arguments array + +unset gtaropts +unset install_opts +unset install_src +unset msg +unset silent_mode +unset verbose_mode + +while getopts "d:hsvX" opt +do + case "$opt" in + h) usage;; + s) silent_mode=1;; + v) verbose_mode=1;; + d) install_src="$OPTARG" ;; + X) install_opts="$install_opts -x" ;; + *) usage;; + esac +done +shift OPTIND-1 + +# Providing more than one passed argument generates a usage message +if [[ $# -gt 1 ]]; then + msg=$(gettext "ERROR: Too many arguments provided:") + + screenlog "$msg" + screenlog " \"%s\"" "$@" + screenlog "" + usage +fi + +# Validate any free-form arguments +if [[ $# -eq 1 && "$1" != "core" && "$1" != "server" && "$1" != "desktop" && + "$1" != "development" && "$1" != "all" ]]; then + msg=$(gettext "ERROR: Unknown cluster name specified: %s") + + screenlog "$msg" "\"$1\"" + screenlog "" + usage +fi + +# The install can't be both verbose AND silent... +if [[ -n $silent_mode && -n $verbose_mode ]]; then + screenlog "$both_modes" "zoneadm install" + screenlog "" + usage +fi + +# +# Validate that we're running on a i686-compatible CPU; abort the zone +# installation now if we're not. +# +procinfo=$(LC_ALL=C psrinfo -vp | grep family) + +# +# All x86 processors in CPUID families 6, 15, 16 or 17 should be +# i686-compatible, assuming third party processor vendors follow AMD and +# Intel's lead. +# +if [[ "$procinfo" != *" x86 "* ]] || + [[ "$procinfo" != *" family 6 "* && "$procinfo" != *" family 15 "* && + "$procinfo" != *" family 16 "* && "$procinfo" != *" family 17 "* ]] ; then + screenlog "$unsupported_cpu" "i686" + exit $int_code +fi + +if [[ -n $install_src ]]; then + # + # Validate $install_src. + # + # If install_src is a directory, assume it contains ISO images to + # install from, otherwise treat the argument as if it points to a tar + # ball file. + # + if [[ "`echo $install_src | cut -c 1`" != "/" ]]; then + screenlog "$path_abs" "$install_src" + exit $int_code + fi + + if [[ ! -a "$install_src" ]]; then + screenlog "$not_found" "$install_src" + screenlog "$install_abort" "$zonename" + exit $int_code + fi + + if [[ ! -r "$install_src" ]]; then + screenlog "$not_readable" "$install_src" + screenlog "$install_abort" "$zonename" + exit $int_code + fi + + # + # If install_src is a block device, a directory, a possible device + # created via lofiadm(1M), or the directory used by a standard volume + # management daemon, pass it on to the secondary install script. + # + # Otherwise, validate the passed filename to prepare for a tar ball + # install. + # + if [[ ! -b "$install_src" && ! -d "$install_src" && + "$install_src" != /dev/lofi/* && "$install_src" != /cdrom/* && + "$install_src" != /media/* ]]; then + if [[ ! -f "$install_src" ]]; then + screenlog "$wrong_type" "$install_src" + screenlog "$install_abort" "$zonename" + exit $int_code + fi + + filetype=`{ LC_ALL=C file $install_src | + awk '{print $2}' ; } 2>/dev/null` + + if [[ "$filetype" = "gzip" ]]; then + verbose "\"$install_src\": \"gzip\" archive" + gtaropts="-xz" + elif [[ "$filetype" = "bzip2" ]]; then + verbose "\"$install_src\": \"bzip2\" archive" + gtaropts="-xj" + elif [[ "$filetype" = "compressed" ]]; then + verbose "\"$install_src\": Lempel-Ziv" \ + "compressed (\".Z\") archive." + gtaropts="-xZ" + elif [[ "$filetype" = "USTAR" ]]; then + verbose "\"$install_src\":" \ + "uncompressed (\"tar\") archive." + gtaropts="-x" + else + screenlog "$wrong_type" "$install_src" + screenlog "$install_abort" "$zonename" + exit $int_code + fi + fi +fi + +# +# Start silent operation and pass the flag to prepare pass the flag to +# the ISO installer, if needed. +# +if [[ -n $silent_mode ]] +then + exec 1>/dev/null + install_opts="$install_opts -s" +fi + +# +# If verbose mode was specified, pass the verbose flag to lx_distro_install +# for ISO or disc installations and to gtar for tarball-based installs. +# +if [[ -n $verbose_mode ]] +then + echo $(gettext "Verbose output mode enabled.") + install_opts="$install_opts -v" + [[ -n $gtaropts ]] && gtaropts="${gtaropts}v" +fi + +[[ -n $gtaropts ]] && gtaropts="${gtaropts}f" + +if [[ ! -d "$install_root" ]] +then + if ! mkdir -p "$install_root" 2>/dev/null; then + screenlog "$no_install" "$install_root" + exit $int_code + fi +fi + +# +# Check for a non-empty root. +# +cnt=`ls $install_root | wc -l` +if [ $cnt -ne 0 ]; then + screenlog "$root_full" "$install_root" + exit $int_code +fi + +if [[ ! -d "$logdir" ]] +then + if ! mkdir -p "$logdir" 2>/dev/null; then + screenlog "$no_log" "$logdir" + exit $int_code + fi +fi + +logfile="${logdir}/$zonename.install.$$.log" + +if ! > $logfile; then + screenlog "$no_logfile" "$logfile" + exit $int_code +fi + +# Redirect stderr to the log file to automatically log any error messages +exec 2>>"$logfile" + +# +# From here on out, an unspecified exit or interrupt should exit with +# ZONE_SUBPROC_FATAL, meaning a user will need to do an uninstall before +# attempting another install, as we've modified the directories we were going +# to install to in some way. +# +int_code=$ZONE_SUBPROC_FATAL + +log "Installation started for zone \"$zonename\" `/usr/bin/date`" + +if [[ -n $gtaropts ]]; then + check_cmd /usr/sfw/bin/gtar $branddir/lx_init_zone + + screenlog "$install_zone" "$zonename" "$zoneroot" + screenlog "$install_from" "$install_src" + echo + echo $(gettext "This process may take several minutes.") + echo + + if ! ( cd "$install_root" && gtar "$gtaropts" "$install_src" ) ; then + log "Error: extraction from tar archive failed." + else + if ! [[ -d "${install_root}/bin" && + -d "${install_root}/sbin" ]]; then + log "Error: improper or incomplete tar archive." + else + $branddir/lx_init_zone "$install_root" && + init_tarzone "$install_root" + + # + # Emit the same code from here whether we're + # interrupted or exiting normally. + # + int_code=$? + fi + fi + + if [[ $int_code -eq ZONE_SUBPROC_OK ]]; then + log "Tar install completed for zone '$zonename' `date`." + else + log "Tar install failed for zone \"$zonename\" `date`." + + fi +else + check_cmd $branddir/lx_distro_install + + $branddir/lx_distro_install -z "$zonename" -r "$zoneroot" \ + -d "$install_src" -l "$logfile" $install_opts "$@" + + # + # Emit the same code from here whether we're interrupted or exiting + # normally. + # + int_code=$? + + [[ $int_code -eq $ZONE_SUBPROC_USAGE ]] && usage +fi + +if [[ $int_code -ne $ZONE_SUBPROC_OK ]]; then + screenlog "" + screenlog "$install_fail" "$zonename" + screenlog "" + + # + # Only make a reference to the log file if one will exist after + # zoneadm exits. + # + [[ $int_code -ne $ZONE_SUBPROC_NOTCOMPLETE ]] && + screenlog "$see_log" "$logfile" + + exit $int_code +fi + +# +# After the install completes, we've likely moved a new copy of the logfile into +# place atop the logfile we WERE writing to, so if we don't reopen the logfile +# here the shell will continue writing to the old logfile's inode, meaning we +# would lose all log information from this point on. +# +exec 2>>"$logfile" + +screenlog "" +screenlog "$install_good" "$zonename" +screenlog "" + +echo $(gettext "Details saved to log file:") +echo " \"$logfile\"" +echo + +exit $ZONE_SUBPROC_OK diff --git a/usr/src/lib/brand/lx/zone/platform.xml b/usr/src/lib/brand/lx/zone/platform.xml new file mode 100644 index 0000000000..a53f0ee509 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/platform.xml @@ -0,0 +1,85 @@ +<?xml version="1.0"?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright 2007 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. +--> + +<!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN" + "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1"> + +<platform name="lx" allow-exclusive-ip="false"> + <!-- Global filesystems to mount when booting the zone --> + <global_mount special="/dev" directory="/native/dev" type="dev" + opt="attrdir=%R/dev" /> + <global_mount special="/lib" directory="/native/lib" + opt="ro" type="lofs" /> + <global_mount special="/usr/lib" directory="/native/usr/lib" + opt="ro" type="lofs" /> + <global_mount special="/usr/lib/brand/lx/etc_default_nfs" + directory="/native/etc/default/nfs" type="lofs" opt="ro" /> + <global_mount special="/usr/lib/brand/lx/etc_netconfig" + directory="/native/etc/netconfig" type="lofs" opt="ro" /> + + <!-- Local filesystems to mount when booting the zone --> + <mount special="/native/dev" directory="/dev" type="lofs" /> + <mount special="proc" directory="/native/proc" type="proc" /> + <mount special="swap" directory="/native/etc/svc/volatile" + type="tmpfs" /> + <mount special="swap" directory="/native/tmp" type="tmpfs" /> + + <!-- Devices to create under /dev --> + <device match="null" /> + <device match="pts/*" /> + <device match="random" /> + <device match="tcp" /> + <device match="tcp6" /> + <device match="tty" /> + <device match="udp" /> + <device match="udp6" /> + <device match="urandom" /> + <device match="zero" /> + + <!-- Renamed devices to create under /dev --> + <device match="brand/lx/ptmx" name="ptmx" /> + <device match="zcons/%z/zoneconsole" name="console" /> + + <!-- Audio devices to create under /dev --> + <device match="brand/lx/dsp" name="dsp" /> + <device match="brand/lx/mixer" name="mixer" /> + + <!-- Symlinks to create under /dev --> + <symlink source="fd" target="../proc/self/fd" /> + <symlink source="log" target="/var/run/syslog" /> + <symlink source="stderr" target="../proc/self/fd/2" /> + <symlink source="stdin" target="../proc/self/fd/0" /> + <symlink source="stdout" target="../proc/self/fd/1" /> + <symlink source="systty" target="console" /> + + <!-- Create a mount point for for the /dev/initctl fifo --> + <device match="null" name="initctl" /> + +</platform> diff --git a/usr/src/pkg/manifests/SUNWlx.mf b/usr/src/pkg/manifests/SUNWlx.mf index 0d5d285fe5..b7da7af793 100644 --- a/usr/src/pkg/manifests/SUNWlx.mf +++ b/usr/src/pkg/manifests/SUNWlx.mf @@ -20,11 +20,12 @@ # # -# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # -# Was renamed to system/zones/brand/lx, both now obsolete. - -set name=pkg.fmri value=pkg:/SUNWlx@0.5.11,5.11-0.143 -set name=pkg.obsolete value=true +set name=pkg.fmri value=pkg:/SUNWlx@0.5.11,5.11-0.133 +set name=pkg.renamed value=true set name=variant.arch value=i386 +set name=variant.opensolaris.zone value=global value=nonglobal +depend fmri=pkg:/system/zones/brand/lx@0.5.11,5.11-0.133 type=require diff --git a/usr/src/pkg/manifests/system-zones-brand-lx.mf b/usr/src/pkg/manifests/system-zones-brand-lx.mf index ca3a8cc541..6af5d31c4a 100644 --- a/usr/src/pkg/manifests/system-zones-brand-lx.mf +++ b/usr/src/pkg/manifests/system-zones-brand-lx.mf @@ -20,9 +20,108 @@ # # -# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2010 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # -set name=pkg.fmri value=pkg:/system/zones/brand/lx@0.5.11,5.11-0.143 -set name=pkg.obsolete value=true +# +# This package will install successfully into any zone, global or +# non-global. The files, directories, links, and hardlinks, however, +# will only be installed into the global zone. +# +<include global_zone_only_component> +set name=pkg.fmri value=pkg:/system/zones/brand/lx@$(PKGVERS) +set name=pkg.description value="Support for the 'lx' Brand" +set name=pkg.summary value="lx Brand" +set name=info.classification \ + value="org.opensolaris.category.2008:Applications/System Utilities" set name=variant.arch value=i386 +dir path=etc group=sys +dir path=etc/zones group=sys +dir path=usr group=sys +dir path=usr/kernel group=sys +dir path=usr/kernel/brand group=sys +dir path=usr/kernel/brand/$(ARCH64) group=sys +dir path=usr/kernel/drv group=sys +dir path=usr/kernel/drv/$(ARCH64) group=sys +dir path=usr/kernel/dtrace group=sys +dir path=usr/kernel/dtrace/$(ARCH64) group=sys +dir path=usr/kernel/fs group=sys +dir path=usr/kernel/fs/$(ARCH64) group=sys +dir path=usr/kernel/strmod group=sys +dir path=usr/kernel/strmod/$(ARCH64) group=sys +dir path=usr/lib +dir path=usr/lib/brand +dir path=usr/lib/brand/lx +dir path=usr/lib/brand/lx/$(ARCH64) +dir path=usr/lib/brand/lx/distros +dir path=usr/lib/devfsadm group=sys +dir path=usr/lib/devfsadm/linkmod group=sys +driver name=lx_audio +driver name=lx_ptm perms="lx_ptmajor 0666 root sys" +driver name=lx_systrace perms="* 0644 root sys" +file path=etc/zones/SUNWlx.xml mode=0444 +file path=etc/zones/SUNWlx26.xml mode=0444 +file path=usr/kernel/brand/$(ARCH64)/lx_brand group=sys mode=0755 +file path=usr/kernel/brand/lx_brand group=sys mode=0755 +file path=usr/kernel/drv/$(ARCH64)/lx_audio group=sys +file path=usr/kernel/drv/$(ARCH64)/lx_ptm group=sys +file path=usr/kernel/drv/$(ARCH64)/lx_systrace group=sys +file path=usr/kernel/drv/lx_audio group=sys +file path=usr/kernel/drv/lx_audio.conf group=sys +file path=usr/kernel/drv/lx_ptm group=sys +file path=usr/kernel/drv/lx_ptm.conf group=sys +file path=usr/kernel/drv/lx_systrace group=sys +file path=usr/kernel/drv/lx_systrace.conf group=sys +file path=usr/kernel/fs/$(ARCH64)/lx_afs group=sys mode=0755 +file path=usr/kernel/fs/$(ARCH64)/lx_proc group=sys mode=0755 +file path=usr/kernel/fs/lx_afs group=sys mode=0755 +file path=usr/kernel/fs/lx_proc group=sys mode=0755 +file path=usr/kernel/strmod/$(ARCH64)/ldlinux group=sys mode=0755 +file path=usr/kernel/strmod/ldlinux group=sys mode=0755 +file path=usr/lib/brand/lx/$(ARCH64)/lx_librtld_db.so.1 +file path=usr/lib/brand/lx/$(ARCH64)/lx_nametoaddr.so.1 +file path=usr/lib/brand/lx/$(ARCH64)/lx_thunk.so.1 +file path=usr/lib/brand/lx/config.xml mode=0444 +file path=usr/lib/brand/lx/distros/centos35.distro mode=0444 +file path=usr/lib/brand/lx/distros/centos36.distro mode=0444 +file path=usr/lib/brand/lx/distros/centos37.distro mode=0444 +file path=usr/lib/brand/lx/distros/centos38.distro mode=0444 +file path=usr/lib/brand/lx/distros/rhel35.distro mode=0444 +file path=usr/lib/brand/lx/distros/rhel36.distro mode=0444 +file path=usr/lib/brand/lx/distros/rhel37.distro mode=0444 +file path=usr/lib/brand/lx/distros/rhel38.distro mode=0444 +file path=usr/lib/brand/lx/distros/rhel_centos_common mode=0444 +file path=usr/lib/brand/lx/etc_default_nfs group=sys mode=0444 +file path=usr/lib/brand/lx/etc_netconfig group=sys mode=0444 +file path=usr/lib/brand/lx/lx_distro_install mode=0755 +file path=usr/lib/brand/lx/lx_init_zone mode=0755 +file path=usr/lib/brand/lx/lx_install mode=0755 +file path=usr/lib/brand/lx/lx_librtld_db.so.1 +file path=usr/lib/brand/lx/lx_lockd mode=0755 +file path=usr/lib/brand/lx/lx_nametoaddr.so.1 +file path=usr/lib/brand/lx/lx_native mode=0755 +file path=usr/lib/brand/lx/lx_statd mode=0755 +file path=usr/lib/brand/lx/lx_support mode=0755 +file path=usr/lib/brand/lx/lx_thunk mode=0755 +file path=usr/lib/brand/lx/lx_thunk.so.1 +file path=usr/lib/brand/lx/platform.xml mode=0444 +file path=usr/lib/devfsadm/linkmod/SUNW_lx_link_$(ARCH).so group=sys +file path=usr/lib/lx_brand.so.1 +hardlink path=usr/kernel/dtrace/$(ARCH64)/lx_systrace \ + target=../../../kernel/drv/$(ARCH64)/lx_systrace +hardlink path=usr/kernel/dtrace/lx_systrace \ + target=../../kernel/drv/lx_systrace +legacy pkg=SUNWlxr arch=$(ARCH) category=system \ + desc="Support for the 'lx' Brand" \ + hotline="Please contact your local service provider" \ + name="lx Brand (Root)" vendor="Sun Microsystems, Inc." \ + version=11.11,REV=2009.11.11 +legacy pkg=SUNWlxu arch=$(ARCH) category=system \ + desc="Support for the 'lx' Brand" \ + hotline="Please contact your local service provider" \ + name="lx Brand (Usr)" vendor="Sun Microsystems, Inc." \ + version=11.11,REV=2009.11.11 +license cr_Sun license=cr_Sun +license lic_CDDL license=lic_CDDL +link path=usr/lib/brand/lx/64 target=$(ARCH64) diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 09550a587c..bd0a9d465a 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -427,6 +427,8 @@ PROFILE_OBJS += profile.o SYSTRACE_OBJS += systrace.o +LX_SYSTRACE_OBJS += lx_systrace.o + LOCKSTAT_OBJS += lockstat.o FASTTRAP_OBJS += fasttrap.o fasttrap_isa.o @@ -491,6 +493,10 @@ PTSL_OBJS += tty_pts.o PTM_OBJS += ptm.o +LX_PTM_OBJS += lx_ptm.o + +LX_AUDIO_OBJS += lx_audio.o + MII_OBJS += mii.o mii_cicada.o mii_natsemi.o mii_intel.o mii_qualsemi.o \ mii_marvell.o mii_realtek.o mii_other.o diff --git a/usr/src/uts/common/brand/lx/autofs/lx_autofs.c b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c new file mode 100644 index 0000000000..d2bb03c118 --- /dev/null +++ b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c @@ -0,0 +1,1569 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <fs/fs_subr.h> +#include <sys/atomic.h> +#include <sys/cmn_err.h> +#include <sys/dirent.h> +#include <sys/fs/fifonode.h> +#include <sys/modctl.h> +#include <sys/mount.h> +#include <sys/policy.h> +#include <sys/sunddi.h> + +#include <sys/sysmacros.h> +#include <sys/vfs.h> +#include <sys/vfs_opreg.h> + +#include <sys/lx_autofs_impl.h> + +/* + * External functions + */ +extern uintptr_t space_fetch(char *key); +extern int space_store(char *key, uintptr_t ptr); + +/* + * Globals + */ +static vfsops_t *lx_autofs_vfsops; +static vnodeops_t *lx_autofs_vn_ops = NULL; +static int lx_autofs_fstype; +static major_t lx_autofs_major; +static minor_t lx_autofs_minor = 0; + +/* + * Support functions + */ +static void +i_strfree(char *str) +{ + kmem_free(str, strlen(str) + 1); +} + +static char * +i_strdup(char *str) +{ + int n = strlen(str); + char *ptr = kmem_alloc(n + 1, KM_SLEEP); + bcopy(str, ptr, n + 1); + return (ptr); +} + +static int +i_str_to_int(char *str, int *val) +{ + long res; + + if (str == NULL) + return (-1); + + if ((ddi_strtol(str, NULL, 10, &res) != 0) || + (res < INT_MIN) || (res > INT_MAX)) + return (-1); + + *val = res; + return (0); +} + +static void +i_stack_init(list_t *lp) +{ + list_create(lp, + sizeof (stack_elem_t), offsetof(stack_elem_t, se_list)); +} + +static void +i_stack_fini(list_t *lp) +{ + ASSERT(list_head(lp) == NULL); + list_destroy(lp); +} + +static void +i_stack_push(list_t *lp, caddr_t ptr1, caddr_t ptr2, caddr_t ptr3) +{ + stack_elem_t *se; + + se = kmem_alloc(sizeof (*se), KM_SLEEP); + se->se_ptr1 = ptr1; + se->se_ptr2 = ptr2; + se->se_ptr3 = ptr3; + list_insert_head(lp, se); +} + +static int +i_stack_pop(list_t *lp, caddr_t *ptr1, caddr_t *ptr2, caddr_t *ptr3) +{ + stack_elem_t *se; + + if ((se = list_head(lp)) == NULL) + return (-1); + list_remove(lp, se); + if (ptr1 != NULL) + *ptr1 = se->se_ptr1; + if (ptr2 != NULL) + *ptr2 = se->se_ptr2; + if (ptr3 != NULL) + *ptr3 = se->se_ptr3; + kmem_free(se, sizeof (*se)); + return (0); +} + +static vnode_t * +fifo_peer_vp(vnode_t *vp) +{ + fifonode_t *fnp = VTOF(vp); + fifonode_t *fn_dest = fnp->fn_dest; + return (FTOV(fn_dest)); +} + +static vnode_t * +i_vn_alloc(vfs_t *vfsp, vnode_t *uvp) +{ + lx_autofs_vfs_t *data = vfsp->vfs_data; + vnode_t *vp, *vp_old; + + /* Allocate a new vnode structure in case we need it. */ + vp = vn_alloc(KM_SLEEP); + vn_setops(vp, lx_autofs_vn_ops); + VN_SET_VFS_TYPE_DEV(vp, vfsp, uvp->v_type, uvp->v_rdev); + vp->v_data = uvp; + ASSERT(vp->v_count == 1); + + /* + * Take a hold on the vfs structure. This is how unmount will + * determine if there are any active vnodes in the file system. + */ + VFS_HOLD(vfsp); + + /* + * Check if we already have a vnode allocated for this underlying + * vnode_t. + */ + mutex_enter(&data->lav_lock); + if (mod_hash_find(data->lav_vn_hash, + (mod_hash_key_t)uvp, (mod_hash_val_t *)&vp_old) != 0) { + + /* + * Didn't find an existing node. + * Add this node to the hash and return. + */ + VERIFY(mod_hash_insert(data->lav_vn_hash, + (mod_hash_key_t)uvp, + (mod_hash_val_t)vp) == 0); + mutex_exit(&data->lav_lock); + return (vp); + } + + /* Get a hold on the existing vnode and free up the one we allocated. */ + VN_HOLD(vp_old); + mutex_exit(&data->lav_lock); + + /* Free up the new vnode we allocated. */ + VN_RELE(uvp); + VFS_RELE(vfsp); + vn_invalid(vp); + vn_free(vp); + + return (vp_old); +} + +static void +i_vn_free(vnode_t *vp) +{ + vfs_t *vfsp = vp->v_vfsp; + lx_autofs_vfs_t *data = vfsp->vfs_data; + vnode_t *uvp = vp->v_data; + vnode_t *vp_tmp; + + ASSERT(MUTEX_HELD((&data->lav_lock))); + ASSERT(MUTEX_HELD((&vp->v_lock))); + + ASSERT(vp->v_count == 0); + + /* We're about to free this vnode so take it out of the hash. */ + (void) mod_hash_remove(data->lav_vn_hash, + (mod_hash_key_t)uvp, (mod_hash_val_t)&vp_tmp); + + /* + * No one else can lookup this vnode any more so there's no need + * to hold locks. + */ + mutex_exit(&data->lav_lock); + mutex_exit(&vp->v_lock); + + /* Release the underlying vnode. */ + VN_RELE(uvp); + VFS_RELE(vfsp); + vn_invalid(vp); + vn_free(vp); +} + +static lx_autofs_lookup_req_t * +i_lalr_alloc(lx_autofs_vfs_t *data, int *dup_request, char *nm) +{ + lx_autofs_lookup_req_t *lalr, *lalr_dup; + + /* Pre-allocate a new automounter request before grabbing locks. */ + lalr = kmem_zalloc(sizeof (*lalr), KM_SLEEP); + mutex_init(&lalr->lalr_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&lalr->lalr_cv, NULL, CV_DEFAULT, NULL); + lalr->lalr_ref = 1; + lalr->lalr_pkt.lap_protover = LX_AUTOFS_PROTO_VERSION; + + /* Assign a unique id for this request. */ + lalr->lalr_pkt.lap_id = id_alloc(data->lav_ids); + + /* + * The token expected by the linux automount is the name of + * the directory entry to look up. (And not the entire + * path that is being accessed.) + */ + lalr->lalr_pkt.lap_name_len = strlen(nm); + if (lalr->lalr_pkt.lap_name_len > + (sizeof (lalr->lalr_pkt.lap_name) - 1)) { + zcmn_err(getzoneid(), CE_NOTE, + "invalid autofs lookup: \"%s\"", nm); + id_free(data->lav_ids, lalr->lalr_pkt.lap_id); + kmem_free(lalr, sizeof (*lalr)); + return (NULL); + } + (void) strlcpy(lalr->lalr_pkt.lap_name, nm, + sizeof (lalr->lalr_pkt.lap_name)); + + /* Check for an outstanding request for this path. */ + mutex_enter(&data->lav_lock); + if (mod_hash_find(data->lav_path_hash, + (mod_hash_key_t)nm, (mod_hash_val_t *)&lalr_dup) == 0) { + /* + * There's already an outstanding request for this + * path so we don't need a new one. + */ + id_free(data->lav_ids, lalr->lalr_pkt.lap_id); + kmem_free(lalr, sizeof (*lalr)); + lalr = lalr_dup; + + /* Bump the ref count on the old request. */ + atomic_add_int(&lalr->lalr_ref, 1); + + *dup_request = 1; + } else { + /* Add it to the hashes. */ + VERIFY(mod_hash_insert(data->lav_id_hash, + (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id, + (mod_hash_val_t)lalr) == 0); + VERIFY(mod_hash_insert(data->lav_path_hash, + (mod_hash_key_t)i_strdup(nm), + (mod_hash_val_t)lalr) == 0); + + *dup_request = 0; + } + mutex_exit(&data->lav_lock); + + return (lalr); +} + +static lx_autofs_lookup_req_t * +i_lalr_find(lx_autofs_vfs_t *data, int id) +{ + lx_autofs_lookup_req_t *lalr; + + /* Check for an outstanding request for this id. */ + mutex_enter(&data->lav_lock); + if (mod_hash_find(data->lav_id_hash, (mod_hash_key_t)(uintptr_t)id, + (mod_hash_val_t *)&lalr) != 0) { + mutex_exit(&data->lav_lock); + return (NULL); + } + atomic_add_int(&lalr->lalr_ref, 1); + mutex_exit(&data->lav_lock); + return (lalr); +} + +static void +i_lalr_complete(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr) +{ + lx_autofs_lookup_req_t *lalr_tmp; + + /* Remove this request from the hashes so no one can look it up. */ + mutex_enter(&data->lav_lock); + (void) mod_hash_remove(data->lav_id_hash, + (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id, + (mod_hash_val_t)&lalr_tmp); + (void) mod_hash_remove(data->lav_path_hash, + (mod_hash_key_t)lalr->lalr_pkt.lap_name, + (mod_hash_val_t)&lalr_tmp); + mutex_exit(&data->lav_lock); + + /* Mark this requst as complete and wakeup anyone waiting on it. */ + mutex_enter(&lalr->lalr_lock); + lalr->lalr_complete = 1; + cv_broadcast(&lalr->lalr_cv); + mutex_exit(&lalr->lalr_lock); +} + +static void +i_lalr_release(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr) +{ + ASSERT(!MUTEX_HELD(&lalr->lalr_lock)); + if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0) + return; + ASSERT(lalr->lalr_ref == 0); + id_free(data->lav_ids, lalr->lalr_pkt.lap_id); + kmem_free(lalr, sizeof (*lalr)); +} + +static void +i_lalr_abort(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr) +{ + lx_autofs_lookup_req_t *lalr_tmp; + + /* + * This is a little tricky. We're aborting the wait for this + * request. So if anyone else is waiting for this request we + * can't free it, but if no one else is waiting for the request + * we should free it. + */ + mutex_enter(&data->lav_lock); + if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0) { + mutex_exit(&data->lav_lock); + return; + } + ASSERT(lalr->lalr_ref == 0); + + /* Remove this request from the hashes so no one can look it up. */ + (void) mod_hash_remove(data->lav_id_hash, + (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id, + (mod_hash_val_t)&lalr_tmp); + (void) mod_hash_remove(data->lav_path_hash, + (mod_hash_key_t)lalr->lalr_pkt.lap_name, + (mod_hash_val_t)&lalr_tmp); + mutex_exit(&data->lav_lock); + + /* It's ok to free this now because the ref count was zero. */ + id_free(data->lav_ids, lalr->lalr_pkt.lap_id); + kmem_free(lalr, sizeof (*lalr)); +} + +static int +i_fifo_lookup(pid_t pgrp, int fd, file_t **fpp_wr, file_t **fpp_rd) +{ + proc_t *prp; + uf_info_t *fip; + uf_entry_t *ufp_wr, *ufp_rd = NULL; + file_t *fp_wr, *fp_rd = NULL; + vnode_t *vp_wr, *vp_rd; + int i; + + /* + * sprlock() is zone aware, so assuming this mount call was + * initiated by a process in a zone, if it tries to specify + * a pgrp outside of it's zone this call will fail. + * + * Also, we want to grab hold of the main automounter process + * and its going to be the group leader for pgrp, so its + * pid will be equal to pgrp. + */ + prp = sprlock(pgrp); + if (prp == NULL) + return (-1); + mutex_exit(&prp->p_lock); + + /* Now we want to access the processes open file descriptors. */ + fip = P_FINFO(prp); + mutex_enter(&fip->fi_lock); + + /* Sanity check fifo write fd. */ + if (fd >= fip->fi_nfiles) { + mutex_exit(&fip->fi_lock); + mutex_enter(&prp->p_lock); + sprunlock(prp); + return (-1); + } + + /* Get a pointer to the write fifo. */ + UF_ENTER(ufp_wr, fip, fd); + if (((fp_wr = ufp_wr->uf_file) == NULL) || + ((vp_wr = fp_wr->f_vnode) == NULL) || (vp_wr->v_type != VFIFO)) { + /* Invalid fifo fd. */ + UF_EXIT(ufp_wr); + mutex_exit(&fip->fi_lock); + mutex_enter(&prp->p_lock); + sprunlock(prp); + return (-1); + } + + /* + * Now we need to find the read end of the fifo (for reasons + * explained below.) We assume that the read end of the fifo + * is in the same process as the write end. + */ + vp_rd = fifo_peer_vp(fp_wr->f_vnode); + for (i = 0; i < fip->fi_nfiles; i++) { + UF_ENTER(ufp_rd, fip, i); + if (((fp_rd = ufp_rd->uf_file) != NULL) && + (fp_rd->f_vnode == vp_rd)) + break; + UF_EXIT(ufp_rd); + } + if (i == fip->fi_nfiles) { + /* Didn't find it. */ + UF_EXIT(ufp_wr); + mutex_exit(&fip->fi_lock); + mutex_enter(&prp->p_lock); + sprunlock(prp); + return (-1); + } + + /* + * We need to drop fi_lock before we can try to acquire f_tlock + * the good news is that the file pointers are protected because + * we're still holding uf_lock. + */ + mutex_exit(&fip->fi_lock); + + /* + * Here we bump the open counts on the fifos. The reason + * that we do this is because when we go to write to the + * fifo we want to ensure that they are actually open (and + * not in the process of being closed) without having to + * stop the automounter. (If the write end of the fifo + * were closed and we tried to write to it we would panic. + * If the read end of the fifo was closed and we tried to + * write to the other end, the process that invoked the + * lookup operation would get an unexpected SIGPIPE.) + */ + mutex_enter(&fp_wr->f_tlock); + fp_wr->f_count++; + ASSERT(fp_wr->f_count >= 2); + mutex_exit(&fp_wr->f_tlock); + + mutex_enter(&fp_rd->f_tlock); + fp_rd->f_count++; + ASSERT(fp_rd->f_count >= 2); + mutex_exit(&fp_rd->f_tlock); + + /* Release all our locks. */ + UF_EXIT(ufp_wr); + UF_EXIT(ufp_rd); + mutex_enter(&prp->p_lock); + sprunlock(prp); + + /* Return the file pointers. */ + *fpp_rd = fp_rd; + *fpp_wr = fp_wr; + return (0); +} + +static uint_t +/*ARGSUSED*/ +i_fifo_close_cb(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ + int *id = (int *)arg; + /* Return the key and terminate the walk. */ + *id = (uintptr_t)key; + return (MH_WALK_TERMINATE); +} + +static void +i_fifo_close(lx_autofs_vfs_t *data) +{ + /* + * Close the fifo to prevent any future requests from + * getting sent to the automounter. + */ + mutex_enter(&data->lav_lock); + if (data->lav_fifo_wr != NULL) { + (void) closef(data->lav_fifo_wr); + data->lav_fifo_wr = NULL; + } + if (data->lav_fifo_rd != NULL) { + (void) closef(data->lav_fifo_rd); + data->lav_fifo_rd = NULL; + } + mutex_exit(&data->lav_lock); + + /* + * Wakeup any threads currently waiting for the automounter + * note that it's possible for multiple threads to have entered + * this function and to be doing the work below simultaneously. + */ + for (;;) { + lx_autofs_lookup_req_t *lalr; + int id; + + /* Lookup the first entry in the hash. */ + id = -1; + mod_hash_walk(data->lav_id_hash, + i_fifo_close_cb, &id); + if (id == -1) { + /* No more id's in the hash. */ + break; + } + if ((lalr = i_lalr_find(data, id)) == NULL) { + /* Someone else beat us to it. */ + continue; + } + + /* Mark the request as compleate and release it. */ + i_lalr_complete(data, lalr); + i_lalr_release(data, lalr); + } +} + +static int +i_fifo_verify_rd(lx_autofs_vfs_t *data) +{ + proc_t *prp; + uf_info_t *fip; + uf_entry_t *ufp_rd = NULL; + file_t *fp_rd = NULL; + vnode_t *vp_rd; + int i; + + ASSERT(MUTEX_HELD((&data->lav_lock))); + + /* Check if we've already been shut down. */ + if (data->lav_fifo_wr == NULL) { + ASSERT(data->lav_fifo_rd == NULL); + return (-1); + } + vp_rd = fifo_peer_vp(data->lav_fifo_wr->f_vnode); + + /* + * sprlock() is zone aware, so assuming this mount call was + * initiated by a process in a zone, if it tries to specify + * a pgrp outside of it's zone this call will fail. + * + * Also, we want to grab hold of the main automounter process + * and its going to be the group leader for pgrp, so its + * pid will be equal to pgrp. + */ + prp = sprlock(data->lav_pgrp); + if (prp == NULL) + return (-1); + mutex_exit(&prp->p_lock); + + /* Now we want to access the processes open file descriptors. */ + fip = P_FINFO(prp); + mutex_enter(&fip->fi_lock); + + /* + * Now we need to find the read end of the fifo (for reasons + * explained below.) We assume that the read end of the fifo + * is in the same process as the write end. + */ + for (i = 0; i < fip->fi_nfiles; i++) { + UF_ENTER(ufp_rd, fip, i); + if (((fp_rd = ufp_rd->uf_file) != NULL) && + (fp_rd->f_vnode == vp_rd)) + break; + UF_EXIT(ufp_rd); + } + if (i == fip->fi_nfiles) { + /* Didn't find it. */ + mutex_exit(&fip->fi_lock); + mutex_enter(&prp->p_lock); + sprunlock(prp); + return (-1); + } + + /* + * Seems the automounter still has the read end of the fifo + * open, we're done here. Release all our locks and exit. + */ + mutex_exit(&fip->fi_lock); + UF_EXIT(ufp_rd); + mutex_enter(&prp->p_lock); + sprunlock(prp); + + return (0); +} + +static int +i_fifo_write(lx_autofs_vfs_t *data, lx_autofs_pkt_t *lap) +{ + struct uio uio; + struct iovec iov; + file_t *fp_wr, *fp_rd; + int error; + + /* + * The catch here is we need to make sure _we_ don't close + * the the fifo while writing to it. (Another thread could come + * along and realize the automounter process is gone and close + * the fifo. To do this we bump the open count before we + * write to the fifo. + */ + mutex_enter(&data->lav_lock); + if (data->lav_fifo_wr == NULL) { + ASSERT(data->lav_fifo_rd == NULL); + mutex_exit(&data->lav_lock); + return (ENOENT); + } + fp_wr = data->lav_fifo_wr; + fp_rd = data->lav_fifo_rd; + + /* Bump the open count on the write fifo. */ + mutex_enter(&fp_wr->f_tlock); + fp_wr->f_count++; + mutex_exit(&fp_wr->f_tlock); + + /* Bump the open count on the read fifo. */ + mutex_enter(&fp_rd->f_tlock); + fp_rd->f_count++; + mutex_exit(&fp_rd->f_tlock); + + mutex_exit(&data->lav_lock); + + iov.iov_base = (caddr_t)lap; + iov.iov_len = sizeof (*lap); + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_loffset = 0; + uio.uio_segflg = (short)UIO_SYSSPACE; + uio.uio_resid = sizeof (*lap); + uio.uio_llimit = 0; + uio.uio_fmode = FWRITE | FNDELAY | FNONBLOCK; + + error = VOP_WRITE(fp_wr->f_vnode, &uio, 0, kcred, NULL); + (void) closef(fp_wr); + (void) closef(fp_rd); + + /* + * After every write we verify that the automounter still has + * these files open. + */ + mutex_enter(&data->lav_lock); + if (i_fifo_verify_rd(data) != 0) { + /* + * Something happened to the automounter. + * Close down the communication pipe we setup. + */ + mutex_exit(&data->lav_lock); + i_fifo_close(data); + if (error != 0) + return (error); + return (ENOENT); + } + mutex_exit(&data->lav_lock); + + return (error); +} + +static int +i_bs_readdir(vnode_t *dvp, list_t *dir_stack, list_t *file_stack) +{ + struct iovec iov; + struct uio uio; + dirent64_t *dp, *dbuf; + vnode_t *vp; + size_t dlen, dbuflen; + int eof, error, ndirents = 64; + char *nm; + + dlen = ndirents * (sizeof (*dbuf)); + dbuf = kmem_alloc(dlen, KM_SLEEP); + + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_fmode = 0; + uio.uio_extflg = UIO_COPY_CACHED; + uio.uio_loffset = 0; + uio.uio_llimit = MAXOFFSET_T; + + eof = 0; + error = 0; + while (!error && !eof) { + uio.uio_resid = dlen; + iov.iov_base = (char *)dbuf; + iov.iov_len = dlen; + + (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); + if (VOP_READDIR(dvp, &uio, kcred, &eof, NULL, 0) != 0) { + VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); + kmem_free(dbuf, dlen); + return (-1); + } + VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); + + if ((dbuflen = dlen - uio.uio_resid) == 0) { + /* We're done. */ + break; + } + + for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); + dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { + + nm = dp->d_name; + + if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) + continue; + + if (VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, kcred, + NULL, NULL, NULL) != 0) { + kmem_free(dbuf, dlen); + return (-1); + } + if (vp->v_type == VDIR) { + if (dir_stack != NULL) { + i_stack_push(dir_stack, (caddr_t)dvp, + (caddr_t)vp, i_strdup(nm)); + } else { + VN_RELE(vp); + } + } else { + if (file_stack != NULL) { + i_stack_push(file_stack, (caddr_t)dvp, + (caddr_t)vp, i_strdup(nm)); + } else { + VN_RELE(vp); + } + } + } + } + kmem_free(dbuf, dlen); + return (0); +} + +static void +i_bs_destroy(vnode_t *dvp, char *path) +{ + list_t search_stack; + list_t dir_stack; + list_t file_stack; + vnode_t *pdvp, *vp; + char *dpath, *fpath; + int ret; + + if (VOP_LOOKUP(dvp, path, &vp, NULL, 0, NULL, kcred, + NULL, NULL, NULL) != 0) { + /* A directory entry with this name doesn't actually exist. */ + return; + } + + if ((vp->v_type & VDIR) == 0) { + /* Easy, the directory entry is a file so delete it. */ + VN_RELE(vp); + (void) VOP_REMOVE(dvp, path, kcred, NULL, 0); + return; + } + + /* + * The directory entry is a subdirectory, now we have a bit more + * work to do. (We'll have to recurse into the sub directory.) + * It would have been much easier to do this recursively but kernel + * stacks are notoriously small. + */ + i_stack_init(&search_stack); + i_stack_init(&dir_stack); + i_stack_init(&file_stack); + + /* Save our newfound subdirectory into a list. */ + i_stack_push(&search_stack, (caddr_t)dvp, (caddr_t)vp, i_strdup(path)); + + /* Do a recursive depth first search into the subdirectories. */ + while (i_stack_pop(&search_stack, + (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) { + + /* Get a list of the subdirectories in this directory. */ + if (i_bs_readdir(dvp, &search_stack, NULL) != 0) + goto exit; + + /* Save the current directory a separate stack. */ + i_stack_push(&dir_stack, (caddr_t)pdvp, (caddr_t)dvp, dpath); + } + + /* + * Now dir_stack contains a list of directories, the deepest paths + * are at the top of the list. So let's go through and process them. + */ + while (i_stack_pop(&dir_stack, + (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) { + + /* Get a list of the files in this directory. */ + if (i_bs_readdir(dvp, NULL, &file_stack) != 0) { + VN_RELE(dvp); + i_strfree(dpath); + goto exit; + } + + /* Delete all the files in this directory. */ + while (i_stack_pop(&file_stack, + NULL, (caddr_t *)&vp, &fpath) == 0) { + VN_RELE(vp) + ret = VOP_REMOVE(dvp, fpath, kcred, NULL, 0); + i_strfree(fpath); + if (ret != 0) { + i_strfree(dpath); + goto exit; + } + } + + /* Delete this directory. */ + VN_RELE(dvp); + ret = VOP_RMDIR(pdvp, dpath, pdvp, kcred, NULL, 0); + i_strfree(dpath); + if (ret != 0) + goto exit; + } + +exit: + while ( + (i_stack_pop(&search_stack, NULL, (caddr_t *)&vp, &path) == 0) || + (i_stack_pop(&dir_stack, NULL, (caddr_t *)&vp, &path) == 0) || + (i_stack_pop(&file_stack, NULL, (caddr_t *)&vp, &path) == 0)) { + VN_RELE(vp); + i_strfree(path); + } + i_stack_fini(&search_stack); + i_stack_fini(&dir_stack); + i_stack_fini(&file_stack); +} + +static vnode_t * +i_bs_create(vnode_t *dvp, char *bs_name) +{ + vnode_t *vp; + vattr_t vattr; + + /* + * After looking at the mkdir syscall path it seems we don't need + * to initialize all of the vattr_t structure. + */ + bzero(&vattr, sizeof (vattr)); + vattr.va_type = VDIR; + vattr.va_mode = 0755; /* u+rwx,og=rx */ + vattr.va_mask = AT_TYPE|AT_MODE; + + if (VOP_MKDIR(dvp, bs_name, &vattr, &vp, kcred, NULL, 0, NULL) != 0) + return (NULL); + return (vp); +} + +static int +i_automounter_call(vnode_t *dvp, char *nm) +{ + lx_autofs_lookup_req_t *lalr; + lx_autofs_vfs_t *data; + int error, dup_request; + + /* Get a pointer to the vfs mount data. */ + data = dvp->v_vfsp->vfs_data; + + /* The automounter only support queries in the root directory. */ + if (dvp != data->lav_root) + return (ENOENT); + + /* + * Check if the current process is in the automounters process + * group. (If it is, the current process is either the autmounter + * itself or one of it's forked child processes.) If so, don't + * redirect this lookup back into the automounter because we'll + * hang. + */ + mutex_enter(&pidlock); + if (data->lav_pgrp == curproc->p_pgrp) { + mutex_exit(&pidlock); + return (ENOENT); + } + mutex_exit(&pidlock); + + /* Verify that the automount process pipe still exists. */ + mutex_enter(&data->lav_lock); + if (data->lav_fifo_wr == NULL) { + ASSERT(data->lav_fifo_rd == NULL); + mutex_exit(&data->lav_lock); + return (ENOENT); + } + mutex_exit(&data->lav_lock); + + /* Allocate an automounter request structure. */ + if ((lalr = i_lalr_alloc(data, &dup_request, nm)) == NULL) + return (ENOENT); + + /* + * If we were the first one to allocate this request then we + * need to send it to the automounter. + */ + if ((!dup_request) && + ((error = i_fifo_write(data, &lalr->lalr_pkt)) != 0)) { + /* + * Unable to send the request to the automounter. + * Unblock any other threads waiting on the request + * and release the request. + */ + i_lalr_complete(data, lalr); + i_lalr_release(data, lalr); + return (error); + } + + /* Wait for someone to signal us that this request has compleated. */ + mutex_enter(&lalr->lalr_lock); + while (!lalr->lalr_complete) { + if (cv_wait_sig(&lalr->lalr_cv, &lalr->lalr_lock) == 0) { + /* We got a signal, abort this lookup. */ + mutex_exit(&lalr->lalr_lock); + i_lalr_abort(data, lalr); + return (EINTR); + } + } + mutex_exit(&lalr->lalr_lock); + i_lalr_release(data, lalr); + + return (0); +} + +static int +i_automounter_ioctl(vnode_t *vp, int cmd, intptr_t arg) +{ + lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vp->v_vfsp->vfs_data; + + /* + * Be strict. + * We only accept ioctls from the automounter process group. + */ + mutex_enter(&pidlock); + if (data->lav_pgrp != curproc->p_pgrp) { + mutex_exit(&pidlock); + return (ENOENT); + } + mutex_exit(&pidlock); + + if ((cmd == LX_AUTOFS_IOC_READY) || (cmd == LX_AUTOFS_IOC_FAIL)) { + lx_autofs_lookup_req_t *lalr; + int id = arg; + + /* + * We don't actually care if the request failed or succeeded. + * We do the same thing either way. + */ + if ((lalr = i_lalr_find(data, id)) == NULL) + return (ENXIO); + + /* Mark the request as compleate and release it. */ + i_lalr_complete(data, lalr); + i_lalr_release(data, lalr); + return (0); + } + if (cmd == LX_AUTOFS_IOC_CATATONIC) { + /* The automounter is shutting down. */ + i_fifo_close(data); + return (0); + } + return (ENOTSUP); +} + +static int +i_parse_mntopt(vfs_t *vfsp, lx_autofs_vfs_t *data) +{ + char *fd_str, *pgrp_str, *minproto_str, *maxproto_str; + int fd, pgrp, minproto, maxproto; + file_t *fp_wr, *fp_rd; + + /* Require all options to be present. */ + if ((vfs_optionisset(vfsp, LX_MNTOPT_FD, &fd_str) != 1) || + (vfs_optionisset(vfsp, LX_MNTOPT_PGRP, &pgrp_str) != 1) || + (vfs_optionisset(vfsp, LX_MNTOPT_MINPROTO, &minproto_str) != 1) || + (vfs_optionisset(vfsp, LX_MNTOPT_MAXPROTO, &maxproto_str) != 1)) + return (EINVAL); + + /* Get the values for each parameter. */ + if ((i_str_to_int(fd_str, &fd) != 0) || + (i_str_to_int(pgrp_str, &pgrp) != 0) || + (i_str_to_int(minproto_str, &minproto) != 0) || + (i_str_to_int(maxproto_str, &maxproto) != 0)) + return (EINVAL); + + /* + * We support v2 of the linux kernel automounter protocol. + * Make sure the mount request we got indicates support + * for this version of the protocol. + */ + if ((minproto > 2) || (maxproto < 2)) + return (EINVAL); + + /* + * Now we need to lookup the fifos we'll be using + * to talk to the userland automounter process. + */ + if (i_fifo_lookup(pgrp, fd, &fp_wr, &fp_rd) != 0) + return (EINVAL); + + /* Save the mount options and fifo pointers. */ + data->lav_fd = fd; + data->lav_pgrp = pgrp; + data->lav_fifo_rd = fp_rd; + data->lav_fifo_wr = fp_wr; + return (0); +} + +/* + * VFS entry points + */ +static int +lx_autofs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) +{ + lx_autofs_vfs_t *data; + dev_t dev; + char name[40]; + int error; + + if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) + return (EPERM); + + if (mvp->v_type != VDIR) + return (ENOTDIR); + + if ((uap->flags & MS_OVERLAY) == 0 && + (mvp->v_count > 1 || (mvp->v_flag & VROOT))) + return (EBUSY); + + /* We don't support mountes in the global zone. */ + if (getzoneid() == GLOBAL_ZONEID) + return (EPERM); + + /* We don't support mounting on top of ourselves. */ + if (vn_matchops(mvp, lx_autofs_vn_ops)) + return (EPERM); + + /* Allocate a vfs struct. */ + data = kmem_zalloc(sizeof (lx_autofs_vfs_t), KM_SLEEP); + + /* Parse mount options. */ + if ((error = i_parse_mntopt(vfsp, data)) != 0) { + kmem_free(data, sizeof (lx_autofs_vfs_t)); + return (error); + } + + /* Initialize the backing store. */ + i_bs_destroy(mvp, LX_AUTOFS_BS_DIR); + if ((data->lav_bs_vp = i_bs_create(mvp, LX_AUTOFS_BS_DIR)) == NULL) { + kmem_free(data, sizeof (lx_autofs_vfs_t)); + return (EBUSY); + } + data->lav_bs_name = LX_AUTOFS_BS_DIR; + + /* We have to hold the underlying vnode we're mounted on. */ + data->lav_mvp = mvp; + VN_HOLD(mvp); + + /* Initialize vfs fields */ + vfsp->vfs_bsize = DEV_BSIZE; + vfsp->vfs_fstype = lx_autofs_fstype; + vfsp->vfs_data = data; + + /* Invent a dev_t (sigh) */ + do { + dev = makedevice(lx_autofs_major, + atomic_add_32_nv(&lx_autofs_minor, 1) & L_MAXMIN32); + } while (vfs_devismounted(dev)); + vfsp->vfs_dev = dev; + vfs_make_fsid(&vfsp->vfs_fsid, dev, lx_autofs_fstype); + + /* Create an id space arena for automounter requests. */ + (void) snprintf(name, sizeof (name), "lx_autofs_id_%d", + getminor(vfsp->vfs_dev)); + data->lav_ids = id_space_create(name, 1, INT_MAX); + + /* Create hashes to keep track of automounter requests. */ + mutex_init(&data->lav_lock, NULL, MUTEX_DEFAULT, NULL); + (void) snprintf(name, sizeof (name), "lx_autofs_path_hash_%d", + getminor(vfsp->vfs_dev)); + data->lav_path_hash = mod_hash_create_strhash(name, + LX_AUTOFS_VFS_PATH_HASH_SIZE, mod_hash_null_valdtor); + (void) snprintf(name, sizeof (name), "lx_autofs_id_hash_%d", + getminor(vfsp->vfs_dev)); + data->lav_id_hash = mod_hash_create_idhash(name, + LX_AUTOFS_VFS_ID_HASH_SIZE, mod_hash_null_valdtor); + + /* Create a hash to keep track of vnodes. */ + (void) snprintf(name, sizeof (name), "lx_autofs_vn_hash_%d", + getminor(vfsp->vfs_dev)); + data->lav_vn_hash = mod_hash_create_ptrhash(name, + LX_AUTOFS_VFS_VN_HASH_SIZE, mod_hash_null_valdtor, + sizeof (vnode_t)); + + /* Create root vnode */ + data->lav_root = i_vn_alloc(vfsp, data->lav_bs_vp); + data->lav_root->v_flag |= + VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT; + + return (0); +} + +static int +lx_autofs_unmount(vfs_t *vfsp, int flag, struct cred *cr) +{ + lx_autofs_vfs_t *data; + + if (secpolicy_fs_unmount(cr, vfsp) != 0) + return (EPERM); + + /* We do not currently support forced unmounts. */ + if (flag & MS_FORCE) + return (ENOTSUP); + + /* + * We should never have a reference count of less than 2: one for the + * caller, one for the root vnode. + */ + ASSERT(vfsp->vfs_count >= 2); + + /* If there are any outstanding vnodes, we can't unmount. */ + if (vfsp->vfs_count > 2) + return (EBUSY); + + /* Check for any remaining holds on the root vnode. */ + data = vfsp->vfs_data; + ASSERT(data->lav_root->v_vfsp == vfsp); + if (data->lav_root->v_count > 1) + return (EBUSY); + + /* Close the fifo to the automount process. */ + if (data->lav_fifo_wr != NULL) + (void) closef(data->lav_fifo_wr); + if (data->lav_fifo_rd != NULL) + (void) closef(data->lav_fifo_rd); + + /* + * We have to release our hold on our root vnode before we can + * delete the backing store. (Since the root vnode is linked + * to the backing store.) + */ + VN_RELE(data->lav_root); + + /* Cleanup the backing store. */ + i_bs_destroy(data->lav_mvp, data->lav_bs_name); + VN_RELE(data->lav_mvp); + + /* Cleanup out remaining data structures. */ + mod_hash_destroy_strhash(data->lav_path_hash); + mod_hash_destroy_idhash(data->lav_id_hash); + mod_hash_destroy_ptrhash(data->lav_vn_hash); + id_space_destroy(data->lav_ids); + kmem_free(data, sizeof (lx_autofs_vfs_t)); + + return (0); +} + +static int +lx_autofs_root(vfs_t *vfsp, vnode_t **vpp) +{ + lx_autofs_vfs_t *data = vfsp->vfs_data; + + *vpp = data->lav_root; + VN_HOLD(*vpp); + + return (0); +} + +static int +lx_autofs_statvfs(vfs_t *vfsp, statvfs64_t *sp) +{ + lx_autofs_vfs_t *data = vfsp->vfs_data; + vnode_t *urvp = data->lav_root->v_data; + dev32_t d32; + int error; + + if ((error = VFS_STATVFS(urvp->v_vfsp, sp)) != 0) + return (error); + + /* Update some of values before returning. */ + (void) cmpldev(&d32, vfsp->vfs_dev); + sp->f_fsid = d32; + (void) strlcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name, + sizeof (sp->f_basetype)); + sp->f_flag = vf_to_stf(vfsp->vfs_flag); + bzero(sp->f_fstr, sizeof (sp->f_fstr)); + return (0); +} + +static const fs_operation_def_t lx_autofs_vfstops[] = { + { VFSNAME_MOUNT, { .vfs_mount = lx_autofs_mount } }, + { VFSNAME_UNMOUNT, { .vfs_unmount = lx_autofs_unmount } }, + { VFSNAME_ROOT, { .vfs_root = lx_autofs_root } }, + { VFSNAME_STATVFS, { .vfs_statvfs = lx_autofs_statvfs } }, + { NULL, NULL } +}; + +/* + * VOP entry points - simple passthrough + * + * For most VOP entry points we can simply pass the request on to + * the underlying filesystem we're mounted on. + */ +static int +lx_autofs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, + caller_context_t *ctp) +{ + vnode_t *uvp = vp->v_data; + return (VOP_CLOSE(uvp, flag, count, offset, cr, ctp)); +} + +static int +lx_autofs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, + caller_context_t *ctp, int flags) +{ + vnode_t *uvp = vp->v_data; + return (VOP_READDIR(uvp, uiop, cr, eofp, ctp, flags)); +} + +static int +lx_autofs_access(vnode_t *vp, int mode, int flags, cred_t *cr, + caller_context_t *ctp) +{ + vnode_t *uvp = vp->v_data; + return (VOP_ACCESS(uvp, mode, flags, cr, ctp)); +} + +static int +lx_autofs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) +{ + vnode_t *uvp = vp->v_data; + return (VOP_RWLOCK(uvp, write_lock, ctp)); +} + +static void +lx_autofs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) +{ + vnode_t *uvp = vp->v_data; + VOP_RWUNLOCK(uvp, write_lock, ctp); +} + +/*ARGSUSED*/ +static int +lx_autofs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, + caller_context_t *ctp, int flags) +{ + vnode_t *udvp = dvp->v_data; + + /* + * cdir is the calling processes current directory. + * If cdir is lx_autofs vnode then get its real underlying + * vnode ptr. (It seems like the only thing cdir is + * ever used for is to make sure the user doesn't delete + * their current directory.) + */ + if (vn_matchops(cdir, lx_autofs_vn_ops)) { + vnode_t *ucdir = cdir->v_data; + return (VOP_RMDIR(udvp, nm, ucdir, cr, ctp, flags)); + } + + return (VOP_RMDIR(udvp, nm, cdir, cr, ctp, flags)); +} + +/* + * VOP entry points - special passthrough + * + * For some VOP entry points we will first pass the request on to + * the underlying filesystem we're mounted on. If there's an error + * then we immediately return the error, but if the request succeeds + * we have to do some extra work before returning. + */ +static int +lx_autofs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ctp) +{ + vnode_t *ovp = *vpp; + vnode_t *uvp = ovp->v_data; + int error; + + if ((error = VOP_OPEN(&uvp, flag, cr, ctp)) != 0) + return (error); + + /* Check for clone opens. */ + if (uvp == ovp->v_data) + return (0); + + /* Deal with clone opens by returning a new vnode. */ + *vpp = i_vn_alloc(ovp->v_vfsp, uvp); + VN_RELE(ovp); + return (0); +} + +static int +lx_autofs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, + caller_context_t *ctp) +{ + vnode_t *uvp = vp->v_data; + int error; + + if ((error = VOP_GETATTR(uvp, vap, flags, cr, ctp)) != 0) + return (error); + + /* Update the attributes with our filesystem id. */ + vap->va_fsid = vp->v_vfsp->vfs_dev; + return (0); +} + +static int +lx_autofs_mkdir(vnode_t *dvp, char *nm, struct vattr *vap, vnode_t **vpp, + cred_t *cr, caller_context_t *ctp, int flags, vsecattr_t *vsecp) +{ + vnode_t *udvp = dvp->v_data; + vnode_t *uvp = NULL; + int error; + + if ((error = VOP_MKDIR(udvp, nm, vap, &uvp, cr, + ctp, flags, vsecp)) != 0) + return (error); + + /* Update the attributes with our filesystem id. */ + vap->va_fsid = dvp->v_vfsp->vfs_dev; + + /* Allocate a new vnode. */ + *vpp = i_vn_alloc(dvp->v_vfsp, uvp); + return (0); +} + +/* + * VOP entry points - custom + */ +/*ARGSUSED*/ +static void +lx_autofs_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ctp) +{ + lx_autofs_vfs_t *data = vp->v_vfsp->vfs_data; + + /* + * We need to hold the vfs lock because if we're going to free + * this vnode we have to prevent anyone from looking it up + * in the vnode hash. + */ + mutex_enter(&data->lav_lock); + mutex_enter(&vp->v_lock); + + if (vp->v_count < 1) { + panic("lx_autofs_inactive: bad v_count"); + /*NOTREACHED*/ + } + + /* Drop the temporary hold by vn_rele now. */ + if (--vp->v_count > 0) { + mutex_exit(&vp->v_lock); + mutex_exit(&data->lav_lock); + return; + } + + /* + * No one should have been blocked on this lock because we're + * about to free this vnode. + */ + i_vn_free(vp); +} + +static int +lx_autofs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, + int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ctp, + int *direntflags, pathname_t *realpnp) +{ + vnode_t *udvp = dvp->v_data; + vnode_t *uvp = NULL; + int error; + + /* First try to lookup if this path component already exitst. */ + if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr, ctp, + direntflags, realpnp)) == 0) { + *vpp = i_vn_alloc(dvp->v_vfsp, uvp); + return (0); + } + + /* Only query the automounter if the path does not exist. */ + if (error != ENOENT) + return (error); + + /* Refer the lookup to the automounter. */ + if ((error = i_automounter_call(dvp, nm)) != 0) + return (error); + + /* Retry the lookup operation. */ + if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr, ctp, + direntflags, realpnp)) == 0) { + *vpp = i_vn_alloc(dvp->v_vfsp, uvp); + return (0); + } + return (error); +} + +/*ARGSUSED*/ +static int +lx_autofs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr, + int *rvalp, caller_context_t *ctp) +{ + vnode_t *uvp = vp->v_data; + + /* Intercept certain ioctls. */ + switch ((uint_t)cmd) { + case LX_AUTOFS_IOC_READY: + case LX_AUTOFS_IOC_FAIL: + case LX_AUTOFS_IOC_CATATONIC: + case LX_AUTOFS_IOC_EXPIRE: + case LX_AUTOFS_IOC_PROTOVER: + case LX_AUTOFS_IOC_SETTIMEOUT: + return (i_automounter_ioctl(vp, cmd, arg)); + } + + /* Pass any remaining ioctl on. */ + return (VOP_IOCTL(uvp, cmd, arg, mode, cr, rvalp, ctp)); +} + +/* + * VOP entry points definitions + */ +static const fs_operation_def_t lx_autofs_tops_root[] = { + { VOPNAME_OPEN, { .vop_open = lx_autofs_open } }, + { VOPNAME_CLOSE, { .vop_close = lx_autofs_close } }, + { VOPNAME_IOCTL, { .vop_ioctl = lx_autofs_ioctl } }, + { VOPNAME_RWLOCK, { .vop_rwlock = lx_autofs_rwlock } }, + { VOPNAME_RWUNLOCK, { .vop_rwunlock = lx_autofs_rwunlock } }, + { VOPNAME_GETATTR, { .vop_getattr = lx_autofs_getattr } }, + { VOPNAME_ACCESS, { .vop_access = lx_autofs_access } }, + { VOPNAME_READDIR, { .vop_readdir = lx_autofs_readdir } }, + { VOPNAME_LOOKUP, { .vop_lookup = lx_autofs_lookup } }, + { VOPNAME_INACTIVE, { .vop_inactive = lx_autofs_inactive } }, + { VOPNAME_MKDIR, { .vop_mkdir = lx_autofs_mkdir } }, + { VOPNAME_RMDIR, { .vop_rmdir = lx_autofs_rmdir } }, + { NULL } +}; + +/* + * lx_autofs_init() gets invoked via the mod_install() call in + * this modules _init() routine. Therefor, the code that cleans + * up the structures we allocate below is actually found in + * our _fini() routine. + */ +/* ARGSUSED */ +static int +lx_autofs_init(int fstype, char *name) +{ + int error; + + if ((lx_autofs_major = + (major_t)space_fetch(LX_AUTOFS_SPACE_KEY_UDEV)) == 0) { + + if ((lx_autofs_major = getudev()) == (major_t)-1) { + cmn_err(CE_WARN, "lx_autofs_init: " + "can't get unique device number"); + return (EAGAIN); + } + + if (space_store(LX_AUTOFS_SPACE_KEY_UDEV, + (uintptr_t)lx_autofs_major) != 0) { + cmn_err(CE_WARN, "lx_autofs_init: " + "can't save unique device number"); + return (EAGAIN); + } + } + + lx_autofs_fstype = fstype; + if ((error = vfs_setfsops( + fstype, lx_autofs_vfstops, &lx_autofs_vfsops)) != 0) { + cmn_err(CE_WARN, "lx_autofs_init: bad vfs ops template"); + return (error); + } + + if ((error = vn_make_ops("lx_autofs vnode ops", + lx_autofs_tops_root, &lx_autofs_vn_ops)) != 0) { + VERIFY(vfs_freevfsops_by_type(fstype) == 0); + lx_autofs_vn_ops = NULL; + return (error); + } + + return (0); +} + + +/* + * Module linkage + */ +static mntopt_t lx_autofs_mntopt[] = { + { LX_MNTOPT_FD, NULL, 0, MO_HASVALUE }, + { LX_MNTOPT_PGRP, NULL, 0, MO_HASVALUE }, + { LX_MNTOPT_MINPROTO, NULL, 0, MO_HASVALUE }, + { LX_MNTOPT_MAXPROTO, NULL, 0, MO_HASVALUE } +}; + +static mntopts_t lx_autofs_mntopts = { + sizeof (lx_autofs_mntopt) / sizeof (mntopt_t), + lx_autofs_mntopt +}; + +static vfsdef_t vfw = { + VFSDEF_VERSION, + LX_AUTOFS_NAME, + lx_autofs_init, + VSW_HASPROTO | VSW_VOLATILEDEV, + &lx_autofs_mntopts +}; + +extern struct mod_ops mod_fsops; + +static struct modlfs modlfs = { + &mod_fsops, "linux autofs filesystem", &vfw +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlfs, NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + int error; + + if ((error = mod_remove(&modlinkage)) != 0) + return (error); + + if (lx_autofs_vn_ops != NULL) { + vn_freevnodeops(lx_autofs_vn_ops); + lx_autofs_vn_ops = NULL; + } + + /* + * In our init routine, if we get an error after calling + * vfs_setfsops() we cleanup by calling vfs_freevfsops_by_type(). + * But we don't need to call vfs_freevfsops_by_type() here + * because the fs framework did this for us as part of the + * mod_remove() call above. + */ + return (0); +} diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c new file mode 100644 index 0000000000..bfeb78330a --- /dev/null +++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c @@ -0,0 +1,396 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include <sys/modctl.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/stat.h> +#include <sys/conf.h> +#include <sys/frame.h> +#include <sys/dtrace.h> +#include <sys/dtrace_impl.h> + +#include <sys/lx_impl.h> + +#define LX_SYSTRACE_SHIFT 16 +#define LX_SYSTRACE_ISENTRY(x) ((int)(x) >> LX_SYSTRACE_SHIFT) +#define LX_SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << LX_SYSTRACE_SHIFT) - 1)) +#define LX_SYSTRACE_ENTRY(id) ((1 << LX_SYSTRACE_SHIFT) | (id)) +#define LX_SYSTRACE_RETURN(id) (id) + +#define LX_SYSTRACE_ENTRY_AFRAMES 2 +#define LX_SYSTRACE_RETURN_AFRAMES 4 + +typedef struct lx_systrace_sysent { + const char *lss_name; + dtrace_id_t lss_entry; + dtrace_id_t lss_return; +} lx_systrace_sysent_t; + +static dev_info_t *lx_systrace_devi; +static dtrace_provider_id_t lx_systrace_id; +static kmutex_t lx_systrace_lock; +static uint_t lx_systrace_nenabled; + +static int lx_systrace_nsysent; +static lx_systrace_sysent_t *lx_systrace_sysent; + +/*ARGSUSED*/ +static void +lx_systrace_entry(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2, + ulong_t arg3, ulong_t arg4, ulong_t arg5) +{ + dtrace_id_t id; + + if (sysnum >= lx_systrace_nsysent) + return; + + if ((id = lx_systrace_sysent[sysnum].lss_entry) == DTRACE_IDNONE) + return; + + dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); +} + +/*ARGSUSED*/ +static void +lx_systrace_return(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2, + ulong_t arg3, ulong_t arg4, ulong_t arg5) +{ + dtrace_id_t id; + + if (sysnum >= lx_systrace_nsysent) + return; + + if ((id = lx_systrace_sysent[sysnum].lss_return) == DTRACE_IDNONE) + return; + + dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); +} + +/*ARGSUSED*/ +static void +lx_systrace_provide(void *arg, const dtrace_probedesc_t *desc) +{ + int i; + + if (desc != NULL) + return; + + for (i = 0; i < lx_systrace_nsysent; i++) { + if (dtrace_probe_lookup(lx_systrace_id, NULL, + lx_systrace_sysent[i].lss_name, "entry") != 0) + continue; + + (void) dtrace_probe_create(lx_systrace_id, NULL, + lx_systrace_sysent[i].lss_name, "entry", + LX_SYSTRACE_ENTRY_AFRAMES, + (void *)((uintptr_t)LX_SYSTRACE_ENTRY(i))); + + (void) dtrace_probe_create(lx_systrace_id, NULL, + lx_systrace_sysent[i].lss_name, "return", + LX_SYSTRACE_RETURN_AFRAMES, + (void *)((uintptr_t)LX_SYSTRACE_RETURN(i))); + + lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE; + lx_systrace_sysent[i].lss_return = DTRACE_IDNONE; + } +} + +/*ARGSUSED*/ +static int +lx_systrace_enable(void *arg, dtrace_id_t id, void *parg) +{ + int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg); + + ASSERT(sysnum < lx_systrace_nsysent); + + mutex_enter(&lx_systrace_lock); + if (lx_systrace_nenabled++ == 0) + lx_brand_systrace_enable(); + mutex_exit(&lx_systrace_lock); + + if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) { + lx_systrace_sysent[sysnum].lss_entry = id; + } else { + lx_systrace_sysent[sysnum].lss_return = id; + } + return (0); +} + +/*ARGSUSED*/ +static void +lx_systrace_disable(void *arg, dtrace_id_t id, void *parg) +{ + int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg); + + ASSERT(sysnum < lx_systrace_nsysent); + + if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) { + lx_systrace_sysent[sysnum].lss_entry = DTRACE_IDNONE; + } else { + lx_systrace_sysent[sysnum].lss_return = DTRACE_IDNONE; + } + + mutex_enter(&lx_systrace_lock); + if (--lx_systrace_nenabled == 0) + lx_brand_systrace_disable(); + mutex_exit(&lx_systrace_lock); +} + +/*ARGSUSED*/ +static void +lx_systrace_destroy(void *arg, dtrace_id_t id, void *parg) +{ +} + +/*ARGSUSED*/ +static uint64_t +lx_systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + struct frame *fp = (struct frame *)dtrace_getfp(); + uintptr_t *stack; + uint64_t val = 0; + int i; + + if (argno >= 6) + return (0); + + /* + * Walk the four frames down the stack to the entry or return callback. + * Our callback calls dtrace_probe() which calls dtrace_dif_variable() + * which invokes this function to get the extended arguments. We get + * the frame pointer in via call to dtrace_getfp() above which makes for + * four frames. + */ + for (i = 0; i < 4; i++) { + fp = (struct frame *)fp->fr_savfp; + } + + stack = (uintptr_t *)&fp[1]; + + /* + * Skip the first argument to the callback -- the system call number. + */ + argno++; + +#ifdef __amd64 + /* + * On amd64, the first 6 arguments are passed in registers while + * subsequent arguments are on the stack. + */ + argno -= 6; +#endif + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + val = stack[argno]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (val); +} + + +static const dtrace_pattr_t lx_systrace_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +}; + +static dtrace_pops_t lx_systrace_pops = { + lx_systrace_provide, + NULL, + lx_systrace_enable, + lx_systrace_disable, + NULL, + NULL, + NULL, + lx_systrace_getarg, + NULL, + lx_systrace_destroy +}; + +static int +lx_systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +{ + int i; + + switch (cmd) { + case DDI_ATTACH: + break; + case DDI_RESUME: + return (DDI_SUCCESS); + default: + return (DDI_FAILURE); + } + + if (ddi_create_minor_node(devi, "lx_systrace", S_IFCHR, + 0, DDI_PSEUDO, NULL) == DDI_FAILURE || + dtrace_register("lx-syscall", &lx_systrace_attr, + DTRACE_PRIV_KERNEL, 0, &lx_systrace_pops, NULL, + &lx_systrace_id) != 0) { + ddi_remove_minor_node(devi, NULL); + return (DDI_FAILURE); + } + + ddi_report_dev(devi); + lx_systrace_devi = devi; + + /* + * Count up the lx_brand system calls. + */ + for (i = 0; lx_sysent[i].sy_callc != NULL; i++) + continue; + + /* + * Initialize our corresponding table. + */ + lx_systrace_sysent = kmem_zalloc(i * sizeof (lx_systrace_sysent_t), + KM_SLEEP); + lx_systrace_nsysent = i; + + for (i = 0; i < lx_systrace_nsysent; i++) { + lx_systrace_sysent[i].lss_name = lx_sysent[i].sy_name; + lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE; + lx_systrace_sysent[i].lss_return = DTRACE_IDNONE; + } + + /* + * Install probe triggers. + */ + lx_systrace_entry_ptr = lx_systrace_entry; + lx_systrace_return_ptr = lx_systrace_return; + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +lx_systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) +{ + switch (cmd) { + case DDI_DETACH: + break; + case DDI_SUSPEND: + return (DDI_SUCCESS); + default: + return (DDI_FAILURE); + } + + if (dtrace_unregister(lx_systrace_id) != 0) + return (DDI_FAILURE); + + /* + * Free table. + */ + kmem_free(lx_systrace_sysent, lx_systrace_nsysent * + sizeof (lx_systrace_sysent_t)); + lx_systrace_sysent = NULL; + lx_systrace_nsysent = 0; + + /* + * Reset probe triggers. + */ + lx_systrace_entry_ptr = NULL; + lx_systrace_return_ptr = NULL; + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +lx_systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) +{ + return (0); +} + +static struct cb_ops lx_systrace_cb_ops = { + lx_systrace_open, /* open */ + nodev, /* close */ + nulldev, /* strategy */ + nulldev, /* print */ + nodev, /* dump */ + nodev, /* read */ + nodev, /* write */ + nodev, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + ddi_prop_op, /* cb_prop_op */ + 0, /* streamtab */ + D_NEW | D_MP /* Driver compatibility flag */ +}; + +static struct dev_ops lx_systrace_ops = { + DEVO_REV, /* devo_rev */ + 0, /* refcnt */ + ddi_getinfo_1to1, /* get_dev_info */ + nulldev, /* identify */ + nulldev, /* probe */ + lx_systrace_attach, /* attach */ + lx_systrace_detach, /* detach */ + nodev, /* reset */ + &lx_systrace_cb_ops, /* driver operations */ + NULL, /* bus operations */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, /* module type (this is a pseudo driver) */ + "Linux Brand System Call Tracing", /* name of module */ + &lx_systrace_ops /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + (void *)&modldrv, + NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf new file mode 100644 index 0000000000..e4499c8a5b --- /dev/null +++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf @@ -0,0 +1,27 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +name="lx_systrace" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/brand/lx/io/ldlinux.c b/usr/src/uts/common/brand/lx/io/ldlinux.c new file mode 100644 index 0000000000..76c5e1d255 --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/ldlinux.c @@ -0,0 +1,297 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/ddi.h> +#include <sys/cmn_err.h> +#include <sys/modctl.h> +#include <sys/ptms.h> +#include <sys/stropts.h> +#include <sys/strsun.h> +#include <sys/sunddi.h> + +#include <sys/ldlinux.h> + + +/* + * ldlinuxopen - open routine gets called when the module gets pushed onto the + * stream. + */ +/* ARGSUSED */ +static int +ldlinuxopen( + queue_t *q, /* pointer to the read side queue */ + dev_t *devp, /* pointer to stream tail's dev */ + int oflag, /* the user open(2) supplied flags */ + int sflag, /* open state flag */ + cred_t *credp) /* credentials */ +{ + struct ldlinux *tp; /* ldlinux entry for this module */ + mblk_t *mop; + struct stroptions *sop; + struct termios *termiosp; + int len; + + if (sflag != MODOPEN) + return (EINVAL); + + if (q->q_ptr != NULL) { + /* It's already attached. */ + return (0); + } + + mop = allocb(sizeof (struct stroptions), BPRI_MED); + if (mop == NULL) + return (ENOSR); + mop->b_datap->db_type = M_SETOPTS; + mop->b_wptr += sizeof (struct stroptions); + sop = (struct stroptions *)mop->b_rptr; + sop->so_flags = SO_ISTTY; + + /* + * Allocate state structure. + */ + tp = kmem_alloc(sizeof (*tp), KM_SLEEP); + + /* Stash a pointer to our private data in q_ptr. */ + q->q_ptr = WR(q)->q_ptr = tp; + + /* + * Get termios defaults. These are stored as + * a property in the "options" node. + */ + if (ddi_getlongprop(DDI_DEV_T_ANY, ddi_root_node(), 0, "ttymodes", + (caddr_t)&termiosp, &len) == DDI_PROP_SUCCESS && + len == sizeof (struct termios)) { + if (termiosp->c_lflag & ICANON) { + tp->veof = termiosp->c_cc[VEOF]; + tp->veol = termiosp->c_cc[VEOL]; + tp->vmin = 1; + tp->vtime = 0; + } else { + tp->veof = 0; + tp->veol = 0; + tp->vmin = termiosp->c_cc[VMIN]; + tp->vtime = termiosp->c_cc[VTIME]; + } + kmem_free(termiosp, len); + } else { + /* + * winge winge winge... + */ + cmn_err(CE_WARN, + "ldlinuxopen: Couldn't get ttymodes property!"); + bzero(tp, sizeof (*tp)); + } + + tp->state = 0; + + /* + * Commit to the open and send the M_SETOPTS off to the stream head. + */ + qprocson(q); + putnext(q, mop); + + return (0); +} + + +/* + * ldlinuxclose - This routine gets called when the module gets + * popped off of the stream. + */ +/* ARGSUSED */ +static int +ldlinuxclose(queue_t *q, int flag, cred_t *credp) +{ + struct ldlinux *tp; + + qprocsoff(q); + tp = q->q_ptr; + kmem_free(tp, sizeof (*tp)); + q->q_ptr = WR(q)->q_ptr = NULL; + return (0); +} + + +static void +do_ioctl(queue_t *q, mblk_t *mp) +{ + struct ldlinux *tp = q->q_ptr; + struct iocblk *iocp = (struct iocblk *)mp->b_rptr; + struct lx_cc *cb; + mblk_t *tmp; + int error; + + switch (iocp->ioc_cmd) { + case TIOCSETLD: + /* prepare caller supplied data for access */ + error = miocpullup(mp, sizeof (struct lx_cc)); + if (error != 0) { + miocnak(q, mp, 0, error); + return; + } + + /* get a pointer to the caller supplied data */ + cb = (struct lx_cc *)mp->b_cont->b_rptr; + + /* save caller supplied data in our per-stream cache */ + tp->veof = cb->veof; + tp->veol = cb->veol; + tp->vmin = cb->vmin; + tp->vtime = cb->vtime; + + /* initialize and send a reply indicating that we're done */ + miocack(q, mp, 0, 0); + return; + + case TIOCGETLD: + /* allocate a reply message */ + if ((tmp = allocb(sizeof (struct lx_cc), BPRI_MED)) == NULL) { + miocnak(q, mp, 0, ENOSR); + return; + } + + /* initialize the reply message */ + mioc2ack(mp, tmp, sizeof (struct lx_cc), 0); + + /* get a pointer to the reply data */ + cb = (struct lx_cc *)mp->b_cont->b_rptr; + + /* copy data from our per-stream cache into the reply data */ + cb->veof = tp->veof; + cb->veol = tp->veol; + cb->vmin = tp->vmin; + cb->vtime = tp->vtime; + + /* send the reply indicating that we're done */ + qreply(q, mp); + return; + + case PTSSTTY: + tp->state |= ISPTSTTY; + break; + + default: + break; + } + + putnext(q, mp); +} + + +/* + * ldlinuxput - Module read and write queue put procedure. + */ +static void +ldlinuxput(queue_t *q, mblk_t *mp) +{ + struct ldlinux *tp = q->q_ptr; + + switch (DB_TYPE(mp)) { + default: + break; + case M_IOCTL: + if ((q->q_flag & QREADR) == 0) { + do_ioctl(q, mp); + return; + } + break; + + case M_FLUSH: + /* + * Handle read and write flushes. + */ + if ((((q->q_flag & QREADR) != 0) && (*mp->b_rptr & FLUSHR)) || + (((q->q_flag & QREADR) == 0) && (*mp->b_rptr & FLUSHW))) { + if ((tp->state & ISPTSTTY) && (*mp->b_rptr & FLUSHBAND)) + flushband(q, *(mp->b_rptr + 1), FLUSHDATA); + else + flushq(q, FLUSHDATA); + } + break; + } + putnext(q, mp); +} + + +static struct module_info ldlinux_info = { + LDLINUX_MODID, + LDLINUX_MOD, + 0, + INFPSZ, + 0, + 0 +}; + +static struct qinit ldlinuxinit = { + (int (*)()) ldlinuxput, + NULL, + ldlinuxopen, + ldlinuxclose, + NULL, + &ldlinux_info +}; + +static struct streamtab ldlinuxinfo = { + &ldlinuxinit, + &ldlinuxinit +}; + +/* + * Module linkage information for the kernel. + */ +static struct fmodsw fsw = { + LDLINUX_MOD, + &ldlinuxinfo, + D_MTQPAIR | D_MP +}; + +static struct modlstrmod modlstrmod = { + &mod_strmodops, "termios extensions for lx brand", &fsw +}; + +static struct modlinkage modlinkage = { + MODREV_1, &modlstrmod, NULL +}; + +int +_init() +{ + return (mod_install(&modlinkage)); +} + +int +_fini() +{ + return (mod_remove(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.c b/usr/src/uts/common/brand/lx/io/lx_audio.c new file mode 100644 index 0000000000..e8c6234d92 --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/lx_audio.c @@ -0,0 +1,1996 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include <sys/audio.h> +#include <sys/conf.h> +#include <sys/debug.h> +#include <sys/disp.h> +#include <sys/ddi.h> +#include <sys/file.h> +#include <sys/id_space.h> +#include <sys/kmem.h> +#include <sys/lx_audio.h> +#include <sys/mixer.h> +#include <sys/modhash.h> +#include <sys/stat.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/sysmacros.h> +#include <sys/stropts.h> +#include <sys/types.h> +#include <sys/zone.h> + +/* Properties used by the lx_audio driver */ +#define LXA_PROP_INPUTDEV "inputdev" +#define LXA_PROP_OUTPUTDEV "outputdev" + +/* default device paths used by this driver */ +#define LXA_DEV_DEFAULT "/dev/audio" +#define LXA_DEV_CUSTOM_DIR "/dev/sound/" + +/* maximum possible number of concurrent opens of this driver */ +#define LX_AUDIO_MAX_OPENS 1024 + +/* + * these are default fragment size and fragment count values. + * these values were chosen to make quake work well on my + * laptop: 2Ghz Pentium M + NVIDIA GeForce Go 6400. + * + * for reference: + * - 1 sec of stereo output at 44Khz is about 171 Kb of data + * - 1 sec of mono output at 8Khz is about 8Kb of data + */ +#define LXA_OSS_FRAG_SIZE (1024) /* 1/8 sec at 8Khz mono */ +#define LXA_OSS_FRAG_CNT (1024 * 2) + +/* maximum ammount of fragment memory we'll allow a process to mmap */ +#define LXA_OSS_FRAG_MEM (1024 * 1024 * 2) /* 2Mb */ + +/* forward declarations */ +typedef struct lxa_state lxa_state_t; +typedef struct lxa_zstate lxa_zstate_t; + +/* + * Structure and enum declarations + */ +typedef enum { + LXA_TYPE_INVALID = 0, + LXA_TYPE_AUDIO = 1, /* audio device */ + LXA_TYPE_AUDIOCTL = 2 /* audio control/mixer device */ +} lxa_dev_type_t; + +struct lxa_zstate { + char *lxa_zs_zonename; + + /* + * we could store the input/output audio device setting here, + * but instead we're keeing them as device node properties + * so that a user can easily see the audio configuration for + * a zone via prtconf. + */ + + /* + * OSS doesn't support multiple opens of the audio device. + * (multiple opens of the mixer device are supported.) + * so here we'll keep a pointer to any open input/output + * streams. (OSS does support two opens if one is for input + * and the other is for output.) + */ + lxa_state_t *lxa_zs_istate; + lxa_state_t *lxa_zs_ostate; + + /* + * we need to cache channel gain and balance. channel gain and + * balance map to PCM volume in OSS, which are supposedly a property + * of the underlying hardware. but in solaris, channels are + * implemented in software and only exist when an audio device + * is actually open. (each open returns a unique channel.) OSS + * apps will expect consistent PCM volume set/get operations to + * work even if no audio device is open. hence, if no underlying + * device is open we need to cache the gain and balance setting. + */ + lxa_mixer_levels_t lxa_zs_pcm_levels; +}; + +struct lxa_state { + lxa_zstate_t *lxas_zs; /* zone state pointer */ + + dev_t lxas_dev_old; /* dev_t used to open the device */ + dev_t lxas_dev_new; /* new dev_t assigned to an open */ + int lxas_flags; /* original flags passed to open */ + lxa_dev_type_t lxas_type; /* type of device that was opened */ + + int lxas_devs_same; /* input and output device the same? */ + + /* input device variables */ + ldi_handle_t lxas_idev_lh; /* ldi handle for access */ + int lxas_idev_flags; /* flags used for open */ + + /* output device variables */ + ldi_handle_t lxas_odev_lh; /* ldi handle for access */ + int lxas_odev_flags; /* flags used for open */ + + /* + * since we support multiplexing of devices we need to remember + * certain parameters about the devices + */ + uint_t lxas_hw_features; + uint_t lxas_sw_features; + + uint_t lxas_frag_size; + uint_t lxas_frag_cnt; + + /* + * members needed to support mmap device access. note that to + * simplifly things we only support one mmap access per open. + */ + ddi_umem_cookie_t lxas_umem_cookie; + char *lxas_umem_ptr; + size_t lxas_umem_len; + kthread_t *lxas_mmap_thread; + int lxas_mmap_thread_running; + int lxas_mmap_thread_exit; + int lxas_mmap_thread_frag; +}; + +/* + * Global variables + */ +dev_info_t *lxa_dip = NULL; +kmutex_t lxa_lock; +id_space_t *lxa_minor_id = NULL; +mod_hash_t *lxa_state_hash = NULL; +mod_hash_t *lxa_zstate_hash = NULL; +size_t lxa_state_hash_size = 15; +size_t lxa_zstate_hash_size = 15; +size_t lxa_registered_zones = 0; + +/* + * function declarations + */ +static void lxa_mmap_output_disable(lxa_state_t *); + +/* + * functions + */ +static void +lxa_state_close(lxa_state_t *lxa_state) +{ + lxa_zstate_t *lxa_zs = lxa_state->lxas_zs; + minor_t minor = getminor(lxa_state->lxas_dev_new); + + /* disable any mmap output that might still be going on */ + lxa_mmap_output_disable(lxa_state); + + /* + * if this was the active input/output device, unlink it from + * the global zone state so that other opens of the audio device + * can now succeed. + */ + mutex_enter(&lxa_lock); + if (lxa_zs->lxa_zs_istate == lxa_state) + lxa_zs->lxa_zs_istate = NULL; + if (lxa_zs->lxa_zs_ostate == lxa_state) { + lxa_zs->lxa_zs_ostate = NULL; + } + mutex_exit(&lxa_lock); + + /* remove this state structure from the hash (if it's there) */ + (void) mod_hash_remove(lxa_state_hash, + (mod_hash_key_t)(uintptr_t)minor, (mod_hash_val_t *)&lxa_state); + + /* close any audio device that we have open */ + if (lxa_state->lxas_idev_lh != NULL) + (void) ldi_close(lxa_state->lxas_idev_lh, + lxa_state->lxas_idev_flags, kcred); + if (lxa_state->lxas_odev_lh != NULL) + (void) ldi_close(lxa_state->lxas_odev_lh, + lxa_state->lxas_odev_flags, kcred); + + /* free up any memory allocated by mmaps */ + if (lxa_state->lxas_umem_cookie != NULL) + ddi_umem_free(lxa_state->lxas_umem_cookie); + + /* release the id associated with this state structure */ + id_free(lxa_minor_id, minor); + + kmem_free(lxa_state, sizeof (*lxa_state)); +} + +static char * +getzonename(void) +{ + return (curproc->p_zone->zone_name); +} + +static char * +lxa_devprop_name(char *zname, char *pname) +{ + char *zpname; + int n; + + ASSERT((pname != NULL) && (zname != NULL)); + + /* prepend the zone name to the property name */ + n = snprintf(NULL, 0, "%s_%s", zname, pname) + 1; + zpname = kmem_alloc(n, KM_SLEEP); + (void) snprintf(zpname, n, "%s_%s", zname, pname); + + return (zpname); +} + +static int +lxa_devprop_verify(char *pval) +{ + int n; + + ASSERT(pval != NULL); + + if (strcmp(pval, "default") == 0) + return (0); + + /* make sure the value is an integer */ + for (n = 0; pval[n] != '\0'; n++) { + if ((pval[n] < '0') && (pval[n] > '9')) { + return (-1); + } + } + + return (0); +} + +static char * +lxa_devprop_lookup(char *zname, char *pname, lxa_dev_type_t lxa_type) +{ + char *zprop_name, *pval; + char *dev_path; + int n, rv; + + ASSERT((pname != NULL) && (zname != NULL)); + ASSERT((lxa_type == LXA_TYPE_AUDIO) || (lxa_type == LXA_TYPE_AUDIOCTL)); + + zprop_name = lxa_devprop_name(zname, pname); + + /* attempt to lookup the property */ + rv = ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, zprop_name, &pval); + strfree(zprop_name); + + if (rv != DDI_PROP_SUCCESS) + return (NULL); + + if (lxa_devprop_verify(pval) != 0) { + ddi_prop_free(pval); + return (NULL); + } + + if (strcmp(pval, "none") == 0) { + /* there is no audio device specified */ + return (NULL); + } else if (strcmp(pval, "default") == 0) { + /* use the default audio device on the system */ + dev_path = strdup(LXA_DEV_DEFAULT); + } else { + /* a custom audio device was specified, generate a path */ + n = snprintf(NULL, 0, "%s%s", LXA_DEV_CUSTOM_DIR, pval) + 1; + dev_path = kmem_alloc(n, KM_SLEEP); + (void) snprintf(dev_path, n, "%s%s", LXA_DEV_CUSTOM_DIR, pval); + } + ddi_prop_free(pval); + + /* + * if this is an audio control device so we need to append + * "ctl" to the path + */ + if (lxa_type == LXA_TYPE_AUDIOCTL) { + char *tmp; + n = snprintf(NULL, 0, "%s%s", dev_path, "ctl") + 1; + tmp = kmem_alloc(n, KM_SLEEP); + (void) snprintf(tmp, n, "%s%s", dev_path, "ctl"); + strfree(dev_path); + dev_path = tmp; + } + + return (dev_path); +} + +static int +lxa_dev_getfeatures(lxa_state_t *lxa_state) +{ + audio_info_t ai_idev, ai_odev; + int n, rv; + + /* set a default fragment size */ + lxa_state->lxas_frag_size = LXA_OSS_FRAG_SIZE; + lxa_state->lxas_frag_cnt = LXA_OSS_FRAG_CNT; + + /* get info for the currently open audio devices */ + if ((lxa_state->lxas_idev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, + AUDIO_GETINFO, (intptr_t)&ai_idev, FKIOCTL, kcred, &n)) != 0)) + return (rv); + if ((lxa_state->lxas_odev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, + AUDIO_GETINFO, (intptr_t)&ai_odev, FKIOCTL, kcred, &n)) != 0)) + return (rv); + + /* if we're only open for reading or writing then it's easy */ + if (lxa_state->lxas_idev_lh == NULL) { + lxa_state->lxas_sw_features = ai_odev.sw_features; + lxa_state->lxas_hw_features = ai_odev.hw_features; + return (0); + } else if (lxa_state->lxas_odev_lh == NULL) { + lxa_state->lxas_sw_features = ai_idev.sw_features; + lxa_state->lxas_hw_features = ai_idev.hw_features; + return (0); + } + + /* + * well if we're open for reading and writing but the underlying + * device is the same then it's also pretty easy + */ + if (lxa_state->lxas_devs_same) { + if ((ai_odev.sw_features != ai_idev.sw_features) || + (ai_odev.hw_features != ai_idev.hw_features)) { + zcmn_err(getzoneid(), CE_WARN, "lx_audio error: " + "audio device reported inconsistent features"); + return (EIO); + } + lxa_state->lxas_sw_features = ai_odev.sw_features; + lxa_state->lxas_hw_features = ai_odev.hw_features; + return (0); + } + + /* + * figure out which software features we're going to support. + * we will report a feature as supported if both the input + * and output device support it. + */ + lxa_state->lxas_sw_features = 0; + n = ai_idev.sw_features & ai_odev.sw_features; + if (n & AUDIO_SWFEATURE_MIXER) + lxa_state->lxas_sw_features |= AUDIO_SWFEATURE_MIXER; + + /* + * figure out which hardware features we're going to support. + * for a first pass we will report a feature as supported if + * both the input and output device support it. + */ + lxa_state->lxas_hw_features = 0; + n = ai_idev.hw_features & ai_odev.hw_features; + if (n & AUDIO_HWFEATURE_MSCODEC) + lxa_state->lxas_hw_features |= AUDIO_HWFEATURE_MSCODEC; + + /* + * if we made it here then we have different audio input and output + * devices. this will allow us to report support for additional + * hardware features that may not supported by just the input or + * output device alone. + */ + + /* always report tha we support both playback and recording */ + lxa_state->lxas_hw_features = + AUDIO_HWFEATURE_PLAY | AUDIO_HWFEATURE_RECORD; + + /* always report full duplex support */ + lxa_state->lxas_hw_features = AUDIO_HWFEATURE_DUPLEX; + + /* never report that we have input to output loopback support */ + ASSERT((lxa_state->lxas_hw_features & AUDIO_HWFEATURE_IN2OUT) == 0); + return (0); +} + +static int +lxa_dev_open(lxa_state_t *lxa_state) +{ + char *idev, *odev; + int flags, rv; + ldi_handle_t lh; + ldi_ident_t li = NULL; + + ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) || + (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL)); + + /* + * check if we have configuration properties for this zone. + * if we don't then audio isn't supported in this zone. + */ + idev = lxa_devprop_lookup(getzonename(), LXA_PROP_INPUTDEV, + lxa_state->lxas_type); + odev = lxa_devprop_lookup(getzonename(), LXA_PROP_OUTPUTDEV, + lxa_state->lxas_type); + + /* make sure there is at least one device to read from or write to */ + if ((idev == NULL) && (odev == NULL)) + return (ENODEV); + + /* see if the input and output devices are actually the same device */ + if (((idev != NULL) && (odev != NULL)) && + (strcmp(idev, odev) == 0)) + lxa_state->lxas_devs_same = 1; + + /* we don't respect FEXCL */ + flags = lxa_state->lxas_flags & ~FEXCL; + if (lxa_state->lxas_type == LXA_TYPE_AUDIO) { + /* + * if we're opening audio devices then we need to muck + * with the FREAD/FWRITE flags. + * + * certain audio device may only support input or output + * (but not both.) so if we're multiplexing input/output + * to different devices we need to make sure we don't try + * and open the output device for reading and the input + * device for writing. + * + * if we're using the same device for input/output we still + * need to do this because some audio devices won't let + * themselves be opened multiple times for read access. + */ + lxa_state->lxas_idev_flags = flags & ~FWRITE; + lxa_state->lxas_odev_flags = flags & ~FREAD; + + /* make sure we have devices to read from and write to */ + if (((flags & FREAD) && (idev == NULL)) || + ((flags & FWRITE) && (odev == NULL))) { + rv = ENODEV; + goto out; + } + } else { + lxa_state->lxas_idev_flags = lxa_state->lxas_odev_flags = flags; + } + + /* get an ident to open the devices */ + if (ldi_ident_from_dev(lxa_state->lxas_dev_new, &li) != 0) { + rv = ENODEV; + goto out; + } + + /* open the input device */ + lxa_state->lxas_idev_lh = NULL; + if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) || + (lxa_state->lxas_idev_flags & FREAD)) && + (idev != NULL)) { + rv = ldi_open_by_name(idev, lxa_state->lxas_idev_flags, + kcred, &lh, li); + if (rv != 0) { + zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: " + "unable to open audio device: %s", idev); + zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: " + "possible zone audio configuration error"); + goto out; + } + lxa_state->lxas_idev_lh = lh; + } + + /* open the output device */ + lxa_state->lxas_odev_lh = NULL; + if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) || + (lxa_state->lxas_odev_flags & FWRITE)) && + (odev != NULL)) { + rv = ldi_open_by_name(odev, lxa_state->lxas_odev_flags, + kcred, &lh, li); + if (rv != 0) { + /* + * If this open failed and we previously opened an + * input device, it is the responsibility of the + * caller to close that device after we return + * failure here. + */ + zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: " + "unable to open audio device: %s", odev); + zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: " + "possible zone audio configuration error"); + goto out; + } + lxa_state->lxas_odev_lh = lh; + } + + /* free up stuff */ +out: + if (li != NULL) + ldi_ident_release(li); + if (idev != NULL) + strfree(idev); + if (odev != NULL) + strfree(odev); + + return (rv); +} + +void +lxa_mmap_thread_exit(lxa_state_t *lxa_state) +{ + mutex_enter(&lxa_lock); + lxa_state->lxas_mmap_thread = NULL; + lxa_state->lxas_mmap_thread_frag = 0; + lxa_state->lxas_mmap_thread_running = 0; + lxa_state->lxas_mmap_thread_exit = 0; + mutex_exit(&lxa_lock); + thread_exit(); + /*NOTREACHED*/ +} + +void +lxa_mmap_thread(lxa_state_t *lxa_state) +{ + struct uio uio, uio_null; + iovec_t iovec, iovec_null; + uint_t bytes_per_sec, usec_per_frag, ticks_per_frag; + int rv, junk, eof, retry; + audio_info_t ai; + + /* we better be setup for writing to the output device */ + ASSERT((lxa_state->lxas_flags & FWRITE) != 0); + ASSERT(lxa_state->lxas_odev_lh != NULL); + + /* setup a uio to output one fragment */ + uio.uio_iov = &iovec; + uio.uio_iovcnt = 1; + uio.uio_offset = 0; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_fmode = 0; + uio.uio_extflg = 0; + uio.uio_llimit = MAXOFFSET_T; + + /* setup a uio to output a eof (a fragment with a length of 0) */ + uio_null.uio_iov = &iovec_null; + uio_null.uio_iov->iov_len = 0; + uio_null.uio_iov->iov_base = NULL; + uio_null.uio_iovcnt = 1; + uio_null.uio_offset = 0; + uio_null.uio_segflg = UIO_SYSSPACE; + uio_null.uio_fmode = 0; + uio_null.uio_extflg = 0; + uio_null.uio_llimit = MAXOFFSET_T; + uio_null.uio_resid = 0; + +lxa_mmap_thread_top: + ASSERT(!MUTEX_HELD(&lxa_lock)); + + /* first drain any pending audio output */ + if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, + AUDIO_DRAIN, NULL, FKIOCTL, kcred, &junk)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "AUDIO_DRAIN failed, aborting audio output"); + lxa_mmap_thread_exit(lxa_state); + /*NOTREACHED*/ + } + + /* + * we depend on the ai.play.eof value to keep track of + * audio output progress so reset it here. + */ + AUDIO_INITINFO(&ai); + ai.play.eof = 0; + if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, + AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "AUDIO_SETINFO failed, aborting audio output"); + lxa_mmap_thread_exit(lxa_state); + /*NOTREACHED*/ + } + + /* + * we're going to need to know the sampling rate and number + * of output channels to estimate how long we can sleep between + * requests. + */ + if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO, + (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "AUDIO_GETINFO failed, aborting audio output"); + lxa_mmap_thread_exit(lxa_state); + /*NOTREACHED*/ + } + + /* estimate how many ticks it takes to output a fragment of data */ + bytes_per_sec = (ai.play.sample_rate * ai.play.channels * + ai.play.precision) / 8; + usec_per_frag = MICROSEC * lxa_state->lxas_frag_size / bytes_per_sec; + ticks_per_frag = drv_usectohz(usec_per_frag); + + /* queue up three fragments of of data into the output stream */ + eof = 3; + + /* sanity check the eof value */ + ASSERT(ai.play.eof == 0); + ai.play.eof = 0; + + /* we always start audio output at fragment 0 */ + mutex_enter(&lxa_lock); + lxa_state->lxas_mmap_thread_frag = 0; + + /* + * we shouldn't have allowed the mapping if it isn't a multiple + * of the fragment size + */ + ASSERT((lxa_state->lxas_umem_len % lxa_state->lxas_frag_size) == 0); + + while (!lxa_state->lxas_mmap_thread_exit) { + size_t start, end; + + /* + * calculate the start and ending offsets of the next + * fragment to output + */ + start = lxa_state->lxas_mmap_thread_frag * + lxa_state->lxas_frag_size; + end = start + lxa_state->lxas_frag_size; + + ASSERT(start < lxa_state->lxas_umem_len); + ASSERT(end <= lxa_state->lxas_umem_len); + + /* setup the uio to output one fragment of audio */ + uio.uio_resid = end - start; + uio.uio_iov->iov_len = end - start; + uio.uio_iov->iov_base = &lxa_state->lxas_umem_ptr[start]; + + /* increment the current fragment index */ + lxa_state->lxas_mmap_thread_frag = + (lxa_state->lxas_mmap_thread_frag + 1) % + (lxa_state->lxas_umem_len / lxa_state->lxas_frag_size); + + /* drop the audio lock before actually outputting data */ + mutex_exit(&lxa_lock); + + /* + * write the fragment of audio data to the device stream + * then write a eof to the stream to tell the device to + * increment ai.play.eof when it's done processing the + * fragment we just wrote + */ + if ((rv = ldi_write(lxa_state->lxas_odev_lh, + &uio, kcred)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "ldi_write() failed (%d), " + "resetting audio output", rv); + goto lxa_mmap_thread_top; + } + if ((rv = ldi_write(lxa_state->lxas_odev_lh, + &uio_null, kcred)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "ldi_write(eof) failed (%d), " + "resetting audio output", rv); + goto lxa_mmap_thread_top; + } + + /* + * we want to avoid buffer underrun so ensure that + * there is always at least one fragment of data in the + * output stream. + */ + mutex_enter(&lxa_lock); + if (--eof > 0) { + continue; + } + + /* + * now we wait until the audio device has finished outputting + * at least one fragment of data. + */ + retry = 0; + while (!lxa_state->lxas_mmap_thread_exit && (eof == 0)) { + uint_t ai_eof_old = ai.play.eof; + + mutex_exit(&lxa_lock); + + /* + * delay for the number of ticks it takes + * to output one fragment of data + */ + if (ticks_per_frag > 0) + delay(ticks_per_frag); + + /* check if we've managed to output any fragments */ + if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, + AUDIO_GETINFO, (intptr_t)&ai, + FKIOCTL, kcred, &junk)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "AUDIO_GETINFO failed (%d), " + "resetting audio output", rv); + /* re-start mmap audio output */ + goto lxa_mmap_thread_top; + } + + if (ai_eof_old == ai.play.eof) { + /* institute a random retry limit */ + if (retry++ < 100) { + mutex_enter(&lxa_lock); + continue; + } + cmn_err(CE_WARN, "lxa_mmap_thread: " + "output stalled, " + "resetting audio output"); + /* re-start mmap audio output */ + goto lxa_mmap_thread_top; + } + + if (ai.play.eof > ai_eof_old) { + eof = ai.play.eof - ai_eof_old; + } else { + /* eof counter wrapped around */ + ASSERT(ai_eof_old < ai.play.eof); + eof = ai.play.eof + (ai_eof_old - UINTMAX_MAX); + } + /* we're done with this loop so re-aquire the lock */ + ASSERT(eof != 0); + mutex_enter(&lxa_lock); + } + } + mutex_exit(&lxa_lock); + lxa_mmap_thread_exit(lxa_state); + /*NOTREACHED*/ +} + +static void +lxa_mmap_output_disable(lxa_state_t *lxa_state) +{ + kt_did_t tid; + + mutex_enter(&lxa_lock); + + /* if the output thread isn't running there's nothing to do */ + if (lxa_state->lxas_mmap_thread_running == 0) { + mutex_exit(&lxa_lock); + return; + } + + /* tell the pcm mmap output thread to exit */ + lxa_state->lxas_mmap_thread_exit = 1; + + /* wait for the mmap output thread to exit */ + tid = lxa_state->lxas_mmap_thread->t_did; + mutex_exit(&lxa_lock); + thread_join(tid); +} + +static void +lxa_mmap_output_enable(lxa_state_t *lxa_state) +{ + mutex_enter(&lxa_lock); + + /* if the output thread is already running there's nothing to do */ + if (lxa_state->lxas_mmap_thread_running != 0) { + mutex_exit(&lxa_lock); + return; + } + + /* setup output state */ + lxa_state->lxas_mmap_thread_running = 1; + lxa_state->lxas_mmap_thread_exit = 0; + lxa_state->lxas_mmap_thread_frag = 0; + + /* kick off a thread to do the mmap pcm output */ + lxa_state->lxas_mmap_thread = thread_create(NULL, 0, + (void (*)())lxa_mmap_thread, lxa_state, + 0, &p0, TS_RUN, minclsyspri); + ASSERT(lxa_state->lxas_mmap_thread != NULL); + + mutex_exit(&lxa_lock); +} + +static int +lxa_ioc_mmap_output(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + uint_t trigger; + + /* we only support output via mmap */ + if ((lxa_state->lxas_flags & FWRITE) == 0) + return (EINVAL); + + /* if the user hasn't mmap the device then there's nothing to do */ + if (lxa_state->lxas_umem_cookie == NULL) + return (EINVAL); + + /* copy in the request */ + if (ddi_copyin((void *)arg, &trigger, sizeof (trigger), mode) != 0) + return (EFAULT); + + /* a zero value disables output */ + if (trigger == 0) { + lxa_mmap_output_disable(lxa_state); + return (0); + } + + /* a non-zero value enables output */ + lxa_mmap_output_enable(lxa_state); + return (0); +} + +static int +lxa_ioc_mmap_ptr(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + int ptr; + + /* we only support output via mmap */ + if ((lxa_state->lxas_flags & FWRITE) == 0) + return (EINVAL); + + /* if the user hasn't mmap the device then there's nothing to do */ + if (lxa_state->lxas_umem_cookie == NULL) + return (EINVAL); + + /* if the output thread isn't running then there's nothing to do */ + if (lxa_state->lxas_mmap_thread_running == 0) + return (EINVAL); + + mutex_enter(&lxa_lock); + ptr = lxa_state->lxas_mmap_thread_frag * lxa_state->lxas_frag_size; + mutex_exit(&lxa_lock); + + if (ddi_copyout(&ptr, (void *)arg, sizeof (ptr), mode) != 0) + return (EFAULT); + + return (0); +} + +static int +lxa_ioc_get_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + lxa_frag_info_t fi; + + fi.lxa_fi_size = lxa_state->lxas_frag_size; + fi.lxa_fi_cnt = lxa_state->lxas_frag_cnt; + + if (ddi_copyout(&fi, (void *)arg, sizeof (fi), mode) != 0) + return (EFAULT); + + return (0); +} + +static int +lxa_ioc_set_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + lxa_frag_info_t fi; + + /* if the device is mmaped we can't change the fragment settings */ + if (lxa_state->lxas_umem_cookie != NULL) + return (EINVAL); + + /* copy in the request */ + if (ddi_copyin((void *)arg, &fi, sizeof (fi), mode) != 0) + return (EFAULT); + + /* do basic bounds checking */ + if ((fi.lxa_fi_cnt == 0) || (fi.lxa_fi_size < 16)) + return (EINVAL); + + /* don't accept size values less than 16 */ + + lxa_state->lxas_frag_size = fi.lxa_fi_size; + lxa_state->lxas_frag_cnt = fi.lxa_fi_cnt; + + return (0); +} + +static int +lxa_audio_drain(lxa_state_t *lxa_state) +{ + int junk; + + /* only applies to output buffers */ + if (lxa_state->lxas_odev_lh == NULL) + return (EINVAL); + + /* can't fail so ignore the return value */ + (void) ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_DRAIN, NULL, + FKIOCTL, kcred, &junk); + return (0); +} + +/* + * lxa_audio_info_merge() usage notes: + * + * - it's important to make sure NOT to get the ai_idev and ai_odev + * parameters mixed up when calling lxa_audio_info_merge(). + * + * - it's important for the caller to make sure that AUDIO_GETINFO + * was called for the input device BEFORE the output device. (see + * the comments for merging the monitor_gain setting to see why.) + */ +static void +lxa_audio_info_merge(lxa_state_t *lxa_state, + audio_info_t *ai_idev, audio_info_t *ai_odev, audio_info_t *ai_merged) +{ + /* if we're not setup for output return the intput device info */ + if (lxa_state->lxas_odev_lh == NULL) { + *ai_merged = *ai_idev; + return; + } + + /* if we're not setup for input return the output device info */ + if (lxa_state->lxas_idev_lh == NULL) { + *ai_merged = *ai_odev; + return; + } + + /* get record values from the input device */ + ai_merged->record = ai_idev->record; + + /* get play values from the output device */ + ai_merged->play = ai_odev->play; + + /* muting status only matters for the output device */ + ai_merged->output_muted = ai_odev->output_muted; + + /* we don't support device reference counts, always return 1 */ + ai_merged->ref_cnt = 1; + + /* + * for supported hw/sw features report the combined feature + * set we calcuated out earlier. + */ + ai_merged->hw_features = lxa_state->lxas_hw_features; + ai_merged->sw_features = lxa_state->lxas_sw_features; + + if (!lxa_state->lxas_devs_same) { + /* + * if the input and output devices are different + * physical devices then we don't support input to + * output loopback so we always report the input + * to output loopback gain to be zero. + */ + ai_merged->monitor_gain = 0; + } else { + /* + * the intput and output devices are actually the + * same physical device. hence it probably supports + * intput to output loopback. regardless we should + * pass back the intput to output gain reported by + * the device. when we pick a value to passback we + * use the output device value since that was + * the most recently queried. (we base this + * decision on the assumption that io gain is + * actually hardware setting in the device and + * hence if it is changed on one open instance of + * the device the change will be visable to all + * other instances of the device.) + */ + ai_merged->monitor_gain = ai_odev->monitor_gain; + } + + /* + * for currently enabled software features always return the + * merger of the two. (of course the enabled software features + * for the input and output devices should alway be the same, + * so if it isn't complain.) + */ + if (ai_idev->sw_features_enabled != ai_odev->sw_features_enabled) + zcmn_err(getzoneid(), CE_WARN, "lx_audio: " + "unexpected sofware feature state"); + ai_merged->sw_features_enabled = + ai_idev->sw_features_enabled & ai_odev->sw_features_enabled; +} + +static int +lxa_audio_setinfo(lxa_state_t *lxa_state, int cmd, intptr_t arg, + int mode) +{ + audio_info_t ai, ai_null, ai_idev, ai_odev; + int rv, junk; + + /* copy in the request */ + if (ddi_copyin((void *)arg, &ai, sizeof (ai), mode) != 0) + return (EFAULT); + + /* + * if the caller is attempting to enable a software feature that + * we didn't report as supported the return an error + */ + if ((ai.sw_features_enabled != -1) && + (ai.sw_features_enabled & ~lxa_state->lxas_sw_features)) + return (EINVAL); + + /* + * if a process has mmaped this device then we don't allow + * changes to the play.eof field (since mmap output depends + * on this field. + */ + if ((lxa_state->lxas_umem_cookie != NULL) && + (ai.play.eof != -1)) + return (EIO); + + /* initialize the new requests */ + AUDIO_INITINFO(&ai_null); + ai_idev = ai_odev = ai; + + /* remove audio input settings from the output device request */ + ai_odev.record = ai_null.record; + + /* remove audio output settings from the input device request */ + ai_idev.play = ai_null.play; + ai_idev.output_muted = ai_null.output_muted; + + /* apply settings to the intput device */ + if ((lxa_state->lxas_idev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, cmd, + (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0)) + return (rv); + + /* apply settings to the output device */ + if ((lxa_state->lxas_odev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, cmd, + (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0)) + return (rv); + + /* + * a AUDIO_SETINFO call performs an implicit AUDIO_GETINFO to + * return values (see the coments in audioio.h.) so we need + * to combine the values returned from the input and output + * device back into the users buffer. + */ + lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai); + + /* copyout the results */ + if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0) { + return (EFAULT); + } + + return (0); +} + +static int +lxa_audio_getinfo(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + audio_info_t ai, ai_idev, ai_odev; + int rv, junk; + + /* get the settings from the input device */ + if ((lxa_state->lxas_idev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, AUDIO_GETINFO, + (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0)) + return (rv); + + /* get the settings from the output device */ + if ((lxa_state->lxas_odev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO, + (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0)) + return (rv); + + /* + * we need to combine the values returned from the input + * and output device back into a single user buffer. + */ + lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai); + + /* copyout the results */ + if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0) + return (EFAULT); + + return (0); +} + +static int +lxa_mixer_ai_from_lh(ldi_handle_t lh, audio_info_t *ai) +{ + int rv, junk; + + ASSERT((lh != NULL) && (ai != NULL)); + + /* get the device state and channel state */ + rv = ldi_ioctl(lh, AUDIO_GETINFO, (intptr_t)ai, FKIOCTL, kcred, &junk); + + return (rv); +} + +static int +lxa_mixer_get_ai(lxa_state_t *lxa_state, audio_info_t *ai) +{ + audio_info_t ai_idev, ai_odev; + int rv; + + /* if there is no input device, query the output device */ + if (lxa_state->lxas_idev_lh == NULL) + return (lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh, ai)); + + /* if there is no ouput device, query the intput device */ + if (lxa_state->lxas_odev_lh == NULL) + return (lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh, ai)); + + /* + * now get the audio_info and channel information for the + * underlying output device. + */ + if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh, + &ai_idev)) != 0) + return (rv); + if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh, + &ai_odev)) != 0) + return (rv); + + /* now merge the audio_info structures */ + lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, ai); + return (0); +} + +static int +lxa_mixer_get_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode) +{ + lxa_mixer_levels_t lxa_ml; + audio_info_t ai; + int rv; + + ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL); + + if ((rv = lxa_mixer_get_ai(lxa_state, &ai)) != 0) + return (rv); + + switch (cmd) { + case LXA_IOC_MIXER_GET_VOL: + lxa_ml.lxa_ml_gain = ai.play.gain; + lxa_ml.lxa_ml_balance = ai.play.balance; + break; + case LXA_IOC_MIXER_GET_MIC: + lxa_ml.lxa_ml_gain = ai.record.gain; + lxa_ml.lxa_ml_balance = ai.record.balance; + break; + } + + if (ddi_copyout(&lxa_ml, (void *)arg, sizeof (lxa_ml), mode) != 0) + return (EFAULT); + return (0); +} + +static int +lxa_mixer_set_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode) +{ + lxa_mixer_levels_t lxa_ml; + audio_info_t ai; + + ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL); + + /* get the new mixer settings */ + if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0) + return (EFAULT); + + /* sanity check the mixer settings */ + if (!LXA_MIXER_LEVELS_OK(&lxa_ml)) + return (EINVAL); + + /* initialize an audio_info struct with the new settings */ + AUDIO_INITINFO(&ai); + switch (cmd) { + case LXA_IOC_MIXER_SET_VOL: + ai.play.gain = lxa_ml.lxa_ml_gain; + ai.play.balance = lxa_ml.lxa_ml_balance; + break; + case LXA_IOC_MIXER_SET_MIC: + ai.record.gain = lxa_ml.lxa_ml_gain; + ai.record.balance = lxa_ml.lxa_ml_balance; + break; + } + + return (lxa_audio_setinfo(lxa_state, AUDIO_SETINFO, (intptr_t)&ai, + FKIOCTL)); +} + +static int +lxa_mixer_get_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL); + + /* simply return the cached pcm mixer settings */ + mutex_enter(&lxa_lock); + if (ddi_copyout(&lxa_state->lxas_zs->lxa_zs_pcm_levels, (void *)arg, + sizeof (lxa_state->lxas_zs->lxa_zs_pcm_levels), mode) != 0) { + mutex_exit(&lxa_lock); + return (EFAULT); + } + mutex_exit(&lxa_lock); + return (0); +} + +static int +lxa_mixer_set_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + lxa_mixer_levels_t lxa_ml; + int rv; + + ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL); + + /* get the new mixer settings */ + if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0) + return (EFAULT); + + /* sanity check the mixer settings */ + if (!LXA_MIXER_LEVELS_OK(&lxa_ml)) + return (EINVAL); + + mutex_enter(&lxa_lock); + + /* if there is an active output channel, update it */ + if (lxa_state->lxas_zs->lxa_zs_ostate != NULL) { + audio_info_t ai; + + /* initialize an audio_info struct with the new settings */ + AUDIO_INITINFO(&ai); + ai.play.gain = lxa_ml.lxa_ml_gain; + ai.play.balance = lxa_ml.lxa_ml_balance; + + if ((rv = lxa_audio_setinfo(lxa_state->lxas_zs->lxa_zs_ostate, + AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) { + mutex_exit(&lxa_lock); + return (rv); + } + } + + /* update the cached mixer settings */ + lxa_state->lxas_zs->lxa_zs_pcm_levels = lxa_ml; + + mutex_exit(&lxa_lock); + return (0); +} + +static int +lxa_zone_reg(intptr_t arg, int mode) +{ + lxa_zone_reg_t lxa_zr; + lxa_zstate_t *lxa_zs = NULL; + char *idev_name = NULL, *odev_name = NULL, *pval = NULL; + int i, junk; + + if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0) + return (EFAULT); + + /* make sure that zone_name is a valid string */ + for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++) + if (lxa_zr.lxa_zr_zone_name[i] == '\0') + break; + if (i == sizeof (lxa_zr.lxa_zr_zone_name)) + return (EINVAL); + + /* make sure that inputdev is a valid string */ + for (i = 0; i < sizeof (lxa_zr.lxa_zr_inputdev); i++) + if (lxa_zr.lxa_zr_inputdev[i] == '\0') + break; + if (i == sizeof (lxa_zr.lxa_zr_inputdev)) + return (EINVAL); + + /* make sure it's a valid inputdev property value */ + if (lxa_devprop_verify(lxa_zr.lxa_zr_inputdev) != 0) + return (EINVAL); + + /* make sure that outputdev is a valid string */ + for (i = 0; i < sizeof (lxa_zr.lxa_zr_outputdev); i++) + if (lxa_zr.lxa_zr_outputdev[i] == '\0') + break; + if (i == sizeof (lxa_zr.lxa_zr_outputdev)) + return (EINVAL); + + /* make sure it's a valid outputdev property value */ + if (lxa_devprop_verify(lxa_zr.lxa_zr_outputdev) != 0) + return (EINVAL); + + /* get the property names */ + idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name, + LXA_PROP_INPUTDEV); + odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name, + LXA_PROP_OUTPUTDEV); + + /* + * allocate and initialize a zone state structure + * since the audio device can't possibly be opened yet + * (since we're setting it up now and the zone isn't booted + * yet) assign some some resonable default pcm channel settings. + * also, default to one mixer channel. + */ + lxa_zs = kmem_zalloc(sizeof (*lxa_zs), KM_SLEEP); + lxa_zs->lxa_zs_zonename = strdup(lxa_zr.lxa_zr_zone_name); + lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain = AUDIO_MID_GAIN; + lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance = AUDIO_MID_BALANCE; + + mutex_enter(&lxa_lock); + + /* + * make sure this zone isn't already registered + * a zone is registered with properties for that zone exist + * or there is a zone state structure for that zone + */ + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + idev_name, &pval) == DDI_PROP_SUCCESS) { + goto err_unlock; + } + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + odev_name, &pval) == DDI_PROP_SUCCESS) { + goto err_unlock; + } + if (mod_hash_find(lxa_zstate_hash, + (mod_hash_key_t)lxa_zs->lxa_zs_zonename, + (mod_hash_val_t *)&junk) == 0) + goto err_unlock; + + /* + * create the new properties and insert the zone state structure + * into the global hash + */ + if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip, + idev_name, lxa_zr.lxa_zr_inputdev) != DDI_PROP_SUCCESS) + goto err_prop_remove; + if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip, + odev_name, lxa_zr.lxa_zr_outputdev) != DDI_PROP_SUCCESS) + goto err_prop_remove; + if (mod_hash_insert(lxa_zstate_hash, + (mod_hash_key_t)lxa_zs->lxa_zs_zonename, + (mod_hash_val_t)lxa_zs) != 0) + goto err_prop_remove; + + /* success! */ + lxa_registered_zones++; + mutex_exit(&lxa_lock); + + /* cleanup */ + strfree(idev_name); + strfree(odev_name); + return (0); + +err_prop_remove: + (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name); + (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name); + +err_unlock: + mutex_exit(&lxa_lock); + + if (lxa_zs != NULL) { + strfree(lxa_zs->lxa_zs_zonename); + kmem_free(lxa_zs, sizeof (*lxa_zs)); + } + if (pval != NULL) + ddi_prop_free(pval); + if (idev_name != NULL) + strfree(idev_name); + if (odev_name != NULL) + strfree(odev_name); + return (EIO); +} + +static int +lxa_zone_unreg(intptr_t arg, int mode) +{ + lxa_zone_reg_t lxa_zr; + lxa_zstate_t *lxa_zs = NULL; + char *idev_name = NULL, *odev_name = NULL, *pval = NULL; + int rv, i; + + if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0) + return (EFAULT); + + /* make sure that zone_name is a valid string */ + for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++) + if (lxa_zr.lxa_zr_zone_name[i] == '\0') + break; + if (i == sizeof (lxa_zr.lxa_zr_zone_name)) + return (EINVAL); + + /* get the property names */ + idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name, + LXA_PROP_INPUTDEV); + odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name, + LXA_PROP_OUTPUTDEV); + + mutex_enter(&lxa_lock); + + if (lxa_registered_zones <= 0) { + rv = ENOENT; + goto err_unlock; + } + + /* make sure this zone is actually registered */ + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + idev_name, &pval) != DDI_PROP_SUCCESS) { + rv = ENOENT; + goto err_unlock; + } + ddi_prop_free(pval); + pval = NULL; + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + odev_name, &pval) != DDI_PROP_SUCCESS) { + rv = ENOENT; + goto err_unlock; + } + ddi_prop_free(pval); + pval = NULL; + if (mod_hash_find(lxa_zstate_hash, + (mod_hash_key_t)lxa_zr.lxa_zr_zone_name, + (mod_hash_val_t *)&lxa_zs) != 0) { + rv = ENOENT; + goto err_unlock; + } + ASSERT(strcmp(lxa_zr.lxa_zr_zone_name, lxa_zs->lxa_zs_zonename) == 0); + + /* + * if the audio device is currently in use then refuse to + * unregister the zone + */ + if ((lxa_zs->lxa_zs_ostate != NULL) || + (lxa_zs->lxa_zs_ostate != NULL)) { + rv = EBUSY; + goto err_unlock; + } + + /* success! cleanup zone config state */ + (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name); + (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name); + + /* + * note, the action of removing the zone state structure from the + * hash will automatically free lxa_zs->lxa_zs_zonename. + * + * the reason for this is that we used lxa_zs->lxa_zs_zonename + * as the hash key and by default mod_hash_create_strhash() uses + * mod_hash_strkey_dtor() as a the hash key destructor. (which + * free's the key for us. + */ + (void) mod_hash_remove(lxa_zstate_hash, + (mod_hash_key_t)lxa_zr.lxa_zr_zone_name, + (mod_hash_val_t *)&lxa_zs); + lxa_registered_zones--; + mutex_exit(&lxa_lock); + + /* cleanup */ + kmem_free(lxa_zs, sizeof (*lxa_zs)); + strfree(idev_name); + strfree(odev_name); + return (0); + +err_unlock: + mutex_exit(&lxa_lock); + + if (pval != NULL) + ddi_prop_free(pval); + if (idev_name != NULL) + strfree(idev_name); + if (odev_name != NULL) + strfree(odev_name); + return (rv); +} + +static int +lxa_ioctl_devctl(int cmd, intptr_t arg, int mode) +{ + /* devctl ioctls are only allowed from the global zone */ + ASSERT(getzoneid() == 0); + if (getzoneid() != 0) + return (EINVAL); + + switch (cmd) { + case LXA_IOC_ZONE_REG: + return (lxa_zone_reg(arg, mode)); + case LXA_IOC_ZONE_UNREG: + return (lxa_zone_unreg(arg, mode)); + } + + return (EINVAL); +} + +static int +/*ARGSUSED*/ +lxa_open(dev_t *devp, int flags, int otyp, cred_t *credp) +{ + lxa_dev_type_t open_type = LXA_TYPE_INVALID; + lxa_zstate_t *lxa_zs; + lxa_state_t *lxa_state; + minor_t minor; + int rv; + + if (getminor(*devp) == LXA_MINORNUM_DEVCTL) { + /* + * this is a devctl node, it exists to administer this + * pseudo driver so it doesn't actually need access to + * any underlying audio devices. hence there is nothing + * really to do here. course, this driver should + * only be administered from the global zone. + */ + ASSERT(getzoneid() == 0); + if (getzoneid() != 0) + return (EINVAL); + return (0); + } + + /* lookup the zone state structure */ + if (mod_hash_find(lxa_zstate_hash, (mod_hash_key_t)getzonename(), + (mod_hash_val_t *)&lxa_zs) != 0) { + return (EIO); + } + + /* determine what type of device was opened */ + switch (getminor(*devp)) { + case LXA_MINORNUM_DSP: + open_type = LXA_TYPE_AUDIO; + break; + case LXA_MINORNUM_MIXER: + open_type = LXA_TYPE_AUDIOCTL; + break; + default: + return (EINVAL); + } + ASSERT(open_type != LXA_TYPE_INVALID); + + /* all other opens are clone opens so get a new minor node */ + minor = id_alloc(lxa_minor_id); + + /* allocate and initialize the new lxa_state structure */ + lxa_state = kmem_zalloc(sizeof (*lxa_state), KM_SLEEP); + lxa_state->lxas_zs = lxa_zs; + lxa_state->lxas_dev_old = *devp; + lxa_state->lxas_dev_new = makedevice(getmajor(*devp), minor); + lxa_state->lxas_flags = flags; + lxa_state->lxas_type = open_type; + + /* initialize the input and output device */ + if (((rv = lxa_dev_open(lxa_state)) != 0) || + ((rv = lxa_dev_getfeatures(lxa_state)) != 0)) { + lxa_state_close(lxa_state); + return (rv); + } + + /* + * save this audio statue structure into a hash indexed + * by it's minor device number. (this will provide a convient + * way to lookup the state structure on future operations.) + */ + if (mod_hash_insert(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t)lxa_state) != 0) { + lxa_state_close(lxa_state); + return (EIO); + } + + mutex_enter(&lxa_lock); + + /* apply the currently cached zone PCM mixer levels */ + if ((lxa_state->lxas_type == LXA_TYPE_AUDIO) && + (lxa_state->lxas_odev_lh != NULL)) { + audio_info_t ai; + + AUDIO_INITINFO(&ai); + ai.play.gain = lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain; + ai.play.balance = lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance; + + if ((rv = lxa_audio_setinfo(lxa_state, + AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) { + mutex_exit(&lxa_lock); + lxa_state_close(lxa_state); + return (rv); + } + } + + /* + * we only allow one active open of the input or output device. + * check here for duplicate opens + */ + if (lxa_state->lxas_type == LXA_TYPE_AUDIO) { + if ((lxa_state->lxas_idev_lh != NULL) && + (lxa_zs->lxa_zs_istate != NULL)) { + mutex_exit(&lxa_lock); + lxa_state_close(lxa_state); + return (EBUSY); + } + if ((lxa_state->lxas_odev_lh != NULL) && + (lxa_zs->lxa_zs_ostate != NULL)) { + mutex_exit(&lxa_lock); + lxa_state_close(lxa_state); + return (EBUSY); + } + + /* not a duplicate open, update the global zone state */ + if (lxa_state->lxas_idev_lh != NULL) + lxa_zs->lxa_zs_istate = lxa_state; + if (lxa_state->lxas_odev_lh != NULL) + lxa_zs->lxa_zs_ostate = lxa_state; + } + mutex_exit(&lxa_lock); + + /* make sure to return our newly allocated dev_t */ + *devp = lxa_state->lxas_dev_new; + return (0); +} + +static int +/*ARGSUSED*/ +lxa_close(dev_t dev, int flags, int otyp, cred_t *credp) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + + /* handle devctl minor nodes (these nodes don't have a handle */ + if (getminor(dev) == LXA_MINORNUM_DEVCTL) + return (0); + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) + return (EINVAL); + + lxa_state_close(lxa_state); + return (0); +} + +static int +/*ARGSUSED*/ +lxa_read(dev_t dev, struct uio *uiop, cred_t *credp) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + int rv; + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) + return (EINVAL); + + /* + * if a process has mmaped this device then we don't allow + * any more reads or writes to the device + */ + if (lxa_state->lxas_umem_cookie != NULL) + return (EIO); + + /* we can't do a read if there is no input device */ + if (lxa_state->lxas_idev_lh == NULL) + return (EBADF); + + /* pass the request on */ + while (uiop->uio_resid != 0) { + rv = ldi_read(lxa_state->lxas_idev_lh, uiop, kcred); + if ((rv != 0) || (uiop->uio_fmode & (FNONBLOCK|FNDELAY))) { + break; + } + } + return (rv); +} + +static int +/*ARGSUSED*/ +lxa_write(dev_t dev, struct uio *uiop, cred_t *credp) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + int rv; + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) + return (EINVAL); + + /* + * if a process has mmaped this device then we don't allow + * any more reads or writes to the device + */ + if (lxa_state->lxas_umem_cookie != NULL) + return (EIO); + + /* we can't do a write if there is no output device */ + if (lxa_state->lxas_odev_lh == NULL) + return (EBADF); + + /* pass the request on */ + while (uiop->uio_resid != 0) { + rv = ldi_write(lxa_state->lxas_odev_lh, uiop, kcred); + if ((rv != 0) || (uiop->uio_fmode & (FNONBLOCK|FNDELAY))) { + break; + } + } + return (rv); +} + +static int +/*ARGSUSED*/ +lxa_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, + int *rvalp) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + + /* handle devctl minor nodes (these nodes don't have a handle */ + if (getminor(dev) == LXA_MINORNUM_DEVCTL) + return (lxa_ioctl_devctl(cmd, arg, mode)); + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) + return (EINVAL); + + ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) || + (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL)); + + switch (cmd) { + case LXA_IOC_GETMINORNUM: + { + int minornum = getminor(lxa_state->lxas_dev_old); + if (ddi_copyout(&minornum, (void *)arg, + sizeof (minornum), mode) != 0) + return (EFAULT); + } + return (0); + } + + if (lxa_state->lxas_type == LXA_TYPE_AUDIO) { + /* deal with native ioctl */ + switch (cmd) { + case LXA_IOC_MMAP_OUTPUT: + return (lxa_ioc_mmap_output(lxa_state, arg, mode)); + case LXA_IOC_MMAP_PTR: + return (lxa_ioc_mmap_ptr(lxa_state, arg, mode)); + case LXA_IOC_GET_FRAG_INFO: + return (lxa_ioc_get_frag_info(lxa_state, arg, mode)); + case LXA_IOC_SET_FRAG_INFO: + return (lxa_ioc_set_frag_info(lxa_state, arg, mode)); + } + + /* deal with layered ioctls */ + switch (cmd) { + case AUDIO_DRAIN: + return (lxa_audio_drain(lxa_state)); + case AUDIO_SETINFO: + return (lxa_audio_setinfo(lxa_state, + AUDIO_SETINFO, arg, mode)); + case AUDIO_GETINFO: + return (lxa_audio_getinfo(lxa_state, arg, mode)); + } + } + + if (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) { + /* deal with native ioctl */ + switch (cmd) { + case LXA_IOC_MIXER_GET_VOL: + return (lxa_mixer_get_common(lxa_state, + cmd, arg, mode)); + case LXA_IOC_MIXER_SET_VOL: + return (lxa_mixer_set_common(lxa_state, + cmd, arg, mode)); + case LXA_IOC_MIXER_GET_MIC: + return (lxa_mixer_get_common(lxa_state, + cmd, arg, mode)); + case LXA_IOC_MIXER_SET_MIC: + return (lxa_mixer_set_common(lxa_state, + cmd, arg, mode)); + case LXA_IOC_MIXER_GET_PCM: + return (lxa_mixer_get_pcm(lxa_state, arg, mode)); + case LXA_IOC_MIXER_SET_PCM: + return (lxa_mixer_set_pcm(lxa_state, arg, mode)); + } + + } + + return (EINVAL); +} + +static int +/*ARGSUSED*/ +lxa_devmap(dev_t dev, devmap_cookie_t dhp, + offset_t off, size_t len, size_t *maplen, uint_t model) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + ddi_umem_cookie_t umem_cookie; + void *umem_ptr; + int rv; + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) + return (EINVAL); + + /* we only support mmaping of audio devices */ + if (lxa_state->lxas_type != LXA_TYPE_AUDIO) + return (EINVAL); + + /* we only support output via mmap */ + if ((lxa_state->lxas_flags & FWRITE) == 0) + return (EINVAL); + + /* sanity check the amount of memory the user is allocating */ + if ((len == 0) || + (len > LXA_OSS_FRAG_MEM) || + ((len % lxa_state->lxas_frag_size) != 0)) + return (EINVAL); + + /* allocate and clear memory to mmap */ + umem_ptr = ddi_umem_alloc(len, DDI_UMEM_NOSLEEP, &umem_cookie); + if (umem_ptr == NULL) + return (ENOMEM); + bzero(umem_ptr, len); + + /* setup the memory mappings */ + rv = devmap_umem_setup(dhp, lxa_dip, NULL, umem_cookie, 0, len, + PROT_USER | PROT_READ | PROT_WRITE, 0, NULL); + if (rv != 0) { + ddi_umem_free(umem_cookie); + return (EIO); + } + + mutex_enter(&lxa_lock); + + /* we only support one mmap per open */ + if (lxa_state->lxas_umem_cookie != NULL) { + ASSERT(lxa_state->lxas_umem_ptr != NULL); + mutex_exit(&lxa_lock); + ddi_umem_free(umem_cookie); + return (EBUSY); + } + ASSERT(lxa_state->lxas_umem_ptr == NULL); + + *maplen = len; + lxa_state->lxas_umem_len = len; + lxa_state->lxas_umem_ptr = umem_ptr; + lxa_state->lxas_umem_cookie = umem_cookie; + mutex_exit(&lxa_lock); + return (0); +} + +static int +/*ARGSUSED*/ +lxa_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + int instance = ddi_get_instance(dip); + + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + ASSERT(instance == 0); + if (instance != 0) + return (DDI_FAILURE); + + lxa_dip = dip; + mutex_init(&lxa_lock, NULL, MUTEX_DEFAULT, NULL); + + /* create our minor nodes */ + if (ddi_create_minor_node(dip, LXA_MINORNAME_DEVCTL, S_IFCHR, + LXA_MINORNUM_DEVCTL, DDI_PSEUDO, 0) != DDI_SUCCESS) + return (DDI_FAILURE); + + if (ddi_create_minor_node(dip, LXA_MINORNAME_DSP, S_IFCHR, + LXA_MINORNUM_DSP, DDI_PSEUDO, 0) != DDI_SUCCESS) + return (DDI_FAILURE); + + if (ddi_create_minor_node(dip, LXA_MINORNAME_MIXER, S_IFCHR, + LXA_MINORNUM_MIXER, DDI_PSEUDO, 0) != DDI_SUCCESS) + return (DDI_FAILURE); + + /* allocate our data structures */ + lxa_minor_id = id_space_create("lxa_minor_id", + LXA_MINORNUM_COUNT, LX_AUDIO_MAX_OPENS); + lxa_state_hash = mod_hash_create_idhash("lxa_state_hash", + lxa_state_hash_size, mod_hash_null_valdtor); + lxa_zstate_hash = mod_hash_create_strhash("lxa_zstate_hash", + lxa_zstate_hash_size, mod_hash_null_valdtor); + + return (DDI_SUCCESS); +} + +static int +/*ARGSUSED*/ +lxa_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + ASSERT(!MUTEX_HELD(&lxa_lock)); + if (lxa_registered_zones > 0) + return (DDI_FAILURE); + + mod_hash_destroy_idhash(lxa_state_hash); + mod_hash_destroy_idhash(lxa_zstate_hash); + id_space_destroy(lxa_minor_id); + lxa_state_hash = NULL; + lxa_dip = NULL; + + return (DDI_SUCCESS); +} + +static int +/*ARGSUSED*/ +lxa_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) +{ + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *resultp = lxa_dip; + return (DDI_SUCCESS); + + case DDI_INFO_DEVT2INSTANCE: + *resultp = (void *)0; + return (DDI_SUCCESS); + } + return (DDI_FAILURE); +} + +/* + * Driver flags + */ +static struct cb_ops lxa_cb_ops = { + lxa_open, /* open */ + lxa_close, /* close */ + nodev, /* strategy */ + nodev, /* print */ + nodev, /* dump */ + lxa_read, /* read */ + lxa_write, /* write */ + lxa_ioctl, /* ioctl */ + lxa_devmap, /* devmap */ + nodev, /* mmap */ + ddi_devmap_segmap, /* segmap */ + nochpoll, /* chpoll */ + ddi_prop_op, /* prop_op */ + NULL, /* cb_str */ + D_NEW | D_MP | D_DEVMAP, + CB_REV, + NULL, + NULL +}; + +static struct dev_ops lxa_ops = { + DEVO_REV, + 0, + lxa_getinfo, + nulldev, + nulldev, + lxa_attach, + lxa_detach, + nodev, + &lxa_cb_ops, + NULL, + NULL, + ddi_quiesce_not_needed, /* quiesce */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, /* type of module */ + "linux audio driver", /* description of module */ + &lxa_ops /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + &modldrv, + NULL +}; + +/* + * standard module entry points + */ +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.conf b/usr/src/uts/common/brand/lx/io/lx_audio.conf new file mode 100644 index 0000000000..2eeb5eb7ee --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/lx_audio.conf @@ -0,0 +1,27 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +name="lx_audio" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.c b/usr/src/uts/common/brand/lx/io/lx_ptm.c new file mode 100644 index 0000000000..6520ca5597 --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/lx_ptm.c @@ -0,0 +1,1156 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +/* + * This driver attempts to emulate some of the the behaviors of + * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris + * + * It does this by layering over the /dev/ptmx device and intercepting + * opens to it. + * + * This driver makes the following assumptions about the way the ptm/pts + * drivers on Solaris work: + * + * - all opens of the /dev/ptmx device node return a unique dev_t. + * + * - the dev_t minor node value for each open ptm instance corrospondes + * to it's associated slave terminal device number. ie. the path to + * the slave terminal device associated with an open ptm instance + * who's dev_t minor node vaue is 5, is /dev/pts/5. + * + * - the ptm driver always allocates the lowest numbered slave terminal + * device possible. + */ + +#include <sys/conf.h> +#include <sys/ddi.h> +#include <sys/devops.h> +#include <sys/file.h> +#include <sys/filio.h> +#include <sys/kstr.h> +#include <sys/ldlinux.h> +#include <sys/lx_ptm.h> +#include <sys/modctl.h> +#include <sys/pathname.h> +#include <sys/ptms.h> +#include <sys/ptyvar.h> +#include <sys/stat.h> +#include <sys/stropts.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/sysmacros.h> +#include <sys/types.h> + +#define LP_PTM_PATH "/dev/ptmx" +#define LP_PTS_PATH "/dev/pts/" +#define LP_PTS_DRV_NAME "pts" +#define LP_PTS_USEC_DELAY (5 * 1000) /* 5 ms */ +#define LP_PTS_USEC_DELAY_MAX (5 * MILLISEC) /* 5 ms */ + +/* + * this driver is layered on top of the ptm driver. we'd like to + * make this drivers minor name space a mirror of the ptm drivers + * namespace, but we can't actually do this. the reason is that the + * ptm driver is opened via the clone driver. there for no minor nodes + * of the ptm driver are actually accessible via the filesystem. + * since we're not a streams device we can't be opened by the clone + * driver. there for we need to have at least minor node accessible + * via the filesystem so that consumers can open it. we use the device + * node with a minor number of 0 for this purpose. what this means is + * that minor node 0 can't be used to map ptm minor node 0. since this + * minor node is now reserved we need to shift our ptm minor node + * mappings by one. ie. a ptm minor node with a value of 0 will + * corrospond to our minor node with a value of 1. these mappings are + * managed with the following macros. + */ +#define DEVT_TO_INDEX(x) LX_PTM_DEV_TO_PTS(x) +#define INDEX_TO_MINOR(x) ((x) + 1) + +/* + * grow our layered handle array by the same size increment that the ptm + * driver uses to grow the pty device space - PTY_MAXDELTA + */ +#define LP_PTY_INC 128 + +/* + * lx_ptm_ops contains state information about outstanding operations on the + * underlying master terminal device. Currently we only track information + * for read operations. + * + * Note that this data has not been rolled directly into the lx_ptm_handle + * structure because we can't put mutex's of condition variables into + * lx_ptm_handle structure. The reason is that the array of lx_ptm_handle + * structures linked to from the global lx_ptm state can be resized + * dynamically, and when it's resized, the new array is at a different + * memory location and the old array memory is discarded. Mutexs and cvs + * are accessed based off their address, so if this array was re-sized while + * there were outstanding operations on any mutexs or cvs in the array + * then the system would tip over. In the future the lx_ptm_handle structure + * array should probably be replaced with either an array of pointers to + * lx_ptm_handle structures or some other kind of data structure containing + * pointers to lx_ptm_handle structures. Then the lx_ptm_ops structure + * could be folded directly into the lx_ptm_handle structures. (This will + * also require the definition of a new locking mechanism to protect the + * contents of lx_ptm_handle structures.) + */ +typedef struct lx_ptm_ops { + int lpo_rops; + kcondvar_t lpo_rops_cv; + kmutex_t lpo_rops_lock; +} lx_ptm_ops_t; + +/* + * Every open of the master terminal device in a zone results in a new + * lx_ptm_handle handle allocation. These handles are stored in an array + * hanging off the lx_ptm_state structure. + */ +typedef struct lx_ptm_handle { + /* Device handle to the underlying real /dev/ptmx master terminal. */ + ldi_handle_t lph_handle; + + /* Flag to indicate if TIOCPKT mode has been enabled. */ + int lph_pktio; + + /* Number of times the slave device has been opened/closed. */ + int lph_eofed; + + /* Callback handler in the ptm driver to check if slave is open. */ + ptmptsopencb_t lph_ppocb; + + /* Pointer to state for operations on underlying device. */ + lx_ptm_ops_t *lph_lpo; +} lx_ptm_handle_t; + +/* + * Global state for the lx_ptm driver. + */ +typedef struct lx_ptm_state { + /* lx_ptm device devinfo pointer */ + dev_info_t *lps_dip; + + /* LDI ident used to open underlying real /dev/ptmx master terminals. */ + ldi_ident_t lps_li; + + /* pts drivers major number */ + major_t lps_pts_major; + + /* rw lock used to manage access and growth of lps_lh_array */ + krwlock_t lps_lh_rwlock; + + /* number of elements in lps_lh_array */ + uint_t lps_lh_count; + + /* Array of handles to underlying real /dev/ptmx master terminals. */ + lx_ptm_handle_t *lps_lh_array; +} lx_ptm_state_t; + +/* Pointer to the lx_ptm global state structure. */ +static lx_ptm_state_t lps; + +/* + * List of modules to be autopushed onto slave terminal devices when they + * are opened in an lx branded zone. + */ +static char *lx_pts_mods[] = { + "ptem", + "ldterm", + "ttcompat", + LDLINUX_MOD, + NULL +}; + +static void +lx_ptm_lh_grow(uint_t index) +{ + uint_t new_lh_count, old_lh_count; + lx_ptm_handle_t *new_lh_array, *old_lh_array; + + /* + * allocate a new array. we drop the rw lock on the array so that + * readers can still access devices in case our memory allocation + * blocks. + */ + new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1); + new_lh_array = + kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP); + + /* + * double check that we still actually need to increase the size + * of the array + */ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + if (index < lps.lps_lh_count) { + /* someone beat us to it so there's nothing more to do */ + rw_exit(&lps.lps_lh_rwlock); + kmem_free(new_lh_array, + sizeof (lx_ptm_handle_t) * new_lh_count); + return; + } + + /* copy the existing data into the new array */ + ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL)); + ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL)); + if (lps.lps_lh_count != 0) { + bcopy(lps.lps_lh_array, new_lh_array, + sizeof (lx_ptm_handle_t) * lps.lps_lh_count); + } + + /* save info on the old array */ + old_lh_array = lps.lps_lh_array; + old_lh_count = lps.lps_lh_count; + + /* install the new array */ + lps.lps_lh_array = new_lh_array; + lps.lps_lh_count = new_lh_count; + + rw_exit(&lps.lps_lh_rwlock); + + /* free the old array */ + if (old_lh_array != NULL) { + kmem_free(old_lh_array, + sizeof (lx_ptm_handle_t) * old_lh_count); + } +} + +static void +lx_ptm_lh_insert(uint_t index, ldi_handle_t lh) +{ + lx_ptm_ops_t *lpo; + + ASSERT(lh != NULL); + + /* Allocate and initialize the ops structure */ + lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP); + mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL); + + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + /* check if we need to grow the size of the layered handle array */ + if (index >= lps.lps_lh_count) { + rw_exit(&lps.lps_lh_rwlock); + lx_ptm_lh_grow(index); + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + } + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle == NULL); + ASSERT(lps.lps_lh_array[index].lph_pktio == 0); + ASSERT(lps.lps_lh_array[index].lph_eofed == 0); + ASSERT(lps.lps_lh_array[index].lph_lpo == NULL); + + /* insert the new handle and return */ + lps.lps_lh_array[index].lph_handle = lh; + lps.lps_lh_array[index].lph_pktio = 0; + lps.lps_lh_array[index].lph_eofed = 0; + lps.lps_lh_array[index].lph_lpo = lpo; + + rw_exit(&lps.lps_lh_rwlock); +} + +static ldi_handle_t +lx_ptm_lh_remove(uint_t index) +{ + ldi_handle_t lh; + + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0); + ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock)); + + /* free the write handle */ + kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t)); + lps.lps_lh_array[index].lph_lpo = NULL; + + /* remove the handle and return it */ + lh = lps.lps_lh_array[index].lph_handle; + lps.lps_lh_array[index].lph_handle = NULL; + lps.lps_lh_array[index].lph_pktio = 0; + lps.lps_lh_array[index].lph_eofed = 0; + rw_exit(&lps.lps_lh_rwlock); + return (lh); +} + +static void +lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb) +{ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + *ppocb = lps.lps_lh_array[index].lph_ppocb; + rw_exit(&lps.lps_lh_rwlock); +} + +static void +lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb) +{ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + lps.lps_lh_array[index].lph_ppocb = *ppocb; + rw_exit(&lps.lps_lh_rwlock); +} + +static ldi_handle_t +lx_ptm_lh_lookup(uint_t index) +{ + ldi_handle_t lh; + + rw_enter(&lps.lps_lh_rwlock, RW_READER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* return the handle */ + lh = lps.lps_lh_array[index].lph_handle; + rw_exit(&lps.lps_lh_rwlock); + return (lh); +} + +static lx_ptm_ops_t * +lx_ptm_lpo_lookup(uint_t index) +{ + lx_ptm_ops_t *lpo; + + rw_enter(&lps.lps_lh_rwlock, RW_READER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_lpo != NULL); + + /* return the handle */ + lpo = lps.lps_lh_array[index].lph_lpo; + rw_exit(&lps.lps_lh_rwlock); + return (lpo); +} + +static int +lx_ptm_lh_pktio_get(uint_t index) +{ + int pktio; + + rw_enter(&lps.lps_lh_rwlock, RW_READER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* return the pktio state */ + pktio = lps.lps_lh_array[index].lph_pktio; + rw_exit(&lps.lps_lh_rwlock); + return (pktio); +} + +static void +lx_ptm_lh_pktio_set(uint_t index, int pktio) +{ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* set the pktio state */ + lps.lps_lh_array[index].lph_pktio = pktio; + rw_exit(&lps.lps_lh_rwlock); +} + +static int +lx_ptm_lh_eofed_get(uint_t index) +{ + int eofed; + + rw_enter(&lps.lps_lh_rwlock, RW_READER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* return the eofed state */ + eofed = lps.lps_lh_array[index].lph_eofed; + rw_exit(&lps.lps_lh_rwlock); + return (eofed); +} + +static void +lx_ptm_lh_eofed_set(uint_t index) +{ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* set the eofed state */ + lps.lps_lh_array[index].lph_eofed++; + rw_exit(&lps.lps_lh_rwlock); +} + +static int +lx_ptm_read_start(dev_t dev) +{ + lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev)); + + mutex_enter(&lpo->lpo_rops_lock); + ASSERT(lpo->lpo_rops >= 0); + + /* Wait for other read operations to finish */ + while (lpo->lpo_rops != 0) { + if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) { + mutex_exit(&lpo->lpo_rops_lock); + return (-1); + } + } + + /* Start a read operation */ + VERIFY(++lpo->lpo_rops == 1); + mutex_exit(&lpo->lpo_rops_lock); + return (0); +} + +static void +lx_ptm_read_end(dev_t dev) +{ + lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev)); + + mutex_enter(&lpo->lpo_rops_lock); + ASSERT(lpo->lpo_rops >= 0); + + /* End a read operation */ + VERIFY(--lpo->lpo_rops == 0); + cv_signal(&lpo->lpo_rops_cv); + + mutex_exit(&lpo->lpo_rops_lock); +} + +static int +lx_ptm_pts_isopen(dev_t dev) +{ + ptmptsopencb_t ppocb; + + lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb); + return (ppocb.ppocb_func(ppocb.ppocb_arg)); +} + +static void +lx_ptm_eof_read(ldi_handle_t lh) +{ + struct uio uio; + iovec_t iov; + char junk[1]; + + /* + * We can remove any EOF message from the head of the stream by + * doing a zero byte read from the stream. + */ + iov.iov_len = 0; + iov.iov_base = junk; + uio.uio_iovcnt = 1; + uio.uio_iov = &iov; + uio.uio_resid = iov.iov_len; + uio.uio_offset = 0; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_fmode = 0; + uio.uio_extflg = 0; + uio.uio_llimit = MAXOFFSET_T; + (void) ldi_read(lh, &uio, kcred); +} + +static int +lx_ptm_eof_drop_1(dev_t dev, int *rvalp) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err, msg_size, msg_count; + + *rvalp = 0; + + /* + * Check if there is an EOF message (represented by a zero length + * data message) at the head of the stream. Note that the + * I_NREAD ioctl is a streams framework ioctl so it will succeed + * even if there have been previous write errors on this stream. + */ + if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size, + FKIOCTL, kcred, &msg_count)) != 0) + return (err); + + if ((msg_count == 0) || (msg_size != 0)) { + /* No EOF message found */ + return (0); + } + + /* Record the fact that the slave device has been closed. */ + lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev)); + + /* drop the EOF */ + lx_ptm_eof_read(lh); + *rvalp = 1; + return (0); +} + +static int +lx_ptm_eof_drop(dev_t dev, int *rvalp) +{ + int rval, err; + + if (rvalp != NULL) + *rvalp = 0; + for (;;) { + if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0) + return (err); + if (rval == 0) + return (0); + if (rvalp != NULL) + *rvalp = 1; + } +} + +static int +lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err; + + *rvalp = 0; + if (ignore_eof) { + int size, rval; + + if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size, + FKIOCTL, kcred, &rval)) != 0) + return (err); + if (size != 0) + *rvalp = 1; + } else { + int msg_size, msg_count; + + if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size, + FKIOCTL, kcred, &msg_count)) != 0) + return (err); + if (msg_count != 0) + *rvalp = 1; + } + return (0); +} + +static int +lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + int err; + + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR, + ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) + return (DDI_FAILURE); + + err = ldi_ident_from_dip(dip, &lps.lps_li); + if (err != 0) { + ddi_remove_minor_node(dip, ddi_get_name(dip)); + return (DDI_FAILURE); + } + + lps.lps_dip = dip; + lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME); + + rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL); + lps.lps_lh_count = 0; + lps.lps_lh_array = NULL; + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + ldi_ident_release(lps.lps_li); + lps.lps_dip = NULL; + + ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL)); + ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL)); + if (lps.lps_lh_array != NULL) { + kmem_free(lps.lps_lh_array, + sizeof (lx_ptm_handle_t) * lps.lps_lh_count); + lps.lps_lh_array = NULL; + lps.lps_lh_count = 0; + } + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp) +{ + struct strioctl iocb; + ptmptsopencb_t ppocb = { NULL, NULL }; + ldi_handle_t lh; + major_t maj, our_major = getmajor(*devp); + minor_t min, lastmin; + uint_t index, anchor = 1; + dev_t ptm_dev; + int err, rval = 0; + + /* + * Don't support the FNDELAY flag and FNONBLOCK until we either + * find a Linux app that opens /dev/ptmx with the O_NDELAY + * or O_NONBLOCK flags explicitly, or until we create test cases + * to determine how reads of master terminal devices opened with + * these flags behave in different situations on Linux. Supporting + * these flags will involve enhancing our read implementation + * and changing the way it deals with EOF notifications. + */ + if (flag & (FNDELAY | FNONBLOCK)) + return (ENOTSUP); + + /* + * we're layered on top of the ptm driver so open that driver + * first. (note that we're opening /dev/ptmx in the global + * zone, not ourselves in the Linux zone.) + */ + err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li); + if (err != 0) + return (err); + + /* get the devt returned by the ptmx open */ + err = ldi_get_dev(lh, &ptm_dev); + if (err != 0) { + (void) ldi_close(lh, flag, credp); + return (err); + } + + /* + * we're a cloning driver so here's well change the devt that we + * return. the ptmx is also a cloning driver so we'll just use + * it's minor number as our minor number (it already manages it's + * minor name space so no reason to duplicate the effort.) + */ + index = getminor(ptm_dev); + *devp = makedevice(our_major, INDEX_TO_MINOR(index)); + + /* Get a callback function to query if the pts device is open. */ + iocb.ic_cmd = PTMPTSOPENCB; + iocb.ic_timout = 0; + iocb.ic_len = sizeof (ppocb); + iocb.ic_dp = (char *)&ppocb; + + err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval); + if ((err != 0) || (rval != 0)) { + (void) ldi_close(lh, flag, credp); + return (EIO); /* XXX return something else here? */ + } + ASSERT(ppocb.ppocb_func != NULL); + + /* + * now setup autopush for the terminal slave device. this is + * necessary so that when a Linux program opens the device we + * can push required strmod modules onto the stream. in Solaris + * this is normally done by the application that actually + * allocates the terminal. + */ + maj = lps.lps_pts_major; + min = index; + lastmin = 0; + err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin, + &anchor, lx_pts_mods); + if (err != 0) { + (void) ldi_close(lh, flag, credp); + return (EIO); /* XXX return something else here? */ + } + + /* save off this layered handle for future accesses */ + lx_ptm_lh_insert(index, lh); + lx_ptm_lh_set_ppocb(index, &ppocb); + return (0); +} + +/*ARGSUSED*/ +static int +lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp) +{ + ldi_handle_t lh; + major_t maj; + minor_t min, lastmin; + uint_t index; + int err; + + index = DEVT_TO_INDEX(dev); + + /* + * we must cleanup all the state associated with this major/minor + * terminal pair before actually closing the ptm master device. + * this is required because once the close of the ptm device is + * complete major/minor terminal pair is immediatly available for + * re-use in any zone. + */ + + /* free up our saved reference for this layered handle */ + lh = lx_ptm_lh_remove(index); + + /* unconfigure autopush for the associated terminal slave device */ + maj = lps.lps_pts_major; + min = index; + lastmin = 0; + do { + /* + * we loop here because we don't want to release this ptm + * node if autopush can't be disabled on the associated + * slave device because then bad things could happen if + * another brand were to get this terminal allocated + * to them. + * + * XXX should we ever give up? + */ + err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin, + 0, NULL); + } while (err != 0); + + err = ldi_close(lh, flag, credp); + + /* + * note that we don't have to bother with changing the permissions + * on the associated slave device here. the reason is that no one + * can actually open the device untill it's associated master + * device is re-opened, which will result in the permissions on + * it being reset. + */ + return (err); +} + +static int +lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err, rval; + struct uio uio = *uiop; + + *loop = 0; + + /* + * Here's another way that Linux master terminals behave differently + * from Solaris master terminals. If you do a read on a Linux + * master terminal (that was opened witout NDELAY and NONBLOCK) + * who's corrosponding slave terminal is currently closed and + * has been opened and closed at least once, Linux return -1 and + * set errno to EIO where as Solaris blocks. + */ + if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) { + /* Slave has been opened and closed at least once. */ + if (lx_ptm_pts_isopen(dev) == 0) { + /* + * Slave is closed. Make sure that data is avaliable + * before attempting a read. + */ + if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0) + return (err); + + /* If there is no data available then return. */ + if (rval == 0) + return (EIO); + } + } + + /* Actually do the read operation. */ + if ((err = ldi_read(lh, uiop, credp)) != 0) + return (err); + + /* If read returned actual data then return. */ + if (uio.uio_resid != uiop->uio_resid) + return (0); + + /* + * This was a zero byte read (ie, an EOF). This indicates + * that the slave terinal device has been closed. Record + * the fact that the slave device has been closed and retry + * the read operation. + */ + lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev)); + *loop = 1; + return (0); +} + +static int +lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp) +{ + int pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev)); + int err, loop; + struct uio uio; + struct iovec iovp; + + ASSERT(uiop->uio_iovcnt > 0); + + /* + * If packet mode has been enabled (via TIOCPKT) we need to pad + * all read requests with a leading byte that indicates any + * relevant control status information. + */ + if (pktio != 0) { + /* + * We'd like to write the control information into + * the current buffer but we can't yet. We don't + * want to modify userspace memory here only to have + * the read operation fail later. So instead + * what we'll do here is read one character from the + * beginning of the memory pointed to by the uio + * structure. This will advance the output pointer + * by one. Then when the read completes successfully + * we can update the byte that we passed over. Before + * we do the read make a copy of the current uiop and + * iovec structs so we can write to them later. + */ + uio = *uiop; + iovp = *uiop->uio_iov; + uio.uio_iov = &iovp; + + if (uwritec(uiop) == -1) + return (EFAULT); + } + + do { + /* + * Before we actually attempt a read operation we need + * to make sure there's some buffer space to actually + * read in some data. We do this because if we're in + * pktio mode and the caller only requested one byte, + * then we've already used up that one byte and we + * don't want to pass this read request. Doing a 0 + * byte read (unless there is a problem with the stream + * head) always returns succcess. Normally when a streams + * read returns 0 bytes we interpret that as an EOF on + * the stream (ie, the slave side has been opened and + * closed) and we ignore it and re-try the read operation. + * So if we pass on a 0 byte read here lx_ptm_read_loop() + * will tell us to loop around and we'll end up in an + * infinite loop. + */ + if (uiop->uio_resid == 0) + break; + + /* + * Serialize all reads. We need to do this so that we can + * properly emulate the behavior of master terminals on Linux. + * In reality this serializaion should not pose any kind of + * performance problem since it would be very strange to have + * multiple threads trying to read from the same master + * terminal device concurrently. + */ + if (lx_ptm_read_start(dev) != 0) + return (EINTR); + + err = lx_ptm_read_loop(dev, uiop, credp, &loop); + lx_ptm_read_end(dev); + if (err != 0) + return (err); + } while (loop != 0); + + if (pktio != 0) { + uint8_t pktio_data = TIOCPKT_DATA; + + /* + * Note that the control status information we + * pass back is faked up in the sense that we + * don't actually report any events, we always + * report a status of 0. + */ + if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0) + return (EFAULT); + } + + return (0); +} + +static int +lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err; + + err = ldi_write(lh, uiop, credp); + + return (err); +} + +static int +lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, + int *rvalp) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err; + + /* + * here we need to make sure that we never allow the + * I_SETSIG and I_ESETSIG ioctls to pass through. we + * do this because we can't support them. + * + * the native Solaris ptm device supports these ioctls because + * they are streams framework ioctls and all streams devices + * support them by default. these ioctls cause the current + * process to be registered with a stream and receive signals + * when certain stream events occur. + * + * a problem arises with cleanup of these registrations + * for layered drivers. + * + * normally the streams framework is notified whenever a + * process closes any reference to a stream and it goes ahead + * and cleans up these registrations. but actual device drivers + * are not notified when a process performs a close operation + * unless the process is closing the last opened reference to + * the device on the entire system. + * + * so while we could pass these ioctls on and allow processes + * to register for signal delivery, we would never receive + * any notification when those processes exit (or close a + * stream) and we wouldn't be able to unregister them. + * + * luckily these operations are streams specific and Linux + * doesn't support streams devices. so it doesn't actually + * seem like we need to support these ioctls. if it turns + * out that we do need to support them for some reason in + * the future, the current driver model will have to be + * enhanced to better support streams device layering. + */ + if ((cmd == I_SETSIG) || (cmd == I_ESETSIG)) + return (EINVAL); + + /* + * here we fake up support for TIOCPKT. Linux applications expect + * /etc/ptmx to support this ioctl, but on Solaris it doesn't. + * (it is supported on older bsd style ptys.) so we'll fake + * up support for it here. + * + * the reason that this ioctl is emulated here instead of in + * userland is that this ioctl affects the results returned + * from read() operations. if this ioctl was emulated in + * userland the brand library would need to intercept all + * read operations and check to see if pktio was enabled + * for the fd being read from. since this ioctl only needs + * to be supported on the ptmx device it makes more sense + * to support it here where we can easily update the results + * returned for read() operations performed on ourselves. + */ + if (cmd == TIOCPKT) { + int pktio; + + if (ddi_copyin((void *)arg, &pktio, sizeof (pktio), + mode) != DDI_SUCCESS) + return (EFAULT); + + if (pktio == 0) + lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0); + else + lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1); + + return (0); + } + + err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp); + + return (err); +} + +static int +lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp, + struct pollhead **phpp, int *loop) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + short reventsp2; + int err, rval; + + *loop = 0; + + /* + * If the slave device has been opened and closed at least + * once and the slave device is currently closed, then poll + * always needs to returns immediatly. + */ + if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) && + (lx_ptm_pts_isopen(dev) == 0)) { + /* In this case always return POLLHUP */ + *reventsp = POLLHUP; + + /* + * Check if there really is data on the stream. + * If so set the correct return flags. + */ + if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) { + /* Something went wrong. */ + return (err); + } + if (rval != 0) + *reventsp |= (events & (POLLIN | POLLRDNORM)); + + /* + * Is the user checking for writability? Note that for ptm + * devices Linux seems to ignore the POLLWRBAND write flag. + */ + if ((events & POLLWRNORM) == 0) + return (0); + + /* + * To check if the stream is writable we have to actually + * call poll, but make sure to set anyyet to 1 to prevent + * the streams framework from setting up callbacks. + */ + if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0) + return (err); + + *reventsp |= (reventsp2 & POLLWRNORM); + } else { + int lockstate; + + /* The slave device is open, do the poll */ + if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0) + return (err); + + /* + * Drop any leading EOFs on the stream. + * + * Note that we have to use pollunlock() here to avoid + * recursive mutex enters in the poll framework. The + * reason is that if there is an EOF message on the stream + * then the act of reading from the queue to remove the + * message can cause the ptm drivers event service + * routine to be invoked, and if there is no open + * slave device then the ptm driver may generate + * error messages and put them on the stream. This + * in turn will generate a poll event and the poll + * framework will try to invoke any poll callbacks + * associated with the stream. In the process of + * doing that the poll framework will try to aquire + * locks that we are already holding. So we need to + * drop those locks here before we do our read. + */ + lockstate = pollunlock(); + err = lx_ptm_eof_drop(dev, &rval); + pollrelock(lockstate); + if (err) + return (err); + + /* If no EOF was dropped then return */ + if (rval == 0) + return (0); + + /* + * An EOF was removed from the stream. Retry the entire + * poll operation from the top because polls on the ptm + * device should behave differently now. + */ + *loop = 1; + } + return (0); +} + +static int +lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp, + struct pollhead **phpp) +{ + int loop, err; + + do { + /* Serialize ourself wrt read operations. */ + if (lx_ptm_read_start(dev) != 0) + return (EINTR); + + err = lx_ptm_poll_loop(dev, + events, anyyet, reventsp, phpp, &loop); + lx_ptm_read_end(dev); + if (err != 0) + return (err); + } while (loop != 0); + return (0); +} + +static struct cb_ops lx_ptm_cb_ops = { + lx_ptm_open, /* open */ + lx_ptm_close, /* close */ + nodev, /* strategy */ + nodev, /* print */ + nodev, /* dump */ + lx_ptm_read, /* read */ + lx_ptm_write, /* write */ + lx_ptm_ioctl, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + lx_ptm_poll, /* chpoll */ + ddi_prop_op, /* prop_op */ + NULL, /* cb_str */ + D_NEW | D_MP, + CB_REV, + NULL, + NULL +}; + +static struct dev_ops lx_ptm_ops = { + DEVO_REV, + 0, + ddi_getinfo_1to1, + nulldev, + nulldev, + lx_ptm_attach, + lx_ptm_detach, + nodev, + &lx_ptm_cb_ops, + NULL, + NULL, + ddi_quiesce_not_needed, /* quiesce */ +}; + +static struct modldrv modldrv = { + &mod_driverops, /* type of module */ + "Linux master terminal driver", /* description of module */ + &lx_ptm_ops /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + &modldrv, + NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.conf b/usr/src/uts/common/brand/lx/io/lx_ptm.conf new file mode 100644 index 0000000000..481b4e3c74 --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/lx_ptm.conf @@ -0,0 +1,27 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +name="lx_ptm" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c new file mode 100644 index 0000000000..15286718b6 --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_brand.c @@ -0,0 +1,943 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/kmem.h> +#include <sys/errno.h> +#include <sys/thread.h> +#include <sys/systm.h> +#include <sys/syscall.h> +#include <sys/proc.h> +#include <sys/modctl.h> +#include <sys/cmn_err.h> +#include <sys/model.h> +#include <sys/exec.h> +#include <sys/lx_impl.h> +#include <sys/machbrand.h> +#include <sys/lx_syscalls.h> +#include <sys/lx_pid.h> +#include <sys/lx_futex.h> +#include <sys/lx_brand.h> +#include <sys/termios.h> +#include <sys/sunddi.h> +#include <sys/ddi.h> +#include <sys/vnode.h> +#include <sys/pathname.h> +#include <sys/auxv.h> +#include <sys/priv.h> +#include <sys/regset.h> +#include <sys/privregs.h> +#include <sys/archsystm.h> +#include <sys/zone.h> +#include <sys/brand.h> + +int lx_debug = 0; + +void lx_init_brand_data(zone_t *); +void lx_free_brand_data(zone_t *); +void lx_setbrand(proc_t *); +int lx_getattr(zone_t *, int, void *, size_t *); +int lx_setattr(zone_t *, int, void *, size_t); +int lx_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t); +int lx_get_kern_version(void); +void lx_set_kern_version(zone_t *, int); +void lx_copy_procdata(proc_t *, proc_t *); + +extern void lx_setrval(klwp_t *, int, int); +extern void lx_proc_exit(proc_t *, klwp_t *); +extern void lx_exec(); +extern int lx_initlwp(klwp_t *); +extern void lx_forklwp(klwp_t *, klwp_t *); +extern void lx_exitlwp(klwp_t *); +extern void lx_freelwp(klwp_t *); +extern greg_t lx_fixsegreg(greg_t, model_t); +extern int lx_sched_affinity(int, uintptr_t, int, uintptr_t, int64_t *); + +int lx_systrace_brand_enabled; + +lx_systrace_f *lx_systrace_entry_ptr; +lx_systrace_f *lx_systrace_return_ptr; + +static int lx_systrace_enabled; + +static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, + struct intpdata *idata, int level, long *execsz, int setid, + caddr_t exec_file, struct cred *cred, int brand_action); + +/* lx brand */ +struct brand_ops lx_brops = { + lx_init_brand_data, + lx_free_brand_data, + lx_brandsys, + lx_setbrand, + lx_getattr, + lx_setattr, + lx_copy_procdata, + lx_proc_exit, + lx_exec, + lx_setrval, + lx_initlwp, + lx_forklwp, + lx_freelwp, + lx_exitlwp, + lx_elfexec, + NULL, + NULL, + NSIG, +}; + +struct brand_mach_ops lx_mops = { + NULL, + lx_brand_int80_callback, + NULL, + NULL, + NULL, + lx_fixsegreg, +}; + +struct brand lx_brand = { + BRAND_VER_1, + "lx", + &lx_brops, + &lx_mops +}; + +static struct modlbrand modlbrand = { + &mod_brandops, "lx brand", &lx_brand +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlbrand, NULL +}; + +void +lx_proc_exit(proc_t *p, klwp_t *lwp) +{ + zone_t *z = p->p_zone; + + ASSERT(p->p_brand != NULL); + ASSERT(p->p_brand_data != NULL); + + /* + * If init is dying and we aren't explicitly shutting down the zone + * or the system, then Solaris is about to restart init. The Linux + * init is not designed to handle a restart, which it interprets as + * a reboot. To give it a sane environment in which to run, we + * reboot the zone. + */ + if (p->p_pid == z->zone_proc_initpid) { + if (z->zone_boot_err == 0 && + z->zone_restart_init && + zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && + zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) + (void) zone_kadmin(A_REBOOT, 0, NULL, CRED()); + } + lx_exitlwp(lwp); + kmem_free(p->p_brand_data, sizeof (struct lx_proc_data)); + p->p_brand_data = NULL; +} + +void +lx_setbrand(proc_t *p) +{ + kthread_t *t = p->p_tlist; + int err; + + ASSERT(p->p_brand_data == NULL); + ASSERT(ttolxlwp(curthread) == NULL); + + p->p_brand_data = kmem_zalloc(sizeof (struct lx_proc_data), KM_SLEEP); + + /* + * This routine can only be called for single-threaded processes. + * Since lx_initlwp() can only fail if we run out of PIDs for + * multithreaded processes, we know that this can never fail. + */ + err = lx_initlwp(t->t_lwp); + ASSERT(err == 0); +} + +/* ARGSUSED */ +int +lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) +{ + boolean_t val; + int num; + + if (attr == LX_ATTR_RESTART_INIT) { + if (bufsize > sizeof (boolean_t)) + return (ERANGE); + if (copyin(buf, &val, sizeof (val)) != 0) + return (EFAULT); + if (val != B_TRUE && val != B_FALSE) + return (EINVAL); + zone->zone_restart_init = val; + return (0); + } else if (attr == LX_KERN_VERSION_NUM) { + if (bufsize > sizeof (int)) + return (ERANGE); + if (copyin(buf, &num, sizeof (num)) != 0) + return (EFAULT); + lx_set_kern_version(zone, num); + return (0); + } + return (EINVAL); +} + +/* ARGSUSED */ +int +lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) +{ + int num; + if (attr == LX_ATTR_RESTART_INIT) { + if (*bufsize < sizeof (boolean_t)) + return (ERANGE); + if (copyout(&zone->zone_restart_init, buf, + sizeof (boolean_t)) != 0) + return (EFAULT); + *bufsize = sizeof (boolean_t); + return (0); + } else if (attr == LX_KERN_VERSION_NUM) { + if (*bufsize < sizeof (int)) + return (ERANGE); + num = lx_get_kern_version(); + if (copyout(&num, buf, sizeof (int)) != 0) + return (EFAULT); + *bufsize = sizeof (int); + return (0); + } + return (-EINVAL); +} + +/* + * Enable ptrace system call tracing for the given LWP. This is done by + * both setting the flag in that LWP's brand data (in the kernel) and setting + * the process-wide trace flag (in the brand library of the traced process). + */ +static int +lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set) +{ + proc_t *p; + kthread_t *t; + klwp_t *lwp; + lx_proc_data_t *lpdp; + lx_lwp_data_t *lldp; + uintptr_t addr; + int ret, flag = 1; + + if ((p = sprlock(pid)) == NULL) + return (ESRCH); + + if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) { + sprunlock(p); + return (EPERM); + } + + if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) { + sprunlock(p); + return (ESRCH); + } + + if ((lpdp = p->p_brand_data) == NULL || + (lldp = lwp->lwp_brand) == NULL) { + sprunlock(p); + return (ESRCH); + } + + if (set) { + /* + * Enable the ptrace flag for this LWP and this process. Note + * that we will turn off the LWP's ptrace flag, but we don't + * turn off the process's ptrace flag. + */ + lldp->br_ptrace = 1; + lpdp->l_ptrace = 1; + + addr = lpdp->l_traceflag; + + mutex_exit(&p->p_lock); + + /* + * This can fail only in some rare corner cases where the + * process is exiting or we're completely out of memory. In + * these cases, it's sufficient to return an error to the ptrace + * consumer and leave the process-wide flag set. + */ + ret = uwrite(p, &flag, sizeof (flag), addr); + + mutex_enter(&p->p_lock); + + /* + * If we couldn't set the trace flag, unset the LWP's ptrace + * flag as there ptrace consumer won't expect this LWP to stop. + */ + if (ret != 0) + lldp->br_ptrace = 0; + } else { + lldp->br_ptrace = 0; + ret = 0; + } + + sprunlock(p); + + if (ret != 0) + ret = EIO; + + return (ret); +} + +static void +lx_ptrace_fire(void) +{ + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + lx_lwp_data_t *lldp = lwp->lwp_brand; + + /* + * The ptrace flag only applies until the next event is encountered + * for the given LWP. If it's set, turn off the flag and poke the + * controlling process by raising a signal. + */ + if (lldp->br_ptrace) { + lldp->br_ptrace = 0; + tsignal(t, SIGTRAP); + } +} + +void +lx_brand_systrace_enable(void) +{ + extern void lx_brand_int80_enable(void); + + ASSERT(!lx_systrace_enabled); + + lx_brand_int80_enable(); + + lx_systrace_enabled = 1; +} + +void +lx_brand_systrace_disable(void) +{ + extern void lx_brand_int80_disable(void); + + ASSERT(lx_systrace_enabled); + + lx_brand_int80_disable(); + + lx_systrace_enabled = 0; +} + +void +lx_init_brand_data(zone_t *zone) +{ + lx_zone_data_t *data; + ASSERT(zone->zone_brand == &lx_brand); + ASSERT(zone->zone_brand_data == NULL); + data = (lx_zone_data_t *)kmem_zalloc(sizeof (lx_zone_data_t), KM_SLEEP); + /* + * Set the default lxzd_kernel_version to LX_KERN_2_4. + * This can be changed by a call to setattr() during zone boot. + */ + data->lxzd_kernel_version = LX_KERN_2_4; + data->lxzd_max_syscall = LX_NSYSCALLS_2_4; + zone->zone_brand_data = data; +} + +void +lx_free_brand_data(zone_t *zone) +{ + kmem_free(zone->zone_brand_data, sizeof (lx_zone_data_t)); +} + +/* + * Get the addresses of the user-space system call handler and attach it to + * the proc structure. Returning 0 indicates success; the value returned + * by the system call is the value stored in rval. Returning a non-zero + * value indicates a failure; the value returned is used to set errno, -1 + * is returned from the syscall and the contents of rval are ignored. To + * set errno and have the syscall return a value other than -1 we can + * manually set errno and rval and return 0. + */ +int +lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, + uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) +{ + kthread_t *t = curthread; + proc_t *p = ttoproc(t); + lx_proc_data_t *pd; + int linux_call; + struct termios *termios; + uint_t termios_len; + int error; + lx_brand_registration_t reg; + + /* + * There is one operation that is suppored for non-branded + * process. B_EXEC_BRAND. This is the equilivant of an + * exec call, but the new process that is created will be + * a branded process. + */ + if (cmd == B_EXEC_BRAND) { + ASSERT(p->p_zone != NULL); + ASSERT(p->p_zone->zone_brand == &lx_brand); + return (exec_common( + (char *)arg1, (const char **)arg2, (const char **)arg3, + EBA_BRAND)); + } + + /* For all other operations this must be a branded process. */ + if (p->p_brand == NULL) + return (set_errno(ENOSYS)); + + ASSERT(p->p_brand == &lx_brand); + ASSERT(p->p_brand_data != NULL); + + switch (cmd) { + case B_REGISTER: + if (p->p_model == DATAMODEL_NATIVE) { + if (copyin((void *)arg1, ®, sizeof (reg)) != 0) { + lx_print("Failed to copyin brand registration " + "at 0x%p\n", (void *)arg1); + return (EFAULT); + } +#ifdef _LP64 + } else { + lx_brand_registration32_t reg32; + + if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0) { + lx_print("Failed to copyin brand registration " + "at 0x%p\n", (void *)arg1); + return (EFAULT); + } + + reg.lxbr_version = (uint_t)reg32.lxbr_version; + reg.lxbr_handler = + (void *)(uintptr_t)reg32.lxbr_handler; + reg.lxbr_tracehandler = + (void *)(uintptr_t)reg32.lxbr_tracehandler; + reg.lxbr_traceflag = + (void *)(uintptr_t)reg32.lxbr_traceflag; +#endif + } + + if (reg.lxbr_version != LX_VERSION_1) { + lx_print("Invalid brand library version (%u)\n", + reg.lxbr_version); + return (EINVAL); + } + + lx_print("Assigning brand 0x%p and handler 0x%p to proc 0x%p\n", + (void *)&lx_brand, (void *)reg.lxbr_handler, (void *)p); + pd = p->p_brand_data; + pd->l_handler = (uintptr_t)reg.lxbr_handler; + pd->l_tracehandler = (uintptr_t)reg.lxbr_tracehandler; + pd->l_traceflag = (uintptr_t)reg.lxbr_traceflag; + *rval = 0; + return (0); + case B_TTYMODES: + /* This is necessary for emulating TCGETS ioctls. */ + if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ddi_root_node(), + DDI_PROP_NOTPROM, "ttymodes", (uchar_t **)&termios, + &termios_len) != DDI_SUCCESS) + return (EIO); + + ASSERT(termios_len == sizeof (*termios)); + + if (copyout(&termios, (void *)arg1, sizeof (termios)) != 0) { + ddi_prop_free(termios); + return (EFAULT); + } + + ddi_prop_free(termios); + *rval = 0; + return (0); + + case B_ELFDATA: + pd = curproc->p_brand_data; + if (copyout(&pd->l_elf_data, (void *)arg1, + sizeof (lx_elf_data_t)) != 0) { + (void) set_errno(EFAULT); + return (*rval = -1); + } + *rval = 0; + return (0); + + case B_EXEC_NATIVE: + error = exec_common( + (char *)arg1, (const char **)arg2, (const char **)arg3, + EBA_NATIVE); + if (error) { + (void) set_errno(error); + return (*rval = -1); + } + return (*rval = 0); + + case B_LPID_TO_SPAIR: + /* + * Given a Linux pid as arg1, return the Solaris pid in arg2 and + * the Solaris LWP in arg3. We also translate pid 1 (which is + * hardcoded in many applications) to the zone's init process. + */ + { + pid_t s_pid; + id_t s_tid; + + if ((pid_t)arg1 == 1) { + s_pid = p->p_zone->zone_proc_initpid; + /* handle the dead/missing init(1M) case */ + if (s_pid == -1) + s_pid = 1; + s_tid = 1; + } else if (lx_lpid_to_spair((pid_t)arg1, &s_pid, + &s_tid) < 0) + return (ESRCH); + + if (copyout(&s_pid, (void *)arg2, + sizeof (s_pid)) != 0 || + copyout(&s_tid, (void *)arg3, sizeof (s_tid)) != 0) + return (EFAULT); + + *rval = 0; + return (0); + } + + case B_PTRACE_SYSCALL: + *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2, + (int)arg3); + return (0); + + case B_SYSENTRY: + if (lx_systrace_enabled) { + uint32_t args[6]; + + ASSERT(lx_systrace_entry_ptr != NULL); + + if (copyin((void *)arg2, args, sizeof (args)) != 0) + return (EFAULT); + + (*lx_systrace_entry_ptr)(arg1, args[0], args[1], + args[2], args[3], args[4], args[5]); + } + + lx_ptrace_fire(); + + pd = p->p_brand_data; + + /* + * If neither DTrace not ptrace are interested in tracing + * this process any more, turn off the trace flag. + */ + if (!lx_systrace_enabled && !pd->l_ptrace) + (void) suword32((void *)pd->l_traceflag, 0); + + *rval = 0; + return (0); + + case B_SYSRETURN: + if (lx_systrace_enabled) { + ASSERT(lx_systrace_return_ptr != NULL); + + (*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0); + } + + lx_ptrace_fire(); + + pd = p->p_brand_data; + + /* + * If neither DTrace not ptrace are interested in tracing + * this process any more, turn off the trace flag. + */ + if (!lx_systrace_enabled && !pd->l_ptrace) + (void) suword32((void *)pd->l_traceflag, 0); + + *rval = 0; + return (0); + + case B_SET_AFFINITY_MASK: + case B_GET_AFFINITY_MASK: + /* + * Retrieve or store the CPU affinity mask for the + * requested linux pid. + * + * arg1 is a linux PID (0 means curthread). + * arg2 is the size of the given mask. + * arg3 is the address of the affinity mask. + */ + return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval)); + + default: + linux_call = cmd - B_EMULATE_SYSCALL; + /* + * Only checking against highest syscall number for all kernel + * versions, since check for specific kernel version is done + * in userland prior to this call, and duplicating logic would + * be redundant. + */ + if (linux_call >= 0 && linux_call < LX_NSYSCALLS) { + *rval = lx_emulate_syscall(linux_call, arg1, arg2, + arg3, arg4, arg5, arg6); + return (0); + } + } + + return (EINVAL); +} + +int +lx_get_zone_kern_version(zone_t *zone) +{ + return (((lx_zone_data_t *)zone->zone_brand_data)->lxzd_kernel_version); +} + +int +lx_get_kern_version() +{ + return (lx_get_zone_kern_version(curzone)); +} + +void +lx_set_kern_version(zone_t *zone, int vers) +{ + lx_zone_data_t *lxzd = (lx_zone_data_t *)zone->zone_brand_data; + + lxzd->lxzd_kernel_version = vers; + if (vers == LX_KERN_2_6) + lxzd->lxzd_max_syscall = LX_NSYSCALLS_2_6; +} + +/* + * Copy the per-process brand data from a parent proc to a child. + */ +void +lx_copy_procdata(proc_t *child, proc_t *parent) +{ + lx_proc_data_t *cpd, *ppd; + + ppd = parent->p_brand_data; + + ASSERT(ppd != NULL); + + cpd = kmem_alloc(sizeof (lx_proc_data_t), KM_SLEEP); + *cpd = *ppd; + + child->p_brand_data = cpd; +} + +/* + * Currently, only 32-bit branded ELF executables are supported. + */ +#if defined(_LP64) +#define elfexec elf32exec +#define mapexec_brand mapexec32_brand +#endif /* _LP64 */ + +/* + * Exec routine called by elfexec() to load 32-bit Linux binaries. + */ +static int +lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, + struct intpdata *idata, int level, long *execsz, int setid, + caddr_t exec_file, struct cred *cred, int brand_action) +{ + int error; + vnode_t *nvp; + auxv32_t phdr_auxv32[3] = { + { AT_SUN_BRAND_LX_PHDR, 0 }, + { AT_SUN_BRAND_AUX2, 0 }, + { AT_SUN_BRAND_AUX3, 0 } + }; + Elf32_Ehdr ehdr; + Elf32_Addr uphdr_vaddr; + intptr_t voffset; + int interp; + int i; + struct execenv env; + struct user *up = PTOU(ttoproc(curthread)); + lx_elf_data_t *edp = + &((lx_proc_data_t *)ttoproc(curthread)->p_brand_data)->l_elf_data; + + ASSERT(ttoproc(curthread)->p_brand == &lx_brand); + ASSERT(ttoproc(curthread)->p_brand_data != NULL); + + /* + * Set the brandname and library name for the new process so that + * elfexec() puts them onto the stack. + */ + args->brandname = LX_BRANDNAME; + args->emulator = LX_LIB_PATH; + + /* + * We will exec the brand library, and map in the linux linker and the + * linux executable. + */ + if ((error = lookupname(LX_LIB_PATH, UIO_SYSSPACE, FOLLOW, NULLVPP, + &nvp))) { + uprintf("%s: not found.", LX_LIB); + return (error); + } + + if ((error = elfexec(nvp, uap, args, idata, level + 1, execsz, setid, + exec_file, cred, brand_action))) { + VN_RELE(nvp); + return (error); + } + VN_RELE(nvp); + + bzero(&env, sizeof (env)); + + if ((error = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset, + exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase, + &env.ex_brksize, NULL))) + return (error); + + /* + * Save off the important properties of the lx executable. The brand + * library will ask us for this data later, when it is ready to set + * things up for the lx executable. + */ + edp->ed_phdr = (uphdr_vaddr == -1) ? voffset + ehdr.e_phoff : + voffset + uphdr_vaddr; + edp->ed_entry = voffset + ehdr.e_entry; + edp->ed_phent = ehdr.e_phentsize; + edp->ed_phnum = ehdr.e_phnum; + + if (interp) { + if (ehdr.e_type == ET_DYN) { + /* + * This is a shared object executable, so we need to + * pick a reasonable place to put the heap. Just don't + * use the first page. + */ + env.ex_brkbase = (caddr_t)PAGESIZE; + env.ex_bssbase = (caddr_t)PAGESIZE; + } + + /* + * If the program needs an interpreter (most do), map it in and + * store relevant information about it in the aux vector, where + * the brand library can find it. + */ + if ((error = lookupname(LX_LINKER, UIO_SYSSPACE, FOLLOW, NULLVPP, + &nvp))) { + uprintf("%s: not found.", LX_LINKER); + return (error); + } + if ((error = mapexec_brand(nvp, args, &ehdr, &uphdr_vaddr, + &voffset, exec_file, &interp, NULL, NULL, NULL, NULL))) { + VN_RELE(nvp); + return (error); + } + VN_RELE(nvp); + + /* + * Now that we know the base address of the brand's linker, + * place it in the aux vector. + */ + edp->ed_base = voffset; + edp->ed_ldentry = voffset + ehdr.e_entry; + } else { + /* + * This program has no interpreter. The lx brand library will + * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector, + * so in this case, put the entry point of the main executable + * there. + */ + if (ehdr.e_type == ET_EXEC) { + /* + * An executable with no interpreter, this must be a + * statically linked executable, which means we loaded + * it at the address specified in the elf header, in + * which case the e_entry field of the elf header is an + * absolute address. + */ + edp->ed_ldentry = ehdr.e_entry; + edp->ed_entry = ehdr.e_entry; + } else { + /* + * A shared object with no interpreter, we use the + * calculated address from above. + */ + edp->ed_ldentry = edp->ed_entry; + + /* + * In all situations except an ET_DYN elf object with no + * interpreter, we want to leave the brk and base + * values set by mapexec_brand alone. Normally when + * running ET_DYN objects on Solaris (most likely + * /lib/ld.so.1) the kernel sets brk and base to 0 since + * it doesn't know where to put the heap, and later the + * linker will call brk() to initialize the heap in: + * usr/src/cmd/sgs/rtld/common/setup.c:setup() + * after it has determined where to put it. (This + * decision is made after the linker loads and inspects + * elf properties of the target executable being run.) + * + * So for ET_DYN Linux executables, we also don't know + * where the heap should go, so we'll set the brk and + * base to 0. But in this case the Solaris linker will + * not initialize the heap, so when the Linux linker + * starts running there is no heap allocated. This + * seems to be ok on Linux 2.4 based systems because the + * Linux linker/libc fall back to using mmap() to + * allocate memory. But on 2.6 systems, running + * applications by specifying them as command line + * arguments to the linker results in segfaults for an + * as yet undetermined reason (which seems to indicatej + * that a more permanent fix for heap initalization in + * these cases may be necessary). + */ + if (ehdr.e_type == ET_DYN) { + env.ex_bssbase = (caddr_t)0; + env.ex_brkbase = (caddr_t)0; + env.ex_brksize = 0; + } + } + + } + + env.ex_vp = vp; + setexecenv(&env); + + /* + * We don't need to copy this stuff out. It is only used by our + * tools to locate the lx linker's debug section. But we should at + * least try to keep /proc's view of the aux vector consistent with + * what's on the process stack. + */ + phdr_auxv32[0].a_un.a_val = edp->ed_phdr; + + /* + * Linux 2.6 programs such as ps will print an error message if the + * following aux entry is missing + */ + if (lx_get_kern_version() >= LX_KERN_2_6) { + phdr_auxv32[1].a_type = AT_CLKTCK; + phdr_auxv32[1].a_un.a_val = hz; + } + + if (copyout(&phdr_auxv32, args->auxp_brand, + sizeof (phdr_auxv32)) == -1) + return (EFAULT); + + /* + * /proc uses the AT_ENTRY aux vector entry to deduce + * the location of the executable in the address space. The user + * structure contains a copy of the aux vector that needs to have those + * entries patched with the values of the real lx executable (they + * currently contain the values from the lx brand library that was + * elfexec'd, above). + * + * For live processes, AT_BASE is used to locate the linker segment, + * which /proc and friends will later use to find Solaris symbols + * (such as rtld_db_preinit). However, for core files, /proc uses + * AT_ENTRY to find the right segment to label as the executable. + * So we set AT_ENTRY to be the entry point of the linux executable, + * but leave AT_BASE to be the address of the Solaris linker. + */ + for (i = 0; i < __KERN_NAUXV_IMPL; i++) { + if (up->u_auxv[i].a_type == AT_ENTRY) + up->u_auxv[i].a_un.a_val = edp->ed_entry; + if (up->u_auxv[i].a_type == AT_SUN_BRAND_LX_PHDR) + up->u_auxv[i].a_un.a_val = edp->ed_phdr; + } + + return (0); +} + +int +_init(void) +{ + int err = 0; + + /* pid/tid conversion hash tables */ + lx_pid_init(); + + /* for lx_futex() */ + lx_futex_init(); + + err = mod_install(&modlinkage); + if (err != 0) { + cmn_err(CE_WARN, "Couldn't install lx brand module"); + + /* + * This looks drastic, but it should never happen. These + * two data structures should be completely free-able until + * they are used by Linux processes. Since the brand + * wasn't loaded there should be no Linux processes, and + * thus no way for these data structures to be modified. + */ + lx_pid_fini(); + if (lx_futex_fini()) + panic("lx brand module cannot be loaded or unloaded."); + } + return (err); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + int err; + int futex_done = 0; + + /* + * If there are any zones using this brand, we can't allow it to be + * unloaded. + */ + if (brand_zone_count(&lx_brand)) + return (EBUSY); + + lx_pid_fini(); + + if ((err = lx_futex_fini()) != 0) + goto done; + futex_done = 1; + + err = mod_remove(&modlinkage); + +done: + if (err) { + /* + * If we can't unload the module, then we have to get it + * back into a sane state. + */ + lx_pid_init(); + + if (futex_done) + lx_futex_init(); + + } + + return (err); +} diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c new file mode 100644 index 0000000000..dd279eb6e2 --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_misc.c @@ -0,0 +1,362 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/errno.h> +#include <sys/systm.h> +#include <sys/archsystm.h> +#include <sys/privregs.h> +#include <sys/exec.h> +#include <sys/lwp.h> +#include <sys/sem.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_pid.h> +#include <sys/lx_futex.h> + +/* Linux specific functions and definitions */ +void lx_setrval(klwp_t *, int, int); +void lx_exec(); +int lx_initlwp(klwp_t *); +void lx_forklwp(klwp_t *, klwp_t *); +void lx_exitlwp(klwp_t *); +void lx_freelwp(klwp_t *); +static void lx_save(klwp_t *); +static void lx_restore(klwp_t *); +extern void lx_ptrace_free(proc_t *); + +/* + * Set the return code for the forked child, always zero + */ +/*ARGSUSED*/ +void +lx_setrval(klwp_t *lwp, int v1, int v2) +{ + lwptoregs(lwp)->r_r0 = 0; +} + +/* + * Reset process state on exec(2) + */ +void +lx_exec() +{ + klwp_t *lwp = ttolwp(curthread); + struct lx_lwp_data *lwpd = lwptolxlwp(lwp); + int err; + + /* + * There are two mutually exclusive special cases we need to + * address. First, if this was a native process prior to this + * exec(), then this lwp won't have its brand-specific data + * initialized and it won't be assigned a Linux PID yet. Second, + * if this was a multi-threaded Linux process and this lwp wasn't + * the main lwp, then we need to make its Solaris and Linux PIDS + * match. + */ + if (lwpd == NULL) { + err = lx_initlwp(lwp); + /* + * Only possible failure from this routine should be an + * inability to allocate a new PID. Since single-threaded + * processes don't need a new PID, we should never hit this + * error. + */ + ASSERT(err == 0); + lwpd = lwptolxlwp(lwp); + } else if (curthread->t_tid != 1) { + lx_pid_reassign(curthread); + } + + installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save, + NULL); + + /* + * clear out the tls array + */ + bzero(lwpd->br_tls, sizeof (lwpd->br_tls)); + + /* + * reset the tls entries in the gdt + */ + kpreempt_disable(); + lx_restore(lwp); + kpreempt_enable(); +} + +void +lx_exitlwp(klwp_t *lwp) +{ + struct lx_lwp_data *lwpd = lwptolxlwp(lwp); + proc_t *p; + kthread_t *t; + sigqueue_t *sqp = NULL; + pid_t ppid; + id_t ptid; + + if (lwpd == NULL) + return; /* second time thru' */ + + if (lwpd->br_clear_ctidp != NULL) { + (void) suword32(lwpd->br_clear_ctidp, 0); + (void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1, + NULL, NULL, 0); + } + + if (lwpd->br_signal != 0) { + /* + * The first thread in a process doesn't cause a signal to + * be sent when it exits. It was created by a fork(), not + * a clone(), so the parent should get signalled when the + * process exits. + */ + if (lwpd->br_ptid == -1) + goto free; + + sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); + /* + * If br_ppid is 0, it means this is a CLONE_PARENT thread, + * so the signal goes to the parent process - not to a + * specific thread in this process. + */ + p = lwptoproc(lwp); + if (lwpd->br_ppid == 0) { + mutex_enter(&p->p_lock); + ppid = p->p_ppid; + t = NULL; + } else { + /* + * If we have been reparented to init or if our + * parent thread is gone, then nobody gets + * signaled. + */ + if ((lx_lwp_ppid(lwp, &ppid, &ptid) == 1) || + (ptid == -1)) + goto free; + + mutex_enter(&pidlock); + if ((p = prfind(ppid)) == NULL || p->p_stat == SIDL) { + mutex_exit(&pidlock); + goto free; + } + mutex_enter(&p->p_lock); + mutex_exit(&pidlock); + + if ((t = idtot(p, ptid)) == NULL) { + mutex_exit(&p->p_lock); + goto free; + } + } + + sqp->sq_info.si_signo = lwpd->br_signal; + sqp->sq_info.si_code = lwpd->br_exitwhy; + sqp->sq_info.si_status = lwpd->br_exitwhat; + sqp->sq_info.si_pid = lwpd->br_pid; + sqp->sq_info.si_uid = crgetruid(CRED()); + sigaddqa(p, t, sqp); + mutex_exit(&p->p_lock); + sqp = NULL; + } + +free: + if (sqp) + kmem_free(sqp, sizeof (sigqueue_t)); + + lx_freelwp(lwp); +} + +void +lx_freelwp(klwp_t *lwp) +{ + struct lx_lwp_data *lwpd = lwptolxlwp(lwp); + + if (lwpd != NULL) { + (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore, + NULL, NULL, lx_save, NULL); + if (lwpd->br_pid != 0) + lx_pid_rele(lwptoproc(lwp)->p_pid, + lwptot(lwp)->t_tid); + + lwp->lwp_brand = NULL; + kmem_free(lwpd, sizeof (struct lx_lwp_data)); + } +} + +int +lx_initlwp(klwp_t *lwp) +{ + struct lx_lwp_data *lwpd; + struct lx_lwp_data *plwpd; + kthread_t *tp = lwptot(lwp); + + lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP); + lwpd->br_exitwhy = CLD_EXITED; + lwpd->br_lwp = lwp; + lwpd->br_clear_ctidp = NULL; + lwpd->br_set_ctidp = NULL; + lwpd->br_signal = 0; + /* + * lwpd->br_affinitymask was zeroed by kmem_zalloc(). + */ + + /* + * The first thread in a process has ppid set to the parent + * process's pid, and ptid set to -1. Subsequent threads in the + * process have their ppid set to the pid of the thread that + * created them, and their ptid to that thread's tid. + */ + if (tp->t_next == tp) { + lwpd->br_ppid = tp->t_procp->p_ppid; + lwpd->br_ptid = -1; + } else if (ttolxlwp(curthread) != NULL) { + plwpd = ttolxlwp(curthread); + bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls)); + lwpd->br_ppid = plwpd->br_pid; + lwpd->br_ptid = curthread->t_tid; + } else { + /* + * Oddball case: the parent thread isn't a Linux process. + */ + lwpd->br_ppid = 0; + lwpd->br_ptid = -1; + } + lwp->lwp_brand = lwpd; + + if (lx_pid_assign(tp)) { + kmem_free(lwpd, sizeof (struct lx_lwp_data)); + lwp->lwp_brand = NULL; + return (-1); + } + lwpd->br_tgid = lwpd->br_pid; + + installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, + lx_save, NULL); + + return (0); +} + +/* + * There is no need to have any locking for either the source or + * destination struct lx_lwp_data structs. This is always run in the + * thread context of the source thread, and the destination thread is + * always newly created and not referred to from anywhere else. + */ +void +lx_forklwp(klwp_t *srclwp, klwp_t *dstlwp) +{ + struct lx_lwp_data *src = srclwp->lwp_brand; + struct lx_lwp_data *dst = dstlwp->lwp_brand; + + dst->br_ppid = src->br_pid; + dst->br_ptid = lwptot(srclwp)->t_tid; + bcopy(src->br_tls, dst->br_tls, sizeof (dst->br_tls)); + + /* + * copy only these flags + */ + dst->br_lwp_flags = src->br_lwp_flags & BR_CPU_BOUND; + dst->br_clone_args = NULL; +} + +/* + * When switching a Linux process off the CPU, clear its GDT entries. + */ +/* ARGSUSED */ +static void +lx_save(klwp_t *t) +{ + int i; + +#if defined(__amd64) + reset_sregs(); +#endif + for (i = 0; i < LX_TLSNUM; i++) + gdt_update_usegd(GDT_TLSMIN + i, &null_udesc); +} + +/* + * When switching a Linux process on the CPU, set its GDT entries. + */ +static void +lx_restore(klwp_t *t) +{ + struct lx_lwp_data *lwpd = lwptolxlwp(t); + user_desc_t *tls; + int i; + + ASSERT(lwpd); + + tls = lwpd->br_tls; + for (i = 0; i < LX_TLSNUM; i++) + gdt_update_usegd(GDT_TLSMIN + i, &tls[i]); +} + +void +lx_set_gdt(int entry, user_desc_t *descrp) +{ + + gdt_update_usegd(entry, descrp); +} + +void +lx_clear_gdt(int entry) +{ + gdt_update_usegd(entry, &null_udesc); +} + +longlong_t +lx_nosys() +{ + return (set_errno(ENOSYS)); +} + +longlong_t +lx_opnotsupp() +{ + return (set_errno(EOPNOTSUPP)); +} + +/* + * Brand-specific routine to check if given non-Solaris standard segment + * register values should be modified to other values. + */ +/*ARGSUSED*/ +greg_t +lx_fixsegreg(greg_t sr, model_t datamodel) +{ + ASSERT(sr == (sr & 0xffff)); + + /* + * Force the SR into the LDT in ring 3 for 32-bit processes. + * + * 64-bit processes get the null GDT selector since they are not + * allowed to have a private LDT. + */ +#if defined(__amd64) + return (datamodel == DATAMODEL_ILP32 ? (sr | SEL_TI_LDT | SEL_UPL) : 0); +#elif defined(__i386) + datamodel = datamodel; /* datamodel currently unused for 32-bit */ + return (sr | SEL_TI_LDT | SEL_UPL); +#endif /* __amd64 */ +} diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c new file mode 100644 index 0000000000..0fdde8c20e --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_pid.c @@ -0,0 +1,369 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/bitmap.h> +#include <sys/var.h> +#include <sys/thread.h> +#include <sys/proc.h> +#include <sys/brand.h> +#include <sys/zone.h> +#include <sys/lx_brand.h> +#include <sys/lx_pid.h> + +#define LINUX_PROC_FACTOR 8 /* factor down the hash table by this */ +static int hash_len = 4; /* desired average hash chain length */ +static int hash_size; /* no of buckets in the hash table */ + +static struct lx_pid **stol_pid_hash; +static struct lx_pid **ltos_pid_hash; + +#define LTOS_HASH(pid) ((pid) & (hash_size - 1)) +#define STOL_HASH(pid, tid) (((pid) + (tid)) & (hash_size - 1)) + +static kmutex_t hash_lock; + +static void +lx_pid_insert_hash(struct lx_pid *lpidp) +{ + int shash = STOL_HASH(lpidp->s_pid, lpidp->s_tid); + int lhash = LTOS_HASH(lpidp->l_pid); + + ASSERT(MUTEX_HELD(&hash_lock)); + + lpidp->stol_next = stol_pid_hash[shash]; + stol_pid_hash[shash] = lpidp; + + lpidp->ltos_next = ltos_pid_hash[lhash]; + ltos_pid_hash[lhash] = lpidp; +} + +static struct lx_pid * +lx_pid_remove_hash(pid_t pid, id_t tid) +{ + struct lx_pid **hpp; + struct lx_pid *lpidp = NULL; + + ASSERT(MUTEX_HELD(&hash_lock)); + + hpp = &stol_pid_hash[STOL_HASH(pid, tid)]; + while (*hpp) { + if ((*hpp)->s_pid == pid && (*hpp)->s_tid == tid) { + lpidp = *hpp; + *hpp = (*hpp)->stol_next; + break; + } + hpp = &(*hpp)->stol_next; + } + + /* + * when called during error recovery the pid may already + * be released + */ + if (lpidp == NULL) + return (NULL); + + hpp = <os_pid_hash[LTOS_HASH(lpidp->l_pid)]; + while (*hpp) { + if (*hpp == lpidp) { + *hpp = lpidp->ltos_next; + break; + } + hpp = &(*hpp)->ltos_next; + } + + return (lpidp); +} + +struct pid * pid_find(pid_t pid); + +/* + * given a solaris pid/tid pair, create a linux pid + */ +int +lx_pid_assign(kthread_t *t) +{ + proc_t *p = ttoproc(t); + pid_t s_pid = p->p_pid; + id_t s_tid = t->t_tid; + struct pid *pidp; + struct lx_pid *lpidp; + lx_lwp_data_t *lwpd = ttolxlwp(t); + pid_t newpid; + + if (p->p_lwpcnt > 0) { + /* + * Allocate a pid for any thread other than the first + */ + if ((newpid = pid_allocate(p, 0, 0)) < 0) + return (-1); + + pidp = pid_find(newpid); + } else { + pidp = NULL; + newpid = s_pid; + } + + lpidp = kmem_alloc(sizeof (struct lx_pid), KM_SLEEP); + lpidp->l_pid = newpid; + lpidp->s_pid = s_pid; + lpidp->s_tid = s_tid; + lpidp->l_pidp = pidp; + lpidp->l_start = t->t_start; + + /* + * now put the pid into the linux-solaris and solaris-linux + * conversion hash tables + */ + mutex_enter(&hash_lock); + lx_pid_insert_hash(lpidp); + mutex_exit(&hash_lock); + + lwpd->br_pid = newpid; + + return (0); +} + +/* + * If we are exec()ing the process, this thread's tid is about to be reset + * to 1. Make sure the Linux PID bookkeeping reflects that change. + */ +void +lx_pid_reassign(kthread_t *t) +{ + proc_t *p = ttoproc(t); + struct pid *old_pidp; + struct lx_pid *lpidp; + + ASSERT(p->p_lwpcnt == 1); + + mutex_enter(&hash_lock); + + /* + * Clean up all the traces of this thread's 'fake' Linux PID. + */ + lpidp = lx_pid_remove_hash(p->p_pid, t->t_tid); + ASSERT(lpidp != NULL); + old_pidp = lpidp->l_pidp; + lpidp->l_pidp = NULL; + + /* + * Now register this thread as (pid, 1). + */ + lpidp->l_pid = p->p_pid; + lpidp->s_pid = p->p_pid; + lpidp->s_tid = 1; + lx_pid_insert_hash(lpidp); + + mutex_exit(&hash_lock); + + if (old_pidp) + (void) pid_rele(old_pidp); +} + +/* + * release a solaris pid/tid pair + */ +void +lx_pid_rele(pid_t pid, id_t tid) +{ + struct lx_pid *lpidp; + + mutex_enter(&hash_lock); + lpidp = lx_pid_remove_hash(pid, tid); + mutex_exit(&hash_lock); + + if (lpidp) { + if (lpidp->l_pidp) + (void) pid_rele(lpidp->l_pidp); + + kmem_free(lpidp, sizeof (*lpidp)); + } +} + +/* + * given a linux pid, return the solaris pid/tid pair + */ +int +lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid) +{ + struct lx_pid *hp; + + mutex_enter(&hash_lock); + for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) { + if (l_pid == hp->l_pid) { + if (s_pid) + *s_pid = hp->s_pid; + if (s_tid) + *s_tid = hp->s_tid; + break; + } + } + mutex_exit(&hash_lock); + if (hp != NULL) + return (0); + + /* + * We didn't find this pid in our translation table. + * But this still could be the pid of a native process + * running in the current zone so check for that here. + * + * Note that prfind() only searches for processes in the current zone. + */ + mutex_enter(&pidlock); + if (prfind(l_pid) != NULL) { + mutex_exit(&pidlock); + if (s_pid) + *s_pid = l_pid; + if (s_tid) + *s_tid = 0; + return (0); + } + mutex_exit(&pidlock); + + return (-1); +} + +/* + * Given an lwp, return the Linux pid of its parent. If the caller + * wants them, we return the Solaris (pid, tid) as well. + */ +pid_t +lx_lwp_ppid(klwp_t *lwp, pid_t *ppidp, id_t *ptidp) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + proc_t *p = lwptoproc(lwp); + struct lx_pid *hp; + pid_t zoneinit = curproc->p_zone->zone_proc_initpid; + pid_t lppid, ppid; + + /* + * Be sure not to return a parent pid that should be invisible + * within this zone. + */ + ppid = ((p->p_flag & SZONETOP) + ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); + + /* + * If the parent process's pid is the zone's init process, force it + * to the Linux init pid value of 1. + */ + if (ppid == zoneinit) + ppid = 1; + + /* + * There are two cases in which the Linux definition of a 'parent' + * matches that of Solaris: + * + * - if our tgid is the same as our PID, then we are either the + * first thread in the process or a CLONE_THREAD thread. + * + * - if the brand lwp value for ppid is 0, then we are either the + * child of a differently-branded process or a CLONE_PARENT thread. + */ + if (p->p_pid == lwpd->br_tgid || lwpd->br_ppid == 0) { + if (ppidp != NULL) + *ppidp = ppid; + if (ptidp != NULL) + *ptidp = -1; + return (ppid); + } + + /* + * Set the default Linux parent pid to be the pid of the zone's init + * process; this will get converted back to the Linux default of 1 + * later. + */ + lppid = zoneinit; + + /* + * If the process's parent isn't init, try and look up the Linux "pid" + * corresponding to the process's parent. + */ + if (ppid != 1) { + /* + * In all other cases, we are looking for the parent of this + * specific thread, which in Linux refers to the thread that + * clone()d it. We stashed that thread's PID away when this + * thread was created. + */ + mutex_enter(&hash_lock); + for (hp = ltos_pid_hash[LTOS_HASH(lwpd->br_ppid)]; hp; + hp = hp->ltos_next) { + if (lwpd->br_ppid == hp->l_pid) { + /* + * We found the PID we were looking for, but + * since we cached its value in this LWP's brand + * structure, it has exited and been reused by + * another process. + */ + if (hp->l_start > lwptot(lwp)->t_start) + break; + + lppid = lwpd->br_ppid; + if (ppidp != NULL) + *ppidp = hp->s_pid; + if (ptidp != NULL) + *ptidp = hp->s_tid; + + break; + } + } + mutex_exit(&hash_lock); + } + + if (lppid == zoneinit) { + lppid = 1; + + if (ppidp != NULL) + *ppidp = lppid; + if (ptidp != NULL) + *ptidp = -1; + } + + return (lppid); +} + +void +lx_pid_init(void) +{ + hash_size = 1 << highbit(v.v_proc / (hash_len * LINUX_PROC_FACTOR)); + + stol_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size, + KM_SLEEP); + ltos_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size, + KM_SLEEP); + + mutex_init(&hash_lock, NULL, MUTEX_DEFAULT, NULL); +} + +void +lx_pid_fini(void) +{ + kmem_free(stol_pid_hash, sizeof (struct lx_pid *) * hash_size); + kmem_free(ltos_pid_hash, sizeof (struct lx_pid *) * hash_size); +} diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c new file mode 100644 index 0000000000..33c340d572 --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c @@ -0,0 +1,454 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/kmem.h> +#include <sys/errno.h> +#include <sys/thread.h> +#include <sys/systm.h> +#include <sys/syscall.h> +#include <sys/proc.h> +#include <sys/modctl.h> +#include <sys/cmn_err.h> +#include <sys/model.h> +#include <sys/brand.h> +#include <sys/machbrand.h> +#include <sys/lx_syscalls.h> +#include <sys/lx_brand.h> +#include <sys/lx_impl.h> + +/* + * Some system calls return either a 32-bit or a 64-bit value, depending + * on the datamodel. + */ +#ifdef _LP64 +#define V_RVAL SE_64RVAL +#else +#define V_RVAL SE_32RVAL1 +#endif + +/* + * Define system calls that return a native 'long' quantity i.e. a 32-bit + * or 64-bit integer - depending on how the kernel is itself compiled + * e.g. read(2) returns 'ssize_t' in the kernel and in userland. + */ +#define LX_CL(name, call, narg) \ + { V_RVAL, (name), (llfcn_t)(call), (narg) } + +/* + * Returns a 32 bit quantity regardless of datamodel + */ +#define LX_CI(name, call, narg) \ + { SE_32RVAL1, (name), (llfcn_t)(call), (narg) } + +extern longlong_t lx_nosys(void); +#define LX_NOSYS(name) \ + {SE_64RVAL, (name), (llfcn_t)lx_nosys, 0} + +lx_sysent_t lx_sysent[] = +{ + LX_NOSYS("lx_nosys"), /* 0 */ + LX_NOSYS("exit"), /* 0 */ + LX_NOSYS("lx_fork"), + LX_NOSYS("read"), + LX_NOSYS("write"), + LX_NOSYS("open"), + LX_NOSYS("close"), + LX_NOSYS("waitpid"), + LX_NOSYS("creat"), + LX_NOSYS("link"), + LX_NOSYS("unlink"), /* 10 */ + LX_NOSYS("exec"), + LX_NOSYS("chdir"), + LX_NOSYS("gtime"), + LX_NOSYS("mknod"), + LX_NOSYS("chmod"), + LX_NOSYS("lchown16"), + LX_NOSYS("break"), + LX_NOSYS("stat"), + LX_NOSYS("lseek"), + LX_CL("getpid", lx_getpid, 0), /* 20 */ + LX_NOSYS("mount"), + LX_NOSYS("umount"), + LX_NOSYS("setuid16"), + LX_NOSYS("getuid16"), + LX_NOSYS("stime"), + LX_NOSYS("ptrace"), + LX_NOSYS("alarm"), + LX_NOSYS("fstat"), + LX_NOSYS("pause"), + LX_NOSYS("utime"), /* 30 */ + LX_NOSYS("stty"), + LX_NOSYS("gtty"), + LX_NOSYS("access"), + LX_NOSYS("nice"), + LX_NOSYS("ftime"), + LX_NOSYS("sync"), + LX_CL("kill", lx_kill, 2), + LX_NOSYS("rename"), + LX_NOSYS("mkdir"), + LX_NOSYS("rmdir"), /* 40 */ + LX_NOSYS("dup"), + LX_NOSYS("pipe"), + LX_NOSYS("times"), + LX_NOSYS("prof"), + LX_CL("brk", lx_brk, 1), + LX_NOSYS("setgid16"), + LX_NOSYS("getgid16"), + LX_NOSYS("signal"), + LX_NOSYS("geteuid16"), + LX_NOSYS("getegid16"), /* 50 */ + LX_NOSYS("sysacct"), + LX_NOSYS("umount2"), + LX_NOSYS("lock"), + LX_NOSYS("ioctl"), + LX_NOSYS("fcntl"), + LX_NOSYS("mpx"), + LX_NOSYS("setpgid"), + LX_NOSYS("ulimit"), + LX_NOSYS("olduname"), + LX_NOSYS("umask"), /* 60 */ + LX_NOSYS("chroot"), + LX_NOSYS("ustat"), + LX_NOSYS("dup2"), + LX_CL("getppid", lx_getppid, 0), + LX_NOSYS("pgrp"), + LX_NOSYS("setsid"), + LX_NOSYS("sigaction"), + LX_NOSYS("sgetmask"), + LX_NOSYS("ssetmask"), + LX_NOSYS("setreuid16"), /* 70 */ + LX_NOSYS("setregid16"), + LX_NOSYS("sigsuspend"), + LX_NOSYS("sigpending"), + LX_NOSYS("sethostname"), + LX_NOSYS("setrlimit"), + LX_NOSYS("old_getrlimit"), + LX_NOSYS("getrusage"), + LX_NOSYS("gettimeofday"), + LX_NOSYS("settimeofday"), + LX_NOSYS("getgroups16"), /* 80 */ + LX_NOSYS("setgroups16"), + LX_NOSYS("old_select"), + LX_NOSYS("symlink"), + LX_NOSYS("oldlstat"), + LX_NOSYS("readlink"), + LX_NOSYS("uselib"), + LX_NOSYS("swapon"), + LX_NOSYS("reboot"), + LX_NOSYS("old_readdir"), + LX_NOSYS("old_mmap"), /* 90 */ + LX_NOSYS("munmap"), + LX_NOSYS("truncate"), + LX_NOSYS("ftruncate"), + LX_NOSYS("fchmod"), + LX_NOSYS("fchown16"), + LX_NOSYS("getpriority"), + LX_NOSYS("setpriority"), + LX_NOSYS("profil"), + LX_NOSYS("statfs"), + LX_NOSYS("fstatfs"), /* 100 */ + LX_NOSYS("ioperm"), + LX_NOSYS("socketcall"), + LX_NOSYS("syslog"), + LX_NOSYS("setitimer"), + LX_NOSYS("getitimer"), + LX_NOSYS("newstat"), + LX_NOSYS("newsltat"), + LX_NOSYS("newsftat"), + LX_NOSYS("uname"), + LX_NOSYS("oldiopl"), /* 110 */ + LX_NOSYS("oldvhangup"), + LX_NOSYS("idle"), + LX_NOSYS("vm86old"), + LX_NOSYS("wait4"), + LX_NOSYS("swapoff"), + LX_CL("sysinfo", lx_sysinfo, 1), + LX_NOSYS("ipc"), + LX_NOSYS("fsync"), + LX_NOSYS("sigreturn"), + LX_CL("clone", lx_clone, 5), /* 120 */ + LX_NOSYS("setdomainname"), + LX_NOSYS("newuname"), + LX_CL("modify_ldt", lx_modify_ldt, 3), + LX_NOSYS("adjtimex"), + LX_NOSYS("mprotect"), + LX_NOSYS("sigprocmask"), + LX_NOSYS("create_module"), + LX_NOSYS("init_module"), + LX_NOSYS("delete_module"), + LX_NOSYS("get_kernel_syms"), /* 130 */ + LX_NOSYS("quotactl"), + LX_NOSYS("getpgid"), + LX_NOSYS("fchdir"), + LX_NOSYS("bdflush"), + LX_NOSYS("sysfs"), + LX_NOSYS("personality"), + LX_NOSYS("afs_syscall"), + LX_NOSYS("setfsuid16"), + LX_NOSYS("setfsgid16"), + LX_NOSYS("llseek"), /* 140 */ + LX_NOSYS("getdents"), + LX_NOSYS("select"), + LX_NOSYS("flock"), + LX_NOSYS("msync"), + LX_NOSYS("readv"), + LX_NOSYS("writev"), + LX_NOSYS("getsid"), + LX_NOSYS("fdatasync"), + LX_NOSYS("sysctl"), + LX_NOSYS("mlock"), /* 150 */ + LX_NOSYS("munlock"), + LX_NOSYS("mlockall"), + LX_NOSYS("munlockall"), + LX_CL("sched_setparam", lx_sched_setparam, 2), + LX_CL("sched_getparam", lx_sched_getparam, 2), + LX_NOSYS("sched_setscheduler"), + LX_NOSYS("sched_getscheduler"), + LX_NOSYS("yield"), + LX_NOSYS("sched_get_priority_max"), + LX_NOSYS("sched_get_priority_min"), /* 160 */ + LX_CL("sched_rr_get_interval", lx_sched_rr_get_interval, 2), + LX_NOSYS("nanosleep"), + LX_NOSYS("mremap"), + LX_CL("setresuid16", lx_setresuid16, 3), + LX_NOSYS("getresuid16"), + LX_NOSYS("vm86"), + LX_NOSYS("query_module"), + LX_NOSYS("poll"), + LX_NOSYS("nfsserctl"), + LX_CL("setresgid16", lx_setresgid16, 3), /* 170 */ + LX_NOSYS("getresgid16"), + LX_NOSYS("prctl"), + LX_NOSYS("rt_sigreturn"), + LX_NOSYS("rt_sigaction"), + LX_NOSYS("rt_sigprocmask"), + LX_NOSYS("rt_sigpending"), + LX_NOSYS("rt_sigtimedwait"), + LX_NOSYS("rt_sigqueueinfo"), + LX_NOSYS("rt_sigsuspend"), + LX_NOSYS("pread64"), /* 180 */ + LX_NOSYS("pwrite64"), + LX_NOSYS("chown16"), + LX_NOSYS("getcwd"), + LX_NOSYS("capget"), + LX_NOSYS("capset"), + LX_NOSYS("sigaltstack"), + LX_NOSYS("sendfile"), + LX_NOSYS("getpmsg"), + LX_NOSYS("putpmsg"), + LX_NOSYS("vfork"), /* 190 */ + LX_NOSYS("getrlimit"), + LX_NOSYS("mmap2"), + LX_NOSYS("truncate64"), + LX_NOSYS("ftruncate64"), + LX_NOSYS("stat64"), + LX_NOSYS("lstat64"), + LX_NOSYS("fstat64"), + LX_NOSYS("lchown"), + LX_NOSYS("getuid"), + LX_NOSYS("getgid"), /* 200 */ + LX_NOSYS("geteuid"), + LX_NOSYS("getegid"), + LX_NOSYS("setreuid"), + LX_NOSYS("setregid"), + LX_NOSYS("getgroups"), + LX_CL("setgroups", lx_setgroups, 2), + LX_NOSYS("fchown"), + LX_CL("setresuid", lx_setresuid, 3), + LX_NOSYS("getresuid"), + LX_CL("setresgid", lx_setresgid, 3), /* 210 */ + LX_NOSYS("getresgid"), + LX_NOSYS("chown"), + LX_NOSYS("setuid"), + LX_NOSYS("setgid"), + LX_NOSYS("setfsuid"), + LX_NOSYS("setfsgid"), + LX_NOSYS("pivot_root"), + LX_NOSYS("mincore"), + LX_NOSYS("madvise"), + LX_NOSYS("getdents64"), /* 220 */ + LX_NOSYS("fcntl64"), + LX_NOSYS("lx_nosys"), + LX_NOSYS("security"), + LX_CL("gettid", lx_gettid, 0), + LX_NOSYS("readahead"), + LX_NOSYS("setxattr"), + LX_NOSYS("lsetxattr"), + LX_NOSYS("fsetxattr"), + LX_NOSYS("getxattr"), + LX_NOSYS("lgetxattr"), /* 230 */ + LX_NOSYS("fgetxattr"), + LX_NOSYS("listxattr"), + LX_NOSYS("llistxattr"), + LX_NOSYS("flistxattr"), + LX_NOSYS("removexattr"), + LX_NOSYS("lremovexattr"), + LX_NOSYS("fremovexattr"), + LX_CL("tkill", lx_tkill, 2), + LX_NOSYS("sendfile64"), + LX_CL("futex", lx_futex, 6), /* 240 */ + LX_NOSYS("sched_setaffinity"), + LX_NOSYS("sched_getaffinity"), + LX_CL("set_thread_area", lx_set_thread_area, 1), + LX_CL("get_thread_area", lx_get_thread_area, 1), + LX_NOSYS("io_setup"), + LX_NOSYS("io_destroy"), + LX_NOSYS("io_getevents"), + LX_NOSYS("io_submit"), + LX_NOSYS("io_cancel"), + LX_NOSYS("fadvise64"), /* 250 */ + LX_NOSYS("lx_nosys"), + LX_NOSYS("exit_group"), + LX_NOSYS("lookup_dcookie"), + LX_NOSYS("epoll_create"), + LX_NOSYS("epoll_ctl"), + LX_NOSYS("epoll_wait"), + LX_NOSYS("remap_file_pages"), + LX_CL("set_tid_address", lx_set_tid_address, 1), + LX_NOSYS("timer_create"), + LX_NOSYS("timer_settime"), /* 260 */ + LX_NOSYS("timer_gettime"), + LX_NOSYS("timer_getoverrun"), + LX_NOSYS("timer_delete"), + LX_NOSYS("clock_settime"), + LX_NOSYS("clock_gettime"), + LX_NOSYS("clock_getres"), + LX_NOSYS("clock_nanosleep"), + LX_NOSYS("statfs64"), + LX_NOSYS("fstatfs64"), + LX_NOSYS("tgkill"), /* 270 */ + /* The following are Linux 2.6 system calls */ + LX_NOSYS("utimes"), + LX_NOSYS("fadvise64_64"), + LX_NOSYS("vserver"), + LX_NOSYS("mbind"), + LX_NOSYS("get_mempolicy"), + LX_NOSYS("set_mempolicy"), + LX_NOSYS("mq_open"), + LX_NOSYS("mq_unlink"), + LX_NOSYS("mq_timedsend"), + LX_NOSYS("mq_timedreceive"), /* 280 */ + LX_NOSYS("mq_notify"), + LX_NOSYS("mq_getsetattr"), + LX_NOSYS("kexec_load"), + LX_NOSYS("waitid"), + LX_NOSYS("sys_setaltroot"), + LX_NOSYS("add_key"), + LX_NOSYS("request_key"), + LX_NOSYS("keyctl"), + LX_NOSYS("ioprio_set"), + LX_NOSYS("ioprio_get"), /* 290 */ + LX_NOSYS("inotify_init"), + LX_NOSYS("inotify_add_watch"), + LX_NOSYS("inotify_rm_watch"), + LX_NOSYS("migrate_pages"), + LX_NOSYS("openat"), + LX_NOSYS("mkdirat"), + LX_NOSYS("mknodat"), + LX_NOSYS("fchownat"), + LX_NOSYS("futimesat"), + LX_NOSYS("fstatat64"), /* 300 */ + LX_NOSYS("unlinkat"), + LX_NOSYS("renameat"), + LX_NOSYS("linkat"), + LX_NOSYS("syslinkat"), + LX_NOSYS("readlinkat"), + LX_NOSYS("fchmodat"), + LX_NOSYS("faccessat"), + LX_NOSYS("pselect6"), + LX_NOSYS("ppoll"), + LX_NOSYS("unshare"), /* 310 */ + LX_NOSYS("set_robust_list"), + LX_NOSYS("get_robust_list"), + LX_NOSYS("splice"), + LX_NOSYS("sync_file_range"), + LX_NOSYS("tee"), + LX_NOSYS("vmsplice"), + LX_NOSYS("move_pages"), + NULL /* NULL-termination is required for lx_systrace */ +}; + +int64_t +lx_emulate_syscall(int num, uintptr_t arg1, uintptr_t arg2, + uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) +{ + struct lx_sysent *jsp; + int64_t rval; + + rval = (int64_t)0; + + jsp = &(lx_sysent[num]); + + switch (jsp->sy_narg) { + case 0: { + lx_print("--> %s()\n", jsp->sy_name); + rval = (int64_t)jsp->sy_callc(); + break; + } + case 1: { + lx_print("--> %s(0x%lx)\n", jsp->sy_name, arg1); + rval = (int64_t)jsp->sy_callc(arg1); + break; + } + case 2: { + lx_print("--> %s(0x%lx, 0x%lx)\n", jsp->sy_name, arg1, arg2); + rval = (int64_t)jsp->sy_callc(arg1, arg2); + break; + } + case 3: { + lx_print("--> %s(0x%lx, 0x%lx, 0x%lx)\n", + jsp->sy_name, arg1, arg2, arg3); + rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3); + break; + } + case 4: { + lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx)\n", + jsp->sy_name, arg1, arg2, arg3, arg4); + rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4); + break; + } + case 5: { + lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n", + jsp->sy_name, arg1, arg2, arg3, arg4, arg5); + rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5); + break; + } + case 6: { + lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx," + " 0x%lx, 0x%lx)\n", + jsp->sy_name, arg1, arg2, arg3, arg4, arg5, arg6); + rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5, + arg6); + break; + } + default: + panic("Invalid syscall entry: #%d at 0x%p\n", num, (void *)jsp); + } + lx_print("----------> return (0x%llx)\n", (long long)rval); + return (rval); +} diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h new file mode 100644 index 0000000000..a2bd74a817 --- /dev/null +++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h @@ -0,0 +1,232 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LXPROC_H +#define _LXPROC_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * lxproc.h: declarations, data structures and macros for lxprocfs + */ + + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/policy.h> +#include <sys/debug.h> +#include <sys/dirent.h> +#include <sys/errno.h> +#include <sys/file.h> +#include <sys/kmem.h> +#include <sys/pathname.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/var.h> +#include <sys/user.h> +#include <sys/t_lock.h> +#include <sys/sysmacros.h> +#include <sys/cred.h> +#include <sys/priv.h> +#include <sys/vnode.h> +#include <sys/vfs.h> +#include <sys/statvfs.h> +#include <sys/cmn_err.h> +#include <sys/zone.h> +#include <sys/uio.h> +#include <sys/utsname.h> +#include <sys/dnlc.h> +#include <sys/atomic.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <vm/as.h> +#include <vm/anon.h> + +/* + * Convert a vnode into an lxpr_mnt_t + */ +#define VTOLXPM(vp) ((lxpr_mnt_t *)(vp)->v_vfsp->vfs_data) + +/* + * convert a vnode into an lxpr_node + */ +#define VTOLXP(vp) ((lxpr_node_t *)(vp)->v_data) + +/* + * convert a lxprnode into a vnode + */ +#define LXPTOV(lxpnp) ((lxpnp)->lxpr_vnode) + +/* + * convert a lxpr_node into zone for fs + */ +#define LXPTOZ(lxpnp) \ + (((lxpr_mnt_t *)(lxpnp)->lxpr_vnode->v_vfsp->vfs_data)->lxprm_zone) + +#define LXPNSIZ 256 /* max size of lx /proc file name entries */ + +/* + * Pretend that a directory entry takes 16 bytes + */ +#define LXPR_SDSIZE 16 + +/* + * Node/file types for lx /proc files + * (directories and files contained therein). + */ +typedef enum lxpr_nodetype { + LXPR_PROCDIR, /* /proc */ + LXPR_PIDDIR, /* /proc/<pid> */ + LXPR_PID_CMDLINE, /* /proc/<pid>/cmdline */ + LXPR_PID_CPU, /* /proc/<pid>/cpu */ + LXPR_PID_CURDIR, /* /proc/<pid>/cwd */ + LXPR_PID_ENV, /* /proc/<pid>/environ */ + LXPR_PID_EXE, /* /proc/<pid>/exe */ + LXPR_PID_MAPS, /* /proc/<pid>/maps */ + LXPR_PID_MEM, /* /proc/<pid>/mem */ + LXPR_PID_ROOTDIR, /* /proc/<pid>/root */ + LXPR_PID_STAT, /* /proc/<pid>/stat */ + LXPR_PID_STATM, /* /proc/<pid>/statm */ + LXPR_PID_STATUS, /* /proc/<pid>/status */ + LXPR_PID_FDDIR, /* /proc/<pid>/fd */ + LXPR_PID_FD_FD, /* /proc/<pid>/fd/nn */ + LXPR_CMDLINE, /* /proc/cmdline */ + LXPR_CPUINFO, /* /proc/cpuinfo */ + LXPR_DEVICES, /* /proc/devices */ + LXPR_DMA, /* /proc/dma */ + LXPR_FILESYSTEMS, /* /proc/filesystems */ + LXPR_INTERRUPTS, /* /proc/interrupts */ + LXPR_IOPORTS, /* /proc/ioports */ + LXPR_KCORE, /* /proc/kcore */ + LXPR_KMSG, /* /proc/kmsg */ + LXPR_LOADAVG, /* /proc/loadavg */ + LXPR_MEMINFO, /* /proc/meminfo */ + LXPR_MOUNTS, /* /proc/mounts */ + LXPR_NETDIR, /* /proc/net */ + LXPR_NET_ARP, /* /proc/net/arp */ + LXPR_NET_DEV, /* /proc/net/dev */ + LXPR_NET_DEV_MCAST, /* /proc/net/dev_mcast */ + LXPR_NET_IGMP, /* /proc/net/igmp */ + LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */ + LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */ + LXPR_NET_MCFILTER, /* /proc/net/mcfilter */ + LXPR_NET_NETSTAT, /* /proc/net/netstat */ + LXPR_NET_RAW, /* /proc/net/raw */ + LXPR_NET_ROUTE, /* /proc/net/route */ + LXPR_NET_RPC, /* /proc/net/rpc */ + LXPR_NET_RT_CACHE, /* /proc/net/rt_cache */ + LXPR_NET_SOCKSTAT, /* /proc/net/sockstat */ + LXPR_NET_SNMP, /* /proc/net/snmp */ + LXPR_NET_STAT, /* /proc/net/stat */ + LXPR_NET_TCP, /* /proc/net/tcp */ + LXPR_NET_UDP, /* /proc/net/udp */ + LXPR_NET_UNIX, /* /proc/net/unix */ + LXPR_PARTITIONS, /* /proc/partitions */ + LXPR_SELF, /* /proc/self */ + LXPR_STAT, /* /proc/stat */ + LXPR_UPTIME, /* /proc/uptime */ + LXPR_VERSION, /* /proc/version */ + LXPR_NFILES /* number of lx /proc file types */ +} lxpr_nodetype_t; + + +/* + * Number of fds allowed for in the inode number calculation + * per process (if a process has more fds then inode numbers + * may be duplicated) + */ +#define LXPR_FD_PERPROC 2000 + +/* + * external dirent characteristics + */ +#define LXPRMAXNAMELEN 14 +typedef struct { + lxpr_nodetype_t d_type; + char d_name[LXPRMAXNAMELEN]; +} lxpr_dirent_t; + +/* + * This is the lxprocfs private data object + * which is attached to v_data in the vnode structure + */ +typedef struct lxpr_node { + lxpr_nodetype_t lxpr_type; /* type of this node */ + vnode_t *lxpr_vnode; /* vnode for the node */ + vnode_t *lxpr_parent; /* parent directory */ + vnode_t *lxpr_realvp; /* real vnode, file in dirs */ + timestruc_t lxpr_time; /* creation etc time for file */ + mode_t lxpr_mode; /* file mode bits */ + uid_t lxpr_uid; /* file owner */ + gid_t lxpr_gid; /* file group owner */ + pid_t lxpr_pid; /* pid of proc referred to */ + ino_t lxpr_ino; /* node id */ + ldi_handle_t lxpr_cons_ldih; /* ldi handle for console device */ +} lxpr_node_t; + +struct zone; /* forward declaration */ + +/* + * This is the lxprocfs private data object + * which is attached to vfs_data in the vfs structure + */ +typedef struct lxpr_mnt { + lxpr_node_t *lxprm_node; /* node at root of proc mount */ + struct zone *lxprm_zone; /* zone for this mount */ + ldi_ident_t lxprm_li; /* ident for ldi */ +} lxpr_mnt_t; + +extern vnodeops_t *lxpr_vnodeops; +extern int nproc_highbit; /* highbit(v.v_nproc) */ + +typedef struct mounta mounta_t; + +extern void lxpr_initnodecache(); +extern void lxpr_fininodecache(); +extern void lxpr_initrootnode(lxpr_node_t **, vfs_t *); +extern ino_t lxpr_inode(lxpr_nodetype_t, pid_t, int); +extern ino_t lxpr_parentinode(lxpr_node_t *); +extern lxpr_node_t *lxpr_getnode(vnode_t *, lxpr_nodetype_t, proc_t *, int); +extern void lxpr_freenode(lxpr_node_t *); + +typedef struct lxpr_uiobuf lxpr_uiobuf_t; +extern lxpr_uiobuf_t *lxpr_uiobuf_new(uio_t *); +extern void lxpr_uiobuf_free(lxpr_uiobuf_t *); +extern int lxpr_uiobuf_flush(lxpr_uiobuf_t *); +extern void lxpr_uiobuf_seek(lxpr_uiobuf_t *, offset_t); +extern void lxpr_uiobuf_write(lxpr_uiobuf_t *, const char *, size_t); +extern void lxpr_uiobuf_printf(lxpr_uiobuf_t *, const char *, ...); +extern void lxpr_uiobuf_seterr(lxpr_uiobuf_t *, int); + +proc_t *lxpr_lock(pid_t); +void lxpr_unlock(proc_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LXPROC_H */ diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c new file mode 100644 index 0000000000..c3ba5024a4 --- /dev/null +++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c @@ -0,0 +1,516 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * lxprsubr.c: Various functions for the /lxproc vnodeops. + */ + +#include <sys/varargs.h> + +#include <sys/cpuvar.h> +#include <sys/mman.h> +#include <sys/vmsystm.h> +#include <sys/prsystm.h> + +#include "lx_proc.h" + +#define LXPRCACHE_NAME "lxpr_cache" + +static int lxpr_node_constructor(void*, void*, int); +static void lxpr_node_destructor(void*, void*); + +static kmem_cache_t *lxpr_node_cache; + +struct lxpr_uiobuf { + uio_t *uiop; + char *buffer; + uint32_t buffsize; + char *pos; + size_t beg; + int error; +}; + +#define BUFSIZE 4000 + +struct lxpr_uiobuf * +lxpr_uiobuf_new(uio_t *uiop) +{ + /* Allocate memory for both lxpr_uiobuf and output buffer */ + struct lxpr_uiobuf *uiobuf = + kmem_alloc(sizeof (struct lxpr_uiobuf) + BUFSIZE, KM_SLEEP); + + uiobuf->uiop = uiop; + uiobuf->buffer = (char *)&uiobuf[1]; + uiobuf->buffsize = BUFSIZE; + uiobuf->pos = uiobuf->buffer; + uiobuf->beg = 0; + uiobuf->error = 0; + + return (uiobuf); +} + +void +lxpr_uiobuf_free(struct lxpr_uiobuf *uiobuf) +{ + ASSERT(uiobuf != NULL); + ASSERT(uiobuf->pos == uiobuf->buffer); + + kmem_free(uiobuf, sizeof (struct lxpr_uiobuf) + uiobuf->buffsize); +} + +void +lxpr_uiobuf_seek(struct lxpr_uiobuf *uiobuf, offset_t offset) +{ + uiobuf->uiop->uio_offset = offset; +} + +void +lxpr_uiobuf_seterr(struct lxpr_uiobuf *uiobuf, int err) +{ + ASSERT(uiobuf->error == 0); + + uiobuf->error = err; +} + +int +lxpr_uiobuf_flush(struct lxpr_uiobuf *uiobuf) +{ + off_t off = uiobuf->uiop->uio_offset; + caddr_t uaddr = uiobuf->buffer; + size_t beg = uiobuf->beg; + + size_t size = uiobuf->pos - uaddr; + + if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) { + ASSERT(off >= beg); + + if (beg+size > off && off >= 0) + uiobuf->error = + uiomove(uaddr+(off-beg), size-(off-beg), + UIO_READ, uiobuf->uiop); + + uiobuf->beg += size; + } + + uiobuf->pos = uaddr; + + return (uiobuf->error); +} + +void +lxpr_uiobuf_write(struct lxpr_uiobuf *uiobuf, const char *buf, size_t size) +{ + /* While we can still carry on */ + while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) { + uint_t remain + = uiobuf->buffsize-(uiobuf->pos-uiobuf->buffer); + + /* Enough space in buffer? */ + if (remain >= size) { + bcopy(buf, uiobuf->pos, size); + uiobuf->pos += size; + return; + } + + /* Not enough space, so copy all we can and try again */ + bcopy(buf, uiobuf->pos, remain); + uiobuf->pos += remain; + (void) lxpr_uiobuf_flush(uiobuf); + buf += remain; + size -= remain; + } +} + +#define TYPBUFFSIZE 256 +void +lxpr_uiobuf_printf(struct lxpr_uiobuf *uiobuf, const char *fmt, ...) +{ + va_list args; + char buff[TYPBUFFSIZE]; + int len; + char *buffer; + + /* Can we still do any output */ + if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0) + return; + + va_start(args, fmt); + + /* Try using stack allocated buffer */ + len = vsnprintf(buff, TYPBUFFSIZE, fmt, args); + if (len < TYPBUFFSIZE) { + va_end(args); + lxpr_uiobuf_write(uiobuf, buff, len); + return; + } + + /* Not enough space in pre-allocated buffer */ + buffer = kmem_alloc(len+1, KM_SLEEP); + + /* + * We know we allocated the correct amount of space + * so no check on the return value + */ + (void) vsnprintf(buffer, len+1, fmt, args); + lxpr_uiobuf_write(uiobuf, buffer, len); + va_end(args); + kmem_free(buffer, len+1); +} + +/* + * lxpr_lock(): + * + * Lookup process from pid and return with p_plock and P_PR_LOCK held. + */ +proc_t * +lxpr_lock(pid_t pid) +{ + proc_t *p; + kmutex_t *mp; + + ASSERT(!MUTEX_HELD(&pidlock)); + + for (;;) { + mutex_enter(&pidlock); + + /* + * If the pid is 1, we really want the zone's init process + */ + p = prfind((pid == 1) ? + curproc->p_zone->zone_proc_initpid : pid); + + if (p == NULL || p->p_stat == SIDL) { + mutex_exit(&pidlock); + return (NULL); + } + /* + * p_lock is persistent, but p itself is not -- it could + * vanish during cv_wait(). Load p->p_lock now so we can + * drop it after cv_wait() without referencing p. + */ + mp = &p->p_lock; + mutex_enter(mp); + + mutex_exit(&pidlock); + + if (!(p->p_proc_flag & P_PR_LOCK)) + break; + + cv_wait(&pr_pid_cv[p->p_slot], mp); + mutex_exit(mp); + } + p->p_proc_flag |= P_PR_LOCK; + THREAD_KPRI_REQUEST(); + return (p); +} + +/* + * lxpr_unlock() + * + * Unlock locked process + */ +void +lxpr_unlock(proc_t *p) +{ + ASSERT(p->p_proc_flag & P_PR_LOCK); + ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(!MUTEX_HELD(&pidlock)); + + cv_signal(&pr_pid_cv[p->p_slot]); + p->p_proc_flag &= ~P_PR_LOCK; + mutex_exit(&p->p_lock); + THREAD_KPRI_RELEASE(); +} + +void +lxpr_initnodecache() +{ + lxpr_node_cache = kmem_cache_create(LXPRCACHE_NAME, + sizeof (lxpr_node_t), 0, + lxpr_node_constructor, lxpr_node_destructor, NULL, NULL, NULL, 0); +} + +void +lxpr_fininodecache() +{ + kmem_cache_destroy(lxpr_node_cache); +} + +/* ARGSUSED */ +static int +lxpr_node_constructor(void *buf, void *un, int kmflags) +{ + lxpr_node_t *lxpnp = buf; + vnode_t *vp; + + vp = lxpnp->lxpr_vnode = vn_alloc(kmflags); + if (vp == NULL) + return (-1); + + (void) vn_setops(vp, lxpr_vnodeops); + vp->v_data = lxpnp; + + return (0); +} + +/* ARGSUSED */ +static void +lxpr_node_destructor(void *buf, void *un) +{ + lxpr_node_t *lxpnp = buf; + + vn_free(LXPTOV(lxpnp)); +} + +/* + * Calculate an inode number + * + * This takes various bits of info and munges them + * to give the inode number for an lxproc node + */ +ino_t +lxpr_inode(lxpr_nodetype_t type, pid_t pid, int fd) +{ + if (pid == 1) + pid = curproc->p_zone->zone_proc_initpid; + + switch (type) { + case LXPR_PIDDIR: + return (pid + 1); + case LXPR_PROCDIR: + return (maxpid + 2); + case LXPR_PID_FD_FD: + return (maxpid + 2 + + (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) + + LXPR_NFILES + fd); + default: + return (maxpid + 2 + + (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) + + type); + } +} + +/* + * Return inode number of parent (directory) + */ +ino_t +lxpr_parentinode(lxpr_node_t *lxpnp) +{ + /* + * If the input node is the root then the parent inode + * is the mounted on inode so just return our inode number + */ + if (lxpnp->lxpr_type != LXPR_PROCDIR) + return (VTOLXP(lxpnp->lxpr_parent)->lxpr_ino); + else + return (lxpnp->lxpr_ino); +} + +/* + * Allocate a new lxproc node + * + * This also allocates the vnode associated with it + */ +lxpr_node_t * +lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd) +{ + lxpr_node_t *lxpnp; + vnode_t *vp; + user_t *up; + timestruc_t now; + + /* + * Allocate a new node. It is deallocated in vop_innactive + */ + lxpnp = kmem_cache_alloc(lxpr_node_cache, KM_SLEEP); + + /* + * Set defaults (may be overridden below) + */ + gethrestime(&now); + lxpnp->lxpr_type = type; + lxpnp->lxpr_realvp = NULL; + lxpnp->lxpr_parent = dp; + VN_HOLD(dp); + if (p != NULL) { + lxpnp->lxpr_pid = ((p->p_pid == + curproc->p_zone->zone_proc_initpid) ? 1 : p->p_pid); + + lxpnp->lxpr_time = PTOU(p)->u_start; + lxpnp->lxpr_uid = crgetruid(p->p_cred); + lxpnp->lxpr_gid = crgetrgid(p->p_cred); + lxpnp->lxpr_ino = lxpr_inode(type, p->p_pid, fd); + } else { + /* Pretend files without a proc belong to sched */ + lxpnp->lxpr_pid = 0; + lxpnp->lxpr_time = now; + lxpnp->lxpr_uid = lxpnp->lxpr_gid = 0; + lxpnp->lxpr_ino = lxpr_inode(type, 0, 0); + } + + /* initialize the vnode data */ + vp = lxpnp->lxpr_vnode; + vn_reinit(vp); + vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; + vp->v_vfsp = dp->v_vfsp; + + /* + * Do node specific stuff + */ + switch (type) { + case LXPR_PROCDIR: + vp->v_flag |= VROOT; + vp->v_type = VDIR; + lxpnp->lxpr_mode = 0555; /* read-search by everyone */ + break; + + case LXPR_PID_CURDIR: + ASSERT(p != NULL); + + /* + * Zombie check. p_stat is officially protected by pidlock, + * but we can't grab pidlock here because we already hold + * p_lock. Luckily if we look at the process exit code + * we see that p_stat only transisions from SRUN to SZOMB + * while p_lock is held. Aside from this, the only other + * p_stat transition that we need to be aware about is + * SIDL to SRUN, but that's not a problem since lxpr_lock() + * ignores nodes in the SIDL state so we'll never get a node + * that isn't already in the SRUN state. + */ + if (p->p_stat == SZOMB) { + lxpnp->lxpr_realvp = NULL; + } else { + up = PTOU(p); + lxpnp->lxpr_realvp = up->u_cdir; + ASSERT(lxpnp->lxpr_realvp != NULL); + VN_HOLD(lxpnp->lxpr_realvp); + } + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0777; /* anyone does anything ! */ + break; + + case LXPR_PID_ROOTDIR: + ASSERT(p != NULL); + /* Zombie check. see locking comment above */ + if (p->p_stat == SZOMB) { + lxpnp->lxpr_realvp = NULL; + } else { + up = PTOU(p); + lxpnp->lxpr_realvp = + up->u_rdir != NULL ? up->u_rdir : rootdir; + ASSERT(lxpnp->lxpr_realvp != NULL); + VN_HOLD(lxpnp->lxpr_realvp); + } + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0777; /* anyone does anything ! */ + break; + + case LXPR_PID_EXE: + ASSERT(p != NULL); + lxpnp->lxpr_realvp = p->p_exec; + if (lxpnp->lxpr_realvp != NULL) { + VN_HOLD(lxpnp->lxpr_realvp); + } + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0777; + break; + + case LXPR_SELF: + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0777; /* anyone does anything ! */ + break; + + case LXPR_PID_FD_FD: + ASSERT(p != NULL); + /* lxpr_realvp is set after we return */ + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */ + break; + + case LXPR_PID_FDDIR: + ASSERT(p != NULL); + vp->v_type = VDIR; + lxpnp->lxpr_mode = 0500; /* read-search by owner only */ + break; + + case LXPR_PIDDIR: + ASSERT(p != NULL); + vp->v_type = VDIR; + lxpnp->lxpr_mode = 0511; + break; + + case LXPR_NETDIR: + vp->v_type = VDIR; + lxpnp->lxpr_mode = 0555; /* read-search by all */ + break; + + case LXPR_PID_ENV: + case LXPR_PID_MEM: + ASSERT(p != NULL); + /*FALLTHRU*/ + case LXPR_KCORE: + vp->v_type = VREG; + lxpnp->lxpr_mode = 0400; /* read-only by owner only */ + break; + + default: + vp->v_type = VREG; + lxpnp->lxpr_mode = 0444; /* read-only by all */ + break; + } + + return (lxpnp); +} + + +/* + * Free the storage obtained from lxpr_getnode(). + */ +void +lxpr_freenode(lxpr_node_t *lxpnp) +{ + ASSERT(lxpnp != NULL); + ASSERT(LXPTOV(lxpnp) != NULL); + + /* + * delete any association with realvp + */ + if (lxpnp->lxpr_realvp != NULL) + VN_RELE(lxpnp->lxpr_realvp); + + /* + * delete any association with parent vp + */ + if (lxpnp->lxpr_parent != NULL) + VN_RELE(lxpnp->lxpr_parent); + + /* + * Release the lxprnode. + */ + kmem_cache_free(lxpr_node_cache, lxpnp); +} diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c new file mode 100644 index 0000000000..a50695ff32 --- /dev/null +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c @@ -0,0 +1,374 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * lxprvfsops.c: vfs operations for /lxprocfs. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/cmn_err.h> +#include <sys/cred.h> +#include <sys/debug.h> +#include <sys/errno.h> +#include <sys/proc.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/var.h> +#include <sys/vfs.h> +#include <sys/vfs_opreg.h> +#include <sys/vnode.h> +#include <sys/mode.h> +#include <sys/signal.h> +#include <sys/user.h> +#include <sys/mount.h> +#include <sys/bitmap.h> +#include <sys/kmem.h> +#include <sys/policy.h> +#include <sys/modctl.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/lx_impl.h> + +#include "lx_proc.h" + +/* Module level parameters */ +static int lxprocfstype; +static dev_t lxprocdev; +static kmutex_t lxpr_mount_lock; + +int nproc_highbit; /* highbit(v.v_nproc) */ + +static int lxpr_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *); +static int lxpr_unmount(vfs_t *, int, cred_t *); +static int lxpr_root(vfs_t *, vnode_t **); +static int lxpr_statvfs(vfs_t *, statvfs64_t *); +static int lxpr_init(int, char *); + +static vfsdef_t vfw = { + VFSDEF_VERSION, + "lx_proc", + lxpr_init, + VSW_ZMOUNT, + NULL +}; + +/* + * Module linkage information for the kernel. + */ +extern struct mod_ops mod_fsops; + +static struct modlfs modlfs = { + &mod_fsops, "generic linux procfs", &vfw +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlfs, NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + int retval; + + /* + * attempt to unload the module + */ + if ((retval = mod_remove(&modlinkage)) != 0) + goto done; + + /* + * destroy lxpr_node cache + */ + lxpr_fininodecache(); + + /* + * clean out the vfsops and vnodeops + */ + (void) vfs_freevfsops_by_type(lxprocfstype); + vn_freevnodeops(lxpr_vnodeops); + + mutex_destroy(&lxpr_mount_lock); +done: + return (retval); +} + +static int +lxpr_init(int fstype, char *name) +{ + static const fs_operation_def_t lxpr_vfsops_template[] = { + VFSNAME_MOUNT, { .vfs_mount = lxpr_mount }, + VFSNAME_UNMOUNT, { .vfs_unmount = lxpr_unmount }, + VFSNAME_ROOT, { .vfs_root = lxpr_root }, + VFSNAME_STATVFS, { .vfs_statvfs = lxpr_statvfs }, + NULL, NULL + }; + extern const fs_operation_def_t lxpr_vnodeops_template[]; + int error; + major_t dev; + + nproc_highbit = highbit(v.v_proc); + lxprocfstype = fstype; + ASSERT(lxprocfstype != 0); + + mutex_init(&lxpr_mount_lock, NULL, MUTEX_DEFAULT, NULL); + + /* + * Associate VFS ops vector with this fstype. + */ + error = vfs_setfsops(fstype, lxpr_vfsops_template, NULL); + if (error != 0) { + cmn_err(CE_WARN, "lxpr_init: bad vfs ops template"); + return (error); + } + + /* + * Set up vnode ops vector too. + */ + error = vn_make_ops(name, lxpr_vnodeops_template, &lxpr_vnodeops); + if (error != 0) { + (void) vfs_freevfsops_by_type(fstype); + cmn_err(CE_WARN, "lxpr_init: bad vnode ops template"); + return (error); + } + + /* + * Assign a unique "device" number (used by stat(2)). + */ + if ((dev = getudev()) == (major_t)-1) { + cmn_err(CE_WARN, "lxpr_init: can't get unique device number"); + dev = 0; + } + + /* + * Make the pseudo device + */ + lxprocdev = makedevice(dev, 0); + + /* + * Initialise cache for lxpr_nodes + */ + lxpr_initnodecache(); + + return (0); +} + +static int +lxpr_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr) +{ + lxpr_mnt_t *lxpr_mnt; + zone_t *zone = curproc->p_zone; + ldi_ident_t li; + int err; + + /* + * must be root to mount + */ + if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) + return (EPERM); + + /* + * mount point must be a directory + */ + if (mvp->v_type != VDIR) + return (ENOTDIR); + + if (zone == global_zone) { + zone_t *mntzone; + + mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); + zone_rele(mntzone); + if (zone != mntzone) + return (EBUSY); + } + + /* + * Having the resource be anything but "lxproc" doesn't make sense + */ + vfs_setresource(vfsp, "lxproc", 0); + + lxpr_mnt = kmem_alloc(sizeof (*lxpr_mnt), KM_SLEEP); + + if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) { + kmem_free(lxpr_mnt, sizeof (*lxpr_mnt)); + return (err); + } + + lxpr_mnt->lxprm_li = li; + + mutex_enter(&lxpr_mount_lock); + + /* + * Ensure we don't allow overlaying mounts + */ + mutex_enter(&mvp->v_lock); + if ((uap->flags & MS_OVERLAY) == 0 && + (mvp->v_count > 1 || (mvp->v_flag & VROOT))) { + mutex_exit(&mvp->v_lock); + mutex_exit(&lxpr_mount_lock); + kmem_free(lxpr_mnt, sizeof ((*lxpr_mnt))); + return (EBUSY); + } + mutex_exit(&mvp->v_lock); + + /* + * allocate the first vnode + */ + zone_hold(lxpr_mnt->lxprm_zone = zone); + + /* Arbitrarily set the parent vnode to the mounted over directory */ + lxpr_mnt->lxprm_node = lxpr_getnode(mvp, LXPR_PROCDIR, NULL, 0); + + /* Correctly set the fs for the root node */ + lxpr_mnt->lxprm_node->lxpr_vnode->v_vfsp = vfsp; + + vfs_make_fsid(&vfsp->vfs_fsid, lxprocdev, lxprocfstype); + vfsp->vfs_bsize = DEV_BSIZE; + vfsp->vfs_fstype = lxprocfstype; + vfsp->vfs_data = (caddr_t)lxpr_mnt; + vfsp->vfs_dev = lxprocdev; + + mutex_exit(&lxpr_mount_lock); + + return (0); +} + +static int +lxpr_unmount(vfs_t *vfsp, int flag, cred_t *cr) +{ + lxpr_mnt_t *lxpr_mnt = (lxpr_mnt_t *)vfsp->vfs_data; + vnode_t *vp; + int count; + + ASSERT(lxpr_mnt != NULL); + vp = LXPTOV(lxpr_mnt->lxprm_node); + + mutex_enter(&lxpr_mount_lock); + + /* + * must be root to unmount + */ + if (secpolicy_fs_unmount(cr, vfsp) != 0) { + mutex_exit(&lxpr_mount_lock); + return (EPERM); + } + + /* + * forced unmount is not supported by this file system + */ + if (flag & MS_FORCE) { + mutex_exit(&lxpr_mount_lock); + return (ENOTSUP); + } + + /* + * Ensure that no vnodes are in use on this mount point. + */ + mutex_enter(&vp->v_lock); + count = vp->v_count; + mutex_exit(&vp->v_lock); + if (count > 1) { + mutex_exit(&lxpr_mount_lock); + return (EBUSY); + } + + + /* + * purge the dnlc cache for vnode entries + * associated with this file system + */ + count = dnlc_purge_vfsp(vfsp, 0); + + /* + * free up the lxprnode + */ + lxpr_freenode(lxpr_mnt->lxprm_node); + zone_rele(lxpr_mnt->lxprm_zone); + kmem_free(lxpr_mnt, sizeof (*lxpr_mnt)); + + mutex_exit(&lxpr_mount_lock); + + return (0); +} + +static int +lxpr_root(vfs_t *vfsp, vnode_t **vpp) +{ + lxpr_node_t *lxpnp = ((lxpr_mnt_t *)vfsp->vfs_data)->lxprm_node; + vnode_t *vp = LXPTOV(lxpnp); + + VN_HOLD(vp); + *vpp = vp; + return (0); +} + +static int +lxpr_statvfs(vfs_t *vfsp, statvfs64_t *sp) +{ + int n; + dev32_t d32; + extern uint_t nproc; + + n = v.v_proc - nproc; + + bzero((caddr_t)sp, sizeof (*sp)); + sp->f_bsize = DEV_BSIZE; + sp->f_frsize = DEV_BSIZE; + sp->f_blocks = (fsblkcnt64_t)0; + sp->f_bfree = (fsblkcnt64_t)0; + sp->f_bavail = (fsblkcnt64_t)0; + sp->f_files = (fsfilcnt64_t)v.v_proc + 2; + sp->f_ffree = (fsfilcnt64_t)n; + sp->f_favail = (fsfilcnt64_t)n; + (void) cmpldev(&d32, vfsp->vfs_dev); + sp->f_fsid = d32; + /* It is guaranteed that vsw_name will fit in f_basetype */ + (void) strcpy(sp->f_basetype, vfssw[lxprocfstype].vsw_name); + sp->f_flag = vf_to_stf(vfsp->vfs_flag); + sp->f_namemax = 64; /* quite arbitrary */ + bzero(sp->f_fstr, sizeof (sp->f_fstr)); + + /* We know f_fstr is 32 chars */ + (void) strcpy(sp->f_fstr, "/proc"); + (void) strcpy(&sp->f_fstr[6], "/proc"); + + return (0); +} diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c new file mode 100644 index 0000000000..4e7b0844a4 --- /dev/null +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c @@ -0,0 +1,3030 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * lxpr_vnops.c: Vnode operations for the lx /proc file system + * + * Assumptions and Gotchas: + * + * In order to preserve Solaris' security policy. This file system's + * functionality does not override Solaris' security policies even if + * that means breaking Linux compatibility. + * + * Linux has no concept of lwps so we only implement procs here as in the + * old /proc interface. + */ + +#include <sys/cpupart.h> +#include <sys/cpuvar.h> +#include <sys/session.h> +#include <sys/vmparam.h> +#include <sys/mman.h> +#include <vm/rm.h> +#include <vm/seg_vn.h> +#include <sys/sdt.h> +#include <lx_signum.h> +#include <sys/strlog.h> +#include <sys/stropts.h> +#include <sys/cmn_err.h> +#include <sys/lx_brand.h> +#include <sys/x86_archext.h> +#include <sys/archsystm.h> +#include <sys/fp.h> +#include <sys/pool_pset.h> +#include <sys/pset.h> +#include <sys/zone.h> +#include <sys/pghw.h> +#include <sys/vfs_opreg.h> + +/* Dependent on the Solaris procfs */ +extern kthread_t *prchoose(proc_t *); + +#include "lx_proc.h" + +extern pgcnt_t swapfs_minfree; +extern time_t boot_time; + +/* + * Pointer to the vnode ops vector for this fs. + * This is instantiated in lxprinit() in lxpr_vfsops.c + */ +vnodeops_t *lxpr_vnodeops; + +static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *); +static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *, + caller_context_t *); +static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); +static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *, + caller_context_t *); +static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *); +static int lxpr_lookup(vnode_t *, char *, vnode_t **, + pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *, + pathname_t *); +static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *, + caller_context_t *, int); +static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *); +static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *); +static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *); +static int lxpr_sync(void); +static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *); + +static vnode_t *lxpr_lookup_procdir(vnode_t *, char *); +static vnode_t *lxpr_lookup_piddir(vnode_t *, char *); +static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *); +static vnode_t *lxpr_lookup_fddir(vnode_t *, char *); +static vnode_t *lxpr_lookup_netdir(vnode_t *, char *); + +static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *); +static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *); +static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *); +static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *); +static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *); + +static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *); + +static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *); + +static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *); + +/* + * Simple conversion + */ +#define btok(x) ((x) >> 10) /* bytes to kbytes */ +#define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */ + +/* + * The lx /proc vnode operations vector + */ +const fs_operation_def_t lxpr_vnodeops_template[] = { + VOPNAME_OPEN, { .vop_open = lxpr_open }, + VOPNAME_CLOSE, { .vop_close = lxpr_close }, + VOPNAME_READ, { .vop_read = lxpr_read }, + VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr }, + VOPNAME_ACCESS, { .vop_access = lxpr_access }, + VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup }, + VOPNAME_READDIR, { .vop_readdir = lxpr_readdir }, + VOPNAME_READLINK, { .vop_readlink = lxpr_readlink }, + VOPNAME_FSYNC, { .error = lxpr_sync }, + VOPNAME_SEEK, { .error = lxpr_sync }, + VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive }, + VOPNAME_CMP, { .vop_cmp = lxpr_cmp }, + VOPNAME_REALVP, { .vop_realvp = lxpr_realvp }, + NULL, NULL +}; + + +/* + * file contents of an lx /proc directory. + */ +static lxpr_dirent_t lx_procdir[] = { + { LXPR_CMDLINE, "cmdline" }, + { LXPR_CPUINFO, "cpuinfo" }, + { LXPR_DEVICES, "devices" }, + { LXPR_DMA, "dma" }, + { LXPR_FILESYSTEMS, "filesystems" }, + { LXPR_INTERRUPTS, "interrupts" }, + { LXPR_IOPORTS, "ioports" }, + { LXPR_KCORE, "kcore" }, + { LXPR_KMSG, "kmsg" }, + { LXPR_LOADAVG, "loadavg" }, + { LXPR_MEMINFO, "meminfo" }, + { LXPR_MOUNTS, "mounts" }, + { LXPR_NETDIR, "net" }, + { LXPR_PARTITIONS, "partitions" }, + { LXPR_SELF, "self" }, + { LXPR_STAT, "stat" }, + { LXPR_UPTIME, "uptime" }, + { LXPR_VERSION, "version" } +}; + +#define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0])) + +/* + * Contents of an lx /proc/<pid> directory. + */ +static lxpr_dirent_t piddir[] = { + { LXPR_PID_CMDLINE, "cmdline" }, + { LXPR_PID_CPU, "cpu" }, + { LXPR_PID_CURDIR, "cwd" }, + { LXPR_PID_ENV, "environ" }, + { LXPR_PID_EXE, "exe" }, + { LXPR_PID_MAPS, "maps" }, + { LXPR_PID_MEM, "mem" }, + { LXPR_PID_ROOTDIR, "root" }, + { LXPR_PID_STAT, "stat" }, + { LXPR_PID_STATM, "statm" }, + { LXPR_PID_STATUS, "status" }, + { LXPR_PID_FDDIR, "fd" } +}; + +#define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0])) + +/* + * contents of lx /proc/net directory + */ +static lxpr_dirent_t netdir[] = { + { LXPR_NET_ARP, "arp" }, + { LXPR_NET_DEV, "dev" }, + { LXPR_NET_DEV_MCAST, "dev_mcast" }, + { LXPR_NET_IGMP, "igmp" }, + { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" }, + { LXPR_NET_IP_MR_VIF, "ip_mr_vif" }, + { LXPR_NET_MCFILTER, "mcfilter" }, + { LXPR_NET_NETSTAT, "netstat" }, + { LXPR_NET_RAW, "raw" }, + { LXPR_NET_ROUTE, "route" }, + { LXPR_NET_RPC, "rpc" }, + { LXPR_NET_RT_CACHE, "rt_cache" }, + { LXPR_NET_SOCKSTAT, "sockstat" }, + { LXPR_NET_SNMP, "snmp" }, + { LXPR_NET_STAT, "stat" }, + { LXPR_NET_TCP, "tcp" }, + { LXPR_NET_UDP, "udp" }, + { LXPR_NET_UNIX, "unix" } +}; + +#define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0])) + +/* + * lxpr_open(): Vnode operation for VOP_OPEN() + */ +static int +lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) +{ + vnode_t *vp = *vpp; + lxpr_node_t *lxpnp = VTOLXP(vp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + vnode_t *rvp; + int error = 0; + + /* + * We only allow reading in this file systrem + */ + if (flag & FWRITE) + return (EROFS); + + /* + * If we are opening an underlying file only allow regular files + * reject the open for anything but a regular file. + * Just do it if we are opening the current or root directory. + */ + if (lxpnp->lxpr_realvp != NULL) { + rvp = lxpnp->lxpr_realvp; + + if (type == LXPR_PID_FD_FD && rvp->v_type != VREG) + error = EACCES; + else { + /* + * Need to hold rvp since VOP_OPEN() may release it. + */ + VN_HOLD(rvp); + error = VOP_OPEN(&rvp, flag, cr, ct); + if (error) { + VN_RELE(rvp); + } else { + *vpp = rvp; + VN_RELE(vp); + } + } + } + + if (type == LXPR_KMSG) { + ldi_ident_t li = VTOLXPM(vp)->lxprm_li; + struct strioctl str; + int rv; + + /* + * Open the zone's console device using the layered driver + * interface. + */ + if ((error = ldi_open_by_name("/dev/log", FREAD, cr, + &lxpnp->lxpr_cons_ldih, li)) != 0) + return (error); + + /* + * Send an ioctl to the underlying console device, letting it + * know we're interested in getting console messages. + */ + str.ic_cmd = I_CONSLOG; + str.ic_timout = 0; + str.ic_len = 0; + str.ic_dp = NULL; + if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR, + (intptr_t)&str, FKIOCTL, cr, &rv)) != 0) + return (error); + } + + return (error); +} + + +/* + * lxpr_close(): Vnode operation for VOP_CLOSE() + */ +/* ARGSUSED */ +static int +lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, + caller_context_t *ct) +{ + lxpr_node_t *lxpr = VTOLXP(vp); + lxpr_nodetype_t type = lxpr->lxpr_type; + int err; + + /* + * we should never get here because the close is done on the realvp + * for these nodes + */ + ASSERT(type != LXPR_PID_FD_FD && + type != LXPR_PID_CURDIR && + type != LXPR_PID_ROOTDIR && + type != LXPR_PID_EXE); + + if (type == LXPR_KMSG) { + if ((err = ldi_close(lxpr->lxpr_cons_ldih, 0, cr)) != 0) + return (err); + } + + return (0); +} + +static void (*lxpr_read_function[LXPR_NFILES])() = { + lxpr_read_isdir, /* /proc */ + lxpr_read_isdir, /* /proc/<pid> */ + lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */ + lxpr_read_empty, /* /proc/<pid>/cpu */ + lxpr_read_invalid, /* /proc/<pid>/cwd */ + lxpr_read_empty, /* /proc/<pid>/environ */ + lxpr_read_invalid, /* /proc/<pid>/exe */ + lxpr_read_pid_maps, /* /proc/<pid>/maps */ + lxpr_read_empty, /* /proc/<pid>/mem */ + lxpr_read_invalid, /* /proc/<pid>/root */ + lxpr_read_pid_stat, /* /proc/<pid>/stat */ + lxpr_read_pid_statm, /* /proc/<pid>/statm */ + lxpr_read_pid_status, /* /proc/<pid>/status */ + lxpr_read_isdir, /* /proc/<pid>/fd */ + lxpr_read_fd, /* /proc/<pid>/fd/nn */ + lxpr_read_empty, /* /proc/cmdline */ + lxpr_read_cpuinfo, /* /proc/cpuinfo */ + lxpr_read_empty, /* /proc/devices */ + lxpr_read_empty, /* /proc/dma */ + lxpr_read_empty, /* /proc/filesystems */ + lxpr_read_empty, /* /proc/interrupts */ + lxpr_read_empty, /* /proc/ioports */ + lxpr_read_empty, /* /proc/kcore */ + lxpr_read_kmsg, /* /proc/kmsg */ + lxpr_read_loadavg, /* /proc/loadavg */ + lxpr_read_meminfo, /* /proc/meminfo */ + lxpr_read_mounts, /* /proc/mounts */ + lxpr_read_isdir, /* /proc/net */ + lxpr_read_net_arp, /* /proc/net/arp */ + lxpr_read_net_dev, /* /proc/net/dev */ + lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */ + lxpr_read_net_igmp, /* /proc/net/igmp */ + lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */ + lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */ + lxpr_read_net_mcfilter, /* /proc/net/mcfilter */ + lxpr_read_net_netstat, /* /proc/net/netstat */ + lxpr_read_net_raw, /* /proc/net/raw */ + lxpr_read_net_route, /* /proc/net/route */ + lxpr_read_net_rpc, /* /proc/net/rpc */ + lxpr_read_net_rt_cache, /* /proc/net/rt_cache */ + lxpr_read_net_sockstat, /* /proc/net/sockstat */ + lxpr_read_net_snmp, /* /proc/net/snmp */ + lxpr_read_net_stat, /* /proc/net/stat */ + lxpr_read_net_tcp, /* /proc/net/tcp */ + lxpr_read_net_udp, /* /proc/net/udp */ + lxpr_read_net_unix, /* /proc/net/unix */ + lxpr_read_partitions, /* /proc/partitions */ + lxpr_read_invalid, /* /proc/self */ + lxpr_read_stat, /* /proc/stat */ + lxpr_read_uptime, /* /proc/uptime */ + lxpr_read_version, /* /proc/version */ +}; + +/* + * Array of lookup functions, indexed by lx /proc file type. + */ +static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = { + lxpr_lookup_procdir, /* /proc */ + lxpr_lookup_piddir, /* /proc/<pid> */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/root */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/status */ + lxpr_lookup_fddir, /* /proc/<pid>/fd */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */ + lxpr_lookup_not_a_dir, /* /proc/cmdline */ + lxpr_lookup_not_a_dir, /* /proc/cpuinfo */ + lxpr_lookup_not_a_dir, /* /proc/devices */ + lxpr_lookup_not_a_dir, /* /proc/dma */ + lxpr_lookup_not_a_dir, /* /proc/filesystems */ + lxpr_lookup_not_a_dir, /* /proc/interrupts */ + lxpr_lookup_not_a_dir, /* /proc/ioports */ + lxpr_lookup_not_a_dir, /* /proc/kcore */ + lxpr_lookup_not_a_dir, /* /proc/kmsg */ + lxpr_lookup_not_a_dir, /* /proc/loadavg */ + lxpr_lookup_not_a_dir, /* /proc/meminfo */ + lxpr_lookup_not_a_dir, /* /proc/mounts */ + lxpr_lookup_netdir, /* /proc/net */ + lxpr_lookup_not_a_dir, /* /proc/net/arp */ + lxpr_lookup_not_a_dir, /* /proc/net/dev */ + lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */ + lxpr_lookup_not_a_dir, /* /proc/net/igmp */ + lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */ + lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */ + lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */ + lxpr_lookup_not_a_dir, /* /proc/net/netstat */ + lxpr_lookup_not_a_dir, /* /proc/net/raw */ + lxpr_lookup_not_a_dir, /* /proc/net/route */ + lxpr_lookup_not_a_dir, /* /proc/net/rpc */ + lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */ + lxpr_lookup_not_a_dir, /* /proc/net/sockstat */ + lxpr_lookup_not_a_dir, /* /proc/net/snmp */ + lxpr_lookup_not_a_dir, /* /proc/net/stat */ + lxpr_lookup_not_a_dir, /* /proc/net/tcp */ + lxpr_lookup_not_a_dir, /* /proc/net/udp */ + lxpr_lookup_not_a_dir, /* /proc/net/unix */ + lxpr_lookup_not_a_dir, /* /proc/partitions */ + lxpr_lookup_not_a_dir, /* /proc/self */ + lxpr_lookup_not_a_dir, /* /proc/stat */ + lxpr_lookup_not_a_dir, /* /proc/uptime */ + lxpr_lookup_not_a_dir, /* /proc/version */ +}; + +/* + * Array of readdir functions, indexed by /proc file type. + */ +static int (*lxpr_readdir_function[LXPR_NFILES])() = { + lxpr_readdir_procdir, /* /proc */ + lxpr_readdir_piddir, /* /proc/<pid> */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/root */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/status */ + lxpr_readdir_fddir, /* /proc/<pid>/fd */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */ + lxpr_readdir_not_a_dir, /* /proc/cmdline */ + lxpr_readdir_not_a_dir, /* /proc/cpuinfo */ + lxpr_readdir_not_a_dir, /* /proc/devices */ + lxpr_readdir_not_a_dir, /* /proc/dma */ + lxpr_readdir_not_a_dir, /* /proc/filesystems */ + lxpr_readdir_not_a_dir, /* /proc/interrupts */ + lxpr_readdir_not_a_dir, /* /proc/ioports */ + lxpr_readdir_not_a_dir, /* /proc/kcore */ + lxpr_readdir_not_a_dir, /* /proc/kmsg */ + lxpr_readdir_not_a_dir, /* /proc/loadavg */ + lxpr_readdir_not_a_dir, /* /proc/meminfo */ + lxpr_readdir_not_a_dir, /* /proc/mounts */ + lxpr_readdir_netdir, /* /proc/net */ + lxpr_readdir_not_a_dir, /* /proc/net/arp */ + lxpr_readdir_not_a_dir, /* /proc/net/dev */ + lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */ + lxpr_readdir_not_a_dir, /* /proc/net/igmp */ + lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */ + lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */ + lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */ + lxpr_readdir_not_a_dir, /* /proc/net/netstat */ + lxpr_readdir_not_a_dir, /* /proc/net/raw */ + lxpr_readdir_not_a_dir, /* /proc/net/route */ + lxpr_readdir_not_a_dir, /* /proc/net/rpc */ + lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */ + lxpr_readdir_not_a_dir, /* /proc/net/sockstat */ + lxpr_readdir_not_a_dir, /* /proc/net/snmp */ + lxpr_readdir_not_a_dir, /* /proc/net/stat */ + lxpr_readdir_not_a_dir, /* /proc/net/tcp */ + lxpr_readdir_not_a_dir, /* /proc/net/udp */ + lxpr_readdir_not_a_dir, /* /proc/net/unix */ + lxpr_readdir_not_a_dir, /* /proc/partitions */ + lxpr_readdir_not_a_dir, /* /proc/self */ + lxpr_readdir_not_a_dir, /* /proc/stat */ + lxpr_readdir_not_a_dir, /* /proc/uptime */ + lxpr_readdir_not_a_dir, /* /proc/version */ +}; + + +/* + * lxpr_read(): Vnode operation for VOP_READ() + * + * As the format of all the files that can be read in the lx procfs is human + * readable and not binary structures there do not have to be different + * read variants depending on whether the reading process model is 32 or 64 bits + * (at least in general, and certainly the difference is unlikely to be enough + * to justify have different routines for 32 and 64 bit reads + */ +/* ARGSUSED */ +static int +lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, + caller_context_t *ct) +{ + lxpr_node_t *lxpnp = VTOLXP(vp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop); + int error; + + ASSERT(type < LXPR_NFILES); + + lxpr_read_function[type](lxpnp, uiobuf); + + error = lxpr_uiobuf_flush(uiobuf); + lxpr_uiobuf_free(uiobuf); + + return (error); +} + + +/* + * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty() + * + * Various special case reads: + * - trying to read a directory + * - invalid file (used to mean a file that should be implemented, + * but isn't yet) + * - empty file + * - wait to be able to read a file that will never have anything to read + */ +/* ARGSUSED */ +static void +lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + lxpr_uiobuf_seterr(uiobuf, EISDIR); +} + +/* ARGSUSED */ +static void +lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + lxpr_uiobuf_seterr(uiobuf, EINVAL); +} + +/* ARGSUSED */ +static void +lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* + * lxpr_read_pid_cmdline(): + * + * This is not precisely compatible with linux: + * + * The linux cmdline returns argv with the correct separation + * using \0 between the arguments, we cannot do that without + * copying the real argv from the correct process context. + * This is too difficult to attempt so we pretend that the + * entire cmdline is just argv[0]. This is good enough for + * ps to display correctly, but might cause some other things + * not to work correctly. + */ +static void +lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + if (PTOU(p)->u_argv != 0) { + char *buff = PTOU(p)->u_psargs; + int len = strlen(buff); + lxpr_unlock(p); + lxpr_uiobuf_write(uiobuf, buff, len+1); + } else { + lxpr_unlock(p); + } +} + + +/* + * lxpr_read_pid_maps(): memory map file + */ +static void +lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + struct as *as; + struct seg *seg; + char *buf; + int buflen = MAXPATHLEN; + struct print_data { + caddr_t saddr; + caddr_t eaddr; + int type; + char prot[5]; + uint32_t offset; + vnode_t *vp; + struct print_data *next; + } *print_head = NULL; + struct print_data **print_tail = &print_head; + struct print_data *pbuf; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + as = p->p_as; + + if (as == &kas) { + lxpr_unlock(p); + return; + } + + mutex_exit(&p->p_lock); + + /* Iterate over all segments in the address space */ + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { + vnode_t *vp; + uint_t protbits; + + pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP); + + pbuf->saddr = seg->s_base; + pbuf->eaddr = seg->s_base+seg->s_size; + pbuf->type = SEGOP_GETTYPE(seg, seg->s_base); + + /* + * Cheat and only use the protection bits of the first page + * in the segment + */ + (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot)); + (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits); + + if (protbits & PROT_READ) pbuf->prot[0] = 'r'; + if (protbits & PROT_WRITE) pbuf->prot[1] = 'w'; + if (protbits & PROT_EXEC) pbuf->prot[2] = 'x'; + if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's'; + else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p'; + + if (seg->s_ops == &segvn_ops && + SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && + vp != NULL && vp->v_type == VREG) { + VN_HOLD(vp); + pbuf->vp = vp; + } else { + pbuf->vp = NULL; + } + + pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr); + + pbuf->next = NULL; + *print_tail = pbuf; + print_tail = &pbuf->next; + } + AS_LOCK_EXIT(as, &as->a_lock); + mutex_enter(&p->p_lock); + lxpr_unlock(p); + + buf = kmem_alloc(buflen, KM_SLEEP); + + /* print the data we've extracted */ + pbuf = print_head; + while (pbuf != NULL) { + struct print_data *pbuf_next; + vattr_t vattr; + + int maj = 0; + int min = 0; + int inode = 0; + + *buf = '\0'; + if (pbuf->vp != NULL) { + vattr.va_mask = AT_FSID | AT_NODEID; + if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(), + NULL) == 0) { + maj = getmajor(vattr.va_fsid); + min = getminor(vattr.va_fsid); + inode = vattr.va_nodeid; + } + (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED()); + VN_RELE(pbuf->vp); + } + + if (*buf != '\0') { + lxpr_uiobuf_printf(uiobuf, + "%08x-%08x %s %08x %02d:%03d %d %s\n", + pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, + maj, min, inode, buf); + } else { + lxpr_uiobuf_printf(uiobuf, + "%08x-%08x %s %08x %02d:%03d %d\n", + pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, + maj, min, inode); + } + + pbuf_next = pbuf->next; + kmem_free(pbuf, sizeof (*pbuf)); + pbuf = pbuf_next; + } + + kmem_free(buf, buflen); +} + +/* + * lxpr_read_pid_statm(): memory status file + */ +static void +lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + struct as *as; + size_t vsize; + size_t rss; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + as = p->p_as; + + mutex_exit(&p->p_lock); + + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + vsize = btopr(as->a_resvsize); + rss = rm_asrss(as); + AS_LOCK_EXIT(as, &as->a_lock); + + mutex_enter(&p->p_lock); + lxpr_unlock(p); + + lxpr_uiobuf_printf(uiobuf, + "%lu %lu %lu %lu %lu %lu %lu\n", + vsize, rss, 0l, rss, 0l, 0l, 0l); +} + +/* + * lxpr_read_pid_status(): status file + */ +static void +lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + kthread_t *t; + user_t *up; + cred_t *cr; + const gid_t *groups; + int ngroups; + struct as *as; + char *status; + pid_t pid, ppid; + size_t vsize; + size_t rss; + k_sigset_t current, ignore, handle; + int i, lx_sig; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + pid = p->p_pid; + + /* + * Convert pid to the Linux default of 1 if we're the zone's init + * process + */ + if (pid == curproc->p_zone->zone_proc_initpid) { + pid = 1; + ppid = 0; /* parent pid for init is 0 */ + } else { + /* + * Make sure not to reference parent PIDs that reside outside + * the zone + */ + ppid = ((p->p_flag & SZONETOP) + ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); + + /* + * Convert ppid to the Linux default of 1 if our parent is the + * zone's init process + */ + if (ppid == curproc->p_zone->zone_proc_initpid) + ppid = 1; + } + + t = prchoose(p); + if (t != NULL) { + switch (t->t_state) { + case TS_SLEEP: + status = "S (sleeping)"; + break; + case TS_RUN: + case TS_ONPROC: + status = "R (running)"; + break; + case TS_ZOMB: + status = "Z (zombie)"; + break; + case TS_STOPPED: + status = "T (stopped)"; + break; + default: + status = "! (unknown)"; + break; + } + thread_unlock(t); + } else { + /* + * there is a hole in the exit code, where a proc can have + * no threads but it is yet to be flagged SZOMB. We will + * assume we are about to become a zombie + */ + status = "Z (zombie)"; + } + + up = PTOU(p); + mutex_enter(&p->p_crlock); + crhold(cr = p->p_cred); + mutex_exit(&p->p_crlock); + + lxpr_uiobuf_printf(uiobuf, + "Name:\t%s\n" + "State:\t%s\n" + "Tgid:\t%d\n" + "Pid:\t%d\n" + "PPid:\t%d\n" + "TracerPid:\t%d\n" + "Uid:\t%u\t%u\t%u\t%u\n" + "Gid:\t%u\t%u\t%u\t%u\n" + "FDSize:\t%d\n" + "Groups:\t", + up->u_comm, + status, + pid, /* thread group id - same as pid until we map lwps to procs */ + pid, + ppid, + 0, + crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr), + crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr), + p->p_fno_ctl); + + ngroups = crgetngroups(cr); + groups = crgetgroups(cr); + for (i = 0; i < ngroups; i++) { + lxpr_uiobuf_printf(uiobuf, + "%u ", + groups[i]); + } + crfree(cr); + + as = p->p_as; + if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) { + mutex_exit(&p->p_lock); + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + vsize = as->a_resvsize; + rss = rm_asrss(as); + AS_LOCK_EXIT(as, &as->a_lock); + mutex_enter(&p->p_lock); + + lxpr_uiobuf_printf(uiobuf, + "\n" + "VmSize:\t%8lu kB\n" + "VmLck:\t%8lu kB\n" + "VmRSS:\t%8lu kB\n" + "VmData:\t%8lu kB\n" + "VmStk:\t%8lu kB\n" + "VmExe:\t%8lu kB\n" + "VmLib:\t%8lu kB", + btok(vsize), + 0l, + ptok(rss), + 0l, + btok(p->p_stksize), + ptok(rss), + 0l); + } + + sigemptyset(¤t); + sigemptyset(&ignore); + sigemptyset(&handle); + + for (i = 1; i < NSIG; i++) { + lx_sig = stol_signo[i]; + + if ((lx_sig > 0) && (lx_sig < LX_NSIG)) { + if (sigismember(&p->p_sig, i)) + sigaddset(¤t, lx_sig); + + if (up->u_signal[i - 1] == SIG_IGN) + sigaddset(&ignore, lx_sig); + else if (up->u_signal[i - 1] != SIG_DFL) + sigaddset(&handle, lx_sig); + } + } + + lxpr_uiobuf_printf(uiobuf, + "\n" + "SigPnd:\t%08x%08x\n" + "SigBlk:\t%08x%08x\n" + "SigIgn:\t%08x%08x\n" + "SigCgt:\t%08x%08x\n" + "CapInh:\t%016x\n" + "CapPrm:\t%016x\n" + "CapEff:\t%016x\n", + current.__sigbits[1], current.__sigbits[0], + 0, 0, /* signals blocked on per thread basis */ + ignore.__sigbits[1], ignore.__sigbits[0], + handle.__sigbits[1], handle.__sigbits[0], + /* Can't do anything with linux capabilities */ + 0, + 0, + 0); + + lxpr_unlock(p); +} + + +/* + * lxpr_read_pid_stat(): pid stat file + */ +static void +lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + kthread_t *t; + struct as *as; + char stat; + pid_t pid, ppid, pgpid, spid; + gid_t psgid; + dev_t psdev; + size_t rss, vsize; + int nice, pri; + caddr_t wchan; + processorid_t cpu; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + pid = p->p_pid; + + /* + * Set Linux defaults if we're the zone's init process + */ + if (pid == curproc->p_zone->zone_proc_initpid) { + pid = 1; /* PID for init */ + ppid = 0; /* parent PID for init is 0 */ + pgpid = 0; /* process group for init is 0 */ + psgid = (gid_t)-1; /* credential GID for init is -1 */ + spid = 0; /* session id for init is 0 */ + psdev = 0; /* session device for init is 0 */ + } else { + /* + * Make sure not to reference parent PIDs that reside outside + * the zone + */ + ppid = ((p->p_flag & SZONETOP) + ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); + + /* + * Convert ppid to the Linux default of 1 if our parent is the + * zone's init process + */ + if (ppid == curproc->p_zone->zone_proc_initpid) + ppid = 1; + + pgpid = p->p_pgrp; + + mutex_enter(&p->p_splock); + mutex_enter(&p->p_sessp->s_lock); + spid = p->p_sessp->s_sid; + /* XXBRAND psdev = DEV_TO_LXDEV(p->p_sessp->s_dev, VCHR); */ + psdev = p->p_sessp->s_dev; + if (p->p_sessp->s_cred) + psgid = crgetgid(p->p_sessp->s_cred); + else + psgid = crgetgid(p->p_cred); + + mutex_exit(&p->p_sessp->s_lock); + mutex_exit(&p->p_splock); + } + + t = prchoose(p); + if (t != NULL) { + switch (t->t_state) { + case TS_SLEEP: + stat = 'S'; break; + case TS_RUN: + case TS_ONPROC: + stat = 'R'; break; + case TS_ZOMB: + stat = 'Z'; break; + case TS_STOPPED: + stat = 'T'; break; + default: + stat = '!'; break; + } + + if (CL_DONICE(t, NULL, 0, &nice) != 0) + nice = 0; + + pri = v.v_maxsyspri - t->t_pri; + wchan = t->t_wchan; + cpu = t->t_cpu->cpu_seqid; + thread_unlock(t); + } else { + /* Only zombies have no threads */ + stat = 'Z'; + nice = 0; + pri = 0; + wchan = 0; + cpu = 0; + } + as = p->p_as; + mutex_exit(&p->p_lock); + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + vsize = as->a_resvsize; + rss = rm_asrss(as); + AS_LOCK_EXIT(as, &as->a_lock); + mutex_enter(&p->p_lock); + + lxpr_uiobuf_printf(uiobuf, + "%d (%s) %c %d %d %d %d %d " + "%lu %lu %lu %lu %lu " + "%lu %lu %ld %ld " + "%d %d " + "0 " + "%ld %lu " + "%lu %ld %llu " + "%lu %lu %u " + "%lu %lu " + "%lu %lu %lu %lu " + "%lu " + "%lu %lu " + "%d " + "%d" + "\n", + pid, + PTOU(p)->u_comm, + stat, + ppid, pgpid, + spid, psdev, psgid, + 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */ + p->p_utime, p->p_stime, p->p_cutime, p->p_cstime, + pri, nice, + 0l, PTOU(p)->u_ticks, /* ticks till next SIGALARM, start time */ + vsize, rss, p->p_vmem_ctl, + 0l, 0l, USRSTACK, /* startcode, endcode, startstack */ + 0l, 0l, /* kstkesp, kstkeip */ + 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */ + wchan, + 0l, 0l, /* nswap, cnswap */ + 0, /* exit_signal */ + cpu); + + lxpr_unlock(p); +} + +/* ARGSUSED */ +static void +lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + lxpr_uiobuf_printf(uiobuf, "Inter-| Receive " + " | Transmit\n"); + lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo" + " frame compressed multicast|bytes packets errs drop fifo" + " colls carrier compressed\n"); + + /* + * XXX: data about each interface should go here, but we'll wait to + * see if anybody wants to use it. + */ +} + +/* ARGSUSED */ +static void +lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* + * lxpr_read_kmsg(): read the contents of the kernel message queue. We + * translate this into the reception of console messages for this lx zone; each + * read copies out a single zone console message, or blocks until the next one + * is produced. + */ + +#define LX_KMSG_PRI "<0>" + +static void +lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf) +{ + ldi_handle_t lh = lxpnp->lxpr_cons_ldih; + mblk_t *mp; + + if (ldi_getmsg(lh, &mp, NULL) == 0) { + /* + * lx procfs doesn't like successive reads to the same file + * descriptor unless we do an explicit rewind each time. + */ + lxpr_uiobuf_seek(uiobuf, 0); + + lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI, + mp->b_cont->b_rptr); + + freemsg(mp); + } +} + +/* + * lxpr_read_loadavg(): read the contents of the "loadavg" file. + * + * Just enough for uptime to work + */ +extern int nthread; + +static void +lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + ulong_t avenrun1; + ulong_t avenrun5; + ulong_t avenrun15; + ulong_t avenrun1_cs; + ulong_t avenrun5_cs; + ulong_t avenrun15_cs; + int loadavg[3]; + int *loadbuf; + cpupart_t *cp; + + uint_t nrunnable = 0; + rctl_qty_t nlwps; + + ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG); + + mutex_enter(&cpu_lock); + + /* + * Need to add up values over all CPU partitions. If pools are active, + * only report the values of the zone's partition, which by definition + * includes the current CPU. + */ + if (pool_pset_enabled()) { + psetid_t psetid = zone_pset_get(curproc->p_zone); + + ASSERT(curproc->p_zone != &zone0); + cp = CPU->cpu_part; + + nrunnable = cp->cp_nrunning + cp->cp_nrunnable; + (void) cpupart_get_loadavg(psetid, &loadavg[0], 3); + loadbuf = &loadavg[0]; + + /* + * We'll report the total number of lwps in the zone for the + * "nproc" parameter of /proc/loadavg; good enough for lx. + */ + nlwps = curproc->p_zone->zone_nlwps; + } else { + cp = cp_list_head; + do { + nrunnable += cp->cp_nrunning + cp->cp_nrunnable; + } while ((cp = cp->cp_next) != cp_list_head); + + loadbuf = &avenrun[0]; + + /* + * This will report kernel threads as well as user lwps, but it + * should be good enough for lx consumers. + */ + nlwps = nthread; + } + + mutex_exit(&cpu_lock); + + avenrun1 = loadbuf[0] >> FSHIFT; + avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT; + avenrun5 = loadbuf[1] >> FSHIFT; + avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT; + avenrun15 = loadbuf[2] >> FSHIFT; + avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT; + + lxpr_uiobuf_printf(uiobuf, + "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n", + avenrun1, avenrun1_cs, + avenrun5, avenrun5_cs, + avenrun15, avenrun15_cs, + nrunnable, nlwps, 0); +} + +/* + * lxpr_read_meminfo(): read the contents of the "meminfo" file. + */ +static void +lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + long total_mem = physmem * PAGESIZE; + long free_mem = freemem * PAGESIZE; + long total_swap = k_anoninfo.ani_max * PAGESIZE; + long used_swap = k_anoninfo.ani_phys_resv * PAGESIZE; + + ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO); + + lxpr_uiobuf_printf(uiobuf, + " total: used: free: shared: buffers: cached:\n" + "Mem: %8lu %8lu %8lu %8u %8u %8u\n" + "Swap: %8lu %8lu %8lu\n" + "MemTotal: %8lu kB\n" + "MemFree: %8lu kB\n" + "MemShared: %8u kB\n" + "Buffers: %8u kB\n" + "Cached: %8u kB\n" + "SwapCached:%8u kB\n" + "Active: %8u kB\n" + "Inactive: %8u kB\n" + "HighTotal: %8u kB\n" + "HighFree: %8u kB\n" + "LowTotal: %8u kB\n" + "LowFree: %8u kB\n" + "SwapTotal: %8lu kB\n" + "SwapFree: %8lu kB\n", + total_mem, total_mem - free_mem, free_mem, 0, 0, 0, + total_swap, used_swap, total_swap - used_swap, + btok(total_mem), /* MemTotal */ + btok(free_mem), /* MemFree */ + 0, /* MemShared */ + 0, /* Buffers */ + 0, /* Cached */ + 0, /* SwapCached */ + 0, /* Active */ + 0, /* Inactive */ + 0, /* HighTotal */ + 0, /* HighFree */ + btok(total_mem), /* LowTotal */ + btok(free_mem), /* LowFree */ + btok(total_swap), /* SwapTotal */ + btok(total_swap - used_swap)); /* SwapFree */ +} + +/* + * lxpr_read_mounts(): + */ +/* ARGSUSED */ +static void +lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + struct vfs *vfsp; + struct vfs *vfslist; + zone_t *zone = LXPTOZ(lxpnp); + struct print_data { + refstr_t *vfs_mntpt; + refstr_t *vfs_resource; + uint_t vfs_flag; + int vfs_fstype; + struct print_data *next; + } *print_head = NULL; + struct print_data **print_tail = &print_head; + struct print_data *printp; + + vfs_list_read_lock(); + + if (zone == global_zone) { + vfsp = vfslist = rootvfs; + } else { + vfsp = vfslist = zone->zone_vfslist; + /* + * If the zone has a root entry, it will be the first in + * the list. If it doesn't, we conjure one up. + */ + if (vfslist == NULL || + strcmp(refstr_value(vfsp->vfs_mntpt), + zone->zone_rootpath) != 0) { + struct vfs *tvfsp; + /* + * The root of the zone is not a mount point. The vfs + * we want to report is that of the zone's root vnode. + */ + tvfsp = zone->zone_rootvp->v_vfsp; + + lxpr_uiobuf_printf(uiobuf, + "/ / %s %s 0 0\n", + vfssw[tvfsp->vfs_fstype].vsw_name, + tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); + + } + if (vfslist == NULL) { + vfs_list_unlock(); + return; + } + } + + /* + * Later on we have to do a lookupname, which can end up causing + * another vfs_list_read_lock() to be called. Which can lead to a + * deadlock. To avoid this, we extract the data we need into a local + * list, then we can run this list without holding vfs_list_read_lock() + * We keep the list in the same order as the vfs_list + */ + do { + /* Skip mounts we shouldn't show */ + if (vfsp->vfs_flag & VFS_NOMNTTAB) { + goto nextfs; + } + + printp = kmem_alloc(sizeof (*printp), KM_SLEEP); + refstr_hold(vfsp->vfs_mntpt); + printp->vfs_mntpt = vfsp->vfs_mntpt; + refstr_hold(vfsp->vfs_resource); + printp->vfs_resource = vfsp->vfs_resource; + printp->vfs_flag = vfsp->vfs_flag; + printp->vfs_fstype = vfsp->vfs_fstype; + printp->next = NULL; + + *print_tail = printp; + print_tail = &printp->next; + +nextfs: + vfsp = (zone == global_zone) ? + vfsp->vfs_next : vfsp->vfs_zone_next; + + } while (vfsp != vfslist); + + vfs_list_unlock(); + + /* + * now we can run through what we've extracted without holding + * vfs_list_read_lock() + */ + printp = print_head; + while (printp != NULL) { + struct print_data *printp_next; + const char *resource; + char *mntpt; + struct vnode *vp; + int error; + + mntpt = (char *)refstr_value(printp->vfs_mntpt); + resource = refstr_value(printp->vfs_resource); + + if (mntpt != NULL && mntpt[0] != '\0') + mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); + else + mntpt = "-"; + + error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); + + if (error != 0) + goto nextp; + + if (!(vp->v_flag & VROOT)) { + VN_RELE(vp); + goto nextp; + } + VN_RELE(vp); + + if (resource != NULL && resource[0] != '\0') { + if (resource[0] == '/') { + resource = ZONE_PATH_VISIBLE(resource, zone) ? + ZONE_PATH_TRANSLATE(resource, zone) : + mntpt; + } + } else { + resource = "-"; + } + + lxpr_uiobuf_printf(uiobuf, + "%s %s %s %s 0 0\n", + resource, mntpt, vfssw[printp->vfs_fstype].vsw_name, + printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); + +nextp: + printp_next = printp->next; + refstr_rele(printp->vfs_mntpt); + refstr_rele(printp->vfs_resource); + kmem_free(printp, sizeof (*printp)); + printp = printp_next; + + } +} + +/* + * lxpr_read_partitions(): + * + * We don't support partitions in a local zone because it requires access to + * physical devices. But we need to fake up enough of the file to show that we + * have no partitions. + */ +/* ARGSUSED */ +static void +lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + lxpr_uiobuf_printf(uiobuf, + "major minor #blocks name rio rmerge rsect ruse " + "wio wmerge wsect wuse running use aveq\n\n"); +} + +/* + * lxpr_read_version(): read the contents of the "version" file. + */ +/* ARGSUSED */ +static void +lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + char *vers; + if (lx_get_zone_kern_version(LXPTOZ(lxpnp)) <= LX_KERN_2_4) + vers = LX_UNAME_RELEASE_2_4; + else + vers = LX_UNAME_RELEASE_2_6; + + lxpr_uiobuf_printf(uiobuf, + "%s version %s (%s version %d.%d.%d) " + "#%s SMP %s\n", + LX_UNAME_SYSNAME, vers, +#if defined(__GNUC__) + "gcc", + __GNUC__, + __GNUC_MINOR__, + __GNUC_PATCHLEVEL__, +#else + "Sun C", + __SUNPRO_C / 0x100, + (__SUNPRO_C & 0xff) / 0x10, + __SUNPRO_C & 0xf, +#endif + LX_UNAME_VERSION, + "00:00:00 00/00/00"); +} + + +/* + * lxpr_read_stat(): read the contents of the "stat" file. + * + */ +/* ARGSUSED */ + +static void +lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + cpu_t *cp, *cpstart; + int pools_enabled; + ulong_t idle_cum = 0; + ulong_t sys_cum = 0; + ulong_t user_cum = 0; + ulong_t irq_cum = 0; + uint_t cpu_nrunnable_cum = 0; + uint_t w_io_cum = 0; + + ulong_t pgpgin_cum = 0; + ulong_t pgpgout_cum = 0; + ulong_t pgswapout_cum = 0; + ulong_t pgswapin_cum = 0; + ulong_t intr_cum = 0; + ulong_t pswitch_cum = 0; + ulong_t forks_cum = 0; + hrtime_t msnsecs[NCMSTATES]; + int lx_kern_version = lx_get_zone_kern_version(LXPTOZ(lxpnp)); + /* temporary variable since scalehrtime modifies data in place */ + hrtime_t tmptime; + + ASSERT(lxpnp->lxpr_type == LXPR_STAT); + + mutex_enter(&cpu_lock); + pools_enabled = pool_pset_enabled(); + + /* Calculate cumulative stats */ + cp = cpstart = CPU; + do { + int i; + + /* + * Don't count CPUs that aren't even in the system + * or aren't up yet. + */ + if ((cp->cpu_flags & CPU_EXISTS) == 0) { + continue; + } + + get_cpu_mstate(cp, msnsecs); + + idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]); + sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); + user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]); + + pgpgin_cum += CPU_STATS(cp, vm.pgpgin); + pgpgout_cum += CPU_STATS(cp, vm.pgpgout); + pgswapin_cum += CPU_STATS(cp, vm.pgswapin); + pgswapout_cum += CPU_STATS(cp, vm.pgswapout); + + if (lx_kern_version >= LX_KERN_2_6) { + cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable; + w_io_cum += CPU_STATS(cp, sys.iowait); + for (i = 0; i < NCMSTATES; i++) { + tmptime = cp->cpu_intracct[i]; + scalehrtime(&tmptime); + irq_cum += NSEC_TO_TICK(tmptime); + } + } + + for (i = 0; i < PIL_MAX; i++) + intr_cum += CPU_STATS(cp, sys.intr[i]); + + pswitch_cum += CPU_STATS(cp, sys.pswitch); + forks_cum += CPU_STATS(cp, sys.sysfork); + forks_cum += CPU_STATS(cp, sys.sysvfork); + + if (pools_enabled) + cp = cp->cpu_next_part; + else + cp = cp->cpu_next; + } while (cp != cpstart); + + if (lx_kern_version >= LX_KERN_2_6) { + lxpr_uiobuf_printf(uiobuf, + "cpu %ld %ld %ld %ld %ld %ld %ld\n", + user_cum, 0, sys_cum, idle_cum, 0, irq_cum, 0); + } else { + lxpr_uiobuf_printf(uiobuf, + "cpu %ld %ld %ld %ld\n", + user_cum, 0, sys_cum, idle_cum); + } + + /* Do per processor stats */ + do { + int i; + + ulong_t idle_ticks; + ulong_t sys_ticks; + ulong_t user_ticks; + ulong_t irq_ticks = 0; + + /* + * Don't count CPUs that aren't even in the system + * or aren't up yet. + */ + if ((cp->cpu_flags & CPU_EXISTS) == 0) { + continue; + } + + get_cpu_mstate(cp, msnsecs); + + idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]); + sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); + user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]); + + if (lx_kern_version >= LX_KERN_2_6) { + for (i = 0; i < NCMSTATES; i++) { + tmptime = cp->cpu_intracct[i]; + scalehrtime(&tmptime); + irq_ticks += NSEC_TO_TICK(tmptime); + } + + lxpr_uiobuf_printf(uiobuf, + "cpu%d %ld %ld %ld %ld %ld %ld %ld\n", + cp->cpu_id, user_ticks, 0, sys_ticks, idle_ticks, + 0, irq_ticks, 0); + } else { + lxpr_uiobuf_printf(uiobuf, + "cpu%d %ld %ld %ld %ld\n", + cp->cpu_id, + user_ticks, 0, sys_ticks, idle_ticks); + } + + if (pools_enabled) + cp = cp->cpu_next_part; + else + cp = cp->cpu_next; + } while (cp != cpstart); + + mutex_exit(&cpu_lock); + + if (lx_kern_version >= LX_KERN_2_6) { + lxpr_uiobuf_printf(uiobuf, + "page %lu %lu\n" + "swap %lu %lu\n" + "intr %lu\n" + "ctxt %lu\n" + "btime %lu\n" + "processes %lu\n" + "procs_running %lu\n" + "procs_blocked %lu\n", + pgpgin_cum, pgpgout_cum, + pgswapin_cum, pgswapout_cum, + intr_cum, + pswitch_cum, + boot_time, + forks_cum, + cpu_nrunnable_cum, + w_io_cum); + } else { + lxpr_uiobuf_printf(uiobuf, + "page %lu %lu\n" + "swap %lu %lu\n" + "intr %lu\n" + "ctxt %lu\n" + "btime %lu\n" + "processes %lu\n", + pgpgin_cum, pgpgout_cum, + pgswapin_cum, pgswapout_cum, + intr_cum, + pswitch_cum, + boot_time, + forks_cum); + } +} + + +/* + * lxpr_read_uptime(): read the contents of the "uptime" file. + * + * format is: "%.2lf, %.2lf",uptime_secs, idle_secs + * Use fixed point arithmetic to get 2 decimal places + */ +/* ARGSUSED */ +static void +lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + cpu_t *cp, *cpstart; + int pools_enabled; + ulong_t idle_cum = 0; + ulong_t cpu_count = 0; + ulong_t idle_s; + ulong_t idle_cs; + ulong_t up_s; + ulong_t up_cs; + hrtime_t birthtime; + hrtime_t centi_sec = 10000000; /* 10^7 */ + + ASSERT(lxpnp->lxpr_type == LXPR_UPTIME); + + /* Calculate cumulative stats */ + mutex_enter(&cpu_lock); + pools_enabled = pool_pset_enabled(); + + cp = cpstart = CPU; + do { + /* + * Don't count CPUs that aren't even in the system + * or aren't up yet. + */ + if ((cp->cpu_flags & CPU_EXISTS) == 0) { + continue; + } + + idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle); + idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait); + cpu_count += 1; + + if (pools_enabled) + cp = cp->cpu_next_part; + else + cp = cp->cpu_next; + } while (cp != cpstart); + mutex_exit(&cpu_lock); + + /* Getting the Zone zsched process startup time */ + birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart; + up_cs = (gethrtime() - birthtime) / centi_sec; + up_s = up_cs / 100; + up_cs %= 100; + + ASSERT(cpu_count > 0); + idle_cum /= cpu_count; + idle_s = idle_cum / hz; + idle_cs = idle_cum % hz; + idle_cs *= 100; + idle_cs /= hz; + + lxpr_uiobuf_printf(uiobuf, + "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs); +} + +static const char *amd_x_edx[] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "mp", + "nx", NULL, "mmxext", NULL, + NULL, NULL, NULL, NULL, + NULL, "lm", "3dnowext", "3dnow" +}; + +static const char *amd_x_ecx[] = { + "lahf_lm", NULL, "svm", NULL, + "altmovcr8" +}; + +static const char *tm_x_edx[] = { + "recovery", "longrun", NULL, "lrti" +}; + +/* + * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx." + */ +static const char *intc_x_edx[] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "nx", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, "lm", NULL, NULL +}; + +static const char *intc_edx[] = { + "fpu", "vme", "de", "pse", + "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", + "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", + NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", + "ht", "tm", "ia64", "pbe" +}; + +/* + * "sse3" on linux is called "pni" (Prescott New Instructions). + */ +static const char *intc_ecx[] = { + "pni", NULL, NULL, "monitor", + "ds_cpl", NULL, NULL, "est", + "tm2", NULL, "cid", NULL, + NULL, "cx16", "xtpr" +}; + +static void +lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + int i; + uint32_t bits; + cpu_t *cp, *cpstart; + int pools_enabled; + const char **fp; + char brandstr[CPU_IDSTRLEN]; + struct cpuid_regs cpr; + int maxeax; + int std_ecx, std_edx, ext_ecx, ext_edx; + + ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO); + + mutex_enter(&cpu_lock); + pools_enabled = pool_pset_enabled(); + + cp = cpstart = CPU; + do { + /* + * This returns the maximum eax value for standard cpuid + * functions in eax. + */ + cpr.cp_eax = 0; + (void) cpuid_insn(cp, &cpr); + maxeax = cpr.cp_eax; + + /* + * Get standard x86 feature flags. + */ + cpr.cp_eax = 1; + (void) cpuid_insn(cp, &cpr); + std_ecx = cpr.cp_ecx; + std_edx = cpr.cp_edx; + + /* + * Now get extended feature flags. + */ + cpr.cp_eax = 0x80000001; + (void) cpuid_insn(cp, &cpr); + ext_ecx = cpr.cp_ecx; + ext_edx = cpr.cp_edx; + + (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN); + + lxpr_uiobuf_printf(uiobuf, + "processor\t: %d\n" + "vendor_id\t: %s\n" + "cpu family\t: %d\n" + "model\t\t: %d\n" + "model name\t: %s\n" + "stepping\t: %d\n" + "cpu MHz\t\t: %u.%03u\n", + cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp), + cpuid_getmodel(cp), brandstr, cpuid_getstep(cp), + (uint32_t)(cpu_freq_hz / 1000000), + ((uint32_t)(cpu_freq_hz / 1000)) % 1000); + + lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n", + getl2cacheinfo(cp, NULL, NULL, NULL) / 1024); + + if (is_x86_feature(x86_featureset, X86FSET_HTT)) { + /* + * 'siblings' is used for HT-style threads + */ + lxpr_uiobuf_printf(uiobuf, + "physical id\t: %lu\n" + "siblings\t: %u\n", + pg_plat_hw_instance_id(cp, PGHW_CHIP), + cpuid_get_ncpu_per_chip(cp)); + } + + /* + * Since we're relatively picky about running on older hardware, + * we can be somewhat cavalier about the answers to these ones. + * + * In fact, given the hardware we support, we just say: + * + * fdiv_bug : no (if we're on a 64-bit kernel) + * hlt_bug : no + * f00f_bug : no + * coma_bug : no + * wp : yes (write protect in supervsr mode) + */ + lxpr_uiobuf_printf(uiobuf, + "fdiv_bug\t: %s\n" + "hlt_bug \t: no\n" + "f00f_bug\t: no\n" + "coma_bug\t: no\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "flags\t\t:", +#if defined(__i386) + fpu_pentium_fdivbug ? "yes" : "no", +#else + "no", +#endif /* __i386 */ + fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no", + maxeax); + + for (bits = std_edx, fp = intc_edx, i = 0; + i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + + /* + * name additional features where appropriate + */ + switch (x86_vendor) { + case X86_VENDOR_Intel: + for (bits = ext_edx, fp = intc_x_edx, i = 0; + i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]); + fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + break; + + case X86_VENDOR_AMD: + for (bits = ext_edx, fp = amd_x_edx, i = 0; + i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]); + fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + + for (bits = ext_ecx, fp = amd_x_ecx, i = 0; + i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]); + fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + break; + + case X86_VENDOR_TM: + for (bits = ext_edx, fp = tm_x_edx, i = 0; + i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]); + fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + break; + default: + break; + } + + for (bits = std_ecx, fp = intc_ecx, i = 0; + i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + + lxpr_uiobuf_printf(uiobuf, "\n\n"); + + if (pools_enabled) + cp = cp->cpu_next_part; + else + cp = cp->cpu_next; + } while (cp != cpstart); + + mutex_exit(&cpu_lock); +} + +/* ARGSUSED */ +static void +lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD); + lxpr_uiobuf_seterr(uiobuf, EFAULT); +} + + + +/* + * lxpr_getattr(): Vnode operation for VOP_GETATTR() + */ +static int +lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, + caller_context_t *ct) +{ + register lxpr_node_t *lxpnp = VTOLXP(vp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + extern uint_t nproc; + int error; + + /* + * Return attributes of underlying vnode if ATTR_REAL + * + * but keep fd files with the symlink permissions + */ + if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) { + vnode_t *rvp = lxpnp->lxpr_realvp; + + /* + * withold attribute information to owner or root + */ + if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) { + return (error); + } + + /* + * now its attributes + */ + if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) { + return (error); + } + + /* + * if it's a file in lx /proc/pid/fd/xx then set its + * mode and keep it looking like a symlink + */ + if (type == LXPR_PID_FD_FD) { + vap->va_mode = lxpnp->lxpr_mode; + vap->va_type = vp->v_type; + vap->va_size = 0; + vap->va_nlink = 1; + } + return (0); + } + + /* Default attributes, that may be overridden below */ + bzero(vap, sizeof (*vap)); + vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time; + vap->va_nlink = 1; + vap->va_type = vp->v_type; + vap->va_mode = lxpnp->lxpr_mode; + vap->va_fsid = vp->v_vfsp->vfs_dev; + vap->va_blksize = DEV_BSIZE; + vap->va_uid = lxpnp->lxpr_uid; + vap->va_gid = lxpnp->lxpr_gid; + vap->va_nodeid = lxpnp->lxpr_ino; + + switch (type) { + case LXPR_PROCDIR: + vap->va_nlink = nproc + 2 + PROCDIRFILES; + vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE; + break; + case LXPR_PIDDIR: + vap->va_nlink = PIDDIRFILES; + vap->va_size = PIDDIRFILES * LXPR_SDSIZE; + break; + case LXPR_SELF: + vap->va_uid = crgetruid(curproc->p_cred); + vap->va_gid = crgetrgid(curproc->p_cred); + break; + default: + break; + } + + vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size); + return (0); +} + + +/* + * lxpr_access(): Vnode operation for VOP_ACCESS() + */ +static int +lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) +{ + lxpr_node_t *lxpnp = VTOLXP(vp); + int shift = 0; + proc_t *tp; + + /* lx /proc is a read only file system */ + if (mode & VWRITE) + return (EROFS); + + /* + * If this is a restricted file, check access permissions. + */ + switch (lxpnp->lxpr_type) { + case LXPR_PIDDIR: + return (0); + case LXPR_PID_CURDIR: + case LXPR_PID_ENV: + case LXPR_PID_EXE: + case LXPR_PID_MAPS: + case LXPR_PID_MEM: + case LXPR_PID_ROOTDIR: + case LXPR_PID_FDDIR: + case LXPR_PID_FD_FD: + if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL) + return (ENOENT); + if (tp != curproc && secpolicy_proc_access(cr) != 0 && + priv_proc_cred_perm(cr, tp, NULL, mode) != 0) { + lxpr_unlock(tp); + return (EACCES); + } + lxpr_unlock(tp); + default: + break; + } + + if (lxpnp->lxpr_realvp != NULL) { + /* + * For these we use the underlying vnode's accessibility. + */ + return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct)); + } + + /* If user is root allow access regardless of permission bits */ + if (secpolicy_proc_access(cr) == 0) + return (0); + + /* + * Access check is based on only + * one of owner, group, public. + * If not owner, then check group. + * If not a member of the group, then + * check public access. + */ + if (crgetuid(cr) != lxpnp->lxpr_uid) { + shift += 3; + if (!groupmember((uid_t)lxpnp->lxpr_gid, cr)) + shift += 3; + } + + mode &= ~(lxpnp->lxpr_mode << shift); + + if (mode == 0) + return (0); + + return (EACCES); +} + + + + +/* ARGSUSED */ +static vnode_t * +lxpr_lookup_not_a_dir(vnode_t *dp, char *comp) +{ + return (NULL); +} + + +/* + * lxpr_lookup(): Vnode operation for VOP_LOOKUP() + */ +/* ARGSUSED */ +static int +lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp, + int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, + int *direntflags, pathname_t *realpnp) +{ + lxpr_node_t *lxpnp = VTOLXP(dp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + int error; + + ASSERT(dp->v_type == VDIR); + ASSERT(type < LXPR_NFILES); + + /* + * we should never get here because the lookup + * is done on the realvp for these nodes + */ + ASSERT(type != LXPR_PID_FD_FD && + type != LXPR_PID_CURDIR && + type != LXPR_PID_ROOTDIR); + + /* + * restrict lookup permission to owner or root + */ + if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) { + return (error); + } + + /* + * Just return the parent vnode + * if thats where we are trying to go + */ + if (strcmp(comp, "..") == 0) { + VN_HOLD(lxpnp->lxpr_parent); + *vpp = lxpnp->lxpr_parent; + return (0); + } + + /* + * Special handling for directory searches + * Note: null component name is synonym for + * current directory being searched. + */ + if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) { + VN_HOLD(dp); + *vpp = dp; + return (0); + } + + *vpp = (lxpr_lookup_function[type](dp, comp)); + return ((*vpp == NULL) ? ENOENT : 0); +} + +/* + * Do a sequential search on the given directory table + */ +static vnode_t * +lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p, + lxpr_dirent_t *dirtab, int dirtablen) +{ + lxpr_node_t *lxpnp; + int count; + + for (count = 0; count < dirtablen; count++) { + if (strcmp(dirtab[count].d_name, comp) == 0) { + lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0); + dp = LXPTOV(lxpnp); + ASSERT(dp != NULL); + return (dp); + } + } + return (NULL); +} + + +static vnode_t * +lxpr_lookup_piddir(vnode_t *dp, char *comp) +{ + proc_t *p; + + ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR); + + p = lxpr_lock(VTOLXP(dp)->lxpr_pid); + if (p == NULL) + return (NULL); + + dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES); + + lxpr_unlock(p); + + return (dp); +} + + +/* + * Lookup one of the process's open files. + */ +static vnode_t * +lxpr_lookup_fddir(vnode_t *dp, char *comp) +{ + lxpr_node_t *dlxpnp = VTOLXP(dp); + lxpr_node_t *lxpnp; + vnode_t *vp = NULL; + proc_t *p; + file_t *fp; + uint_t fd; + int c; + uf_entry_t *ufp; + uf_info_t *fip; + + ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR); + + /* + * convert the string rendition of the filename + * to a file descriptor + */ + fd = 0; + while ((c = *comp++) != '\0') { + int ofd; + if (c < '0' || c > '9') + return (NULL); + + ofd = fd; + fd = 10*fd + c - '0'; + /* integer overflow */ + if (fd / 10 != ofd) + return (NULL); + } + + /* + * get the proc to work with and lock it + */ + p = lxpr_lock(dlxpnp->lxpr_pid); + if ((p == NULL)) + return (NULL); + + /* + * If the process is a zombie or system process + * it can't have any open files. + */ + if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { + lxpr_unlock(p); + return (NULL); + } + + /* + * get us a fresh node/vnode + */ + lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd); + + /* + * get open file info + */ + fip = (&(p)->p_user.u_finfo); + mutex_enter(&fip->fi_lock); + + /* + * got the fd data so now done with this proc + */ + lxpr_unlock(p); + + if (fd < fip->fi_nfiles) { + UF_ENTER(ufp, fip, fd); + /* + * ensure the fd is still kosher. + * it may have gone between the readdir and + * the lookup + */ + if (fip->fi_list[fd].uf_file == NULL) { + mutex_exit(&fip->fi_lock); + UF_EXIT(ufp); + lxpr_freenode(lxpnp); + return (NULL); + } + + if ((fp = ufp->uf_file) != NULL) + vp = fp->f_vnode; + UF_EXIT(ufp); + } + mutex_exit(&fip->fi_lock); + + if (vp == NULL) { + lxpr_freenode(lxpnp); + return (NULL); + } else { + /* + * Fill in the lxpr_node so future references will + * be able to find the underlying vnode. + * The vnode is held on the realvp. + */ + lxpnp->lxpr_realvp = vp; + VN_HOLD(lxpnp->lxpr_realvp); + } + + dp = LXPTOV(lxpnp); + ASSERT(dp != NULL); + + return (dp); +} + + +static vnode_t * +lxpr_lookup_netdir(vnode_t *dp, char *comp) +{ + ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR); + + dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES); + + return (dp); +} + + +static vnode_t * +lxpr_lookup_procdir(vnode_t *dp, char *comp) +{ + ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR); + + /* + * We know all the names of files & dirs in our + * file system structure except those that are pid names. + * These change as pids are created/deleted etc. + * So just look for a number as the first char to see if we + * are we doing pid lookups? + * + * Don't need to check for "self" as it is implemented as a symlink + */ + if (*comp >= '0' && *comp <= '9') { + pid_t pid = 0; + lxpr_node_t *lxpnp = NULL; + proc_t *p; + int c; + + while ((c = *comp++) != '\0') + pid = 10*pid + c - '0'; + + /* + * Can't continue if the process is still loading + * or it doesn't really exist yet (or maybe it just died!) + */ + p = lxpr_lock(pid); + if (p == NULL) + return (NULL); + + if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { + lxpr_unlock(p); + return (NULL); + } + + /* + * allocate and fill in a new lx /proc node + */ + lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0); + + lxpr_unlock(p); + + dp = LXPTOV(lxpnp); + ASSERT(dp != NULL); + + return (dp); + + } + + /* Lookup fixed names */ + return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES)); +} + + + + +/* + * lxpr_readdir(): Vnode operation for VOP_READDIR() + */ +/* ARGSUSED */ +static int +lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp, + caller_context_t *ct, int flags) +{ + lxpr_node_t *lxpnp = VTOLXP(dp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + ssize_t uresid; + off_t uoffset; + int error; + + ASSERT(dp->v_type == VDIR); + ASSERT(type < LXPR_NFILES); + + /* + * we should never get here because the readdir + * is done on the realvp for these nodes + */ + ASSERT(type != LXPR_PID_FD_FD && + type != LXPR_PID_CURDIR && + type != LXPR_PID_ROOTDIR); + + /* + * restrict readdir permission to owner or root + */ + if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0) + return (error); + + uoffset = uiop->uio_offset; + uresid = uiop->uio_resid; + + /* can't do negative reads */ + if (uoffset < 0 || uresid <= 0) + return (EINVAL); + + /* can't read directory entries that don't exist! */ + if (uoffset % LXPR_SDSIZE) + return (ENOENT); + + return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp)); +} + + +/* ARGSUSED */ +static int +lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + return (ENOTDIR); +} + +/* + * This has the common logic for returning directory entries + */ +static int +lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp, + lxpr_dirent_t *dirtab, int dirtablen) +{ + /* bp holds one dirent64 structure */ + longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; + dirent64_t *dirent = (dirent64_t *)bp; + ssize_t oresid; /* save a copy for testing later */ + ssize_t uresid; + + oresid = uiop->uio_resid; + + /* clear out the dirent buffer */ + bzero(bp, sizeof (bp)); + + /* + * Satisfy user request + */ + while ((uresid = uiop->uio_resid) > 0) { + int dirindex; + off_t uoffset; + int reclen; + int error; + + uoffset = uiop->uio_offset; + dirindex = (uoffset / LXPR_SDSIZE) - 2; + + if (uoffset == 0) { + + dirent->d_ino = lxpnp->lxpr_ino; + dirent->d_name[0] = '.'; + dirent->d_name[1] = '\0'; + reclen = DIRENT64_RECLEN(1); + + } else if (uoffset == LXPR_SDSIZE) { + + dirent->d_ino = lxpr_parentinode(lxpnp); + dirent->d_name[0] = '.'; + dirent->d_name[1] = '.'; + dirent->d_name[2] = '\0'; + reclen = DIRENT64_RECLEN(2); + + } else if (dirindex < dirtablen) { + int slen = strlen(dirtab[dirindex].d_name); + + dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type, + lxpnp->lxpr_pid, 0); + + ASSERT(slen < LXPNSIZ); + (void) strcpy(dirent->d_name, dirtab[dirindex].d_name); + reclen = DIRENT64_RECLEN(slen); + + } else { + /* Run out of table entries */ + if (eofp) { + *eofp = 1; + } + return (0); + } + + dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); + dirent->d_reclen = (ushort_t)reclen; + + /* + * if the size of the data to transfer is greater + * that that requested then we can't do it this transfer. + */ + if (reclen > uresid) { + /* + * Error if no entries have been returned yet. + */ + if (uresid == oresid) { + return (EINVAL); + } + break; + } + + /* + * uiomove() updates both uiop->uio_resid and + * uiop->uio_offset by the same amount. But we want + * uiop->uio_offset to change in increments + * of LXPR_SDSIZE, which is different from the number of bytes + * being returned to the user. + * So we set uiop->uio_offset separately, ignoring what + * uiomove() does. + */ + if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))) { + return (error); + } + + uiop->uio_offset = uoffset + LXPR_SDSIZE; + } + + /* Have run out of space, but could have just done last table entry */ + if (eofp) { + *eofp = + (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0; + } + return (0); +} + + +static int +lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + /* bp holds one dirent64 structure */ + longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; + dirent64_t *dirent = (dirent64_t *)bp; + ssize_t oresid; /* save a copy for testing later */ + ssize_t uresid; + off_t uoffset; + zoneid_t zoneid; + pid_t pid; + int error; + int ceof; + + ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR); + + oresid = uiop->uio_resid; + zoneid = LXPTOZ(lxpnp)->zone_id; + + /* + * We return directory entries in the order: + * "." and ".." then the unique lx procfs files, then the + * directories corresponding to the running processes. + * + * This is a good order because it allows us to more easily + * keep track of where we are betwen calls to getdents(). + * If the number of processes changes between calls then we + * can't lose track of where we are in the lx procfs files. + */ + + /* Do the fixed entries */ + error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir, + PROCDIRFILES); + + /* Finished if we got an error or if we couldn't do all the table */ + if (error != 0 || ceof == 0) + return (error); + + /* clear out the dirent buffer */ + bzero(bp, sizeof (bp)); + + /* Do the process entries */ + while ((uresid = uiop->uio_resid) > 0) { + proc_t *p; + int len; + int reclen; + int i; + + uoffset = uiop->uio_offset; + + /* + * Stop when entire proc table has been examined. + */ + i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES; + if (i >= v.v_proc) { + /* Run out of table entries */ + if (eofp) { + *eofp = 1; + } + return (0); + } + mutex_enter(&pidlock); + + /* + * Skip indices for which there is no pid_entry, PIDs for + * which there is no corresponding process, a PID of 0, + * and anything the security policy doesn't allow + * us to look at. + */ + if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL || + p->p_pid == 0 || + secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { + mutex_exit(&pidlock); + goto next; + } + mutex_exit(&pidlock); + + /* + * Convert pid to the Linux default of 1 if we're the zone's + * init process, otherwise use the value from the proc + * structure + */ + pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ? + p->p_pid : 1); + + /* + * If this /proc was mounted in the global zone, view + * all procs; otherwise, only view zone member procs. + */ + if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) { + goto next; + } + + ASSERT(p->p_stat != 0); + + dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0); + len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid); + ASSERT(len < LXPNSIZ); + reclen = DIRENT64_RECLEN(len); + + dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); + dirent->d_reclen = (ushort_t)reclen; + + /* + * if the size of the data to transfer is greater + * that that requested then we can't do it this transfer. + */ + if (reclen > uresid) { + /* + * Error if no entries have been returned yet. + */ + if (uresid == oresid) + return (EINVAL); + break; + } + + /* + * uiomove() updates both uiop->uio_resid and + * uiop->uio_offset by the same amount. But we want + * uiop->uio_offset to change in increments + * of LXPR_SDSIZE, which is different from the number of bytes + * being returned to the user. + * So we set uiop->uio_offset separately, in the + * increment of this for loop, ignoring what uiomove() does. + */ + if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))) + return (error); + +next: + uiop->uio_offset = uoffset + LXPR_SDSIZE; + } + + if (eofp) + *eofp = + (uiop->uio_offset >= + ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0; + + return (0); +} + + +static int +lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + proc_t *p; + + ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR); + + /* can't read its contents if it died */ + mutex_enter(&pidlock); + + p = prfind((lxpnp->lxpr_pid == 1) ? + curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid); + + if (p == NULL || p->p_stat == SIDL) { + mutex_exit(&pidlock); + return (ENOENT); + } + mutex_exit(&pidlock); + + return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES)); +} + + +static int +lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + ASSERT(lxpnp->lxpr_type == LXPR_NETDIR); + return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES)); +} + + +static int +lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + /* bp holds one dirent64 structure */ + longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; + dirent64_t *dirent = (dirent64_t *)bp; + ssize_t oresid; /* save a copy for testing later */ + ssize_t uresid; + off_t uoffset; + int error; + int ceof; + proc_t *p; + int fddirsize; + uf_info_t *fip; + + + ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR); + + oresid = uiop->uio_resid; + + /* can't read its contents if it died */ + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) + return (ENOENT); + + /* Get open file info */ + fip = (&(p)->p_user.u_finfo); + + if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) + fddirsize = 0; + else + fddirsize = fip->fi_nfiles; + + mutex_enter(&fip->fi_lock); + lxpr_unlock(p); + + /* Do the fixed entries (in this case just "." & "..") */ + error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); + + /* Finished if we got an error or if we couldn't do all the table */ + if (error != 0 || ceof == 0) + return (error); + + /* clear out the dirent buffer */ + bzero(bp, sizeof (bp)); + + /* + * Loop until user's request is satisfied or until + * all file descriptors have been examined. + */ + for (; (uresid = uiop->uio_resid) > 0; + uiop->uio_offset = uoffset + LXPR_SDSIZE) { + int reclen; + int fd; + int len; + + uoffset = uiop->uio_offset; + + /* + * Stop at the end of the fd list + */ + fd = (uoffset / LXPR_SDSIZE) - 2; + if (fd >= fddirsize) { + if (eofp) { + *eofp = 1; + } + goto out; + } + + if (fip->fi_list[fd].uf_file == NULL) + continue; + + dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd); + len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd); + ASSERT(len < LXPNSIZ); + reclen = DIRENT64_RECLEN(len); + + dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); + dirent->d_reclen = (ushort_t)reclen; + + if (reclen > uresid) { + /* + * Error if no entries have been returned yet. + */ + if (uresid == oresid) + error = EINVAL; + goto out; + } + + if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))) + goto out; + } + + if (eofp) + *eofp = + (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0; + +out: + mutex_exit(&fip->fi_lock); + return (error); +} + + +/* + * lxpr_readlink(): Vnode operation for VOP_READLINK() + */ +/* ARGSUSED */ +static int +lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) +{ + char bp[MAXPATHLEN + 1]; + size_t buflen = sizeof (bp); + lxpr_node_t *lxpnp = VTOLXP(vp); + vnode_t *rvp = lxpnp->lxpr_realvp; + pid_t pid; + int error = 0; + + /* must be a symbolic link file */ + if (vp->v_type != VLNK) + return (EINVAL); + + /* Try to produce a symlink name for anything that has a realvp */ + if (rvp != NULL) { + if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0) + return (error); + if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) + return (error); + } else { + switch (lxpnp->lxpr_type) { + case LXPR_SELF: + /* + * Don't need to check result as every possible int + * will fit within MAXPATHLEN bytes + */ + + /* + * Convert pid to the Linux default of 1 if we're the + * zone's init process + */ + pid = ((curproc->p_pid != + curproc->p_zone->zone_proc_initpid) + ? curproc->p_pid : 1); + + (void) snprintf(bp, buflen, "%d", pid); + break; + case LXPR_PID_CURDIR: + case LXPR_PID_ROOTDIR: + case LXPR_PID_EXE: + return (EACCES); + default: + /* + * Need to return error so that nothing thinks + * that the symlink is empty and hence "." + */ + return (EINVAL); + } + } + + /* copy the link data to user space */ + return (uiomove(bp, strlen(bp), UIO_READ, uiop)); +} + + +/* + * lxpr_inactive(): Vnode operation for VOP_INACTIVE() + * Vnode is no longer referenced, deallocate the file + * and all its resources. + */ +/* ARGSUSED */ +static void +lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) +{ + lxpr_freenode(VTOLXP(vp)); +} + + +/* + * lxpr_sync(): Vnode operation for VOP_SYNC() + */ +static int +lxpr_sync() +{ + /* + * nothing to sync but this + * function must never fail + */ + return (0); +} + + +/* + * lxpr_cmp(): Vnode operation for VOP_CMP() + */ +static int +lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) +{ + vnode_t *rvp; + + while (vn_matchops(vp1, lxpr_vnodeops) && + (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) + vp1 = rvp; + while (vn_matchops(vp2, lxpr_vnodeops) && + (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) + vp2 = rvp; + if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops)) + return (vp1 == vp2); + return (VOP_CMP(vp1, vp2, ct)); +} + + +/* + * lxpr_realvp(): Vnode operation for VOP_REALVP() + */ +static int +lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct) +{ + vnode_t *rvp; + + if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) { + vp = rvp; + if (VOP_REALVP(vp, &rvp, ct) == 0) + vp = rvp; + } + + *vpp = vp; + return (0); +} diff --git a/usr/src/uts/common/brand/lx/sys/ldlinux.h b/usr/src/uts/common/brand/lx/sys/ldlinux.h new file mode 100644 index 0000000000..b259c05d97 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/ldlinux.h @@ -0,0 +1,117 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LDLINUX_H +#define _SYS_LDLINUX_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The ldlinux streams module is only intended for use in lx branded zones. + * This streams module implements the following ioctls: + * TIOCSETLD and TIOCGETLD + * + * These ioctls are special ioctls supported only by the ldlinux streams + * module and invoked only by the lx brand emulation library. These ioctls + * do not exist on native Linux systems. + * + * The TIOCSETLD ioctl is used when emulating the following Linux ioctls: + * TCSETS/TCSETSW/TCSETSF + * TCSETA/TCSETAW/TCSETAF + * + * The TIOCGETLD ioctl is used when emulating the following Linux ioctls: + * TCGETS/TCGETA + * + * This module is needed to emulate these ioctls because the following arrays: + * termio.c_cc + * termios.c_cc + * which are parameters for the following ioctls: + * TCSETS/TCSETSW/TCSETSF + * TCSETA/TCSETAW/TCSETAF + * TCGETS/TCGETA + * + * are defined differently on Solaris and Linux. + * + * According to the termio(7I) man page on Solaris the following is true of + * the members of the c_cc array: + * The VMIN element is the same element as the VEOF element. + * The VTIME element is the same element as the VEOL element. + * + * But on Linux the termios(3) man page states: + * These symbolic subscript values are all different, except that + * VTIME, VMIN may have the same value as VEOL, VEOF, respectively. + * + * While the man page indicates that these values may be the same empirical + * tests shows them to be different. Since these values are different on + * Linux systems it's possible that applications could set the members of + * the c_cc array to different values and then later expect to be able to + * read back those same separate values. The ldlinux module exists to provide + * a per-stream storage area where the lx_brand emulation library can save + * these values. The values are set and retrieved via the TIOCSETLD and + * TIOCGETLD ioctls respectively. + */ + +#include <sys/termios.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define LDLINUX_MOD "ldlinux" + +#ifdef _KERNEL + +/* + * LDLINUX_MODID - This should be a unique number associated with + * this particular module. Unfortunatly there is no authority responsible + * for administering this name space, hence there's no real guarantee that + * whatever number we choose will be unique. Luckily, this constant + * is not really used anywhere by the system. It is used by some + * kernel subsystems to check for the presence of certain streams + * modules with known id vaules. Since no other kernel subsystem + * checks for the presence of this module we'll just set the id to 0. + */ +#define LDLINUX_MODID 0 + +struct ldlinux { + int state; /* state information */ + /* Linux expects the next four c_cc values */ + /* to be distinct, whereas solaris (legally) */ + /* overlaps their storage */ + unsigned char veof; /* veof value */ + unsigned char veol; /* veol value */ + unsigned char vmin; /* vmin value */ + unsigned char vtime; /* vtime value */ +}; + +#define ISPTSTTY 0x01 + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LDLINUX_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_audio.h b/usr/src/uts/common/brand/lx/sys/lx_audio.h new file mode 100644 index 0000000000..cbb3431c4b --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_audio.h @@ -0,0 +1,130 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_AUDIO_H +#define _LX_AUDIO_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/zone.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * name for this driver + */ +#define LX_AUDIO_DRV "lx_audio" + +/* + * names for the minor nodes this driver exports + */ +#define LXA_MINORNAME_DEVCTL "lx_devctl" +#define LXA_MINORNAME_DSP "lx_dsp" +#define LXA_MINORNAME_MIXER "lx_mixer" + +/* + * minor numbers for the minor nodes this driver exporrts + */ +#define LXA_MINORNUM_DEVCTL 0 +#define LXA_MINORNUM_DSP 1 +#define LXA_MINORNUM_MIXER 2 +#define LXA_MINORNUM_COUNT 3 + +/* + * driver ioctls + * + * note that we're layering on top of solaris audio devices so we want + * to make sure that our ioctls namespace doesn't conflict with theirs. + * looking in sys/audioio.h and sys/mixer.h we see that they seem to + * use an _IO key of 'A' and 'M', so we'll choose an _IO key of 'a.' + */ + +/* + * administrative ioctls. + * these ioctls are only supported on the DEVCTL minor node + */ +#define LXA_IOC_ZONE_REG (_IOR('a', 0, lxa_zone_reg_t)) +#define LXA_IOC_ZONE_UNREG (_IOR('a', 1, lxa_zone_reg_t)) + + +/* + * audio and mixer device ioctls + * these ioctls are supported on DSP and MIXER minor nodes. + */ +#define LXA_IOC_GETMINORNUM (_IOR('a', 20, int)) + +/* + * audio device ioctls. + * these ioctls are supports on DSP minor nodes. + */ +#define LXA_IOC_MMAP_OUTPUT (_IOR('a', 41, int)) +#define LXA_IOC_MMAP_PTR (_IOR('a', 42, int)) +#define LXA_IOC_GET_FRAG_INFO (_IOR('a', 43, lxa_frag_info_t)) +#define LXA_IOC_SET_FRAG_INFO (_IOR('a', 44, lxa_frag_info_t)) + +/* + * mixer device ioctls. + * these ioctls are supports on MIXER minor nodes. + */ +#define LXA_IOC_MIXER_GET_VOL (_IOR('a', 60, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_SET_VOL (_IOR('a', 61, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_GET_MIC (_IOR('a', 62, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_SET_MIC (_IOR('a', 63, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_GET_PCM (_IOR('a', 64, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_SET_PCM (_IOR('a', 65, lxa_mixer_levels_t)) + +/* command structure for LXA_IOC_ZONE_REG */ +#define LXA_INTSTRLEN 32 +typedef struct lxa_zone_reg { + char lxa_zr_zone_name[ZONENAME_MAX]; + char lxa_zr_inputdev[LXA_INTSTRLEN]; + char lxa_zr_outputdev[LXA_INTSTRLEN]; +} lxa_zone_reg_t; + +/* command structure for LXA_IOC_GET_FRAG_INFO and LXA_IOC_SET_FRAG_INFO */ +typedef struct lxa_frag_info { + int lxa_fi_size; + int lxa_fi_cnt; +} lxa_frag_info_t; + +/* command structure for LXA_IOC_MIXER_GET_* and LXA_IOC_MIXER_SET_* */ +typedef struct lxa_mixer_levels { + int lxa_ml_gain; + int lxa_ml_balance; +} lxa_mixer_levels_t; + +/* verify that a solaris mixer level structure has valid values */ +#define LXA_MIXER_LEVELS_OK(x) (((x)->lxa_ml_gain >= AUDIO_MIN_GAIN) && \ + ((x)->lxa_ml_gain <= AUDIO_MAX_GAIN) && \ + ((x)->lxa_ml_balance >= AUDIO_LEFT_BALANCE) && \ + ((x)->lxa_ml_balance <= AUDIO_RIGHT_BALANCE)) + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_AUDIO_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs.h b/usr/src/uts/common/brand/lx/sys/lx_autofs.h new file mode 100644 index 0000000000..4436226deb --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_autofs.h @@ -0,0 +1,334 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_AUTOFS_H +#define _LX_AUTOFS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The lx_autofs filesystem exists to emulate the Linux autofs filesystem + * and provide support for the Linux "automount" automounter. + * + * + * + * +++ Linux automounter background. + * + * Linux has two automounters: "amd" and "automount" + * + * 1) "amd" is a userland NFS server. It basically mounts an NFS filesystem + * at an automount point, and it acts as the NFS server for the mount. When + * an access is done to that NFS filesystem, the access is redirected by the + * kernel to the "amd" process via rpc. "amd" then looks up any information + * required to resolve the requests, mounts real NFS filesystems if + * necessary, and returns. "amd" has it's own strange configuration + * mechanism that doesn't seem to be very compatabile with Solaris's network + * based automounter map support. + * + * 2) "automount" is the other Linux automounter. It utilizes a kernel + * filesystem (autofs) to provide it's functionality. Basically, it mounts + * the autofs filesystem at any automounter controlled mount point. This + * filesystem then intercepts and redirects lookup operations (and only + * lookup ops) to the userland automounter process via a pipe. (The + * pipe to the automounter is establised via mount options when the autofs + * filesystem is mounted.) When the automounter recieves a request via this + * pipe, it does lookups to whatever backing store it's configured to use, + * does mkdir operations on the autofs filesystem, mounts remote NFS + * filesystems on any leaf directories it just created, and signals the + * autofs filesystem via an ioctl to let it know that the lookup can + * continue. + * + * + * + * +++ Linux autofs (and automount daemon) notes + * + * Since we're mimicking the behavior of the Linux autofs filesystem it's + * important to document some of it's observed behavior here since there's + * no doubt that in the future this behavior will change. These comments + * apply to the behavior of the automounter as observed on a system + * running Linux v2.4.21 (autofs is bundled with the Linux kernel). + * + * A) Autofs allows root owned, non-automounter processes to create + * directories in the autofs filesystem. The autofs filesystem treats the + * automounter's process group as special, but it doesn't prevent root + * processes outside of the automounter's process group from creating new + * directories in the autofs filesystem. + * + * B) Autofs doesn't allow creation of any non-directory entries in the + * autofs filesystem. No entity can create files (e.g. /bin/touch or + * VOP_CREATE/VOP_SYMLINK/etc.) The only entries that can exist within + * the autofs filesystem are directories. + * + * C) Autofs only intercepts vop lookup operations. Notably, it does _not_ + * intercept and re-direct vop readdir operations. This means that the + * observed behavior of the Linux automounter can be considerably different + * from that of the Solaris automounter. Specifically, on Solaris if autofs + * mount point is mounted _without_ the -nobrowse option then if a user does + * an ls operation (which translates into a vop readdir operation) then the + * automounter will intercept that operation and list all the possible + * directories and mount points without actually mounting any filesystems. + * Essentially, all automounter managed mount points on Linux will behave + * like "-nobrowse" mount points on Solaris. Here's an example to + * illustrate this. If /ws was mounted on Solaris without the -nobrowse + * option and an auto_ws yp map was setup as the backing store for this + * mount point, then an "ls /ws" would list all the keys in the map as + * valid directories, but an "ls /ws" on Linux would list an emptry + * directory. + * + * D) NFS mounts are performed by the automount process. When the automount + * process gets a redirected lookup request, it determines _all_ the + * possible remote mount points for that request, creates directory paths + * via mkdir, and mounts the remote filesystems on the newly created paths. + * So for example, if a machine called mcescher exported /var/crash and + * /var/core, an "ls /net/mcescher" would result in the following actions + * being done by the automounter: + * mkdir /net/mcescher + * mkdir /net/mcescher/var + * mkdir /net/mcescher/var/crash + * mkdir /net/mcescher/var/core + * mount mcescher:/var/crash /var/crash + * mount mcescher:/var/crash /var/core + * once the automounter compleated the work above it would signal the autofs + * filesystem (via an ioctl) that the lookup could continue. + * + * E.1) Autofs only redirects vop lookup operations for path entries that + * don't already exist in the autofs filesystem. So for the example above, + * an initial (after the start of the automounter) "ls /net/mcescher" would + * result in a request to the automounter. A subsequest "ls /net/mcescher" + * would not result in a request to the automounter. Even if + * /net/mcescher/var/crash and /net/mcescher/var/core were manually unmounted + * after the initial "ls /net/mcescher", a subsequest "ls /net/mcescher" + * would not result in a new request to the automounter. + * + * E.2) Autofs lookup requests that are sent to the automounter only include + * the root directory path component. So for example, after starting up + * the automounter if a user were to do a "ls /net/mcescher/var/crash", the + * lookup request actually sent to the automounter would just be for + * "mcescher". (The same request as if the user had done "ls /net/mcescher".) + * + * E.3) The two statements above aren't entirely entirely true. The Linux + * autofs filesystem will also redirect lookup operations for leaf + * directories that don't have a filesystem mounted on them. Using the + * example above, if a user did a "ls /net/mcescher", then manually + * unmounted /net/mcescher/var/crash, and then did an "ls + * /net/mcescher/var/crash", this would result in a request for + * "mcescher/var/crash" being sent to the automounter. The strange thing + * (a Linux bug perhaps) is that the automounter won't do anything with this + * request and the lookup will fail. + * + * F) The autofs filesystem communication protocol (what ioctls it supports + * and what data it passes to the automount process) are versioned. The + * source for the userland automount daemon (i looked at version v3.1.7) + * seemed to support two versions of the Linux kernel autofs implementation. + * Both versions supported communiciation with a pipe and the format of the + * structure passed via this pipe was the same. The difference between the + * two versions was in the functionality supported. (The v3 version has + * additional ioctls to support automount timeouts.) + * + * + * + * +++ lx_autofs notes + * + * 1) In general, the lx_autofs filesystem tries to mimic the behavior of the + * Linux autofs filesystem with the following exceptions: + * + * 1.1) We don't bother to implement the E.3 functionality listed above + * since it doesn't appear to be of any use. + * + * 1.2) We only implement v2 of the automounter protocol since + * implementing v3 would take a _lot_ more work. If this proves to be a + * problem we can re-visit this decision later. (More details about v3 + * support are included in comments below.) + * + * 2) In general, the approach taken for lx_autofs is to keep it as simple + * as possible and to minimize it's memory usage. To do this all information + * about the contents of the lx_autofs filesystem are mirrored in the + * underlying filesystem that lx_autofs is mounted on and most vop operations + * are simply passed onto this underlying filesystem. This means we don't + * have to implement most the complex operations that a full filesystem + * normally has to implement. It also means that most of our filesystem state + * (wrt the contents of the filesystem) doesn't actually have to be stored + * in memory, we can simply go to the underlying filesystem to get it when + * it's requested. For the purposes of discussion, we'll call the underlying + * filesystem the "backing store." + * + * The backing store is actually directory called ".lx_afs" which is created in + * the directory where the lx_autofs filesystem is mounted. When the lx_autofs + * filesystem is unmounted this backing store directory is deleted. If this + * directory exists at mount time (perhaps the system crashed while a previous + * lx_autofs instance was mounted at the same location) it will be deleted. + * There are a few implications of using a backing store worth mentioning. + * + * 2.1) lx_autofs can't be mounted on a read only filesystem. If this + * proves to be a problem we can probably move the location of the + * backing store. + * + * 2.2) If the backing store filesystem runs out of space then the + * automounter process won't be able to create more directories and mount + * new filesystems. Of course, strange failures usually happen when + * filesystems run out of space. + * + * 3) Why aren't we using gfs? gfs has two different usage models. + * + * 3.1) I'm my own filesystem but i'm using gfs to help with managing + * readdir operations. + * + * 3.2) I'm a gfs filesystem and gfs is managing all my vnodes + * + * We're not using the 3.1 interfaces because we don't implement readdir + * ourselves. We pass all readdir operations onto the backing store + * filesystem and utilize its readdir implementation. + * + * We're not using the 3.2 interfaces because they are really designed for + * in memory filesystems where all of the filesystem state is stored in + * memory. They don't lend themselves to filesystems where part of the + * state is in memory and part of the state is on disk. + * + * For more information on gfs take a look at the block comments in the + * top of gfs.c + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Note that the name of the actual Solaris filesystem is lx_afs and not + * lx_autofs. This is becase filesystem names are stupidly limited to 8 + * characters. + */ +#define LX_AUTOFS_NAME "lx_afs" + +/* + * Mount options supported. + */ +#define LX_MNTOPT_FD "fd" +#define LX_MNTOPT_PGRP "pgrp" +#define LX_MNTOPT_MINPROTO "minproto" +#define LX_MNTOPT_MAXPROTO "maxproto" + +/* Version of the Linux kernel automount protocol we support. */ +#define LX_AUTOFS_PROTO_VERSION 2 + +/* + * Command structure sent to automount process from lx_autofs via a pipe. + * This structure is the same for v2 and v3 of the automount protocol + * (the communication pipe is established at mount time). + */ +typedef struct lx_autofs_pkt { + int lap_protover; /* protocol version number */ + int lap_constant; /* always set to 0 */ + int lap_id; /* every pkt must have a unique id */ + int lap_name_len; /* don't include newline or NULL */ + char lap_name[256]; /* path component to lookup */ +} lx_autofs_pkt_t; + +/* + * Ioctls supprted (v2 protocol). + */ +#define LX_AUTOFS_IOC_READY 0x00009360 /* arg: int */ +#define LX_AUTOFS_IOC_FAIL 0x00009361 /* arg: int */ +#define LX_AUTOFS_IOC_CATATONIC 0x00009362 /* arg: <none> */ + +/* + * Ioctls not supported (v3 protocol). + * + * Initially we're only going to support v2 of the Linux kernel automount + * protocol. This means that we don't support the following ioctls. + * + * 1) The protocol version ioctl (by not supporting it the automounter + * will assume version 2). + * + * 2) Automounter timeout ioctls. For v3 and later the automounter can + * be started with a timeout option. It will notify the filesystem of + * this timeout and, if any automounter filesystem root directory entry + * is not in use, it will notify the automounter via the LX_AUTOFS_IOC_EXPIRE + * ioctl. For example, if the timeout is 60 seconds, the Linux + * automounter will use the LX_AUTOFS_IOC_EXPIRE ioctl to query for + * timeouts more often than that. (v3.1.7 of the automount daemon would + * perform this ioctl every <timeout>/4 seconds.) Then, if the autofs + * filesystem will + * report top level directories that aren't in use to the automounter + * via this ioctl. If /net was managed by the automounter and + * there were the following mount points: + * /net/jurassic/var/crash + * /net/mcescher/var/crash + * and no one was looking at any crash dumps on mcescher but someone + * was analyzing a crash dump on jurassic, then after <timeout> seconds + * had passed the autofs filesystem would let the automounter know that + * "mcescher" could be unmounted. (Note the granularity of notification + * is directories in the root of the autofs filesystem.) Here's two + * ideas for how this functionality could be implemented on Solaris: + * + * 2.1) The easy incomplete way. Don't do any in-use detection. Simply + * tell the automounter it can try to unmount the filesystem every time + * the specified timeout passes. If the filesystem is in use then the + * unmount will fail. This would break down for remote hosts with multiple + * mounts. For example, if the automounter had mounted the following + * filesystems: + * /net/jurassic/var/crash + * /net/jurassic/var/core + * and the user was looking at a core file, and the timeout expired, the + * automounter would recieve notification to unmount "jurassic". Then + * it would unmount crash (which would succeed) and then to try unmount + * core (which would fail). After that (since the automounter only + * performs mounts for failed lookups in the root autofs directory) + * future access to /net/jurassic/var/crash would result to access + * to an empty autofs directory. We might be able to work around + * this by caching which root autofs directories we've timed out, + * then any access to paths that contain those directories could be + * stalled and we could resend another request to the automounter. + * This could work if the automounter ignores mount failures. + * + * 2.2) The hard correct way. The real difficulty here is detecting + * files in use on other filesystems (say NFS) that have been mounted + * on top of autofs. (Detecting in use autofs vnodes should be easy.) + * to do this we would probably have to create a new brand op to intercept + * mount/umount filesystem operations. Then using this entry point we + * could detect mounts of other filesystems on top of lx_autofs. When + * a successful mount finishes we would use the FEM (file event + * monitoring) framework to push a module onto that filesystem and + * intercept VOP operations that allocate/free vnodes in that filesystem. + * (We would also then have to track mount operations on top of that + * filesystem, etc.) this would allow us to properly detect any + * usage of subdirectories of an autofs directory. + */ +#define LX_AUTOFS_IOC_PROTOVER 0x80049363 /* arg: int */ +#define LX_AUTOFS_IOC_EXPIRE 0x81109365 /* arg: lx_autofs_expire * */ +#define LX_AUTOFS_IOC_SETTIMEOUT 0xc0049364 /* arg: ulong_t */ + +typedef struct lx_autofs_expire { + int lap_protover; /* protol version number */ + int lap_constant; /* always set to 1 */ + int lap_name_len; /* don't include newline or NULL */ + char lap_name[256]; /* path component that has timed out */ +} lx_autofs_expire_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_AUTOFS_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h new file mode 100644 index 0000000000..9c5517b8d5 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_AUTOFS_IMPL_H +#define _LX_AUTOFS_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/file.h> +#include <sys/id_space.h> +#include <sys/modhash.h> +#include <sys/vnode.h> + +#include <sys/lx_autofs.h> + +/* + * Space key. + * Used to persist data across lx_autofs filesystem module unloads. + */ +#define LX_AUTOFS_SPACE_KEY_UDEV LX_AUTOFS_NAME "_udev" + +/* + * Name of the backing store directory. + */ +#define LX_AUTOFS_BS_DIR "." LX_AUTOFS_NAME + +#define LX_AUTOFS_VFS_ID_HASH_SIZE 15 +#define LX_AUTOFS_VFS_PATH_HASH_SIZE 15 +#define LX_AUTOFS_VFS_VN_HASH_SIZE 15 + +/* + * VFS data object. + */ +typedef struct lx_autofs_vfs { + /* Info about the underlying filesystem and backing store. */ + vnode_t *lav_mvp; + char *lav_bs_name; + vnode_t *lav_bs_vp; + + /* Info about the automounter process managing this filesystem. */ + int lav_fd; + pid_t lav_pgrp; + file_t *lav_fifo_wr; + file_t *lav_fifo_rd; + + /* Each automount requests needs a unique id. */ + id_space_t *lav_ids; + + /* All remaining structure members are protected by lav_lock. */ + kmutex_t lav_lock; + + /* Hashes to keep track of outstanding automounter requests. */ + mod_hash_t *lav_path_hash; + mod_hash_t *lav_id_hash; + + /* We need to keep track of all our vnodes. */ + vnode_t *lav_root; + mod_hash_t *lav_vn_hash; +} lx_autofs_vfs_t; + +/* + * Structure to keep track of requests sent to the automounter. + */ +typedef struct lx_autofs_lookup_req { + /* Packet that gets sent to the automounter. */ + lx_autofs_pkt_t lalr_pkt; + + /* Reference count. Always updated atomically. */ + uint_t lalr_ref; + + /* + * Fields to keep track and sync threads waiting on a lookup. + * Fields are protected by lalr_lock. + */ + kmutex_t lalr_lock; + kcondvar_t lalr_cv; + int lalr_complete; +} lx_autofs_lookup_req_t; + +/* + * Generic stack structure. + */ +typedef struct stack_elem { + list_node_t se_list; + caddr_t se_ptr1; + caddr_t se_ptr2; + caddr_t se_ptr3; +} stack_elem_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_AUTOFS_IMPL_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h new file mode 100644 index 0000000000..b489fabc9f --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -0,0 +1,232 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_BRAND_H +#define _LX_BRAND_H + +#ifndef _ASM +#include <sys/types.h> +#include <sys/cpuvar.h> +#include <sys/zone.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define LX_BRANDNAME "lx" + +/* + * Brand uname info + */ +#define LX_UNAME_SYSNAME "Linux" +#define LX_UNAME_RELEASE_2_6 "2.6.18" +#define LX_UNAME_RELEASE_2_4 "2.4.21" +#define LX_UNAME_VERSION "BrandZ fake linux" +#define LX_UNAME_MACHINE "i686" + +#define LX_LINKER_NAME "ld-linux.so.2" +#define LX_LINKER "/lib/" LX_LINKER_NAME +#define LX_LIBC_NAME "libc.so.6" +#define LIB_PATH "/native/usr/lib/" +#define LX_LIB "lx_brand.so.1" +#define LX_LIB_PATH LIB_PATH LX_LIB + +#define LX_NSYSCALLS_2_4 270 +#define LX_NSYSCALLS_2_6 317 +#define LX_NSYSCALLS LX_NSYSCALLS_2_6 + +#define LX_KERN_2_4 0 +#define LX_KERN_2_6 1 + +/* + * brand(2) subcommands + * + * Everything >= 128 is a brand-specific subcommand. + * 192 to 462 are reserved for system calls, although most of that space is + * unused. + */ +#define B_LPID_TO_SPAIR 128 +#define B_SYSENTRY 129 +#define B_SYSRETURN 130 +#define B_PTRACE_SYSCALL 131 +#define B_SET_AFFINITY_MASK 132 +#define B_GET_AFFINITY_MASK 133 + +#define B_EMULATE_SYSCALL 192 + +#define LX_VERSION_1 1 +#define LX_VERSION LX_VERSION_1 + +#define LX_ATTR_RESTART_INIT ZONE_ATTR_BRAND_ATTRS +#define LX_KERN_VERSION_NUM (ZONE_ATTR_BRAND_ATTRS + 1) + +/* Aux vector containing phdr of linux executable, used by lx_librtld_db */ +#define AT_SUN_BRAND_LX_PHDR AT_SUN_BRAND_AUX1 + +/* Aux vector containing hz value */ +#define AT_CLKTCK 17 + +#ifndef _ASM + +typedef struct lx_brand_registration { + uint_t lxbr_version; /* version number */ + void *lxbr_handler; /* base address of handler */ + void *lxbr_tracehandler; /* base address of trace handler */ + void *lxbr_traceflag; /* address of trace flag */ +} lx_brand_registration_t; + +#ifdef _SYSCALL32 +typedef struct lx_brand_registration32 { + uint32_t lxbr_version; /* version number */ + caddr32_t lxbr_handler; /* base address of handler */ + caddr32_t lxbr_tracehandler; /* base address of trace handler */ + caddr32_t lxbr_traceflag; /* address of trace flag */ +} lx_brand_registration32_t; +#endif + +typedef struct lx_regs { + long lxr_gs; + long lxr_edi; + long lxr_esi; + long lxr_ebp; + long lxr_esp; + long lxr_ebx; + long lxr_edx; + long lxr_ecx; + long lxr_eax; + long lxr_eip; + + long lxr_orig_eax; +} lx_regs_t; + +#endif /* _ASM */ + +/* + * GDT usage + */ +#define GDT_TLSMIN (GDT_BRANDMIN) +#define GDT_TLSMAX (GDT_TLSMIN + 2) +#define LX_TLSNUM (GDT_TLSMAX - GDT_TLSMIN) + +#ifndef _ASM + +/* + * Stores information needed by the lx linker to launch the main + * lx executable. + */ +typedef struct lx_elf_data { + int ed_phdr; + int ed_phent; + int ed_phnum; + int ed_entry; + int ed_base; + int ed_ldentry; +} lx_elf_data_t; + +#ifdef _KERNEL + +typedef struct lx_proc_data { + uintptr_t l_handler; /* address of user-space handler */ + uintptr_t l_tracehandler; /* address of user-space traced handler */ + uintptr_t l_traceflag; /* address of 32-bit tracing flag */ + void (*l_sigrestorer[MAXSIG])(void); /* array of sigrestorer fns */ + pid_t l_ppid; /* pid of originating parent proc */ + uint64_t l_ptrace; /* process being observed with ptrace */ + lx_elf_data_t l_elf_data; /* ELF data for linux executable */ +} lx_proc_data_t; + +#endif /* _KERNEL */ + +/* + * A data type big enough to bitmap all Linux possible cpus. + * The bitmap size is defined as 1024 cpus in the Linux 2.4 and 2.6 man pages + * for sched_getaffinity() and sched_getaffinity(). + */ +#define LX_NCPU (1024) +#define LX_AFF_ULONGS (LX_NCPU / (8 * sizeof (ulong_t))) +typedef ulong_t lx_affmask_t[LX_AFF_ULONGS]; + +#ifdef _KERNEL + +/* + * lx-specific data in the klwp_t + */ +typedef struct lx_lwp_data { + uint_t br_lwp_flags; /* misc. flags */ + klwp_t *br_lwp; /* back pointer to container lwp */ + int br_signal; /* signal to send to parent when */ + /* clone()'ed child terminates */ + int br_exitwhy; /* reason for thread (process) exit */ + int br_exitwhat; /* exit code / killing signal */ + lx_affmask_t br_affinitymask; /* bitmask of CPU sched affinities */ + struct user_desc br_tls[LX_TLSNUM]; + /* descriptors used by libc for TLS */ + pid_t br_pid; /* converted pid for this thread */ + pid_t br_tgid; /* thread group ID for this thread */ + pid_t br_ppid; /* parent pid for this thread */ + id_t br_ptid; /* parent tid for this thread */ + void *br_clear_ctidp; /* clone thread id ptr */ + void *br_set_ctidp; /* clone thread id ptr */ + + /* + * The following struct is used by lx_clone() + * to pass info into fork() + */ + void *br_clone_args; + + uint_t br_ptrace; /* ptrace is active for this LWP */ +} lx_lwp_data_t; + +/* brand specific data */ +typedef struct lx_zone_data { + int lxzd_kernel_version; + int lxzd_max_syscall; +} lx_zone_data_t; + +#define BR_CPU_BOUND 0x0001 + +#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) +#define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l)) +#define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data) + +void lx_brand_int80_callback(void); +int64_t lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t); + +extern int lx_get_zone_kern_version(zone_t *); +extern int lx_get_kern_version(void); + +extern int lx_debug; +#define lx_print if (lx_debug) printf + +#endif /* _KERNEL */ +#endif /* _ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_BRAND_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_futex.h b/usr/src/uts/common/brand/lx/sys/lx_futex.h new file mode 100644 index 0000000000..b5c5334bff --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_futex.h @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_FUTEX_H +#define _SYS_LX_FUTEX_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 +#define FUTEX_FD 2 +#define FUTEX_REQUEUE 3 +#define FUTEX_CMP_REQUEUE 4 +#define FUTEX_MAX_CMD FUTEX_CMP_REQUEUE + +#ifdef _KERNEL +extern long lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout, + uintptr_t addr2, int val2); +extern void lx_futex_init(void); +extern int lx_futex_fini(void); +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_FUTEX_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_impl.h b/usr/src/uts/common/brand/lx/sys/lx_impl.h new file mode 100644 index 0000000000..12f1aab2b3 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_impl.h @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_IMPL_H +#define _LX_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int64_t (*llfcn_t)(); + +typedef struct lx_sysent { + int sy_flags; + char *sy_name; + llfcn_t sy_callc; + char sy_narg; +} lx_sysent_t; + +typedef void (lx_systrace_f)(ulong_t, ulong_t, ulong_t, ulong_t, ulong_t, + ulong_t, ulong_t); + + +extern lx_sysent_t lx_sysent[]; + +extern lx_systrace_f *lx_systrace_entry_ptr; +extern lx_systrace_f *lx_systrace_return_ptr; + +extern void lx_brand_systrace_enable(void); +extern void lx_brand_systrace_disable(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_IMPL_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_ldt.h b/usr/src/uts/common/brand/lx/sys/lx_ldt.h new file mode 100644 index 0000000000..5080c3adae --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_ldt.h @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LINUX_LDT_H +#define _SYS_LINUX_LDT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/segments.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct ldt_info { + uint_t entry_number; + uint_t base_addr; + uint_t limit; + uint_t seg_32bit:1, + contents:2, + read_exec_only:1, + limit_in_pages:1, + seg_not_present:1, + useable:1; +}; + +#define LDT_INFO_EMPTY(info) \ + ((info)->base_addr == 0 && (info)->limit == 0 && \ + (info)->contents == 0 && (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && (info)->useable == 0) + +#if defined(__amd64) +#define SETMODE(desc) (desc)->usd_long = SDP_SHORT; +#else +#define SETMODE(desc) +#endif + +#define LDT_INFO_TO_DESC(info, desc) { \ + USEGD_SETBASE(desc, (info)->base_addr); \ + USEGD_SETLIMIT(desc, (info)->limit); \ + (desc)->usd_type = ((info)->contents << 2) | \ + ((info)->read_exec_only ^ 1) << 1 | 0x10; \ + (desc)->usd_dpl = SEL_UPL; \ + (desc)->usd_p = (info)->seg_not_present ^ 1; \ + (desc)->usd_def32 = (info)->seg_32bit; \ + (desc)->usd_gran = (info)->limit_in_pages; \ + (desc)->usd_avl = (info)->useable; \ + SETMODE(desc); \ +} + +#define DESC_TO_LDT_INFO(desc, info) { \ + bzero((info), sizeof (*(info))); \ + (info)->base_addr = USEGD_GETBASE(desc); \ + (info)->limit = USEGD_GETLIMIT(desc); \ + (info)->seg_not_present = (desc)->usd_p ^ 1; \ + (info)->contents = ((desc)->usd_type >> 2) & 3; \ + (info)->read_exec_only = (((desc)->usd_type >> 1) & 1) ^ 1; \ + (info)->seg_32bit = (desc)->usd_def32; \ + (info)->limit_in_pages = (desc)->usd_gran; \ + (info)->useable = (desc)->usd_avl; \ +} + +extern void lx_set_gdt(int, user_desc_t *); +extern void lx_clear_gdt(int); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LINUX_LDT_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_pid.h b/usr/src/uts/common/brand/lx/sys/lx_pid.h new file mode 100644 index 0000000000..80c8079f0b --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_pid.h @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_PID_H +#define _SYS_LX_PID_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/note.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL +struct lx_pid { + pid_t s_pid; /* the solaris pid and ... */ + id_t s_tid; /* ... tid pair */ + pid_t l_pid; /* the corresponding linux pid */ + time_t l_start; /* birthday of this pid */ + struct pid *l_pidp; + struct lx_pid *stol_next; /* link in stol hash table */ + struct lx_pid *ltos_next; /* link in ltos hash table */ +}; + +extern int lx_pid_assign(kthread_t *); +extern void lx_pid_reassign(kthread_t *); +extern void lx_pid_rele(pid_t, id_t); +extern pid_t lx_lpid_to_spair(pid_t, pid_t *, id_t *); +extern pid_t lx_lwp_ppid(klwp_t *, pid_t *, id_t *); +extern void lx_pid_init(void); +extern void lx_pid_fini(void); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_PID_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_ptm.h b/usr/src/uts/common/brand/lx/sys/lx_ptm.h new file mode 100644 index 0000000000..74bbc939a3 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_ptm.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_PTM_LINUX_H +#define _SYS_PTM_LINUX_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define LX_PTM_DRV "lx_ptm" +#define LX_PTM_MINOR_NODE "lx_ptmajor" + +#define LX_PTM_DEV_TO_PTS(dev) (getminor(dev) - 1) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_PTM_LINUX_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_sched.h b/usr/src/uts/common/brand/lx/sys/lx_sched.h new file mode 100644 index 0000000000..b0ae748f3c --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_sched.h @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LINUX_SCHED_H +#define _SYS_LINUX_SCHED_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/procset.h> +#include <sys/priocntl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Linux scheduler policies. + */ +#define LX_SCHED_OTHER 0 +#define LX_SCHED_FIFO 1 +#define LX_SCHED_RR 2 + +#define LX_PRI_MAX 99 + +typedef int l_pid_t; + +struct lx_sched_param { + int lx_sched_prio; +}; + +extern int sched_setprocset(procset_t *, l_pid_t); +extern long do_priocntlsys(int, procset_t *, void *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LINUX_SCHED_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h new file mode 100644 index 0000000000..b4d41d5241 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LINUX_SYSCALLS_H +#define _SYS_LINUX_SYSCALLS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +extern long lx_brk(); +extern long lx_getpid(); +extern long lx_getppid(); +extern long lx_clone(); +extern long lx_kill(); +extern long lx_tkill(); +extern long lx_modify_ldt(); +extern long lx_gettid(); +extern long lx_futex(); +extern long lx_get_thread_area(); +extern long lx_sched_getparam(); +extern long lx_sched_getscheduler(); +extern long lx_sched_rr_get_interval(); +extern long lx_sched_setparam(); +extern long lx_sched_setscheduler(); +extern long lx_set_thread_area(); +extern long lx_set_tid_address(); +extern long lx_setresgid(); +extern long lx_setresgid16(); +extern long lx_setresuid(); +extern long lx_setresuid16(); +extern long lx_sysinfo(); +extern long lx_setgroups(); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LINUX_SYSCALLS_H */ diff --git a/usr/src/uts/common/brand/lx/syscall/lx_brk.c b/usr/src/uts/common/brand/lx/syscall/lx_brk.c new file mode 100644 index 0000000000..19a7577ac0 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_brk.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> + +/* + * The brk() system call needs to be in-kernel because Linux expects a call to + * brk(0) to return the current breakpoint. In Solaris, the process breakpoint + * is setup and managed by libc. Due to the way we link our libraries and the + * need for Linux to manage its own breakpoint, this has to remain in the + * kernel. + */ +extern int brk(caddr_t); + +long +lx_brk(caddr_t nva) +{ + proc_t *p = curproc; + klwp_t *lwp = ttolwp(curthread); + + if (nva != 0) { + (void) brk(nva); + + /* + * Despite claims to the contrary in the manpage, when Linux + * brk() fails, errno is left unchanged. + */ + lwp->lwp_errno = 0; + } + return ((long)(p->p_brkbase + p->p_brksize)); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c new file mode 100644 index 0000000000..2af3c00bae --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_ldt.h> + +#define LX_CSIGNAL 0x000000ff +#define LX_CLONE_VM 0x00000100 +#define LX_CLONE_FS 0x00000200 +#define LX_CLONE_FILES 0x00000400 +#define LX_CLONE_SIGHAND 0x00000800 +#define LX_CLONE_PID 0x00001000 +#define LX_CLONE_PTRACE 0x00002000 +#define LX_CLONE_PARENT 0x00008000 +#define LX_CLONE_THREAD 0x00010000 +#define LX_CLONE_SYSVSEM 0x00040000 +#define LX_CLONE_SETTLS 0x00080000 +#define LX_CLONE_PARENT_SETTID 0x00100000 +#define LX_CLONE_CHILD_CLEARTID 0x00200000 +#define LX_CLONE_DETACH 0x00400000 +#define LX_CLONE_CHILD_SETTID 0x01000000 + +/* + * Our lwp has already been created at this point, so this routine is + * responsible for setting up all the state needed to track this as a + * linux cloned thread. + */ +/* ARGSUSED */ +long +lx_clone(int flags, void *stkp, void *ptidp, void *ldtinfo, void *ctidp) +{ + struct lx_lwp_data *lwpd = ttolxlwp(curthread); + struct ldt_info info; + struct user_desc descr; + int tls_index; + int entry = -1; + int signo; + + signo = flags & LX_CSIGNAL; + if (signo < 0 || signo > MAXSIG) + return (set_errno(EINVAL)); + + if (flags & LX_CLONE_SETTLS) { + if (copyin((caddr_t)ldtinfo, &info, sizeof (info))) + return (set_errno(EFAULT)); + + if (LDT_INFO_EMPTY(&info)) + return (set_errno(EINVAL)); + + entry = info.entry_number; + if (entry < GDT_TLSMIN || entry > GDT_TLSMAX) + return (set_errno(EINVAL)); + + tls_index = entry - GDT_TLSMIN; + + /* + * Convert the user-space structure into a real x86 + * descriptor and copy it into this LWP's TLS array. We + * also load it into the GDT. + */ + LDT_INFO_TO_DESC(&info, &descr); + bcopy(&descr, &lwpd->br_tls[tls_index], sizeof (descr)); + lx_set_gdt(entry, &lwpd->br_tls[tls_index]); + } else { + tls_index = -1; + bzero(&descr, sizeof (descr)); + } + + lwpd->br_clear_ctidp = + (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL; + + if (signo && ! (flags & LX_CLONE_DETACH)) + lwpd->br_signal = signo; + else + lwpd->br_signal = 0; + + if (flags & LX_CLONE_THREAD) + lwpd->br_tgid = curthread->t_procp->p_pid; + + if (flags & LX_CLONE_PARENT) + lwpd->br_ppid = 0; + + if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) && + (suword32(ctidp, lwpd->br_pid) != 0)) { + if (entry >= 0) + lx_clear_gdt(entry); + return (set_errno(EFAULT)); + } + if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) && + (suword32(ptidp, lwpd->br_pid) != 0)) { + if (entry >= 0) + lx_clear_gdt(entry); + return (set_errno(EFAULT)); + } + + return (lwpd->br_pid); +} + +long +lx_set_tid_address(int *tidp) +{ + struct lx_lwp_data *lwpd = ttolxlwp(curthread); + + lwpd->br_clear_ctidp = tidp; + + return (lwpd->br_pid); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_futex.c b/usr/src/uts/common/brand/lx/syscall/lx_futex.c new file mode 100644 index 0000000000..ee5fa7993d --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_futex.c @@ -0,0 +1,471 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/debug.h> +#include <vm/as.h> +#include <vm/seg.h> +#include <vm/seg_vn.h> +#include <vm/page.h> +#include <sys/mman.h> +#include <sys/timer.h> +#include <sys/condvar.h> +#include <sys/inttypes.h> +#include <sys/lx_futex.h> + +/* + * Futexes are a Linux-specific implementation of inter-process mutexes. + * They are designed to use shared memory for simple, uncontested + * operations, and rely on the kernel to resolve any contention issues. + * + * Most of the information in this section comes from the paper "Futexes + * Are Tricky", by Ulrich Drepper. This paper is currently available at: + * http://people.redhat.com/~drepper/futex.pdf. + * + * A futex itself a 4-byte integer, which must be 4-byte aligned. The + * value of this integer is expected to be modified using user-level atomic + * operations. The futex(4) design itself does not impose any semantic + * constraints on the value stored in the futex; it is up to the + * application to define its own protocol. + * + * When the application decides that kernel intervention is required, it + * will use the futex(2) system call. There are 5 different operations + * that can be performed on a futex, using this system call. Since this + * interface has evolved over time, there are several different prototypes + * available to the user. Fortunately, there is only a single kernel-level + * interface: + * + * long sys_futex(void *futex1, int cmd, int val1, + * struct timespec *timeout, void *futex2, int val2) + * + * The kernel-level operations that may be performed on a futex are: + * + * FUTEX_WAIT + * + * Atomically verify that futex1 contains the value val1. If it + * doesn't, return EWOULDBLOCK. If it does contain the expected + * value, the thread will sleep until somebody performs a FUTEX_WAKE + * on the futex. The caller may also specify a timeout, indicating + * the maximum time the thread should sleep. If the timer expires, + * the call returns ETIMEDOUT. If the thread is awoken with a signal, + * the call returns EINTR. Otherwise, the call returns 0. + * + * FUTEX_WAKE + * + * Wake up val1 processes that are waiting on futex1. The call + * returns the number of blocked threads that were woken up. + * + * FUTEX_CMP_REQUEUE + * + * If the value stored in futex1 matches that passed in in val2, wake + * up val1 processes that are waiting on futex1. Otherwise, return + * EAGAIN. + * + * If there are more than val1 threads waiting on the futex, remove + * the remaining threads from this futex, and requeue them on futex2. + * The caller can limit the number of threads being requeued by + * encoding an integral numerical value in the position usually used + * for the timeout pointer. + * + * The call returns the number of blocked threads that were woken up + * or requeued. + * + * FUTEX_REQUEUE + * + * Identical to FUTEX_CMP_REQUEUE except that it does not use val2. + * This command has been declared broken and obsolete, but we still + * need to support it. + * + * FUTEX_FD + * + * Return a file descriptor, which can be used to refer to the futex. + * We don't support this operation. + */ + +/* + * This structure is used to track all the threads currently waiting on a + * futex. There is one fwaiter_t for each blocked thread. We store all + * fwaiter_t's in a hash structure, indexed by the memid_t of the integer + * containing the futex's value. + * + * At the moment, all fwaiter_t's for a single futex are simply dumped into + * the hash bucket. If futex contention ever becomes a hot path, we can + * chain a single futex's waiters together. + */ +typedef struct fwaiter { + memid_t fw_memid; /* memid of the user-space futex */ + kcondvar_t fw_cv; /* cond var */ + struct fwaiter *fw_next; /* hash queue */ + struct fwaiter *fw_prev; /* hash queue */ + volatile int fw_woken; +} fwaiter_t; + +#define MEMID_COPY(s, d) \ + { (d)->val[0] = (s)->val[0]; (d)->val[1] = (s)->val[1]; } +#define MEMID_EQUAL(s, d) \ + ((d)->val[0] == (s)->val[0] && (d)->val[1] == (s)->val[1]) + +/* Borrowed from the page freelist hash code. */ +#define HASH_SHIFT_SZ 7 +#define HASH_SIZE (1 << HASH_SHIFT_SZ) +#define HASH_FUNC(id) \ + ((((uintptr_t)((id)->val[1]) >> PAGESHIFT) + \ + ((uintptr_t)((id)->val[1]) >> (PAGESHIFT + HASH_SHIFT_SZ)) + \ + ((uintptr_t)((id)->val[0]) >> 3) + \ + ((uintptr_t)((id)->val[0]) >> (3 + HASH_SHIFT_SZ)) + \ + ((uintptr_t)((id)->val[0]) >> (3 + 2 * HASH_SHIFT_SZ))) & \ + (HASH_SIZE - 1)) + +static fwaiter_t *futex_hash[HASH_SIZE]; +static kmutex_t futex_hash_lock[HASH_SIZE]; + +static void +futex_hashin(fwaiter_t *fwp) +{ + int index; + + index = HASH_FUNC(&fwp->fw_memid); + ASSERT(MUTEX_HELD(&futex_hash_lock[index])); + + fwp->fw_prev = NULL; + fwp->fw_next = futex_hash[index]; + if (fwp->fw_next) + fwp->fw_next->fw_prev = fwp; + futex_hash[index] = fwp; +} + +static void +futex_hashout(fwaiter_t *fwp) +{ + int index; + + index = HASH_FUNC(&fwp->fw_memid); + ASSERT(MUTEX_HELD(&futex_hash_lock[index])); + + if (fwp->fw_prev) + fwp->fw_prev->fw_next = fwp->fw_next; + if (fwp->fw_next) + fwp->fw_next->fw_prev = fwp->fw_prev; + if (futex_hash[index] == fwp) + futex_hash[index] = fwp->fw_next; + + fwp->fw_prev = NULL; + fwp->fw_next = NULL; +} + +/* + * Go to sleep until somebody does a WAKE operation on this futex, we get a + * signal, or the timeout expires. + */ +static int +futex_wait(memid_t *memid, caddr_t addr, int val, timespec_t *timeout) +{ + int err, ret; + int32_t curval; + fwaiter_t fw; + int index; + + fw.fw_woken = 0; + MEMID_COPY(memid, &fw.fw_memid); + cv_init(&fw.fw_cv, NULL, CV_DEFAULT, NULL); + + index = HASH_FUNC(&fw.fw_memid); + mutex_enter(&futex_hash_lock[index]); + + if (fuword32(addr, (uint32_t *)&curval)) { + err = set_errno(EFAULT); + goto out; + } + if (curval != val) { + err = set_errno(EWOULDBLOCK); + goto out; + } + + futex_hashin(&fw); + + err = 0; + while ((fw.fw_woken == 0) && (err == 0)) { + ret = cv_waituntil_sig(&fw.fw_cv, &futex_hash_lock[index], + timeout, timechanged); + if (ret < 0) + err = set_errno(ETIMEDOUT); + else if (ret == 0) + err = set_errno(EINTR); + } + + /* + * The futex is normally hashed out in wakeup. If we timed out or + * got a signal, we need to hash it out here instead. + */ + if (fw.fw_woken == 0) + futex_hashout(&fw); + +out: + mutex_exit(&futex_hash_lock[index]); + + return (err); +} + +/* + * Wake up to wake_threads threads that are blocked on the futex at memid. + */ +static int +futex_wake(memid_t *memid, int wake_threads) +{ + fwaiter_t *fwp, *next; + int index; + int ret = 0; + + index = HASH_FUNC(memid); + + mutex_enter(&futex_hash_lock[index]); + + for (fwp = futex_hash[index]; fwp && ret < wake_threads; fwp = next) { + next = fwp->fw_next; + if (MEMID_EQUAL(&fwp->fw_memid, memid)) { + futex_hashout(fwp); + fwp->fw_woken = 1; + cv_signal(&fwp->fw_cv); + ret++; + } + } + + mutex_exit(&futex_hash_lock[index]); + + return (ret); +} + +/* + * Wake up to wake_threads waiting on the futex at memid. If there are + * more than that many threads waiting, requeue the remaining threads on + * the futex at requeue_memid. + */ +static int +futex_requeue(memid_t *memid, memid_t *requeue_memid, int wake_threads, + ulong_t requeue_threads, caddr_t addr, int *cmpval) +{ + fwaiter_t *fwp, *next; + int index1, index2; + int ret = 0; + int32_t curval; + kmutex_t *l1, *l2; + + /* + * To ensure that we don't miss a wakeup if the value of cmpval + * changes, we need to grab locks on both the original and new hash + * buckets. To avoid deadlock, we always grab the lower-indexed + * lock first. + */ + index1 = HASH_FUNC(memid); + index2 = HASH_FUNC(requeue_memid); + + if (index1 == index2) { + l1 = &futex_hash_lock[index1]; + l2 = NULL; + } else if (index1 < index2) { + l1 = &futex_hash_lock[index1]; + l2 = &futex_hash_lock[index2]; + } else { + l1 = &futex_hash_lock[index2]; + l2 = &futex_hash_lock[index1]; + } + + mutex_enter(l1); + if (l2 != NULL) + mutex_enter(l2); + + if (cmpval != NULL) { + if (fuword32(addr, (uint32_t *)&curval)) { + ret = -EFAULT; + goto out; + } + if (curval != *cmpval) { + ret = -EAGAIN; + goto out; + } + } + + for (fwp = futex_hash[index1]; fwp; fwp = next) { + next = fwp->fw_next; + if (!MEMID_EQUAL(&fwp->fw_memid, memid)) + continue; + + futex_hashout(fwp); + if (ret++ < wake_threads) { + fwp->fw_woken = 1; + cv_signal(&fwp->fw_cv); + } else { + MEMID_COPY(requeue_memid, &fwp->fw_memid); + futex_hashin(fwp); + + if ((ret - wake_threads) >= requeue_threads) + break; + } + } + +out: + if (l2 != NULL) + mutex_exit(l2); + mutex_exit(l1); + + if (ret < 0) + return (set_errno(-ret)); + return (ret); +} + +/* + * Copy in the relative timeout provided by the application and convert it + * to an absolute timeout. + */ +static int +get_timeout(void *lx_timeout, timestruc_t *timeout) +{ + timestruc_t now; + + if (get_udatamodel() == DATAMODEL_NATIVE) { + if (copyin(lx_timeout, timeout, sizeof (timestruc_t))) + return (EFAULT); + } +#ifdef _SYSCALL32_IMPL + else { + timestruc32_t timeout32; + if (copyin(lx_timeout, &timeout32, sizeof (timestruc32_t))) + return (EFAULT); + timeout->tv_sec = (time_t)timeout32.tv_sec; + timeout->tv_nsec = timeout32.tv_nsec; + } +#endif + gethrestime(&now); + + if (itimerspecfix(timeout)) + return (EINVAL); + + timespecadd(timeout, &now); + return (0); +} + +long +lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout, + uintptr_t addr2, int val2) +{ + struct as *as = curproc->p_as; + memid_t memid, requeue_memid; + timestruc_t timeout; + timestruc_t *tptr = NULL; + int requeue_threads = NULL; + int *requeue_cmp = NULL; + int rval = 0; + + /* must be aligned on int boundary */ + if (addr & 0x3) + return (set_errno(EINVAL)); + + /* Sanity check the futex command */ + if (cmd < 0 || cmd > FUTEX_MAX_CMD) + return (set_errno(EINVAL)); + + /* Copy in the timeout structure from userspace. */ + if (cmd == FUTEX_WAIT && lx_timeout != NULL) { + rval = get_timeout((timespec_t *)lx_timeout, &timeout); + if (rval != 0) + return (set_errno(rval)); + tptr = &timeout; + } + + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) { + if (cmd == FUTEX_CMP_REQUEUE) + requeue_cmp = &val2; + + /* + * lx_timeout is nominally a pointer to a userspace + * address. For these two commands, it actually contains + * an integer which indicates the maximum number of threads + * to requeue. This is horrible, and I'm sorry. + */ + requeue_threads = (int)lx_timeout; + } + + /* + * Translate the process-specific, user-space futex virtual + * address(es) to universal memid. + */ + rval = as_getmemid(as, (void *)addr, &memid); + if (rval != 0) + return (set_errno(rval)); + + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) { + rval = as_getmemid(as, (void *)addr2, &requeue_memid); + if (rval) + return (set_errno(rval)); + } + + switch (cmd) { + case FUTEX_WAIT: + rval = futex_wait(&memid, (void *)addr, val, tptr); + break; + + case FUTEX_WAKE: + rval = futex_wake(&memid, val); + break; + + case FUTEX_CMP_REQUEUE: + case FUTEX_REQUEUE: + rval = futex_requeue(&memid, &requeue_memid, val, + requeue_threads, (void *)addr2, requeue_cmp); + + break; + } + + return (rval); +} + +void +lx_futex_init(void) +{ + int i; + + for (i = 0; i < HASH_SIZE; i++) + mutex_init(&futex_hash_lock[i], NULL, MUTEX_DEFAULT, NULL); + bzero(futex_hash, sizeof (futex_hash)); +} + +int +lx_futex_fini(void) +{ + int i, err; + + err = 0; + for (i = 0; (err == 0) && (i < HASH_SIZE); i++) { + mutex_enter(&futex_hash_lock[i]); + if (futex_hash[i] != NULL) + err = EBUSY; + mutex_exit(&futex_hash_lock[i]); + } + return (err); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getpid.c b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c new file mode 100644 index 0000000000..aa8b2b40e1 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/zone.h> +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/thread.h> +#include <sys/cpuvar.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_pid.h> + +/* + * return the pid + */ +long +lx_getpid() +{ + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + long rv; + + if (curproc->p_pid == curproc->p_zone->zone_proc_initpid) { + rv = 1; + } else { + ASSERT(lwpd != NULL); + rv = lwpd->br_tgid; + } + + return (rv); +} + +/* + * return the parent pid + */ +long +lx_getppid(void) +{ + return (lx_lwp_ppid(ttolwp(curthread), NULL, NULL)); +} + +/* + * return the thread id + */ +long +lx_gettid(void) +{ + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + + return (lwpd->br_pid); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_id.c b/usr/src/uts/common/brand/lx/syscall/lx_id.c new file mode 100644 index 0000000000..5ca18b7556 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_id.c @@ -0,0 +1,297 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/zone.h> +#include <sys/cred_impl.h> +#include <sys/policy.h> + +typedef ushort_t l_uid16_t; +typedef ushort_t l_gid16_t; +typedef uint_t l_uid_t; +typedef uint_t l_gid_t; + +#define LINUX_UID16_TO_UID32(uid16) \ + (((uid16) == (l_uid16_t)-1) ? ((l_uid_t)-1) : (l_uid_t)(uid16)) + +#define LINUX_GID16_TO_GID32(gid16) \ + (((gid16) == (l_gid16_t)-1) ? ((l_gid_t)-1) : (l_gid_t)(gid16)) + +#define LX_NGROUPS_MAX 32 +extern int setgroups(int, gid_t *); + +/* + * This function is based on setreuid in common/syscall/uid.c and exists + * because Solaris does not have a way to explicitly set the saved uid (suid) + * from any other system call. + */ +long +lx_setresuid(l_uid_t ruid, l_uid_t euid, l_uid_t suid) +{ + proc_t *p; + int error = 0; + int do_nocd = 0; + int uidchge = 0; + uid_t oldruid = ruid; + cred_t *cr, *newcr; + zoneid_t zoneid = getzoneid(); + + if ((ruid != -1 && (ruid > MAXUID)) || + (euid != -1 && (euid > MAXUID)) || + (suid != -1 && (suid > MAXUID))) { + error = EINVAL; + goto done; + } + + /* + * Need to pre-allocate the new cred structure before grabbing + * the p_crlock mutex. + */ + newcr = cralloc(); + + p = ttoproc(curthread); + +retry: + mutex_enter(&p->p_crlock); + cr = p->p_cred; + + if (ruid != -1 && + ruid != cr->cr_ruid && ruid != cr->cr_uid && + ruid != cr->cr_suid && secpolicy_allow_setid(cr, ruid, B_FALSE)) { + error = EPERM; + } else if (euid != -1 && + euid != cr->cr_ruid && euid != cr->cr_uid && + euid != cr->cr_suid && secpolicy_allow_setid(cr, euid, B_FALSE)) { + error = EPERM; + } else if (suid != -1 && + suid != cr->cr_ruid && suid != cr->cr_uid && + suid != cr->cr_suid && secpolicy_allow_setid(cr, suid, B_FALSE)) { + error = EPERM; + } else { + if (!uidchge && ruid != -1 && cr->cr_ruid != ruid) { + /* + * The ruid of the process is going to change. In order + * to avoid a race condition involving the + * process count associated with the newly given ruid, + * we increment the count before assigning the + * credential to the process. + * To do that, we'll have to take pidlock, so we first + * release p_crlock. + */ + mutex_exit(&p->p_crlock); + uidchge = 1; + mutex_enter(&pidlock); + upcount_inc(ruid, zoneid); + mutex_exit(&pidlock); + /* + * As we released p_crlock we can't rely on the cr + * we read. So retry the whole thing. + */ + goto retry; + } + crhold(cr); + crcopy_to(cr, newcr); + p->p_cred = newcr; + + if (euid != -1) + newcr->cr_uid = euid; + if (suid != -1) + newcr->cr_suid = suid; + if (ruid != -1) { + oldruid = newcr->cr_ruid; + newcr->cr_ruid = ruid; + ASSERT(ruid != oldruid ? uidchge : 1); + } + + /* + * A process that gives up its privilege + * must be marked to produce no core dump. + */ + if ((cr->cr_uid != newcr->cr_uid || + cr->cr_ruid != newcr->cr_ruid || + cr->cr_suid != newcr->cr_suid)) + do_nocd = 1; + + crfree(cr); + } + mutex_exit(&p->p_crlock); + + /* + * We decrement the number of processes associated with the oldruid + * to match the increment above, even if the ruid of the process + * did not change or an error occurred (oldruid == uid). + */ + if (uidchge) { + ASSERT(oldruid != -1 && ruid != -1); + mutex_enter(&pidlock); + upcount_dec(oldruid, zoneid); + mutex_exit(&pidlock); + } + + if (error == 0) { + if (do_nocd) { + mutex_enter(&p->p_lock); + p->p_flag |= SNOCD; + mutex_exit(&p->p_lock); + } + crset(p, newcr); /* broadcast to process threads */ + goto done; + } + crfree(newcr); +done: + if (error) + return (set_errno(error)); + else + return (0); +} + +long +lx_setresuid16(l_uid16_t ruid16, l_uid16_t euid16, l_uid16_t suid16) +{ + long rval; + + rval = lx_setresuid( + LINUX_UID16_TO_UID32(ruid16), + LINUX_UID16_TO_UID32(euid16), + LINUX_UID16_TO_UID32(suid16)); + + return (rval); +} + +/* + * This function is based on setregid in common/syscall/gid.c + */ +long +lx_setresgid(l_gid_t rgid, l_gid_t egid, l_gid_t sgid) +{ + proc_t *p; + int error = 0; + int do_nocd = 0; + cred_t *cr, *newcr; + + if ((rgid != -1 && (rgid > MAXUID)) || + (egid != -1 && (egid > MAXUID)) || + (sgid != -1 && (sgid > MAXUID))) { + error = EINVAL; + goto done; + } + + /* + * Need to pre-allocate the new cred structure before grabbing + * the p_crlock mutex. + */ + newcr = cralloc(); + + p = ttoproc(curthread); + mutex_enter(&p->p_crlock); + cr = p->p_cred; + + if (rgid != -1 && + rgid != cr->cr_rgid && rgid != cr->cr_gid && + rgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) { + error = EPERM; + } else if (egid != -1 && + egid != cr->cr_rgid && egid != cr->cr_gid && + egid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) { + error = EPERM; + } else if (sgid != -1 && + sgid != cr->cr_rgid && sgid != cr->cr_gid && + sgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) { + error = EPERM; + } else { + crhold(cr); + crcopy_to(cr, newcr); + p->p_cred = newcr; + + if (egid != -1) + newcr->cr_gid = egid; + if (sgid != -1) + newcr->cr_sgid = sgid; + if (rgid != -1) + newcr->cr_rgid = rgid; + + /* + * A process that gives up its privilege + * must be marked to produce no core dump. + */ + if ((cr->cr_gid != newcr->cr_gid || + cr->cr_rgid != newcr->cr_rgid || + cr->cr_sgid != newcr->cr_sgid)) + do_nocd = 1; + + crfree(cr); + } + mutex_exit(&p->p_crlock); + + if (error == 0) { + if (do_nocd) { + mutex_enter(&p->p_lock); + p->p_flag |= SNOCD; + mutex_exit(&p->p_lock); + } + crset(p, newcr); /* broadcast to process threads */ + goto done; + } + crfree(newcr); +done: + if (error) + return (set_errno(error)); + else + return (0); +} + +long +lx_setresgid16(l_gid16_t rgid16, l_gid16_t egid16, l_gid16_t sgid16) +{ + long rval; + + rval = lx_setresgid( + LINUX_GID16_TO_GID32(rgid16), + LINUX_GID16_TO_GID32(egid16), + LINUX_GID16_TO_GID32(sgid16)); + + return (rval); +} + +/* + * Linux defines NGROUPS_MAX to be 32, but on Solaris it is only 16. We employ + * the terrible hack below so that tests may proceed, if only on DEBUG kernels. + */ +long +lx_setgroups(int ngroups, gid_t *grouplist) +{ +#ifdef DEBUG + if (ngroups > ngroups_max && ngroups <= LX_NGROUPS_MAX) + ngroups = ngroups_max; +#endif /* DEBUG */ + + return (setgroups(ngroups, grouplist)); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_kill.c b/usr/src/uts/common/brand/lx/syscall/lx_kill.c new file mode 100644 index 0000000000..a3983eeb75 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_kill.c @@ -0,0 +1,253 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/proc.h> +#include <sys/zone.h> +#include <sys/thread.h> +#include <sys/signal.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_pid.h> +#include <lx_signum.h> + +extern int kill(pid_t, int); + +/* + * Check if it is legal to send this signal to the init process. Linux + * kill(2) semantics dictate that no _unhandled_ signal may be sent to pid + * 1. + */ +static int +init_sig_check(int sig, pid_t pid) +{ + proc_t *p; + int rv = 0; + + mutex_enter(&pidlock); + + if (((p = prfind(pid)) == NULL) || (p->p_stat == SIDL)) + rv = ESRCH; + else if (sig && (sigismember(&cantmask, sig) || + (PTOU(p)->u_signal[sig-1] == SIG_DFL) || + (PTOU(p)->u_signal[sig-1] == SIG_IGN))) + rv = EPERM; + + mutex_exit(&pidlock); + + return (rv); +} + +long +lx_tkill(pid_t pid, int lx_sig) +{ + kthread_t *t; + proc_t *pp; + pid_t initpid; + sigqueue_t *sqp; + struct lx_lwp_data *br = ttolxlwp(curthread); + int tid = 1; /* default tid */ + int sig, rv; + + /* + * Unlike kill(2), Linux tkill(2) doesn't allow signals to + * be sent to process IDs <= 0 as it doesn't overlay any special + * semantics on the pid. + */ + if ((pid <= 0) || ((lx_sig < 0) || (lx_sig >= LX_NSIG)) || + ((sig = ltos_signo[lx_sig]) < 0)) + return (set_errno(EINVAL)); + + /* + * If the Linux pid is 1, translate the pid to the actual init + * pid for the zone. Note that Linux dictates that no unhandled + * signals may be sent to init, so check for that, too. + * + * Otherwise, extract the tid and real pid from the Linux pid. + */ + initpid = curproc->p_zone->zone_proc_initpid; + if (pid == 1) + pid = initpid; + if ((pid == initpid) && ((rv = init_sig_check(sig, pid)) != 0)) + return (set_errno(rv)); + else if (lx_lpid_to_spair(pid, &pid, &tid) < 0) + return (set_errno(ESRCH)); + + sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); + + /* + * Find the process for the passed pid... + */ + mutex_enter(&pidlock); + if (((pp = prfind(pid)) == NULL) || (pp->p_stat == SIDL)) { + mutex_exit(&pidlock); + rv = set_errno(ESRCH); + goto free_and_exit; + } + mutex_enter(&pp->p_lock); + mutex_exit(&pidlock); + + /* + * Deny permission to send the signal if either of the following + * is true: + * + * + The signal is SIGCONT and the target pid is not in the same + * session as the sender + * + * + prochasprocperm() shows the user lacks sufficient permission + * to send the signal to the target pid + */ + if (((sig == SIGCONT) && (pp->p_sessp != curproc->p_sessp)) || + (!prochasprocperm(pp, curproc, CRED()))) { + mutex_exit(&pp->p_lock); + rv = set_errno(EPERM); + goto free_and_exit; + } + + /* check for the tid */ + if ((t = idtot(pp, tid)) == NULL) { + mutex_exit(&pp->p_lock); + rv = set_errno(ESRCH); + goto free_and_exit; + } + + /* a signal of 0 means just check for the existence of the thread */ + if (lx_sig == 0) { + mutex_exit(&pp->p_lock); + rv = 0; + goto free_and_exit; + } + + sqp->sq_info.si_signo = sig; + sqp->sq_info.si_code = SI_LWP; + sqp->sq_info.si_pid = br->br_pid; + sqp->sq_info.si_uid = crgetruid(CRED()); + sigaddqa(pp, t, sqp); + + mutex_exit(&pp->p_lock); + + return (0); + +free_and_exit: + kmem_free(sqp, sizeof (sigqueue_t)); + return (rv); +} + +long +lx_kill(pid_t lx_pid, int lx_sig) +{ + pid_t s_pid, initpid; + sigsend_t v; + zone_t *zone = curproc->p_zone; + struct proc *p; + int err, sig, nfound; + + if ((lx_sig < 0) || (lx_sig >= LX_NSIG) || + ((sig = ltos_signo[lx_sig]) < 0)) + return (set_errno(EINVAL)); + + /* + * Since some linux apps rely on init(1M) having PID 1, we + * transparently translate 1 to the real init(1M)'s pid. We then + * check to be sure that it is legal for this process to send this + * signal to init(1M). + */ + initpid = zone->zone_proc_initpid; + if (lx_pid == 1 || lx_pid == -1) { + s_pid = initpid; + } else if (lx_pid == 0) { + s_pid = 0; + } else if (lx_pid > 0) { + if (lx_lpid_to_spair(lx_pid, &s_pid, NULL) != 0) { + /* + * If we didn't find this pid that means it doesn't + * exist in this zone. + */ + return (set_errno(ESRCH)); + } + } else { + ASSERT(lx_pid < 0); + if (lx_lpid_to_spair(-lx_pid, &s_pid, NULL) != 0) { + /* + * If we didn't find this pid it means that the + * process group leader doesn't exist in this zone. + * In this case assuming that the Linux pid is + * the same as the Solaris pid will get us the + * correct behavior. + */ + s_pid = -lx_pid; + } + } + + if ((s_pid == initpid) && ((err = init_sig_check(sig, s_pid)) != 0)) + return (set_errno(err)); + + /* + * For individual processes, kill() semantics are the same between + * Solaris and Linux. + */ + if (lx_pid >= 0) + return (kill(s_pid, sig)); + + /* + * In Solaris, sending a signal to -pid means "send a signal to + * everyone in process group pid." In Linux it means "send a + * signal to everyone in the group other than init." Sending a + * signal to -1 means "send a signal to every process except init + * and myself." + */ + + bzero(&v, sizeof (v)); + v.sig = sig; + v.checkperm = 1; + v.sicode = SI_USER; + err = 0; + + mutex_enter(&pidlock); + + p = (lx_pid == -1) ? practive : pgfind(s_pid); + nfound = 0; + while (err == 0 && p != NULL) { + if ((p->p_zone == zone) && (p->p_stat != SIDL) && + (p->p_pid != initpid) && (lx_pid < -1 || p != curproc)) { + nfound++; + err = sigsendproc(p, &v); + } + + p = (lx_pid == -1) ? p->p_next : p->p_pglink; + } + mutex_exit(&pidlock); + if (nfound == 0) + err = ESRCH; + else if (err == 0 && v.perm == 0) + err = EPERM; + return (err ? set_errno(err) : 0); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c new file mode 100644 index 0000000000..aa6e12a7d8 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/segments.h> +#include <sys/archsystm.h> +#include <sys/proc.h> +#include <sys/sysi86.h> +#include <sys/cmn_err.h> +#include <sys/lx_ldt.h> + +/* + * Read the ldt_info structure in from the Linux app, convert it to an ssd + * structure, and then call setdscr() to do all the heavy lifting. + */ +static int +write_ldt(void *data, ulong_t count) +{ + user_desc_t usd; + struct ssd ssd; + struct ldt_info ldt_inf; + proc_t *pp = curthread->t_procp; + int err; + + if (count != sizeof (ldt_inf)) + return (set_errno(EINVAL)); + + if (copyin(data, &ldt_inf, sizeof (ldt_inf))) + return (set_errno(EFAULT)); + + if (ldt_inf.entry_number >= MAXNLDT) + return (set_errno(EINVAL)); + + LDT_INFO_TO_DESC(&ldt_inf, &usd); + usd_to_ssd(&usd, &ssd, SEL_LDT(ldt_inf.entry_number)); + + /* + * Get everyone into a safe state before changing the LDT. + */ + if (!holdlwps(SHOLDFORK1)) + return (set_errno(EINTR)); + + err = setdscr(&ssd); + + /* + * Release the hounds! + */ + mutex_enter(&pp->p_lock); + continuelwps(pp); + mutex_exit(&pp->p_lock); + + return (err ? set_errno(err) : 0); +} + +static int +read_ldt(void *uptr, ulong_t count) +{ + proc_t *pp = curproc; + int bytes; + + if (pp->p_ldt == NULL) + return (0); + + bytes = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); + if (bytes > count) + bytes = count; + + if (copyout(pp->p_ldt, uptr, bytes)) + return (set_errno(EFAULT)); + + return (bytes); +} + +long +lx_modify_ldt(int op, void *data, ulong_t count) +{ + int rval; + + switch (op) { + case 0: + rval = read_ldt(data, count); + break; + + case 1: + rval = write_ldt(data, count); + break; + + default: + rval = set_errno(ENOSYS); + break; + } + + return (rval); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sched.c b/usr/src/uts/common/brand/lx/syscall/lx_sched.c new file mode 100644 index 0000000000..bb91a752d2 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_sched.c @@ -0,0 +1,513 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/proc.h> +#include <sys/cpu.h> +#include <sys/rtpriocntl.h> +#include <sys/tspriocntl.h> +#include <sys/processor.h> +#include <sys/brand.h> +#include <sys/lx_pid.h> +#include <sys/lx_sched.h> +#include <sys/lx_brand.h> + +extern long priocntl_common(int, procset_t *, int, caddr_t, caddr_t, uio_seg_t); + +int +lx_sched_affinity(int cmd, uintptr_t pid, int len, uintptr_t maskp, + int64_t *rval) +{ + pid_t s_pid; + id_t s_tid; + kthread_t *t = curthread; + lx_lwp_data_t *lx_lwp; + + if (cmd != B_GET_AFFINITY_MASK && cmd != B_SET_AFFINITY_MASK) + return (set_errno(EINVAL)); + + /* + * The caller wants to know how large the mask should be. + */ + if (cmd == B_GET_AFFINITY_MASK && len == 0) { + *rval = sizeof (lx_affmask_t); + return (0); + } + + /* + * Otherwise, ensure they have a large enough mask. + */ + if (cmd == B_GET_AFFINITY_MASK && len < sizeof (lx_affmask_t)) { + *rval = -1; + return (set_errno(EINVAL)); + } + + if (pid == 0) { + s_pid = curproc->p_pid; + s_tid = curthread->t_tid; + } else if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) == -1) { + return (set_errno(ESRCH)); + } + + /* + * For now, we only support manipulating threads in the + * same process. + */ + if (curproc->p_pid != s_pid) + return (set_errno(EPERM)); + + /* + * We must hold the process lock so that the thread list + * doesn't change while we're looking at it. We'll hold + * the lock until we no longer reference the + * corresponding lwp. + */ + + mutex_enter(&curproc->p_lock); + + do { + if (t->t_tid == s_tid) + break; + t = t->t_forw; + } while (t != curthread); + + /* + * If the given PID is in the current thread's process, + * then we _must_ find it in the process's thread list. + */ + ASSERT(t->t_tid == s_tid); + + lx_lwp = t->t_lwp->lwp_brand; + + if (cmd == B_SET_AFFINITY_MASK) { + if (copyin_nowatch((void *)maskp, &lx_lwp->br_affinitymask, + sizeof (lx_affmask_t)) != 0) { + mutex_exit(&curproc->p_lock); + return (set_errno(EFAULT)); + } + + *rval = 0; + } else { + if (copyout_nowatch(&lx_lwp->br_affinitymask, (void *)maskp, + sizeof (lx_affmask_t)) != 0) { + mutex_exit(&curproc->p_lock); + return (set_errno(EFAULT)); + } + + *rval = sizeof (lx_affmask_t); + } + + mutex_exit(&curproc->p_lock); + return (0); +} + +long +lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param) +{ + klwp_t *lwp = ttolwp(curthread); + procset_t procset; + procset_t procset_cid; + pcparms_t pcparm; + pcinfo_t pcinfo; + struct lx_sched_param sched_param; + tsparms_t *tsp; + int prio, maxupri; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if ((rv = sched_setprocset(&procset, pid))) + return (rv); + + if (copyin(param, &sched_param, sizeof (sched_param))) + return (set_errno(EFAULT)); + + prio = sched_param.lx_sched_prio; + + if (policy < 0) { + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the current policy + */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (strcmp(pcinfo.pc_clname, "TS") == 0) + policy = LX_SCHED_OTHER; + else if (strcmp(pcinfo.pc_clname, "RT") == 0) + policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == + RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR; + else + return (set_errno(EINVAL)); + } + + bzero(&pcinfo, sizeof (pcinfo)); + bzero(&pcparm, sizeof (pcparm)); + setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0); + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + (void) strcpy(pcinfo.pc_clname, "RT"); + (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (prio < 0 || + prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri) + return (set_errno(EINVAL)); + pcparm.pc_cid = pcinfo.pc_cid; + ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; + ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = + policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF; + break; + + case LX_SCHED_OTHER: + (void) strcpy(pcinfo.pc_clname, "TS"); + (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri; + if (prio > maxupri || prio < -maxupri) + return (set_errno(EINVAL)); + + pcparm.pc_cid = pcinfo.pc_cid; + tsp = (tsparms_t *)pcparm.pc_clparms; + tsp->ts_upri = prio; + tsp->ts_uprilim = TS_NOCHANGE; + break; + + default: + return (set_errno(EINVAL)); + } + + /* + * finally set scheduling policy and parameters + */ + (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm); + + return (0); +} + +long +lx_sched_getscheduler(l_pid_t pid) +{ + klwp_t *lwp = ttolwp(curthread); + procset_t procset; + pcparms_t pcparm; + pcinfo_t pcinfo; + int policy; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if ((rv = sched_setprocset(&procset, pid))) + return (rv); + + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the class info and identify the equivalent linux policy + */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (strcmp(pcinfo.pc_clname, "TS") == 0) + policy = LX_SCHED_OTHER; + else if (strcmp(pcinfo.pc_clname, "RT") == 0) + policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == + RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR; + else + policy = set_errno(EINVAL); + + return (policy); +} + +long +lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param) +{ + klwp_t *lwp = ttolwp(curthread); + procset_t procset; + procset_t procset_cid; + pcparms_t pcparm; + pcinfo_t pcinfo; + struct lx_sched_param sched_param; + tsparms_t *tsp; + int policy; + int prio, maxupri; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if ((rv = sched_setprocset(&procset, pid))) + return (rv); + + if (copyin(param, &sched_param, sizeof (sched_param))) + return (set_errno(EFAULT)); + + prio = sched_param.lx_sched_prio; + + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the current policy + */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (strcmp(pcinfo.pc_clname, "TS") == 0) + policy = LX_SCHED_OTHER; + else if (strcmp(pcinfo.pc_clname, "RT") == 0) + policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == + RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR; + else + return (set_errno(EINVAL)); + + bzero(&pcinfo, sizeof (pcinfo)); + bzero(&pcparm, sizeof (pcparm)); + setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0); + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + (void) strcpy(pcinfo.pc_clname, "RT"); + (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (prio < 0 || + prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri) + return (set_errno(EINVAL)); + pcparm.pc_cid = pcinfo.pc_cid; + ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; + ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = + policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF; + break; + + case LX_SCHED_OTHER: + (void) strcpy(pcinfo.pc_clname, "TS"); + (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri; + if (prio > maxupri || prio < -maxupri) + return (set_errno(EINVAL)); + + pcparm.pc_cid = pcinfo.pc_cid; + tsp = (tsparms_t *)pcparm.pc_clparms; + tsp->ts_upri = prio; + tsp->ts_uprilim = TS_NOCHANGE; + break; + + default: + return (set_errno(EINVAL)); + } + + /* + * finally set scheduling policy and parameters + */ + (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm); + + return (0); +} + +long +lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param) +{ + klwp_t *lwp = ttolwp(curthread); + struct lx_sched_param local_param; + procset_t procset; + pcparms_t pcparm; + pcinfo_t pcinfo; + tsinfo_t *tsi; + int prio, scale; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if ((rv = sched_setprocset(&procset, pid))) + return (rv); + + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the class info and identify the equivalent linux policy + */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + bzero(&local_param, sizeof (local_param)); + if (strcmp(pcinfo.pc_clname, "TS") == 0) { + /* + * I don't know if we need to do this, coz it can't be + * changed from zero anyway..... + */ + tsi = (tsinfo_t *)pcinfo.pc_clinfo; + prio = ((tsparms_t *)pcparm.pc_clparms)->ts_upri; + scale = tsi->ts_maxupri; + if (scale == 0) + local_param.lx_sched_prio = 0; + else + local_param.lx_sched_prio = -(prio * 20) / scale; + } else if (strcmp(pcinfo.pc_clname, "RT") == 0) + local_param.lx_sched_prio = + ((rtparms_t *)pcparm.pc_clparms)->rt_pri; + else + rv = set_errno(EINVAL); + + if (rv == 0) + if (copyout(&local_param, param, sizeof (local_param))) + return (set_errno(EFAULT)); + + return (rv); +} + +long +lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival) +{ + klwp_t *lwp = ttolwp(curthread); + struct timespec interval; + procset_t procset; + pcparms_t pcparm; + pcinfo_t pcinfo; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if ((rv = sched_setprocset(&procset, pid))) + return (rv); + + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the class info and identify the equivalent linux policy + */ + setprocset(&procset, POP_AND, P_PID, 0, P_ALL, 0); + bzero(&pcinfo, sizeof (pcinfo)); + (void) strcpy(pcinfo.pc_clname, "RT"); + (void) do_priocntlsys(PC_GETCID, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (pcparm.pc_cid == pcinfo.pc_cid && + ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) { + interval.tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs; + interval.tv_nsec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs; + + if (copyout(&interval, ival, sizeof (interval))) + return (set_errno(EFAULT)); + + return (0); + } + + return (set_errno(EINVAL)); +} + +int +sched_setprocset(procset_t *procset, l_pid_t pid) +{ + id_t lid, rid; + idtype_t lidtype, ridtype; + + /* + * define the target lwp + */ + if (pid == 0) { + ridtype = P_ALL; + lidtype = P_PID; + rid = 0; + lid = P_MYID; + } else { + if (lx_lpid_to_spair(pid, &pid, &lid) < 0) + return (set_errno(ESRCH)); + if (pid != curproc->p_pid) + return (set_errno(ESRCH)); + rid = 0; + ridtype = P_ALL; + lidtype = P_LWPID; + } + setprocset(procset, POP_AND, lidtype, lid, ridtype, rid); + + return (0); +} + +long +do_priocntlsys(int cmd, procset_t *procset, void *arg) +{ + return (priocntl_common(PC_VERSION, procset, cmd, (caddr_t)arg, 0, + UIO_SYSSPACE)); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c new file mode 100644 index 0000000000..9fdb734805 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <vm/anon.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/zone.h> +#include <sys/time.h> + +struct lx_sysinfo { + int32_t si_uptime; /* Seconds since boot */ + uint32_t si_loads[3]; /* 1, 5, and 15 minute avg runq length */ + uint32_t si_totalram; /* Total memory size */ + uint32_t si_freeram; /* Available memory */ + uint32_t si_sharedram; /* Shared memory */ + uint32_t si_bufferram; /* Buffer memory */ + uint32_t si_totalswap; /* Total swap space */ + uint32_t si_freeswap; /* Avail swap space */ + uint16_t si_procs; /* Process count */ + uint32_t si_totalhigh; /* High memory size */ + uint32_t si_freehigh; /* Avail high memory */ + uint32_t si_mem_unit; /* Unit size of memory fields */ +}; + +long +lx_sysinfo(struct lx_sysinfo *sip) +{ + struct lx_sysinfo si; + hrtime_t birthtime; + zone_t *zone = curthread->t_procp->p_zone; + proc_t *init_proc; + + /* + * We don't record the time a zone was booted, so we use the + * birthtime of that zone's init process instead. + */ + mutex_enter(&pidlock); + init_proc = prfind(zone->zone_proc_initpid); + if (init_proc != NULL) + birthtime = init_proc->p_mstart; + else + birthtime = p0.p_mstart; + mutex_exit(&pidlock); + si.si_uptime = (gethrtime() - birthtime) / NANOSEC; + + /* + * We scale down the load in avenrun to allow larger load averages + * to fit in 32 bits. Linux doesn't, so we remove the scaling + * here. + */ + si.si_loads[0] = avenrun[0] << FSHIFT; + si.si_loads[1] = avenrun[1] << FSHIFT; + si.si_loads[2] = avenrun[2] << FSHIFT; + + /* + * In linux each thread looks like a process, so we conflate the + * two in this stat as well. + */ + si.si_procs = (int32_t)zone->zone_nlwps; + + /* + * If the maximum memory stat is less than 1^20 pages (i.e. 4GB), + * then we report the result in bytes. Otherwise we use pages. + * Once we start supporting >1TB x86 systems, we'll need a third + * option. + */ + if (MAX(physmem, k_anoninfo.ani_max) < 1024 * 1024) { + si.si_totalram = physmem * PAGESIZE; + si.si_freeram = freemem * PAGESIZE; + si.si_totalswap = k_anoninfo.ani_max * PAGESIZE; + si.si_freeswap = k_anoninfo.ani_free * PAGESIZE; + si.si_mem_unit = 1; + } else { + si.si_totalram = physmem; + si.si_freeram = freemem; + si.si_totalswap = k_anoninfo.ani_max; + si.si_freeswap = k_anoninfo.ani_free; + si.si_mem_unit = PAGESIZE; + } + si.si_bufferram = 0; + si.si_sharedram = 0; + + /* + * These two stats refer to high physical memory. If an + * application running in a Linux zone cares about this, then + * either it or we are broken. + */ + si.si_totalhigh = 0; + si.si_freehigh = 0; + + if (copyout(&si, sip, sizeof (si)) != 0) + return (set_errno(EFAULT)); + return (0); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c new file mode 100644 index 0000000000..f9751819f9 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c @@ -0,0 +1,128 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/cpuvar.h> +#include <sys/archsystm.h> +#include <sys/proc.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_ldt.h> + +long +lx_get_thread_area(struct ldt_info *inf) +{ + struct lx_lwp_data *jlwp = ttolxlwp(curthread); + struct ldt_info ldt_inf; + user_desc_t *dscrp; + int entry; + + if (fuword32(&inf->entry_number, (uint32_t *)&entry)) + return (set_errno(EFAULT)); + + if (entry < GDT_TLSMIN || entry > GDT_TLSMAX) + return (set_errno(EINVAL)); + + dscrp = jlwp->br_tls + entry - GDT_TLSMIN; + + /* + * convert the solaris ldt to the linux format expected by the + * caller + */ + DESC_TO_LDT_INFO(dscrp, &ldt_inf); + ldt_inf.entry_number = entry; + + if (copyout(&ldt_inf, inf, sizeof (struct ldt_info))) + return (set_errno(EFAULT)); + + return (0); +} + +long +lx_set_thread_area(struct ldt_info *inf) +{ + struct lx_lwp_data *jlwp = ttolxlwp(curthread); + struct ldt_info ldt_inf; + user_desc_t *dscrp; + int entry; + int i; + + if (copyin(inf, &ldt_inf, sizeof (ldt_inf))) + return (set_errno(EFAULT)); + + entry = ldt_inf.entry_number; + if (entry == -1) { + /* + * find an empty entry in the tls for this thread + */ + for (i = 0, dscrp = jlwp->br_tls; + i < LX_TLSNUM; i++, dscrp++) + if (((unsigned long *)dscrp)[0] == 0 && + ((unsigned long *)dscrp)[1] == 0) + break; + + if (i < LX_TLSNUM) { + /* + * found one + */ + entry = i + GDT_TLSMIN; + if (suword32(&inf->entry_number, entry)) + return (set_errno(EFAULT)); + } else { + return (set_errno(ESRCH)); + } + } + + if (entry < GDT_TLSMIN || entry > GDT_TLSMAX) + return (set_errno(EINVAL)); + + /* + * convert the linux ldt info to standard intel descriptor + */ + dscrp = jlwp->br_tls + entry - GDT_TLSMIN; + + if (LDT_INFO_EMPTY(&ldt_inf)) { + ((unsigned long *)dscrp)[0] = 0; + ((unsigned long *)dscrp)[1] = 0; + } else { + LDT_INFO_TO_DESC(&ldt_inf, dscrp); + } + + /* + * update the gdt with the new descriptor + */ + kpreempt_disable(); + + for (i = 0, dscrp = jlwp->br_tls; i < LX_TLSNUM; i++, dscrp++) + lx_set_gdt(GDT_TLSMIN + i, dscrp); + + kpreempt_enable(); + + return (0); +} diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c index d61928d578..ab733a07cc 100644 --- a/usr/src/uts/common/brand/sn1/sn1_brand.c +++ b/usr/src/uts/common/brand/sn1/sn1_brand.c @@ -94,9 +94,11 @@ struct brand_mach_ops sn1_mops = { struct brand_mach_ops sn1_mops = { sn1_brand_sysenter_callback, + NULL, sn1_brand_int91_callback, sn1_brand_syscall_callback, - sn1_brand_syscall32_callback + sn1_brand_syscall32_callback, + NULL }; #else /* ! __amd64 */ @@ -104,7 +106,9 @@ struct brand_mach_ops sn1_mops = { struct brand_mach_ops sn1_mops = { sn1_brand_sysenter_callback, NULL, + NULL, sn1_brand_syscall_callback, + NULL, NULL }; #endif /* __amd64 */ diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.c b/usr/src/uts/common/brand/solaris10/s10_brand.c index f24b864eef..2e2309a33e 100644 --- a/usr/src/uts/common/brand/solaris10/s10_brand.c +++ b/usr/src/uts/common/brand/solaris10/s10_brand.c @@ -99,9 +99,11 @@ struct brand_mach_ops s10_mops = { struct brand_mach_ops s10_mops = { s10_brand_sysenter_callback, + NULL, s10_brand_int91_callback, s10_brand_syscall_callback, - s10_brand_syscall32_callback + s10_brand_syscall32_callback, + NULL }; #else /* ! __amd64 */ @@ -109,7 +111,9 @@ struct brand_mach_ops s10_mops = { struct brand_mach_ops s10_mops = { s10_brand_sysenter_callback, NULL, + NULL, s10_brand_syscall_callback, + NULL, NULL }; #endif /* __amd64 */ diff --git a/usr/src/uts/common/io/ptm.c b/usr/src/uts/common/io/ptm.c index 400e9ffd10..07ffddc123 100644 --- a/usr/src/uts/common/io/ptm.c +++ b/usr/src/uts/common/io/ptm.c @@ -447,6 +447,18 @@ ptmclose(queue_t *rqp, int flag, cred_t *credp) return (0); } +static boolean_t +ptmptsopencb(ptmptsopencb_arg_t arg) +{ + struct pt_ttys *ptmp = (struct pt_ttys *)arg; + boolean_t rval; + + PT_ENTER_READ(ptmp); + rval = (ptmp->pt_nullmsg != NULL); + PT_EXIT_READ(ptmp); + return (rval); +} + /* * The wput procedure will only handle ioctl and flush messages. */ @@ -574,6 +586,41 @@ ptmwput(queue_t *qp, mblk_t *mp) miocack(qp, mp, 0, 0); break; } + case PTMPTSOPENCB: + { + mblk_t *dp; /* ioctl reply data */ + ptmptsopencb_t *ppocb; + + /* only allow the kernel to invoke this ioctl */ + if (iocp->ioc_cr != kcred) { + miocnak(qp, mp, 0, EINVAL); + break; + } + + /* we don't support transparent ioctls */ + ASSERT(iocp->ioc_count != TRANSPARENT); + if (iocp->ioc_count == TRANSPARENT) { + miocnak(qp, mp, 0, EINVAL); + break; + } + + /* allocate a response message */ + dp = allocb(sizeof (ptmptsopencb_t), BPRI_MED); + if (dp == NULL) { + miocnak(qp, mp, 0, EAGAIN); + break; + } + + /* initialize the ioctl results */ + ppocb = (ptmptsopencb_t *)dp->b_rptr; + ppocb->ppocb_func = ptmptsopencb; + ppocb->ppocb_arg = (ptmptsopencb_arg_t)ptmp; + + /* send the reply data */ + mioc2ack(mp, dp, sizeof (ptmptsopencb_t), 0); + qreply(qp, mp); + break; + } } break; diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c index eb8c6e730a..532b74ec9b 100644 --- a/usr/src/uts/common/os/brand.c +++ b/usr/src/uts/common/os/brand.c @@ -45,7 +45,7 @@ struct brand_mach_ops native_mach_ops = { }; #else /* !__sparcv9 */ struct brand_mach_ops native_mach_ops = { - NULL, NULL, NULL, NULL + NULL, NULL, NULL, NULL, NULL, NULL }; #endif /* !__sparcv9 */ diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c index b555bb82b7..39db5cb27d 100644 --- a/usr/src/uts/common/os/pid.c +++ b/usr/src/uts/common/os/pid.c @@ -112,6 +112,18 @@ pid_lookup(pid_t pid) return (pidp); } +struct pid * +pid_find(pid_t pid) +{ + struct pid *pidp; + + mutex_enter(&pidlinklock); + pidp = pid_lookup(pid); + mutex_exit(&pidlinklock); + + return (pidp); +} + void pid_setmin(void) { diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c index c6ebe8b110..f9df89923f 100644 --- a/usr/src/uts/common/os/streamio.c +++ b/usr/src/uts/common/os/streamio.c @@ -3177,6 +3177,7 @@ job_control_type(int cmd) case JAGENT: /* Obsolete */ case JTRUN: /* Obsolete */ case JXTPROTO: /* Obsolete */ + case TIOCSETLD: return (JCSETP); } diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h index 6c79ee266d..ba8b2b1210 100644 --- a/usr/src/uts/common/sys/ptms.h +++ b/usr/src/uts/common/sys/ptms.h @@ -126,6 +126,12 @@ extern void ptms_logp(char *, uintptr_t); #define DDBGP(a, b) #endif +typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t; +typedef struct ptmptsopencb { + boolean_t (*ppocb_func)(ptmptsopencb_arg_t); + ptmptsopencb_arg_t ppocb_arg; +} ptmptsopencb_t; + #endif /* _KERNEL */ typedef struct pt_own { @@ -157,6 +163,19 @@ typedef struct pt_own { #define ZONEPT (('P'<<8)|4) /* set zone of master/slave pair */ #define OWNERPT (('P'<<8)|5) /* set owner/group for slave device */ +#ifdef _KERNEL +/* + * kernel ioctl commands + * + * PTMPTSOPENCB: Returns a callback function pointer and opaque argument. + * The return value of the callback function when it's invoked + * with the opaque argument passed to it will indicate if the + * pts slave device is currently open. + */ +#define PTMPTSOPENCB (('P'<<8)|6) /* check if the slave is open */ + +#endif /* _KERNEL */ + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h index e66ba0bc6b..1e1124d554 100644 --- a/usr/src/uts/common/sys/termios.h +++ b/usr/src/uts/common/sys/termios.h @@ -380,6 +380,24 @@ extern pid_t tcgetsid(); #define TCSETSF (_TIOC|16) /* + * linux terminal ioctls we need to be aware of + */ +#define TIOCSETLD (_TIOC|123) /* set line discipline parms */ +#define TIOCGETLD (_TIOC|124) /* get line discipline parms */ + +/* + * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is + * perfectly legal except, linux expects them to be separate. So we keep + * them separately. + */ +struct lx_cc { + unsigned char veof; /* veof value */ + unsigned char veol; /* veol value */ + unsigned char vmin; /* vmin value */ + unsigned char vtime; /* vtime value */ +}; + +/* * NTP PPS ioctls */ #define TIOCGPPS (_TIOC|125) diff --git a/usr/src/uts/i86pc/ml/syscall_asm.s b/usr/src/uts/i86pc/ml/syscall_asm.s index 61ef4ac6c3..68181be28a 100644 --- a/usr/src/uts/i86pc/ml/syscall_asm.s +++ b/usr/src/uts/i86pc/ml/syscall_asm.s @@ -631,6 +631,36 @@ _sysenter_done: sysexit SET_SIZE(sys_sysenter) SET_SIZE(brand_sys_sysenter) +#endif /* __lint */ + +#if defined(__lint) +/* + * System call via an int80. This entry point is only used by the Linux + * application environment. Unlike the sysenter path, there is no default + * action to take if no callback is registered for this process. + */ +void +sys_int80() +{} + +#else /* __lint */ + + ENTRY_NP(brand_sys_int80) + BRAND_CALLBACK(BRAND_CB_INT80) + + ALTENTRY(sys_int80) + /* + * We hit an int80, but this process isn't of a brand with an int80 + * handler. Bad process! Make it look as if the INT failed. + * Modify %eip to point before the INT, push the expected error + * code and fake a GP fault. + * + */ + subl $2, (%esp) /* int insn 2-bytes */ + pushl $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2) + jmp gptrap / GP fault + SET_SIZE(sys_int80) + SET_SIZE(brand_sys_int80) /* * Declare a uintptr_t which covers the entire pc range of syscall diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s index 443689cec3..fec8301bbc 100644 --- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s +++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s @@ -1159,6 +1159,48 @@ sys_sysenter() SET_SIZE(brand_sys_sysenter) #endif /* __lint */ + +#if defined(__lint) +/* + * System call via an int80. This entry point is only used by the Linux + * application environment. Unlike the other entry points, there is no + * default action to take if no callback is registered for this process. + */ +void +sys_int80() +{} + +#else /* __lint */ + + ENTRY_NP(brand_sys_int80) + SWAPGS /* kernel gsbase */ + XPV_TRAP_POP + BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK()) + SWAPGS /* user gsbase */ + jmp nopop_int80 + + ENTRY_NP(sys_int80) + /* + * We hit an int80, but this process isn't of a brand with an int80 + * handler. Bad process! Make it look as if the INT failed. + * Modify %rip to point before the INT, push the expected error + * code and fake a GP fault. Note on 64-bit hypervisor we need + * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack + * because gptrap will pop them again with its own XPV_TRAP_POP. + */ + XPV_TRAP_POP +nopop_int80: + subq $2, (%rsp) /* int insn 2-bytes */ + pushq $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2) +#if defined(__xpv) + push %r11 + push %rcx +#endif + jmp gptrap / GP fault + SET_SIZE(sys_int80) + SET_SIZE(brand_sys_int80) +#endif /* __lint */ + /* * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h index b632cea09c..8f9803290c 100644 --- a/usr/src/uts/i86pc/sys/apic.h +++ b/usr/src/uts/i86pc/sys/apic.h @@ -380,7 +380,7 @@ struct apic_io_intr { /* special or reserve vectors */ #define APIC_CHECK_RESERVE_VECTORS(v) \ (((v) == T_FASTTRAP) || ((v) == APIC_SPUR_INTR) || \ - ((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET)) + ((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET) || ((v) == 0x80)) /* cmos shutdown code for BIOS */ #define BIOS_SHUTDOWN 0x0a diff --git a/usr/src/uts/intel/Makefile b/usr/src/uts/intel/Makefile index 72b2d89989..37e10e011e 100644 --- a/usr/src/uts/intel/Makefile +++ b/usr/src/uts/intel/Makefile @@ -64,7 +64,7 @@ install_h.prereq := TARGET= install_h .PARALLEL: $(PARALLEL_KMODS) $(XMODS) config $(LINT_DEPS) -def all install clean clobber modlist: $(KMODS) $(XMODS) config +def all install clean clobber modlist: genassym $(KMODS) $(XMODS) config clobber: clobber.targ @@ -106,7 +106,7 @@ CLOBBERFILES += $(PRIVS_C) # intel/dtrace depends on i86pc/genassym, so we need to build both # i86pc/genassym and intel/genassym. # -all.prereq install.prereq def.prereq: genunix FRC +all.prereq install.prereq def.prereq: genassym genunix FRC @cd ../i86pc/genassym; pwd; $(MAKE) $(@:%.prereq=%) # @@ -124,7 +124,7 @@ genunix: $(PRIVS_C) modlintlib clean.lint: $(LINT_KMODS) $(XMODS) -$(KMODS) $(SUBDIRS) config: FRC +genassym $(KMODS) $(SUBDIRS) config: FRC @cd $@; pwd; $(MAKE) $(NO_STATE) $(TARGET) $(XMODS): FRC diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files index 1661f3ff93..4321c3586e 100644 --- a/usr/src/uts/intel/Makefile.files +++ b/usr/src/uts/intel/Makefile.files @@ -99,6 +99,14 @@ GENUNIX_OBJS += \ # CORE_OBJS += \ prmachdep.o + +LX_PROC_OBJS += \ + lx_prsubr.o \ + lx_prvfsops.o \ + lx_prvnops.o + +LX_AUTOFS_OBJS += \ + lx_autofs.o # # ZFS file system module @@ -267,6 +275,23 @@ IOMMULIB_OBJS = iommulib.o SN1_BRAND_OBJS = sn1_brand.o sn1_brand_asm.o SNGL_BRAND_OBJS = sngl_brand.o sngl_brand_asm.o S10_BRAND_OBJS = s10_brand.o s10_brand_asm.o +LX_BRAND_OBJS = \ + lx_brand.o \ + lx_brand_asm.o \ + lx_brk.o \ + lx_clone.o \ + lx_futex.o \ + lx_getpid.o \ + lx_id.o \ + lx_kill.o \ + lx_misc.o \ + lx_modify_ldt.o \ + lx_pid.o \ + lx_sched.o \ + lx_signum.o \ + lx_syscall.o \ + lx_sysinfo.o \ + lx_thread_area.o # # special files diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel index 7e95f5fa23..b818b1860c 100644 --- a/usr/src/uts/intel/Makefile.intel +++ b/usr/src/uts/intel/Makefile.intel @@ -42,6 +42,7 @@ PLATFORM = i86pc # UNIX_DIR = $(UTSBASE)/i86pc/unix GENLIB_DIR = $(UTSBASE)/intel/genunix +GENASSYM_DIR = $(UTSBASE)/intel/genassym IPDRV_DIR = $(UTSBASE)/intel/ip MODSTUBS_DIR = $(UNIX_DIR) DSF_DIR = $(UTSBASE)/$(PLATFORM)/genassym @@ -134,6 +135,7 @@ ASFLAGS_XARCH_64 = $(amd64_ASFLAGS) ASFLAGS_XARCH = $(ASFLAGS_XARCH_$(CLASS)) ASFLAGS += $(ASFLAGS_XARCH) +AS_INC_PATH += -I$(GENASSYM_DIR)/$(OBJS_DIR) # # Define the base directory for installation. @@ -513,7 +515,9 @@ MISC_KMODS += md_sp # # Brand modules # -BRAND_KMODS += sn1_brand sngl_brand s10_brand +BRAND_KMODS += sn1_brand sngl_brand s10_brand lx_brand +DRV_KMODS += lx_systrace lx_ptm lx_audio +STRMOD_KMODS += ldlinux # # Exec Class Modules (/kernel/exec): @@ -529,7 +533,7 @@ SCHED_KMODS += IA RT TS RT_DPTBL TS_DPTBL FSS FX FX_DPTBL SDC # File System Modules (/kernel/fs): # FS_KMODS += autofs cachefs ctfs dcfs dev devfs fdfs fifofs hsfs hyprlofs -FS_KMODS += lofs lxprocfs mntfs namefs nfs objfs zfs zut +FS_KMODS += lofs lx_afs lx_proc lxprocfs mntfs namefs nfs objfs zfs zut FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs sharefs FS_KMODS += smbfs diff --git a/usr/src/uts/intel/brand/lx/lx_brand_asm.s b/usr/src/uts/intel/brand/lx/lx_brand_asm.s new file mode 100644 index 0000000000..7f3ed908f4 --- /dev/null +++ b/usr/src/uts/intel/brand/lx/lx_brand_asm.s @@ -0,0 +1,162 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#if defined(__lint) + +#include <sys/systm.h> + +#else /* __lint */ + +#include "genassym.h" +#include "../common/brand_asm.h" + +#endif /* __lint */ + +#ifdef __lint + +void +lx_brand_int80_callback(void) +{ +} + +#else /* __lint */ + +#if defined(__amd64) + +/* + * See "64-BIT INTERPOSITION STACK" in brand_asm.h. + */ +ENTRY(lx_brand_int80_callback) + GET_PROCP(SP_REG, 0, %r15) + movq P_ZONE(%r15), %r15 /* grab the zone pointer */ + /* grab the 'max syscall num' for this process from 'zone brand data' */ + movq ZONE_BRAND_DATA(%r15), %r15 /* grab the zone brand ptr */ + movl LXZD_MAX_SYSCALL(%r15), %r15d /* get the 'max sysnum' word */ + cmpq %r15, %rax /* is 0 <= syscall <= MAX? */ + jbe 0f /* yes, syscall is OK */ + xorl %eax, %eax /* no, zero syscall number */ +0: + +.lx_brand_int80_patch_point: + jmp .lx_brand_int80_notrace + +.lx_brand_int80_notrace: + CALC_TABLE_ADDR(%r15, L_HANDLER) +1: + movq %r15, %rax + GET_V(%rsp, 0, V_SSP, %rsp) /* restore intr. stack pointer */ + xchgq (%rsp), %rax /* swap %rax and return addr */ + jmp sys_sysint_swapgs_iret + +.lx_brand_int80_trace: + /* + * If tracing is active, we vector to an alternate trace-enabling + * handler table instead. + */ + CALC_TABLE_ADDR(%r15, L_TRACEHANDLER) + jmp 1b +SET_SIZE(lx_brand_int80_callback) + +#define PATCH_POINT _CONST(.lx_brand_int80_patch_point + 1) +#define PATCH_VAL _CONST(.lx_brand_int80_trace - .lx_brand_int80_notrace) + +ENTRY(lx_brand_int80_enable) + movl $1, lx_systrace_brand_enabled(%rip) + movq $PATCH_POINT, %r8 + movb $PATCH_VAL, (%r8) + ret +SET_SIZE(lx_brand_int80_enable) + +ENTRY(lx_brand_int80_disable) + movq $PATCH_POINT, %r8 + movb $0, (%r8) + movl $0, lx_systrace_brand_enabled(%rip) + ret +SET_SIZE(lx_brand_int80_disable) + + +#elif defined(__i386) + +/* + * See "32-BIT INTERPOSITION STACK" in brand_asm.h. + */ +ENTRY(lx_brand_int80_callback) + GET_PROCP(SP_REG, 0, %ebx) + movl P_ZONE(%ebx), %ebx /* grab the zone pointer */ + /* grab the 'max syscall num' for this process from 'zone brand data' */ + movl ZONE_BRAND_DATA(%ebx), %ebx /* grab the zone brand data */ + movl LXZD_MAX_SYSCALL(%ebx), %ebx /* get the max sysnum */ + + cmpl %ebx, %eax /* is 0 <= syscall <= MAX? */ + jbe 0f /* yes, syscall is OK */ + xorl %eax, %eax /* no, zero syscall number */ +0: + +.lx_brand_int80_patch_point: + jmp .lx_brand_int80_notrace + +.lx_brand_int80_notrace: + CALC_TABLE_ADDR(%ebx, L_HANDLER) + +1: + movl %ebx, %eax + GET_V(%esp, 0, V_U_EBX, %ebx) /* restore scratch register */ + addl $V_END, %esp /* restore intr. stack ptr */ + xchgl (%esp), %eax /* swap new and orig. return addrs */ + jmp nopop_sys_rtt_syscall + +.lx_brand_int80_trace: + CALC_TABLE_ADDR(%ebx, L_TRACEHANDLER) + jmp 1b +SET_SIZE(lx_brand_int80_callback) + + +#define PATCH_POINT _CONST(.lx_brand_int80_patch_point + 1) +#define PATCH_VAL _CONST(.lx_brand_int80_trace - .lx_brand_int80_notrace) + +ENTRY(lx_brand_int80_enable) + pushl %ebx + pushl %eax + movl $1, lx_systrace_brand_enabled + movl $PATCH_POINT, %ebx + movl $PATCH_VAL, %eax + movb %al, (%ebx) + popl %eax + popl %ebx + ret +SET_SIZE(lx_brand_int80_enable) + +ENTRY(lx_brand_int80_disable) + pushl %ebx + movl $PATCH_POINT, %ebx + movb $0, (%ebx) + movl $0, lx_systrace_brand_enabled + popl %ebx + ret +SET_SIZE(lx_brand_int80_disable) + +#endif /* __i386 */ +#endif /* __lint */ diff --git a/usr/src/uts/intel/genassym/Makefile b/usr/src/uts/intel/genassym/Makefile new file mode 100644 index 0000000000..ce01dc8610 --- /dev/null +++ b/usr/src/uts/intel/genassym/Makefile @@ -0,0 +1,85 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of genassym.h through +# compile time intialized data. +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +GENASSYM_H = $(GENASSYM_DIR)/$(OBJS_DIR)/genassym.h +OFFSETS_SRC = $(GENASSYM_DIR)/offsets.in + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(GENASSYM_H) + +INC_PATH += -I$(UTSBASE)/common/brand/lx + +# +# Overrides +# +CLEANFILES = Nothing_to_remove +CLOBBERFILES = $(GENASSYM_H) Nothing_to_remove + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +clean.lint: + +install: def + +# +# Create genassym.h +# +$(GENASSYM_H): $(OFFSETS_SRC) + $(OFFSETS_CREATE) <$(OFFSETS_SRC) >$@ + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/genassym/offsets.in b/usr/src/uts/intel/genassym/offsets.in new file mode 100644 index 0000000000..f389fe6da3 --- /dev/null +++ b/usr/src/uts/intel/genassym/offsets.in @@ -0,0 +1,43 @@ +\ +\ CDDL HEADER START +\ +\ The contents of this file are subject to the terms of the +\ Common Development and Distribution License (the "License"). +\ You may not use this file except in compliance with the License. +\ +\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +\ or http://www.opensolaris.org/os/licensing. +\ See the License for the specific language governing permissions +\ and limitations under the License. +\ +\ When distributing Covered Code, include this CDDL HEADER in each +\ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +\ If applicable, add the following below this CDDL HEADER, with the +\ fields enclosed by brackets "[]" replaced with your own identifying +\ information: Portions Copyright [yyyy] [name of copyright owner] +\ +\ CDDL HEADER END +\ +\ +\ Copyright 2010 Sun Microsystems, Inc. All rights reserved. +\ Use is subject to license terms. +\ + +\ +\ offsets.in: input file to produce the architecture-dependent genassym.h +\ using the ctfstabs program +\ + +#ifndef _GENASSYM +#define _GENASSYM +#endif + +#include <sys/lx_brand.h> + +lx_proc_data + l_handler + l_tracehandler + l_traceflag + +lx_zone_data + lxzd_max_syscall diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index d83b16d673..42cc0d4d10 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -632,6 +632,8 @@ getuserpc() static greg_t fix_segreg(greg_t sr, int iscs, model_t datamodel) { + kthread_t *t = curthread; + switch (sr &= 0xffff) { case 0: @@ -667,6 +669,19 @@ fix_segreg(greg_t sr, int iscs, model_t datamodel) break; } + /* + * Allow this process's brand to do any necessary segment register + * manipulation. + */ + if (PROC_IS_BRANDED(t->t_procp) && BRMOP(t->t_procp)->b_fixsegreg) { + greg_t bsr = BRMOP(t->t_procp)->b_fixsegreg(sr, datamodel); + + if (bsr == 0 && iscs == IS_CS) + return (0 | SEL_UPL); + else + return (bsr); + } + /* * Force it into the LDT in ring 3 for 32-bit processes, which by * default do not have an LDT, so that any attempt to use an invalid diff --git a/usr/src/uts/intel/ia32/os/desctbls.c b/usr/src/uts/intel/ia32/os/desctbls.c index a05137eee6..97024b7b59 100644 --- a/usr/src/uts/intel/ia32/os/desctbls.c +++ b/usr/src/uts/intel/ia32/os/desctbls.c @@ -161,7 +161,7 @@ struct interposing_handler { * The brand infrastructure interposes on two handlers, and we use one as a * NULL signpost. */ -static struct interposing_handler brand_tbl[2]; +static struct interposing_handler brand_tbl[3]; /* * software prototypes for default local descriptor table @@ -976,6 +976,12 @@ init_idt_common(gate_desc_t *idt) set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0); + /* + * install "int80" handler at, well, 0x80. + */ + set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL, + 0); + /* * install fast trap handler at 210. */ @@ -1001,21 +1007,27 @@ init_idt_common(gate_desc_t *idt) SDT_SYSIGT, TRP_UPL, 0); /* - * Prepare interposing descriptor for the syscall handler - * and cache copy of the default descriptor. +- * Prepare interposing descriptors for the branded "int80" +- * and syscall handlers and cache copies of the default +- * descriptors. */ - brand_tbl[0].ih_inum = T_SYSCALLINT; - brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT]; + brand_tbl[0].ih_inum = T_INT80; + brand_tbl[0].ih_default_desc = idt0[T_INT80]; + set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL, + SDT_SYSIGT, TRP_UPL, 0); + + brand_tbl[1].ih_inum = T_SYSCALLINT; + brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; #if defined(__amd64) - set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int, + set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); #elif defined(__i386) - set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call, + set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call, KCS_SEL, SDT_SYSIGT, TRP_UPL, 0); #endif /* __i386 */ - brand_tbl[1].ih_inum = 0; + brand_tbl[2].ih_inum = 0; } #if defined(__xpv) diff --git a/usr/src/uts/intel/ldlinux/Makefile b/usr/src/uts/intel/ldlinux/Makefile new file mode 100644 index 0000000000..5177fc5799 --- /dev/null +++ b/usr/src/uts/intel/ldlinux/Makefile @@ -0,0 +1,103 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/ldlinux/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the ldlinux streams kernel +# module. +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = ldlinux +OBJECTS = $(LDLINUX_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LDLINUX_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_STRMOD_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_afs/Makefile b/usr/src/uts/intel/lx_afs/Makefile new file mode 100644 index 0000000000..657ce7f4f0 --- /dev/null +++ b/usr/src/uts/intel/lx_afs/Makefile @@ -0,0 +1,108 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# This makefile drives the production of the lxproc file system +# kernel module. +# +# i86 architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +# Note that the name of the actual filesystem is lx_afs and +# not lx_autofs. This is becase filesystem names are stupidly +# limited to 8 characters. +# +MODULE = lx_afs +OBJECTS = $(LX_AUTOFS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_AUTOFS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_FS_DIR)/$(MODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +# +# Include brand-specific rules +# + +include $(UTSBASE)/intel/lx_afs/Makefile.rules diff --git a/usr/src/uts/intel/lx_afs/Makefile.rules b/usr/src/uts/intel/lx_afs/Makefile.rules new file mode 100644 index 0000000000..2793fedaa4 --- /dev/null +++ b/usr/src/uts/intel/lx_afs/Makefile.rules @@ -0,0 +1,40 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Section 1a: C object build rules +# +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/autofs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +# +# Section 1b: Lint `object' build rules. +# +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/autofs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_audio/Makefile b/usr/src/uts/intel/lx_audio/Makefile new file mode 100644 index 0000000000..9341fc7def --- /dev/null +++ b/usr/src/uts/intel/lx_audio/Makefile @@ -0,0 +1,100 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/lx_audio/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the lx_audio driver +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = lx_audio +OBJECTS = $(LX_AUDIO_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_AUDIO_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV +LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_brand/Makefile b/usr/src/uts/intel/lx_brand/Makefile new file mode 100644 index 0000000000..b2a430de51 --- /dev/null +++ b/usr/src/uts/intel/lx_brand/Makefile @@ -0,0 +1,107 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the kernel component of +# the lx brand +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Path to where brand common sources live +# +LX_CMN = $(SRC)/common/brand/lx + +# +# Define the module and object file sets. +# +MODULE = lx_brand +OBJECTS = $(LX_BRAND_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_BRAND_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_BRAND_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) +AS_INC_PATH += -I$(UTSBASE)/i86pc/genassym/$(OBJS_DIR) + +# +# lint pass one enforcement +# +CFLAGS += $(CCVERBOSE) + +LDFLAGS += -dy -Nexec/elfexec + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV +LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +# +# Include brand-specific rules +# + +include $(UTSBASE)/intel/lx_brand/Makefile.rules diff --git a/usr/src/uts/intel/lx_brand/Makefile.rules b/usr/src/uts/intel/lx_brand/Makefile.rules new file mode 100644 index 0000000000..0862baef84 --- /dev/null +++ b/usr/src/uts/intel/lx_brand/Makefile.rules @@ -0,0 +1,85 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +# +# Section 1a: C object build rules +# +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c + $(COMPILE.c) -D_ELF32_COMPAT -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c + $(COMPILE.c) -D_ELF32_COMPAT -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c + $(COMPILE.c) -D_ELF32_COMPAT -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c + $(COMPILE.c) -D_ELF32_COMPAT -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/intel/brand/lx/%.s + $(COMPILE.s) -D_ELF32_COMPAT -o $@ $< + +$(OBJS_DIR_OBJ64)/%.o: $(LX_CMN)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/intel/brand/lx/%.s + $(COMPILE.s) -D_ELF32_COMPAT -o $@ $< + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/os/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(LX_CMN)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/lx/%.s + $(COMPILE.s) -o $@ $< + +# +# Section 1b: Lint `object' build rules. +# +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/os/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/syscall/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(LX_CMN)/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/lx/%.s + @($(LHEAD) $(LINT.s) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_proc/Makefile b/usr/src/uts/intel/lx_proc/Makefile new file mode 100644 index 0000000000..0aaf2cabfa --- /dev/null +++ b/usr/src/uts/intel/lx_proc/Makefile @@ -0,0 +1,113 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/lx_proc/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the lxproc file system +# kernel module. +# +# i86 architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Path to where brand common sources live +# +LX_CMN = $(SRC)/common/brand/lx + +# +# Define the module and object file sets. +# +MODULE = lx_proc +OBJECTS = $(LX_PROC_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_PROC_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_FS_DIR)/$(MODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) + +# +# Depends on procfs and lx_brand +# +LDFLAGS += -dy -Nfs/procfs -Nbrand/lx_brand + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +# +# Include brand-specific rules +# + +include $(UTSBASE)/intel/lx_proc/Makefile.rules diff --git a/usr/src/uts/intel/lx_proc/Makefile.rules b/usr/src/uts/intel/lx_proc/Makefile.rules new file mode 100644 index 0000000000..b8592d2fdd --- /dev/null +++ b/usr/src/uts/intel/lx_proc/Makefile.rules @@ -0,0 +1,38 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +# +# Section 1a: C object build rules +# +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/procfs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +# +# Section 1b: Lint `object' build rules. +# +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/procfs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_ptm/Makefile b/usr/src/uts/intel/lx_ptm/Makefile new file mode 100644 index 0000000000..dcead27da7 --- /dev/null +++ b/usr/src/uts/intel/lx_ptm/Makefile @@ -0,0 +1,91 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/lx_ptm/Makefile +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the lx_ptm driver +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = lx_ptm +OBJECTS = $(LX_PTM_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_PTM_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_systrace/Makefile b/usr/src/uts/intel/lx_systrace/Makefile new file mode 100644 index 0000000000..20c4a6a3a3 --- /dev/null +++ b/usr/src/uts/intel/lx_systrace/Makefile @@ -0,0 +1,80 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +UTSBASE = ../.. + +MODULE = lx_systrace +OBJECTS = $(LX_SYSTRACE_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_SYSTRACE_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +ROOTLINK = $(USR_DTRACE_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/brand/lx/dtrace + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +LDFLAGS += -dy -Ndrv/dtrace -Nbrand/lx_brand + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_STATIC_UNUSED + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +$(ROOTLINK): $(USR_DTRACE_DIR) $(ROOTMODULE) + -$(RM) $@; ln $(ROOTMODULE) $@ + +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/dtrace/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/dtrace/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/sys/machbrand.h b/usr/src/uts/intel/sys/machbrand.h index 3f9ebdb6b7..c1d045515c 100644 --- a/usr/src/uts/intel/sys/machbrand.h +++ b/usr/src/uts/intel/sys/machbrand.h @@ -35,17 +35,20 @@ extern "C" { struct brand_mach_ops { void (*b_sysenter)(void); + void (*b_int80)(void); void (*b_int91)(void); void (*b_syscall)(void); void (*b_syscall32)(void); + greg_t (*b_fixsegreg)(greg_t, model_t); }; #endif /* _ASM */ #define BRAND_CB_SYSENTER 0 -#define BRAND_CB_INT91 1 -#define BRAND_CB_SYSCALL 2 -#define BRAND_CB_SYSCALL32 3 +#define BRAND_CB_INT80 1 +#define BRAND_CB_INT91 2 +#define BRAND_CB_SYSCALL 3 +#define BRAND_CB_SYSCALL32 4 #ifdef __cplusplus } diff --git a/usr/src/uts/intel/sys/segments.h b/usr/src/uts/intel/sys/segments.h index c4b194fcd8..8a6e398eec 100644 --- a/usr/src/uts/intel/sys/segments.h +++ b/usr/src/uts/intel/sys/segments.h @@ -683,6 +683,8 @@ extern void _start(), cmnint(); extern void achktrap(), mcetrap(); extern void xmtrap(); extern void fasttrap(); +extern void sys_int80(); +extern void brand_sys_int80(); extern void dtrace_ret(); #if !defined(__amd64) |