diff options
author | nn35248 <none@none> | 2006-09-11 22:51:59 -0700 |
---|---|---|
committer | nn35248 <none@none> | 2006-09-11 22:51:59 -0700 |
commit | 9acbbeaf2a1ffe5c14b244867d427714fab43c5c (patch) | |
tree | d1ecd54896325c19a463220e9cbc50864874fc82 /usr | |
parent | da51466dc253d7c98dda4956059042bd0c476328 (diff) | |
download | illumos-joyent-9acbbeaf2a1ffe5c14b244867d427714fab43c5c.tar.gz |
PSARC/2005/471 BrandZ: Support for non-native zones
6374606 ::nm -D without an object may not work on processes in zones
6409350 BrandZ project integration into Solaris
6455289 pthread_setschedparam() should return EPERM rather than panic libc
6455591 setpriority(3C) gets errno wrong for deficient privileges failure
6458178 fifofs doesn't support lofs mounts of fifos
6460380 Attempted open() of a symlink with the O_NOFOLLOW flag set returns EINVAL, not ELOOP
6463857 renice(1) errors erroneously
--HG--
rename : usr/src/lib/libzonecfg/zones/SUNWblank.xml => usr/src/lib/brand/native/zone/SUNWblank.xml
rename : usr/src/lib/libzonecfg/zones/SUNWdefault.xml => usr/src/lib/brand/native/zone/SUNWdefault.xml
Diffstat (limited to 'usr')
438 files changed, 59554 insertions, 1876 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint index 63a2cf956d..cc0ea0206c 100644 --- a/usr/src/Makefile.lint +++ b/usr/src/Makefile.lint @@ -289,11 +289,13 @@ COMMON_SUBDIRS = \ cmd/ztest \ lib/abi \ lib/auditd_plugins \ + lib/brand/sn1 \ lib/crypt_modules \ lib/extendedFILE \ lib/libadt_jni \ lib/libaio \ lib/libavl \ + lib/libbrand \ lib/libbsdmalloc \ lib/libbsm \ lib/libc \ @@ -408,8 +410,9 @@ $(CLOSED_BUILD)COMMON_SUBDIRS += \ i386_SUBDIRS= \ cmd/biosdev \ - cmd/rtc \ cmd/fs.d/xmemfs \ + cmd/rtc \ + lib/brand/lx \ lib/cfgadm_plugins/sata sparc_SUBDIRS= \ diff --git a/usr/src/Makefile.master b/usr/src/Makefile.master index db2c53d0f0..b6eaf71bce 100644 --- a/usr/src/Makefile.master +++ b/usr/src/Makefile.master @@ -915,10 +915,10 @@ PKGARCHIVE=$(SRC)/../../packages/$(MACH)/nightly$(PKGARCHIVESUFFIX) $(COMPILE.java) $< # -# Rules to create message catalogue files from .sh, .c, .y, and .l files. -# For .sh files, we extract all gettext strings with sed(1) (being careful -# to permit multiple gettext strings on the same line), weed out the dups, -# and build the catalogue with awk(1). +# Rules to create message catalogue files from .sh, .ksh, .c, .y, and .l +# files. For .sh and .ksh files, we extract all gettext strings with +# sed(1) (being careful to permit multiple gettext strings on the same +# line), weed out the dups, and build the catalogue with awk(1). # .sh.po: @@ -930,6 +930,15 @@ PKGARCHIVE=$(SRC)/../../packages/$(MACH)/nightly$(PKGARCHIVESUFFIX) -e "t a" \ $< | sort -u | awk '{ print "msgid\t" $$0 "\nmsgstr" }' > $@ +.ksh.po: + $(SED) -n -e ":a" \ + -e "h" \ + -e "s/.*gettext *\(\"[^\"]*\"\).*/\1/p" \ + -e "x" \ + -e "s/\(.*\)gettext *\"[^\"]*\"\(.*\)/\1\2/" \ + -e "t a" \ + $< | sort -u | awk '{ print "msgid\t" $$0 "\nmsgstr" }' > $@ + # # When using xgettext, we want messages to go to the default domain, # rather than the specified one. This special version of the diff --git a/usr/src/cmd/bsmrecord/audit_record_attr.txt b/usr/src/cmd/bsmrecord/audit_record_attr.txt index 0193082d12..45a7bfc605 100644 --- a/usr/src/cmd/bsmrecord/audit_record_attr.txt +++ b/usr/src/cmd/bsmrecord/audit_record_attr.txt @@ -475,6 +475,19 @@ label=AUE_BIND comment=1, socket type, "type": comment=socket address +label=AUE_BRANDSYS +# generic mechanism to allow user-space and kernel components of a brand +# to communicate. The interpretation of the arguments to the call is +# left entirely up to the brand. + format=arg1:arg2:arg3:arg4:arg5:arg6:arg7 + comment=1, command, "cmd": + comment=2, command args, "arg": + comment=3, command args, "arg": + comment=4, command args, "arg": + comment=5, command args, "arg": + comment=6, command args, "arg": + comment=7, command args, "arg": + label=AUE_BSMSYS skip=Not used. diff --git a/usr/src/cmd/devfsadm/Makefile.com b/usr/src/cmd/devfsadm/Makefile.com index 247e1cb0fa..972f4e985c 100644 --- a/usr/src/cmd/devfsadm/Makefile.com +++ b/usr/src/cmd/devfsadm/Makefile.com @@ -29,6 +29,7 @@ include ../../Makefile.cmd COMMON = .. +UTSBASE = $(COMMON)/../../uts DEVFSADM_MOD = devfsadm @@ -66,7 +67,7 @@ LINK_SRCS = \ $(COMMON)/md_link.c \ $(COMMON)/dtrace_link.c \ $(COMMON)/zfs_link.c \ - $(MISC_LINK_ISA).c + $(LINK_SRCS_($MACH)) LINT_MODULES = $(LINK_SRCS:.c=.ln) @@ -87,7 +88,7 @@ LINK_OBJS = \ md_link.o \ dtrace_link.o \ zfs_link.o \ - $(MISC_LINK_ISA).o + $(LINK_OBJS_($MACH)) LINK_MODS = \ SUNW_disk_link.so \ @@ -106,7 +107,7 @@ LINK_MODS = \ SUNW_md_link.so \ SUNW_dtrace_link.so \ SUNW_zfs_link.so \ - SUNW_$(MISC_LINK_ISA).so + $(LINK_MODS_$(MACH)) DEVLINKTAB = devlink.tab DEVLINKTAB_SRC = $(COMMON)/$(DEVLINKTAB).sh @@ -114,15 +115,15 @@ DEVLINKTAB_SRC = $(COMMON)/$(DEVLINKTAB).sh COMPAT_LINKS = disks tapes ports audlinks devlinks drvconfig CPPFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D_REENTRANT \ - -I.. -I../../../uts/common -I$(MODLOADDIR) -CFLAGS += $(CCVERBOSE) $(C_PICFLAGS) -I.. -I$(MODLOADDIR) + -I$(COMMON) -I$(UTSBASE)/common -I$(MODLOADDIR) +CFLAGS += $(CCVERBOSE) $(C_PICFLAGS) LINTFLAGS += -erroff=E_NAME_USED_NOT_DEF2 LINTFLAGS += -erroff=E_NAME_DEF_NOT_USED2 LINTFLAGS += -erroff=E_NAME_MULTIPLY_DEF2 -LAZYLIBS = $(ZLAZYLOAD) -lzonecfg -lbsm $(ZNOLAZYLOAD) -lint := LAZYLIBS = -lzonecfg -lbsm +LAZYLIBS = $(ZLAZYLOAD) -lzonecfg -lbrand -lbsm $(ZNOLAZYLOAD) +lint := LAZYLIBS = -lzonecfg -lbrand -lbsm LDLIBS += -ldevinfo -lgen -lsysevent -lnvpair -lcmd -ldoor $(LAZYLIBS) -lnsl SRCS = $(DEVFSADM_SRC) $(LINK_SRCS) diff --git a/usr/src/cmd/devfsadm/i386/Makefile b/usr/src/cmd/devfsadm/i386/Makefile index b721ec3d10..d6f9fcc4bd 100644 --- a/usr/src/cmd/devfsadm/i386/Makefile +++ b/usr/src/cmd/devfsadm/i386/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,17 +19,17 @@ # CDDL HEADER END # # -#ident "%Z%%M% %I% %E% SMI" -# -# Copyright (c) 1998 by Sun Microsystems, Inc. -# All rights reserved. -# -# cmd/devfsadm/i386/Makefile +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # +#ident "%Z%%M% %I% %E% SMI" -MISC_LINK_ISA = misc_link_i386 - +LINK_SRCS_i386 = misc_link_i386.c lx_link_i386.c +LINK_OBJS_i386 = misc_link_i386.o lx_link_i386.o +LINK_MODS_i386 = SUNW_misc_link_i386.so SUNW_lx_link_i386.so include ../Makefile.com +lx_link_i386.o := CPPFLAGS += -I$(UTSBASE)/common/brand/lx + install: all diff --git a/usr/src/cmd/devfsadm/i386/lx_link_i386.c b/usr/src/cmd/devfsadm/i386/lx_link_i386.c new file mode 100644 index 0000000000..855f4f7383 --- /dev/null +++ b/usr/src/cmd/devfsadm/i386/lx_link_i386.c @@ -0,0 +1,86 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <devfsadm.h> +#include <strings.h> +#include <stdio.h> +#include <sys/lx_ptm.h> +#include <sys/lx_audio.h> + +static int lx_ptm(di_minor_t minor, di_node_t node); +static int lx_audio(di_minor_t minor, di_node_t node); +static int lx_systrace(di_minor_t minor, di_node_t node); + +static devfsadm_create_t lx_create_cbt[] = { + { "pseudo", "ddi_pseudo", LX_PTM_DRV, + TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_ptm }, + { "pseudo", "ddi_pseudo", LX_AUDIO_DRV, + TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_audio }, + { "pseudo", "ddi_pseudo", "lx_systrace", + TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_systrace }, +}; + +DEVFSADM_CREATE_INIT_V0(lx_create_cbt); + +static int +lx_ptm(di_minor_t minor, di_node_t node) +{ + char *mname = di_minor_name(minor); + + if (strcmp(LX_PTM_MINOR_NODE, mname) == 0) + (void) devfsadm_mklink("brand/lx/ptmx", node, minor, 0); + + return (DEVFSADM_CONTINUE); +} + +static int +lx_audio(di_minor_t minor, di_node_t node) +{ + char *mname = di_minor_name(minor); + + if (strcmp(LXA_MINORNAME_DEVCTL, mname) == 0) + (void) devfsadm_mklink("brand/lx/audio_devctl", node, minor, 0); + if (strcmp(LXA_MINORNAME_DSP, mname) == 0) + (void) devfsadm_mklink("brand/lx/dsp", node, minor, 0); + if (strcmp(LXA_MINORNAME_MIXER, mname) == 0) + (void) devfsadm_mklink("brand/lx/mixer", node, minor, 0); + + return (DEVFSADM_CONTINUE); +} + +static int +lx_systrace(di_minor_t minor, di_node_t node) +{ + char *mname = di_minor_name(minor); + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), "dtrace/provider/%s", mname); + (void) devfsadm_mklink(path, node, minor, 0); + + return (DEVFSADM_CONTINUE); +} diff --git a/usr/src/cmd/devfsadm/sparc/Makefile b/usr/src/cmd/devfsadm/sparc/Makefile index f6cdca62a2..20fe372cdd 100644 --- a/usr/src/cmd/devfsadm/sparc/Makefile +++ b/usr/src/cmd/devfsadm/sparc/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,14 @@ # CDDL HEADER END # # -#ident "%Z%%M% %I% %E% SMI" -# -# Copyright (c) 1998 by Sun Microsystems, Inc. -# All rights reserved. -# -# cmd/devfsadm/sparc/Makefile +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. # +#ident "%Z%%M% %I% %E% SMI" -MISC_LINK_ISA = misc_link_sparc +LINK_SRCS_sparc = misc_link_sparc.c +LINK_OBJS_sparc = misc_link_sparc.o +LINK_MODS_sparc = SUNW_misc_link_sparc.so include ../Makefile.com diff --git a/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c b/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c index b761e47bc6..854279cea3 100644 --- a/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c +++ b/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c @@ -29,6 +29,8 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#define PORTMAP + #include <tiuser.h> #include <fcntl.h> #include <netconfig.h> @@ -36,6 +38,7 @@ #include <errno.h> #include <syslog.h> #include <rpc/rpc.h> +#include <rpc/pmap_prot.h> #include <sys/time.h> #include <sys/resource.h> #include <signal.h> @@ -119,6 +122,9 @@ static int num_conns; /* Current number of connections */ int (*Mysvc4)(int, struct netbuf *, struct netconfig *, int, struct netbuf *); +extern bool_t __pmap_set(const rpcprog_t program, const rpcvers_t version, + const struct netconfig *nconf, const struct netbuf *address); + /* * Called to create and prepare a transport descriptor for in-kernel * RPC service. @@ -467,7 +473,7 @@ nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf) */ void do_one(char *provider, NETSELDECL(proto), struct protob *protobp0, - int (*svc)(int, struct netbuf, struct netconfig *)) + int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap) { register int sock; struct protob *protobp; @@ -511,9 +517,31 @@ do_one(char *provider, NETSELDECL(proto), struct protob *protobp0, strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0) continue; - (void) rpcb_unset(protobp->program, vers, retnconf); - (void) rpcb_set(protobp->program, vers, retnconf, - retaddr); + if (use_pmap) { + /* + * Note that if we're using a portmapper + * instead of rpcbind then we can't do an + * unregister operation here. + * + * The reason is that the portmapper unset + * operation removes all the entries for a + * given program/version regardelss of + * transport protocol. + * + * The caller of this routine needs to ensure + * that __pmap_unset() has been called for all + * program/version service pairs they plan + * to support before they start registering + * each program/version/protocol triplet. + */ + (void) __pmap_set(protobp->program, vers, + retnconf, retaddr); + } else { + (void) rpcb_unset(protobp->program, vers, + retnconf); + (void) rpcb_set(protobp->program, vers, + retnconf, retaddr); + } } } @@ -552,7 +580,7 @@ do_one(char *provider, NETSELDECL(proto), struct protob *protobp0, */ int do_all(struct protob *protobp, - int (*svc)(int, struct netbuf, struct netconfig *)) + int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap) { struct netconfig *nconf; NCONF_HANDLE *nc; @@ -570,7 +598,7 @@ do_all(struct protob *protobp, (protobp->program != NFS4_CALLBACK || strncasecmp(nconf->nc_proto, NC_UDP, l) != 0)) do_one(nconf->nc_device, nconf->nc_proto, - protobp, svc); + protobp, svc, use_pmap); } (void) endnetconfig(nc); return (0); diff --git a/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.h b/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.h index 028d6dc8d3..96eac79f0e 100644 --- a/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.h +++ b/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,8 +19,8 @@ * CDDL HEADER END */ /* - * Copyright 1996, 1999, 2001-2003 Sun Microsystems, Inc. - * All rights reserved. Use is subject to license terms. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ /* @@ -77,9 +76,11 @@ extern int nfslib_bindit(struct netconfig *, struct netbuf **, struct nd_hostserv *, int); extern void nfslib_log_tli_error(char *, int, struct netconfig *); extern int do_all(struct protob *, - int (*)(int, struct netbuf, struct netconfig *)); + int (*)(int, struct netbuf, struct netconfig *), + int use_pmap); extern void do_one(char *, char *, struct protob *, - int (*)(int, struct netbuf, struct netconfig *)); + int (*)(int, struct netbuf, struct netconfig *), + int use_pmap); extern void poll_for_action(void); #ifdef __cplusplus diff --git a/usr/src/cmd/fs.d/nfs/nfs4cbd/nfs4cbd.c b/usr/src/cmd/fs.d/nfs/nfs4cbd/nfs4cbd.c index fc128dffce..c99453f6dd 100644 --- a/usr/src/cmd/fs.d/nfs/nfs4cbd/nfs4cbd.c +++ b/usr/src/cmd/fs.d/nfs/nfs4cbd/nfs4cbd.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -202,7 +201,7 @@ main(int argc, char *argv[]) protobp->program = NFS4_CALLBACK; protobp->next = NULL; - if (do_all(protobp, NULL) == -1) { + if (do_all(protobp, NULL, 0) == -1) { exit(1); } diff --git a/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c b/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c index 169a3cd544..274d789ce4 100644 --- a/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c +++ b/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c @@ -536,7 +536,7 @@ main(int ac, char *av[]) protobp->next = (struct protob *)NULL; if (allflag) { - if (do_all(protobp0, nfssvc) == -1) + if (do_all(protobp0, nfssvc, 0) == -1) exit(1); } else if (proto) { /* there's more than one match for the same protocol */ @@ -551,7 +551,7 @@ main(int ac, char *av[]) if (strcmp(nconf->nc_proto, proto) == 0) { protoFound = TRUE; do_one(nconf->nc_device, NULL, - protobp0, nfssvc); + protobp0, nfssvc, 0); } } (void) endnetconfig(nc); @@ -560,12 +560,12 @@ main(int ac, char *av[]) for protocol %s", proto); } else if (provider) - do_one(provider, proto, protobp0, nfssvc); + do_one(provider, proto, protobp0, nfssvc, 0); else { for (providerp = defaultproviders; *providerp != NULL; providerp++) { provider = *providerp; - do_one(provider, NULL, protobp0, nfssvc); + do_one(provider, NULL, protobp0, nfssvc, 0); } } done: diff --git a/usr/src/cmd/fs.d/nfs/statd/sm_svc.c b/usr/src/cmd/fs.d/nfs/statd/sm_svc.c index bf0d9e2e0d..2ffb1e5662 100644 --- a/usr/src/cmd/fs.d/nfs/statd/sm_svc.c +++ b/usr/src/cmd/fs.d/nfs/statd/sm_svc.c @@ -79,12 +79,17 @@ #define backup1 "statmon/sm.bak/" #define state1 "statmon/state" +extern void __use_portmapper(int); +extern bool_t __pmap_unset(const rpcprog_t program, const rpcvers_t version); + /* * User and group IDs to run as. These are hardwired, rather than looked * up at runtime, because they are very unlikely to change and because they * provide some protection against bogus changes to the passwd and group * files. */ +uid_t daemon_uid = DAEMON_UID; +gid_t daemon_gid = DAEMON_GID; char STATE[MAXPATHLEN], CURRENT[MAXPATHLEN], BACKUP[MAXPATHLEN]; static char statd_home[MAXPATHLEN]; @@ -437,6 +442,7 @@ main(int argc, char *argv[]) int mode; int sz; int connmaxrec = RPC_MAXDATASIZE; + int use_pmap = 0; addrix = 0; pathix = 0; @@ -445,7 +451,7 @@ main(int argc, char *argv[]) if (init_hostname() < 0) exit(1); - while ((c = getopt(argc, argv, "Dd:a:p:r")) != EOF) + while ((c = getopt(argc, argv, "a:Dd:G:Pp:rU:")) != EOF) switch (c) { case 'd': (void) sscanf(optarg, "%d", &debug); @@ -474,6 +480,16 @@ main(int argc, char *argv[]) (void) fprintf(stderr, "statd: -a exceeding maximum hostnames\n"); break; + case 'P': + __use_portmapper(1); + use_pmap = 1; + break; + case 'U': + (void) sscanf(optarg, "%d", &daemon_uid); + break; + case 'G': + (void) sscanf(optarg, "%d", &daemon_gid); + break; case 'p': if (strlen(optarg) < MAXPATHLEN) { /* If the path_name array has not yet */ @@ -557,7 +573,7 @@ main(int argc, char *argv[]) openlog("statd", LOG_PID, LOG_DAEMON); } - (void) _create_daemon_lock(STATD, DAEMON_UID, DAEMON_GID); + (void) _create_daemon_lock(STATD, daemon_uid, daemon_gid); /* * establish our lock on the lock file and write our pid to it. * exit if some other process holds the lock, or if there's any @@ -598,6 +614,11 @@ main(int argc, char *argv[]) syslog(LOG_INFO, "unable to set maximum RPC record size"); } + if (use_pmap) { + (void) __pmap_unset(SM_PROG, SM_VERS); + (void) __pmap_unset(NSM_ADDR_PROGRAM, NSM_ADDR_V1); + } + if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "netpath")) { syslog(LOG_ERR, "statd: unable to create (SM_PROG, SM_VERS) for netpath."); @@ -692,7 +713,7 @@ set_statmon_owner(void) can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP); if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, - DAEMON_UID, DAEMON_GID, can_do_mlp ? PRIV_NET_BINDMLP : NULL, + daemon_uid, daemon_gid, can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) { syslog(LOG_ERR, "can't run unprivileged: %m"); exit(1); @@ -809,13 +830,13 @@ nftw_owner(const char *path, const struct stat *statp, int info, } /* If already owned by daemon, don't bother changing. */ - if (statp->st_uid == DAEMON_UID && - statp->st_gid == DAEMON_GID) + if (statp->st_uid == daemon_uid && + statp->st_gid == daemon_gid) return (0); if (debug) printf("lchown %s daemon:daemon\n", path); - if (lchown(path, DAEMON_UID, DAEMON_GID) < 0) { + if (lchown(path, daemon_uid, daemon_gid) < 0) { int error = errno; syslog(LOG_WARNING, "can't chown %s to daemon: %m", diff --git a/usr/src/cmd/mdb/Makefile.common b/usr/src/cmd/mdb/Makefile.common index c58f2de0e0..329176e35b 100644 --- a/usr/src/cmd/mdb/Makefile.common +++ b/usr/src/cmd/mdb/Makefile.common @@ -32,6 +32,7 @@ COMMON_MODULES_PROC = \ libavl \ libc \ libnvpair \ + libproc \ libsysevent \ libumem \ libuutil \ diff --git a/usr/src/cmd/mdb/common/kmdb/kvm.h b/usr/src/cmd/mdb/common/kmdb/kvm.h index 757c7f4636..4c1786dbf6 100644 --- a/usr/src/cmd/mdb/common/kmdb/kvm.h +++ b/usr/src/cmd/mdb/common/kmdb/kvm.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -95,8 +94,10 @@ typedef struct kmt_data { GElf_Sym _kmt_cmnint; GElf_Sym _kmt_cmntrap; GElf_Sym _kmt_sysenter; + GElf_Sym _kmt_brand_sysenter; #if defined(__amd64) GElf_Sym _kmt_syscall; + GElf_Sym _kmt_brand_syscall; #endif } kmt_intrsyms; #endif diff --git a/usr/src/cmd/mdb/common/mdb/mdb_main.c b/usr/src/cmd/mdb/common/mdb/mdb_main.c index 6c41ae6471..d3e16c4aa6 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_main.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_main.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -583,9 +582,15 @@ main(int argc, char *argv[], char *envp[]) } } + if (rd_ctl(RD_CTL_SET_HELPPATH, (void *)mdb.m_root) != RD_OK) { + warn("cannot set librtld_db helper path to %s\n", mdb.m_root); + terminate(2); + } + if (mdb.m_debug & MDB_DBG_HELP) terminate(0); /* Quit here if we've printed out the tokens */ + if (Iflag != NULL && strchr(Iflag, ';') != NULL) { warn("macro path cannot contain semicolons\n"); terminate(2); diff --git a/usr/src/cmd/mdb/common/mdb/mdb_proc.c b/usr/src/cmd/mdb/common/mdb/mdb_proc.c index be50280b4c..52c1ea5ad8 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_proc.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_proc.c @@ -4519,6 +4519,18 @@ pt_stack_iter(mdb_tgt_t *t, const mdb_tgt_gregset_t *gsp, return (set_errno(EMDB_NOPROC)); } +static int +pt_auxv(mdb_tgt_t *t, const auxv_t **auxvp) +{ + if (t->t_pshandle != NULL) { + *auxvp = Pgetauxvec(t->t_pshandle); + return (0); + } + + return (set_errno(EMDB_NOPROC)); +} + + static const mdb_tgt_ops_t proc_ops = { pt_setflags, /* t_setflags */ (int (*)()) mdb_tgt_notsup, /* t_setcontext */ @@ -4570,7 +4582,8 @@ static const mdb_tgt_ops_t proc_ops = { pt_add_fault, /* t_add_fault */ pt_getareg, /* t_getareg */ pt_putareg, /* t_putareg */ - pt_stack_iter /* t_stack_iter */ + pt_stack_iter, /* t_stack_iter */ + pt_auxv /* t_auxv */ }; /* diff --git a/usr/src/cmd/mdb/common/mdb/mdb_pservice.c b/usr/src/cmd/mdb/common/mdb/mdb_pservice.c index 5f3778086f..dca6ccdffa 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_pservice.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_pservice.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -229,14 +228,13 @@ ps_plog(const char *format, ...) ps_err_e ps_pauxv(struct ps_prochandle *P, const auxv_t **auxvp) { - static const auxv_t auxv = { AT_NULL, 0 }; - mdb_tgt_t *t = mdb_tgt_from_pshandle(P); if (t == NULL) return (ps_ops.ps_pauxv(P, auxvp)); - *auxvp = &auxv; + if (mdb_tgt_auxv(t, auxvp) != 0) + return (PS_ERR); return (PS_OK); } diff --git a/usr/src/cmd/mdb/common/mdb/mdb_target.c b/usr/src/cmd/mdb/common/mdb/mdb_target.c index fa01e00693..cd05e72459 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_target.c +++ b/usr/src/cmd/mdb/common/mdb/mdb_target.c @@ -384,6 +384,12 @@ mdb_tgt_dmodel(mdb_tgt_t *t) return (t->t_ops->t_dmodel(t)); } +int +mdb_tgt_auxv(mdb_tgt_t *t, const auxv_t **auxvp) +{ + return (t->t_ops->t_auxv(t, auxvp)); +} + ssize_t mdb_tgt_aread(mdb_tgt_t *t, mdb_tgt_as_t as, void *buf, size_t n, mdb_tgt_addr_t addr) diff --git a/usr/src/cmd/mdb/common/mdb/mdb_target_impl.h b/usr/src/cmd/mdb/common/mdb/mdb_target_impl.h index b0c5dcae09..ecc5331a1e 100644 --- a/usr/src/cmd/mdb/common/mdb/mdb_target_impl.h +++ b/usr/src/cmd/mdb/common/mdb/mdb_target_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,6 +32,7 @@ #include <mdb/mdb_module.h> #include <mdb/mdb_list.h> #include <mdb/mdb_gelf.h> +#include <sys/auxv.h> #ifdef __cplusplus extern "C" { @@ -133,6 +133,7 @@ typedef struct mdb_tgt_ops { int (*t_stack_iter)(mdb_tgt_t *, const mdb_tgt_gregset_t *, mdb_tgt_stack_f *, void *); + int (*t_auxv)(mdb_tgt_t *, const auxv_t **auxvp); } mdb_tgt_ops_t; /* @@ -324,6 +325,8 @@ extern int mdb_tgt_vespec_insert(mdb_tgt_t *, const mdb_se_ops_t *, extern mdb_vespec_t *mdb_tgt_vespec_lookup(mdb_tgt_t *, int); +extern int mdb_tgt_auxv(mdb_tgt_t *, const auxv_t **); + extern void mdb_tgt_vespec_hold(mdb_tgt_t *, mdb_vespec_t *); extern void mdb_tgt_vespec_rele(mdb_tgt_t *, mdb_vespec_t *); diff --git a/usr/src/cmd/mdb/common/modules/libproc/libproc.c b/usr/src/cmd/mdb/common/modules/libproc/libproc.c new file mode 100644 index 0000000000..f1a87503cd --- /dev/null +++ b/usr/src/cmd/mdb/common/modules/libproc/libproc.c @@ -0,0 +1,353 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libproc.h> +#include <Pcontrol.h> +#include <stddef.h> + +#include <mdb/mdb_modapi.h> + +typedef struct ps_prochandle ps_prochandle_t; + +/* + * addr::pr_symtab [-a | n] + * + * -a Sort symbols by address + * -n Sort symbols by name + * + * Given a sym_tbl_t, dump its contents in tabular form. When given '-a' or + * '-n', we use the sorted tables 'sym_byaddr' or 'sym_byname', respectively. + */ +static int +pr_symtab(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + sym_tbl_t symtab; + Elf_Data data; +#ifdef _LP64 + Elf64_Sym sym; + int width = 16; +#else + Elf32_Sym sym; + int width = 8; +#endif + int i, idx, count; + char name[128]; + int byaddr = FALSE; + int byname = FALSE; + uint_t *symlist; + size_t symlistsz; + + if (mdb_getopts(argc, argv, + 'a', MDB_OPT_SETBITS, TRUE, &byaddr, + 'n', MDB_OPT_SETBITS, TRUE, &byname, + NULL) != argc) + return (DCMD_USAGE); + + if (byaddr && byname) { + mdb_warn("only one of '-a' or '-n' can be specified\n"); + return (DCMD_USAGE); + } + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (mdb_vread(&symtab, sizeof (sym_tbl_t), addr) == -1) { + mdb_warn("failed to read sym_tbl_t at %p", addr); + return (DCMD_ERR); + } + + if (symtab.sym_count == 0) { + mdb_warn("no symbols present\n"); + return (DCMD_ERR); + } + + if (mdb_vread(&data, sizeof (Elf_Data), + (uintptr_t)symtab.sym_data) == -1) { + mdb_warn("failed to read Elf_Data at %p", symtab.sym_data); + return (DCMD_ERR); + } + + symlist = NULL; + if (byaddr || byname) { + uintptr_t src = byaddr ? (uintptr_t)symtab.sym_byaddr : + (uintptr_t)symtab.sym_byname; + + symlistsz = symtab.sym_count * sizeof (uint_t); + symlist = mdb_alloc(symlistsz, UM_SLEEP); + if (mdb_vread(symlist, symlistsz, src) == -1) { + mdb_warn("failed to read sorted symbols at %p", src); + return (DCMD_ERR); + } + count = symtab.sym_count; + } else { + count = symtab.sym_symn; + } + + mdb_printf("%<u>%*s %*s %s%</u>\n", width, "ADDRESS", width, + "SIZE", "NAME"); + + for (i = 0; i < count; i++) { + if (byaddr | byname) + idx = symlist[i]; + else + idx = i; + + if (mdb_vread(&sym, sizeof (sym), (uintptr_t)data.d_buf + + idx * sizeof (sym)) == -1) { + mdb_warn("failed to read symbol at %p", + (uintptr_t)data.d_buf + idx * sizeof (sym)); + if (symlist) + mdb_free(symlist, symlistsz); + return (DCMD_ERR); + } + + if (mdb_readstr(name, sizeof (name), + (uintptr_t)symtab.sym_strs + sym.st_name) == -1) { + mdb_warn("failed to read symbol name at %p", + symtab.sym_strs + sym.st_name); + name[0] = '\0'; + } + + mdb_printf("%0?p %0?p %s\n", sym.st_value, sym.st_size, + name); + } + + if (symlist) + mdb_free(symlist, symlistsz); + + return (DCMD_OK); +} + +/* + * addr::pr_addr2map search + * + * Given a ps_prochandle_t, convert the given address to the corresponding + * map_info_t. Functionally equivalent to Paddr2mptr(). + */ +static int +pr_addr2map(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + uintptr_t search; + ps_prochandle_t psp; + map_info_t *mp; + int lo, hi, mid; + + if (!(flags & DCMD_ADDRSPEC) || argc != 1) + return (DCMD_USAGE); + + if (argv[0].a_type == MDB_TYPE_IMMEDIATE) + search = argv[0].a_un.a_val; + else + search = mdb_strtoull(argv[0].a_un.a_str); + + if (mdb_vread(&psp, sizeof (ps_prochandle_t), addr) == -1) { + mdb_warn("failed to read ps_prochandle at %p", addr); + return (DCMD_ERR); + } + + lo = 0; + hi = psp.map_count; + while (lo <= hi) { + mid = (lo + hi) / 2; + mp = &psp.mappings[mid]; + + if ((addr - mp->map_pmap.pr_vaddr) < mp->map_pmap.pr_size) { + mdb_printf("%#lr\n", addr + offsetof(ps_prochandle_t, + mappings) + (mp - psp.mappings) * + sizeof (map_info_t)); + return (DCMD_OK); + } + + if (addr < mp->map_pmap.pr_vaddr) + hi = mid - 1; + else + lo = mid + 1; + } + + mdb_warn("no corresponding map for %p\n", search); + return (DCMD_ERR); +} + +/* + * ::walk pr_file_info + * + * Given a ps_prochandle_t, walk all its file_info_t structures. + */ +typedef struct { + uintptr_t fiw_next; + int fiw_count; +} file_info_walk_t; + +static int +pr_file_info_walk_init(mdb_walk_state_t *wsp) +{ + ps_prochandle_t psp; + file_info_walk_t *fiw; + + if (wsp->walk_addr == NULL) { + mdb_warn("pr_file_info doesn't support global walks\n"); + return (WALK_ERR); + } + + if (mdb_vread(&psp, sizeof (ps_prochandle_t), wsp->walk_addr) == -1) { + mdb_warn("failed to read ps_prochandle at %p", wsp->walk_addr); + return (WALK_ERR); + } + + fiw = mdb_alloc(sizeof (file_info_walk_t), UM_SLEEP); + + fiw->fiw_next = (uintptr_t)psp.file_head.list_forw; + fiw->fiw_count = psp.num_files; + wsp->walk_data = fiw; + + return (WALK_NEXT); +} + +static int +pr_file_info_walk_step(mdb_walk_state_t *wsp) +{ + file_info_walk_t *fiw = wsp->walk_data; + file_info_t f; + int status; + + if (fiw->fiw_count == 0) + return (WALK_DONE); + + if (mdb_vread(&f, sizeof (file_info_t), fiw->fiw_next) == -1) { + mdb_warn("failed to read file_info_t at %p", fiw->fiw_next); + return (WALK_ERR); + } + + status = wsp->walk_callback(fiw->fiw_next, &f, wsp->walk_cbdata); + + fiw->fiw_next = (uintptr_t)f.file_list.list_forw; + fiw->fiw_count--; + + return (status); +} + +static void +pr_file_info_walk_fini(mdb_walk_state_t *wsp) +{ + file_info_walk_t *fiw = wsp->walk_data; + mdb_free(fiw, sizeof (file_info_walk_t)); +} + +/* + * ::walk pr_map_info + * + * Given a ps_prochandle_t, walk all its map_info_t structures. + */ +typedef struct { + uintptr_t miw_next; + int miw_count; + int miw_current; +} map_info_walk_t; + +static int +pr_map_info_walk_init(mdb_walk_state_t *wsp) +{ + ps_prochandle_t psp; + map_info_walk_t *miw; + + if (wsp->walk_addr == NULL) { + mdb_warn("pr_map_info doesn't support global walks\n"); + return (WALK_ERR); + } + + if (mdb_vread(&psp, sizeof (ps_prochandle_t), wsp->walk_addr) == -1) { + mdb_warn("failed to read ps_prochandle at %p", wsp->walk_addr); + return (WALK_ERR); + } + + miw = mdb_alloc(sizeof (map_info_walk_t), UM_SLEEP); + + miw->miw_next = (uintptr_t)psp.mappings; + miw->miw_count = psp.map_count; + miw->miw_current = 0; + wsp->walk_data = miw; + + return (WALK_NEXT); +} + +static int +pr_map_info_walk_step(mdb_walk_state_t *wsp) +{ + map_info_walk_t *miw = wsp->walk_data; + map_info_t m; + int status; + + if (miw->miw_current == miw->miw_count) + return (WALK_DONE); + + if (mdb_vread(&m, sizeof (map_info_t), miw->miw_next) == -1) { + mdb_warn("failed to read map_info_t at %p", miw->miw_next); + return (WALK_DONE); + } + + status = wsp->walk_callback(miw->miw_next, &m, wsp->walk_cbdata); + + miw->miw_current++; + miw->miw_next += sizeof (map_info_t); + + return (status); +} + +static void +pr_map_info_walk_fini(mdb_walk_state_t *wsp) +{ + map_info_walk_t *miw = wsp->walk_data; + mdb_free(miw, sizeof (map_info_walk_t)); +} + +static const mdb_dcmd_t dcmds[] = { + { "pr_addr2map", ":addr", "convert an adress into a map_info_t", + pr_addr2map }, + { "pr_symtab", ":[-a | -n]", "print the contents of a sym_tbl_t", + pr_symtab }, + { NULL } +}; + +static const mdb_walker_t walkers[] = { + { "pr_file_info", "given a ps_prochandle, walk its file_info " + "structures", pr_file_info_walk_init, pr_file_info_walk_step, + pr_file_info_walk_fini }, + { "pr_map_info", "given a ps_prochandle, walk its map_info structures", + pr_map_info_walk_init, pr_map_info_walk_step, + pr_map_info_walk_fini }, + { NULL } +}; + +static const mdb_modinfo_t modinfo = { + MDB_API_VERSION, dcmds, walkers +}; + +const mdb_modinfo_t * +_mdb_init(void) +{ + return (&modinfo); +} diff --git a/usr/src/cmd/mdb/intel/amd64/libproc/Makefile b/usr/src/cmd/mdb/intel/amd64/libproc/Makefile new file mode 100644 index 0000000000..4e98e2dee4 --- /dev/null +++ b/usr/src/cmd/mdb/intel/amd64/libproc/Makefile @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libproc.so +MDBTGT = proc + +MODSRCS_DIR = ../../../common/modules/libproc + +MODSRCS = libproc.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.amd64 +include ../../../Makefile.module + +CFLAGS64 += -I../../../../../lib/libproc/common +LINTFLAGS64 += -I../../../../../lib/libproc/common + +%.o: $(MODSRCS_DIR)/%.c + $(COMPILE.c) $< + $(CTFCONVERT_O) + +%.ln: $(MODSRCS_DIR)/%.c + $(LINT.c) -c $< diff --git a/usr/src/cmd/mdb/intel/ia32/libproc/Makefile b/usr/src/cmd/mdb/intel/ia32/libproc/Makefile new file mode 100644 index 0000000000..938998447c --- /dev/null +++ b/usr/src/cmd/mdb/intel/ia32/libproc/Makefile @@ -0,0 +1,46 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libproc.so +MDBTGT = proc + +MODSRCS_DIR = ../../../common/modules/libproc + +MODSRCS = libproc.c + +include ../../../../Makefile.cmd +include ../../Makefile.ia32 +include ../../../Makefile.module + +CFLAGS += -I../../../../../lib/libproc/common +LINTFLAGS += -I../../../../../lib/libproc/common + +%.o: $(MODSRCS_DIR)/%.c + $(COMPILE.c) $< + $(CTFCONVERT_O) + +%.ln: $(MODSRCS_DIR)/%.c + $(LINT.c) -c $< diff --git a/usr/src/cmd/mdb/intel/kmdb/kaif.c b/usr/src/cmd/mdb/intel/kmdb/kaif.c index d5cdf1a1db..ceedccc96f 100644 --- a/usr/src/cmd/mdb/intel/kmdb/kaif.c +++ b/usr/src/cmd/mdb/intel/kmdb/kaif.c @@ -69,6 +69,7 @@ uint64_t *kaif_msr_wrexit_valp; uintptr_t kaif_kernel_handler; uintptr_t kaif_sys_sysenter; +uintptr_t kaif_brand_sys_sysenter; int kaif_trap_switch; @@ -921,6 +922,10 @@ kaif_init(kmdb_auxv_t *kav) "sys_sysenter")) == NULL) return (set_errno(ENOENT)); + if ((kaif_brand_sys_sysenter = kmdb_kdi_lookup_by_name("unix", + "brand_sys_sysenter")) == NULL) + return (set_errno(ENOENT)); + return (0); } diff --git a/usr/src/cmd/mdb/intel/kmdb/kaif.h b/usr/src/cmd/mdb/intel/kmdb/kaif.h index b827e580b5..c5392e7889 100644 --- a/usr/src/cmd/mdb/intel/kmdb/kaif.h +++ b/usr/src/cmd/mdb/intel/kmdb/kaif.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -80,6 +79,7 @@ extern int kaif_trap_switch; extern uintptr_t kaif_kernel_handler; extern uintptr_t kaif_sys_sysenter; +extern uintptr_t kaif_brand_sys_sysenter; extern void kaif_trap_set_debugger(void); extern void kaif_trap_set_saved(kaif_cpusave_t *); diff --git a/usr/src/cmd/mdb/intel/kmdb/kaif_start_isadep.c b/usr/src/cmd/mdb/intel/kmdb/kaif_start_isadep.c index 27ee02c0d3..708cbb390c 100644 --- a/usr/src/cmd/mdb/intel/kmdb/kaif_start_isadep.c +++ b/usr/src/cmd/mdb/intel/kmdb/kaif_start_isadep.c @@ -72,7 +72,8 @@ kaif_trap_pass(kaif_cpusave_t *cpusave) * See the comments in the kernel's T_SGLSTP handler for why we need to * do this. */ - if (tt == T_SGLSTP && pc == kaif_sys_sysenter) + if (tt == T_SGLSTP && + (pc == kaif_sys_sysenter || pc == kaif_brand_sys_sysenter)) return (1); return (0); diff --git a/usr/src/cmd/mdb/intel/kmdb/kvm_isadep.c b/usr/src/cmd/mdb/intel/kmdb/kvm_isadep.c index 9ccf5de65a..e80b2243ef 100644 --- a/usr/src/cmd/mdb/intel/kmdb/kvm_isadep.c +++ b/usr/src/cmd/mdb/intel/kmdb/kvm_isadep.c @@ -627,8 +627,12 @@ kmt_startup_isadep(mdb_tgt_t *t) "cmntrap", &kmt->kmt_intrsyms._kmt_cmntrap, NULL); (void) mdb_tgt_lookup_by_name(t, MDB_TGT_OBJ_EXEC, "sys_sysenter", &kmt->kmt_intrsyms._kmt_sysenter, NULL); + (void) mdb_tgt_lookup_by_name(t, MDB_TGT_OBJ_EXEC, + "brand_sys_sysenter", &kmt->kmt_intrsyms._kmt_brand_sysenter, NULL); #if defined(__amd64) (void) mdb_tgt_lookup_by_name(t, MDB_TGT_OBJ_EXEC, "sys_syscall", &kmt->kmt_intrsyms._kmt_syscall, NULL); + (void) mdb_tgt_lookup_by_name(t, MDB_TGT_OBJ_EXEC, + "brand_sys_syscall", &kmt->kmt_intrsyms._kmt_brand_syscall, NULL); #endif } diff --git a/usr/src/cmd/mdb/sparc/v7/libproc/Makefile b/usr/src/cmd/mdb/sparc/v7/libproc/Makefile new file mode 100644 index 0000000000..6c0562334d --- /dev/null +++ b/usr/src/cmd/mdb/sparc/v7/libproc/Makefile @@ -0,0 +1,46 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libproc.so +MDBTGT = proc + +MODSRCS_DIR = ../../../common/modules/libproc + +MODSRCS = libproc.c + +include ../../../../Makefile.cmd +include ../../Makefile.sparcv7 +include ../../../Makefile.module + +CFLAGS += -I../../../../../lib/libproc/common +LINTFLAGS += -I../../../../../lib/libproc/common + +%.o: $(MODSRCS_DIR)/%.c + $(COMPILE.c) $< + $(CTFCONVERT_O) + +%.ln: $(MODSRCS_DIR)/%.c + $(LINT.c) -c $< diff --git a/usr/src/cmd/mdb/sparc/v9/libproc/Makefile b/usr/src/cmd/mdb/sparc/v9/libproc/Makefile new file mode 100644 index 0000000000..77fd8f45ba --- /dev/null +++ b/usr/src/cmd/mdb/sparc/v9/libproc/Makefile @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +MODULE = libproc.so +MDBTGT = proc + +MODSRCS_DIR = ../../../common/modules/libproc + +MODSRCS = libproc.c + +include ../../../../Makefile.cmd +include ../../../../Makefile.cmd.64 +include ../../Makefile.sparcv9 +include ../../../Makefile.module + +CFLAGS64 += -I../../../../../lib/libproc/common +LINTFLAGS64 += -I../../../../../lib/libproc/common + +%.o: $(MODSRCS_DIR)/%.c + $(COMPILE.c) $< + $(CTFCONVERT_O) + +%.ln: $(MODSRCS_DIR)/%.c + $(LINT.c) -c $< diff --git a/usr/src/cmd/ptools/Makefile.bld b/usr/src/cmd/ptools/Makefile.bld index 446bdac5ae..5afd76d753 100644 --- a/usr/src/cmd/ptools/Makefile.bld +++ b/usr/src/cmd/ptools/Makefile.bld @@ -42,7 +42,7 @@ LDLIBS_pfiles = -lproc -lnsl LDLIBS_pflags = -lproc LDLIBS_pldd = -lproc LDLIBS_plgrp = -lproc -llgrp -LDLIBS_pmap = -lproc +LDLIBS_pmap = -lproc -lzonecfg LDLIBS_pmadvise = -lproc LDLIBS_ppriv = -lproc LDLIBS_preap = -lproc diff --git a/usr/src/cmd/ptools/pargs/pargs.c b/usr/src/cmd/ptools/pargs/pargs.c index b1e01600dc..7bac7a00e8 100644 --- a/usr/src/cmd/ptools/pargs/pargs.c +++ b/usr/src/cmd/ptools/pargs/pargs.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -788,6 +787,9 @@ static struct aux_id aux_arr[] = { { AT_SUN_MMU, "AT_SUN_MMU", at_null }, { AT_SUN_LDDATA, "AT_SUN_LDDATA", at_null }, { AT_SUN_AUXFLAGS, "AT_SUN_AUXFLAGS", at_flags }, + { AT_SUN_EMULATOR, "AT_SUN_EMULATOR", at_str }, + { AT_SUN_BRANDNAME, "AT_SUN_BRANDNAME", at_str }, + { AT_SUN_BRAND_PHDR, "AT_SUN_BRAND_PHDR", at_null } }; #define N_AT_ENTS (sizeof (aux_arr) / sizeof (struct aux_id)) diff --git a/usr/src/cmd/ptools/pmap/pmap.c b/usr/src/cmd/ptools/pmap/pmap.c index 866964ef3a..f71348c649 100644 --- a/usr/src/cmd/ptools/pmap/pmap.c +++ b/usr/src/cmd/ptools/pmap/pmap.c @@ -44,6 +44,7 @@ #include <sys/mman.h> #include <sys/lgrp_user.h> #include <libproc.h> +#include <libzonecfg.h> #define KILOBYTE 1024 #define MEGABYTE (KILOBYTE * KILOBYTE) @@ -598,18 +599,48 @@ static char * make_name(struct ps_prochandle *Pr, uintptr_t addr, const char *mapname, char *buf, size_t bufsz) { - const pstatus_t *Psp = Pstatus(Pr); - char fname[100]; - struct stat statb; - int len; + const pstatus_t *Psp = Pstatus(Pr); + const psinfo_t *pi = Ppsinfo(Pr); + char fname[100]; + struct stat statb; + int len; + char zname[ZONENAME_MAX]; + char zpath[PATH_MAX]; + char objname[PATH_MAX]; if (!lflag && strcmp(mapname, "a.out") == 0 && Pexecname(Pr, buf, bufsz) != NULL) return (buf); - if (Pobjname(Pr, addr, buf, bufsz) != NULL) { + if (Pobjname(Pr, addr, objname, sizeof (objname)) != NULL) { + (void) strncpy(buf, objname, bufsz); + if (lflag) return (buf); + + if ((len = resolvepath(buf, buf, bufsz)) > 0) { + buf[len] = '\0'; + return (buf); + } + + /* + * If the target is in a non-global zone, attempt to prepend + * the zone path in order to give the global-zone caller the + * real path to the file. + */ + if (getzonenamebyid(pi->pr_zoneid, zname, + sizeof (zname)) != -1 && strcmp(zname, "global") != 0 && + zone_get_zonepath(zname, zpath, sizeof (zpath)) == Z_OK) { + (void) strncat(zpath, "/root", + MAXPATHLEN - strlen(zpath)); + + if (bufsz <= strlen(zpath)) + return (NULL); + + (void) strncpy(buf, zpath, bufsz); + (void) strncat(buf, objname, bufsz - strlen(zpath)); + } + if ((len = resolvepath(buf, buf, bufsz)) > 0) { buf[len] = '\0'; return (buf); diff --git a/usr/src/cmd/renice/renice.c b/usr/src/cmd/renice/renice.c index 8920e6b7a4..ef83b150c0 100644 --- a/usr/src/cmd/renice/renice.c +++ b/usr/src/cmd/renice/renice.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -337,7 +336,7 @@ donice(int which, id_t who, int prio, int increment, char *who_s) if (setpriority(which, who, prio) < 0) { (void) fprintf(stderr, gettext("renice: %s:"), who_s); - if (errno == EPERM && prio < oldprio) + if (errno == EACCES && prio < oldprio) (void) fprintf(stderr, gettext( " Cannot lower nice value.\n")); else diff --git a/usr/src/cmd/sgs/librtld_db/common/_rtld_db.h b/usr/src/cmd/sgs/librtld_db/common/_rtld_db.h index a3a3b8e31a..0f513c1d32 100644 --- a/usr/src/cmd/sgs/librtld_db/common/_rtld_db.h +++ b/usr/src/cmd/sgs/librtld_db/common/_rtld_db.h @@ -51,9 +51,9 @@ struct rd_agent { ulong_t rd_flags; /* flags */ ulong_t rd_rdebugvers; /* rtld_db_priv.vers */ int rd_dmodel; /* data model */ + rd_helper_t rd_helper; /* private to helper */ }; - /* * Values for rd_flags */ @@ -63,10 +63,12 @@ struct rd_agent { #define RDAGLOCK(x) (void) mutex_lock(&(x->rd_mutex)); #define RDAGUNLOCK(x) (void) mutex_unlock(&(x->rd_mutex)); -#define LOG(func) (void) mutex_lock(&glob_mutex); \ - if (rtld_db_logging) \ - func; \ - (void) mutex_unlock(&glob_mutex); +#define LOG(func) { \ + (void) mutex_lock(&glob_mutex); \ + if (rtld_db_logging) \ + func; \ + (void) mutex_unlock(&glob_mutex); \ + } extern mutex_t glob_mutex; extern int rtld_db_version; diff --git a/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg b/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg index 6eb9c534e3..a68b417490 100644 --- a/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg +++ b/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg @@ -81,6 +81,10 @@ @ MSG_DB_NODYNDEBUG "rtld_db: fde: no %lld found in .dynamic" @ MSG_DB_FINDDYNAMIC "rtld_db: fde: DYNAMIC entry found tag: %d found. \ val: 0x%llx" +@ MSG_DB_HELPER_PREFIX "/usr/lib/brand/" + + + # Diagnostic messages @ MSG_DB_LOGENABLE "rtld_db: logging enabled!" @@ -95,6 +99,13 @@ @ MSG_DB_RDEVENTGETMSG "rtld_db: rd_event_getmsg(dmodel=%d, type=%d, \ state=%d)" @ MSG_DB_RDOBJPADE "rtld_db: rd_objpad_enable(padsize=0x%llx)" +@ MSG_DB_64BIT_PREFIX "64/"; +@ MSG_DB_BRAND_HELPERPATH_PREFIX "%s/%s/%s/%s%s_librtld_db.so.1" +@ MSG_DB_BRAND_HELPERPATH "%s/%s/%s%s_librtld_db.so.1" +@ MSG_DB_HELPERNOOPS "rtld_db: helper lib loaded but ops not preset" +@ MSG_DB_HELPERLOADED "rtld_db: helper library loaded for brand \"%s\"" +@ MSG_DB_HELPERLOADFAILED "rtld_db: couldn't load brand helper library %s" +@ MSG_DB_HELPERINITFAILED "rtld_db: brand helper library initialization failed" @ MSG_ER_OK "no error" @ MSG_ER_ERR "generic rtld_db.so error" @@ -111,3 +122,4 @@ @ MSG_SYM_DLACT "rtld_db_dlactivity" @ MSG_SYM_RTBIND "elf_rtbndr" @ MSG_SYM_DYNAMIC "_DYNAMIC" +@ MSG_SYM_BRANDOPS "rtld_db_brand_ops" diff --git a/usr/src/cmd/sgs/librtld_db/common/llib-lrtld_db b/usr/src/cmd/sgs/librtld_db/common/llib-lrtld_db index cf802b0244..f54aa47e17 100644 --- a/usr/src/cmd/sgs/librtld_db/common/llib-lrtld_db +++ b/usr/src/cmd/sgs/librtld_db/common/llib-lrtld_db @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -23,8 +22,8 @@ /* PROTOLIB1 */ /* - * Copyright (c) 1998 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" @@ -38,6 +37,7 @@ rd_err_e rd_event_addr(rd_agent_t *, rd_event_e, rd_notify_t *); rd_err_e rd_event_enable(rd_agent_t *, int); rd_err_e rd_event_getmsg(rd_agent_t *, rd_event_msg_t *); rd_err_e rd_init(int); +rd_err_e rd_ctl(int, void *); rd_err_e rd_loadobj_iter(rd_agent_t *, rl_iter_f *, void *); void rd_log(const int); @@ -45,4 +45,6 @@ rd_agent_t * rd_new(struct ps_prochandle *); rd_err_e rd_objpad_enable(struct rd_agent *, size_t); rd_err_e rd_plt_resolution(rd_agent_t *, psaddr_t, lwpid_t, psaddr_t, rd_plt_info_t *); +void rd_fix_phdrs(struct rd_agent *, Elf32_Dyn *, size_t, + uintptr_t); rd_err_e rd_reset(struct rd_agent *); diff --git a/usr/src/cmd/sgs/librtld_db/common/mapfile-vers b/usr/src/cmd/sgs/librtld_db/common/mapfile-vers index 1b43116e9e..34adaf9766 100644 --- a/usr/src/cmd/sgs/librtld_db/common/mapfile-vers +++ b/usr/src/cmd/sgs/librtld_db/common/mapfile-vers @@ -1,13 +1,12 @@ # -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -32,11 +31,13 @@ SUNW_1.1 { rd_event_enable; rd_event_getmsg; rd_init; + rd_ctl; rd_loadobj_iter; rd_log; rd_new; rd_objpad_enable; rd_plt_resolution; + rd_fix_phdrs; rd_reset; local: *; @@ -53,4 +54,5 @@ SUNW_1.1 { ps_plog = PARENT; ps_pread = PARENT; ps_pwrite = PARENT; + ps_pbrandname = PARENT; }; diff --git a/usr/src/cmd/sgs/librtld_db/common/rd_elf.c b/usr/src/cmd/sgs/librtld_db/common/rd_elf.c index 570e533070..889af3226b 100644 --- a/usr/src/cmd/sgs/librtld_db/common/rd_elf.c +++ b/usr/src/cmd/sgs/librtld_db/common/rd_elf.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,7 +33,9 @@ #include <rtld.h> #include <_rtld_db.h> #include <msg.h> - +#include <limits.h> +#include <string.h> +#include <sys/param.h> /* * 64-bit builds are going to compile this module twice, the @@ -63,7 +64,6 @@ #define TListnode Listnode #endif /* _LP64 */ - static rd_err_e validate_rdebug(struct rd_agent *rap) { @@ -87,14 +87,14 @@ validate_rdebug(struct rd_agent *rap) if (ps_pread(php, db_privp, (char *)&db_priv, sizeof (Rtld_db_priv)) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_READPRIVFAIL_1), - EC_ADDR(db_privp))); + EC_ADDR(db_privp))); return (RD_DBERR); } if ((db_priv.rtd_version < R_RTLDDB_VERSION1) || (db_priv.rtd_version > R_RTLDDB_VERSION)) { LOG(ps_plog(MSG_ORIG(MSG_DB_BADPVERS), - db_priv.rtd_version, R_RTLDDB_VERSION)); + db_priv.rtd_version, R_RTLDDB_VERSION)); return (RD_NOCAPAB); } @@ -114,7 +114,7 @@ validate_rdebug(struct rd_agent *rap) if (((rap->rd_flags & RDF_FL_COREFILE) == 0) && (db_priv.rtd_version != R_RTLDDB_VERSION)) { LOG(ps_plog(MSG_ORIG(MSG_DB_BADPVERS), - db_priv.rtd_version, R_RTLDDB_VERSION)); + db_priv.rtd_version, R_RTLDDB_VERSION)); return (RD_NOCAPAB); } @@ -122,8 +122,8 @@ validate_rdebug(struct rd_agent *rap) rap->rd_rtlddbpriv = db_privp; LOG(ps_plog(MSG_ORIG(MSG_DB_VALIDRDEBUG), EC_ADDR(rap->rd_rdebug), - R_RTLDDB_VERSION, rap->rd_rdebugvers, - rap->rd_flags & RDF_FL_COREFILE)); + R_RTLDDB_VERSION, rap->rd_rdebugvers, + rap->rd_flags & RDF_FL_COREFILE)); return (RD_OK); } @@ -140,7 +140,7 @@ find_dynamic_ent32(struct rd_agent *rap, psaddr_t dynaddr, if (ps_pread(php, dynaddr, (void *)(&d), sizeof (d)) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_READFAIL_4), - EC_ADDR(dynaddr))); + EC_ADDR(dynaddr))); return (RD_DBERR); } dynaddr += sizeof (d); @@ -157,6 +157,17 @@ find_dynamic_ent32(struct rd_agent *rap, psaddr_t dynaddr, return (RD_DBERR); } +extern char rtld_db_helper_path[MAXPATHLEN]; + +#ifndef _ELF64 +void +rd_fix_phdrs(struct rd_agent *rap, Elf32_Dyn *dp, size_t sz, uintptr_t a) +{ + if (rap->rd_helper.rh_ops != NULL) + rap->rd_helper.rh_ops->rho_fix_phdrs(rap, dp, sz, a); +} +#endif + rd_err_e _rd_reset32(struct rd_agent *rap) { @@ -164,6 +175,8 @@ _rd_reset32(struct rd_agent *rap) struct ps_prochandle *php = rap->rd_psp; const auxv_t *auxvp = NULL; rd_err_e rc = RD_OK; + char brandname[MAXPATHLEN]; + char brandlib[MAXPATHLEN]; /* * librtld_db attempts three different methods to find @@ -192,6 +205,7 @@ _rd_reset32(struct rd_agent *rap) * * Scan the aux vector looking for AT_BASE & AT_SUN_LDDATA */ + if (ps_pauxv(php, &auxvp) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_NOAUXV))); rc = RD_ERR; @@ -228,12 +242,12 @@ _rd_reset32(struct rd_agent *rap) if (ps_pglobal_lookup(php, PS_OBJ_LDSO, MSG_ORIG(MSG_SYM_DEBUG), &symaddr) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_LOOKFAIL), - MSG_ORIG(MSG_SYM_DEBUG))); + MSG_ORIG(MSG_SYM_DEBUG))); rc = RD_DBERR; } else { rap->rd_rdebug = symaddr; LOG(ps_plog(MSG_ORIG(MSG_DB_SYMRDEBUG), - EC_ADDR(symaddr))); + EC_ADDR(symaddr))); rc = validate_rdebug(rap); } } @@ -263,11 +277,58 @@ _rd_reset32(struct rd_agent *rap) } } + /* + * If we are debugging a branded executable, load the appropriate helper + * library, and call its initialization routine. + */ + if (ps_pbrandname(php, brandname, MAXPATHLEN) == PS_OK) { + const char *isa = ""; + +#ifdef __amd64 + isa = MSG_ORIG(MSG_DB_64BIT_PREFIX); +#endif /* __amd64 */ + + if (rtld_db_helper_path[0] != '\0') + (void) snprintf(brandlib, MAXPATHLEN, + MSG_ORIG(MSG_DB_BRAND_HELPERPATH_PREFIX), + rtld_db_helper_path, + MSG_ORIG(MSG_DB_HELPER_PREFIX), brandname, isa, + brandname); + else + (void) snprintf(brandlib, MAXPATHLEN, + MSG_ORIG(MSG_DB_BRAND_HELPERPATH), + MSG_ORIG(MSG_DB_HELPER_PREFIX), brandname, isa, + brandname); + + if ((rap->rd_helper.rh_dlhandle = dlopen(brandlib, + RTLD_LAZY | RTLD_LOCAL)) == NULL) { + LOG(ps_plog(MSG_ORIG(MSG_DB_HELPERLOADFAILED), + brandlib)); + return (RD_ERR); + } + + if ((rap->rd_helper.rh_ops = dlsym(rap->rd_helper.rh_dlhandle, + MSG_ORIG(MSG_SYM_BRANDOPS))) == NULL) { + LOG(ps_plog(MSG_ORIG(MSG_DB_HELPERNOOPS), + brandlib)); + return (RD_ERR); + } + + rap->rd_helper.rh_data = rap->rd_helper.rh_ops->rho_init(php); + if (rap->rd_helper.rh_data == NULL) { + LOG(ps_plog(MSG_ORIG(MSG_DB_HELPERINITFAILED))); + (void) dlclose(rap->rd_helper.rh_dlhandle); + rap->rd_helper.rh_dlhandle = NULL; + rap->rd_helper.rh_ops = NULL; + } else + LOG(ps_plog(MSG_ORIG(MSG_DB_HELPERLOADED), brandname)); + } + if ((rap->rd_flags & RDF_FL_COREFILE) == 0) { if (ps_pglobal_lookup(php, PS_OBJ_LDSO, MSG_ORIG(MSG_SYM_PREINIT), &symaddr) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_LOOKFAIL), - MSG_ORIG(MSG_SYM_PREINIT))); + MSG_ORIG(MSG_SYM_PREINIT))); return (RD_DBERR); } rap->rd_preinit = symaddr; @@ -275,7 +336,7 @@ _rd_reset32(struct rd_agent *rap) if (ps_pglobal_lookup(php, PS_OBJ_LDSO, MSG_ORIG(MSG_SYM_POSTINIT), &symaddr) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_LOOKFAIL), - MSG_ORIG(MSG_SYM_POSTINIT))); + MSG_ORIG(MSG_SYM_POSTINIT))); return (RD_DBERR); } rap->rd_postinit = symaddr; @@ -283,7 +344,7 @@ _rd_reset32(struct rd_agent *rap) if (ps_pglobal_lookup(php, PS_OBJ_LDSO, MSG_ORIG(MSG_SYM_DLACT), &symaddr) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_LOOKFAIL), - MSG_ORIG(MSG_SYM_DLACT))); + MSG_ORIG(MSG_SYM_DLACT))); return (RD_DBERR); } rap->rd_dlact = symaddr; @@ -293,7 +354,6 @@ _rd_reset32(struct rd_agent *rap) return (RD_OK); } - rd_err_e _rd_event_enable32(rd_agent_t *rap, int onoff) { @@ -357,7 +417,7 @@ _rd_event_getmsg32(rd_agent_t *rap, rd_event_msg_t *emsg) emsg->u.state = RD_NOSTATE; LOG(ps_plog(MSG_ORIG(MSG_DB_RDEVENTGETMSG), rap->rd_dmodel, - emsg->type, emsg->u.state)); + emsg->type, emsg->u.state)); return (RD_OK); } @@ -392,9 +452,6 @@ _rd_objpad_enable32(struct rd_agent *rap, size_t padsize) return (RD_OK); } - - - static rd_err_e iter_map(rd_agent_t *rap, unsigned long ident, psaddr_t lmaddr, rl_iter_f *cb, void *client_data, uint_t *abort_iter) @@ -431,9 +488,12 @@ iter_map(rd_agent_t *rap, unsigned long ident, psaddr_t lmaddr, } lobj.rl_base = (psaddr_t)ADDR(&rmap); - lobj.rl_lmident = ident; lobj.rl_flags = 0; lobj.rl_refnameaddr = (psaddr_t)REFNAME(&rmap); + if (rap->rd_helper.rh_dlhandle != NULL) + lobj.rl_lmident = LM_ID_BRAND; + else + lobj.rl_lmident = ident; /* * refnameaddr is only valid from a core file @@ -491,21 +551,20 @@ iter_map(rd_agent_t *rap, unsigned long ident, psaddr_t lmaddr, lobj.rl_data_base = phdr.p_vaddr; if (ehdr.e_type == ET_DYN) lobj.rl_data_base += - ADDR(&rmap); + ADDR(&rmap); break; } off += ehdr.e_phentsize; } } - /* * When we transfer control to the client we free the * lock and re-atain it after we've returned from the * client. This is to avoid any deadlock situations. */ LOG(ps_plog(MSG_ORIG(MSG_DB_ITERMAP), cb, client_data, - EC_ADDR(lobj.rl_base), EC_ADDR(lobj.rl_lmident))); + EC_ADDR(lobj.rl_base), EC_ADDR(lobj.rl_lmident))); RDAGUNLOCK(rap); if ((*cb)(&lobj, client_data) == 0) { LOG(ps_plog(MSG_ORIG(MSG_DB_CALLBACKR0))); @@ -532,7 +591,7 @@ _rd_loadobj_iter32(rd_agent_t *rap, rl_iter_f *cb, void *client_data) uint_t abort_iter = 0; LOG(ps_plog(MSG_ORIG(MSG_DB_LOADOBJITER), rap->rd_dmodel, cb, - client_data)); + client_data)); if (ps_pread(rap->rd_psp, rap->rd_rtlddbpriv, (char *)&db_priv, sizeof (Rtld_db_priv)) != PS_OK) { @@ -543,20 +602,20 @@ _rd_loadobj_iter32(rd_agent_t *rap, rl_iter_f *cb, void *client_data) if (db_priv.rtd_dynlmlst == 0) { LOG(ps_plog(MSG_ORIG(MSG_DB_LKMAPNOINIT), - EC_ADDR((uintptr_t)db_priv.rtd_dynlmlst))); + EC_ADDR((uintptr_t)db_priv.rtd_dynlmlst))); return (RD_NOMAPS); } if (ps_pread(rap->rd_psp, (psaddr_t)db_priv.rtd_dynlmlst, (char *)&list, sizeof (TList)) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_READDBGFAIL_3), - EC_ADDR((uintptr_t)db_priv.rtd_dynlmlst))); + EC_ADDR((uintptr_t)db_priv.rtd_dynlmlst))); return (RD_DBERR); } if (list.head == 0) { LOG(ps_plog(MSG_ORIG(MSG_DB_LKMAPNOINIT_1), - EC_ADDR((uintptr_t)list.head))); + EC_ADDR((uintptr_t)list.head))); return (RD_NOMAPS); } @@ -575,14 +634,14 @@ _rd_loadobj_iter32(rd_agent_t *rap, rl_iter_f *cb, void *client_data) if (ps_pread(rap->rd_psp, (psaddr_t)lnp, (char *)&lnode, sizeof (TListnode)) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_READDBGFAIL_4), - EC_ADDR(lnp))); + EC_ADDR(lnp))); return (RD_DBERR); } if (ps_pread(rap->rd_psp, (psaddr_t)lnode.data, (char *)&lml, sizeof (Lm_list)) != PS_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_READDBGFAIL_5), - EC_ADDR((uintptr_t)lnode.data))); + EC_ADDR((uintptr_t)lnode.data))); return (RD_DBERR); } @@ -603,5 +662,13 @@ _rd_loadobj_iter32(rd_agent_t *rap, rl_iter_f *cb, void *client_data) if (abort_iter) break; } - return (rc); + + if (rc != RD_OK) + return (rc); + + if (rap->rd_helper.rh_ops != NULL) + return (rap->rd_helper.rh_ops->rho_loadobj_iter(rap->rd_psp, cb, + client_data, rap->rd_helper.rh_data)); + + return (RD_OK); } diff --git a/usr/src/cmd/sgs/librtld_db/common/rtld_db.c b/usr/src/cmd/sgs/librtld_db/common/rtld_db.c index 9301b16b65..a0324c6107 100644 --- a/usr/src/cmd/sgs/librtld_db/common/rtld_db.c +++ b/usr/src/cmd/sgs/librtld_db/common/rtld_db.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,13 +28,14 @@ #include <stdlib.h> #include <stdio.h> +#include <string.h> #include <proc_service.h> #include <link.h> #include <rtld_db.h> #include <rtld.h> #include <_rtld_db.h> #include <msg.h> - +#include <sys/param.h> /* * Mutex to protect global data @@ -43,6 +43,7 @@ mutex_t glob_mutex = DEFAULTMUTEX; int rtld_db_version = RD_VERSION1; int rtld_db_logging = 0; +char rtld_db_helper_path[MAXPATHLEN]; void @@ -92,9 +93,21 @@ rd_init(int version) return (RD_NOCAPAB); rtld_db_version = version; LOG(ps_plog(MSG_ORIG(MSG_DB_RDINIT), rtld_db_version)); + return (RD_OK); } +rd_err_e +rd_ctl(int cmd, void *arg) +{ + if (cmd != RD_CTL_SET_HELPPATH || arg == NULL || + strlen((char *)arg) >= MAXPATHLEN) + return (RD_ERR); + + (void) strcpy(rtld_db_helper_path, (char *)arg); + + return (RD_OK); +} rd_err_e rd_reset(struct rd_agent *rap) @@ -138,6 +151,7 @@ rd_new(struct ps_prochandle *php) rap->rd_psp = php; (void) mutex_init(&rap->rd_mutex, USYNC_THREAD, 0); if (rd_reset(rap) != RD_OK) { + (void) dlclose(rap->rd_helper.rh_dlhandle); free(rap); LOG(ps_plog(MSG_ORIG(MSG_DB_RESETFAIL))); return ((rd_agent_t *)0); @@ -220,7 +234,7 @@ rd_event_addr(rd_agent_t *rap, rd_event_e num, rd_notify_t *np) } if (rc == RD_OK) { LOG(ps_plog(MSG_ORIG(MSG_DB_RDEVENTADDR), num, - EC_ADDR(np->u.bptaddr))); + EC_ADDR(np->u.bptaddr))); } RDAGUNLOCK(rap); diff --git a/usr/src/cmd/sgs/rtld/amd64/_setup.c b/usr/src/cmd/sgs/rtld/amd64/_setup.c index 8e5a0961ff..d7773fab5d 100644 --- a/usr/src/cmd/sgs/rtld/amd64/_setup.c +++ b/usr/src/cmd/sgs/rtld/amd64/_setup.c @@ -69,7 +69,7 @@ _setup(Boot * ebp, Dyn * ld_dyn) auxv_t *auxv, *_auxv; uid_t uid = -1, euid = -1; gid_t gid = -1, egid = -1; - char *_platform = 0, *_execname = 0; + char *_platform = 0, *_execname = 0, *_emulator = 0; int auxflags = -1; /* * Scan the bootstrap structure to pick up the basics. @@ -153,6 +153,12 @@ _setup(Boot * ebp, Dyn * ld_dyn) case AT_SUN_HWCAP: hwcap_1 = (uint_t)auxv->a_un.a_val; break; +#ifdef AT_SUN_EMULATOR /* Emulation library name */ + case AT_SUN_EMULATOR: + /* name of emulation library, if any */ + _emulator = auxv->a_un.a_ptr; + break; +#endif } } @@ -208,6 +214,10 @@ _setup(Boot * ebp, Dyn * ld_dyn) reladdr += relaent; } + if (_emulator != NULL) { + rtld_flags2 |= RT_FL2_BRANDED; + } + /* * Initialize the dyn_plt_ent_size field. It currently contains the * size of the dyn_plt_template. It still needs to be aligned and have diff --git a/usr/src/cmd/sgs/rtld/common/_rtld.h b/usr/src/cmd/sgs/rtld/common/_rtld.h index 94354aaea4..c473c39ae7 100644 --- a/usr/src/cmd/sgs/rtld/common/_rtld.h +++ b/usr/src/cmd/sgs/rtld/common/_rtld.h @@ -310,6 +310,7 @@ typedef struct { #define RT_FL2_BINDNOW 0x00000100 /* LD_BIND_NOW in effect */ #define RT_FL2_BINDLAZY 0x00000200 /* disable RTLD_NOW (and LD_BIND_NOW) */ #define RT_FL2_PLMSETUP 0x00000400 /* primary link-map set up complete */ +#define RT_FL2_BRANDED 0x00000800 /* Process is branded */ /* * Information flags for env_info. diff --git a/usr/src/cmd/sgs/rtld/common/rtld.msg b/usr/src/cmd/sgs/rtld/common/rtld.msg index f9228905d7..473465867c 100644 --- a/usr/src/cmd/sgs/rtld/common/rtld.msg +++ b/usr/src/cmd/sgs/rtld/common/rtld.msg @@ -367,6 +367,8 @@ @ MSG_LD_VERBOSE "VERBOSE" @ MSG_LD_WARN "WARN" +@ MSG_LD_BRAND_PREFIX "BRAND_" + @ MSG_LD_FIX_1 "_FIX_1_" @ MSG_LC_ALL "ALL=" diff --git a/usr/src/cmd/sgs/rtld/common/util.c b/usr/src/cmd/sgs/rtld/common/util.c index 514779549d..6efaef19e6 100644 --- a/usr/src/cmd/sgs/rtld/common/util.c +++ b/usr/src/cmd/sgs/rtld/common/util.c @@ -2232,6 +2232,18 @@ ld_str_env(const char *s1, Word *lmflags, Word *lmtflags, uint_t env_flags, int flags; /* + * In a branded process we must ignore all LD_XXXX env vars + * because they are intended for the brand's linker. + * To affect the Solaris linker, use LD_BRAND_XXXX instead. + */ + if (rtld_flags2 & RT_FL2_BRANDED) { + if (strncmp(s1, MSG_ORIG(MSG_LD_BRAND_PREFIX), + MSG_LD_BRAND_PREFIX_SIZE) != 0) + return; + s1 += MSG_LD_BRAND_PREFIX_SIZE; + } + + /* * Environment variables with no value (ie. LD_XXXX=) typically * have no impact, however if environment variables are defined * within a configuration file, these null user settings can be diff --git a/usr/src/cmd/sgs/rtld/i386/_setup.c b/usr/src/cmd/sgs/rtld/i386/_setup.c index 4a3b801265..6761a0d9e9 100644 --- a/usr/src/cmd/sgs/rtld/i386/_setup.c +++ b/usr/src/cmd/sgs/rtld/i386/_setup.c @@ -82,7 +82,8 @@ _setup(Boot * ebp, Dyn * ld_dyn) auxv_t *auxv, *_auxv; uid_t uid = -1, euid = -1; gid_t gid = -1, egid = -1; - char *_platform = 0, *_execname = 0; + char *_platform = 0, *_execname = 0, *_emulator = 0; + int _branded = 0; int auxflags = -1; /* * Scan the bootstrap structure to pick up the basics. @@ -174,6 +175,13 @@ _setup(Boot * ebp, Dyn * ld_dyn) hwcap_1 = (uint_t)auxv->a_un.a_val; break; #endif +#ifdef AT_SUN_EMULATOR /* Emulation library name */ + case AT_SUN_EMULATOR: + /* name of emulation library, if any */ + _emulator = auxv->a_un.a_ptr; + _branded = 1; + break; +#endif } } @@ -226,6 +234,16 @@ _setup(Boot * ebp, Dyn * ld_dyn) } /* + * If an emulation library is being used, use that as the linker's + * effective executable name. The real executable is not linked by this + * linker. + */ + if (_emulator != NULL) { + _execname = _emulator; + rtld_flags2 |= RT_FL2_BRANDED; + } + + /* * Initialize the dyn_plt_ent_size field. It currently contains the * size of the dyn_plt_template. It still needs to be aligned and have * space for the 'dyn_data' area added. diff --git a/usr/src/cmd/truss/systable.c b/usr/src/cmd/truss/systable.c index ee8b7f0233..bc5d8c49a9 100644 --- a/usr/src/cmd/truss/systable.c +++ b/usr/src/cmd/truss/systable.c @@ -260,10 +260,7 @@ const struct systable systable[] = { {"kill", 2, DEC, NOV, DEC, SIG}, /* 37 */ {"fstatfs", 4, DEC, NOV, DEC, HEX, DEC, DEC}, /* 38 */ {"pgrpsys", 3, DEC, NOV, DEC, DEC, DEC}, /* 39 */ - -/* The following entry was xenix */ -{ NULL, 8, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX}, - +{"uucopystr", 3, DEC, NOV, STG, RST, UNS}, /* 40 */ {"dup", 1, DEC, NOV, DEC}, /* 41 */ {"pipe", 0, DEC, DEC}, /* 42 */ {"times", 1, DEC, NOV, HEX}, /* 43 */ @@ -403,7 +400,7 @@ const struct systable systable[] = { {"pwrite", 4, DEC, NOV, DEC, IOB, UNS, DEX}, /* 174 */ {"llseek", 4, LLO, NOV, DEC, LLO, HID, WHN}, /* 175 */ {"inst_sync", 2, DEC, NOV, STG, DEC}, /* 176 */ -{ NULL, 8, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX}, +{"brand", 6, DEC, NOV, DEC, HEX, HEX, HEX, HEX, HEX}, /* 177 */ {"kaio", 7, DEC, NOV, AIO, HEX, HEX, HEX, HEX, HEX, HEX}, /* 178 */ {"cpc", 5, DEC, NOV, CPC, DEC, HEX, HEX, HEX}, /* 179 */ {"lgrpsys", 3, DEC, NOV, DEC, DEC, HEX}, /* 180 */ @@ -480,7 +477,7 @@ const struct systable systable[] = { {"lwp_mutex_trylock", 1, DEC, NOV, HEX}, /* 251 */ {"lwp_mutex_init", 2, DEC, NOV, HEX, HEX}, /* 252 */ {"cladm", 3, DEC, NOV, CLC, CLF, HEX}, /* 253 */ -{ NULL, 8, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX, HEX}, +{"uucopy", 3, DEC, NOV, HEX, HEX, UNS}, /* 254 */ {"umount2", 2, DEC, NOV, STG, MTF}, /* 255 */ { NULL, -1, DEC, NOV}, }; diff --git a/usr/src/cmd/zlogin/Makefile b/usr/src/cmd/zlogin/Makefile index cae4ae625f..90d1b15a34 100644 --- a/usr/src/cmd/zlogin/Makefile +++ b/usr/src/cmd/zlogin/Makefile @@ -28,7 +28,7 @@ PROG = zlogin include ../Makefile.cmd -LDLIBS += -lsocket -lzonecfg -lcontract +LDLIBS += -lsocket -lzonecfg -lcontract -lbrand CFLAGS += $(CCVERBOSE) FILEMODE = 0555 GROUP = bin diff --git a/usr/src/cmd/zlogin/zlogin.c b/usr/src/cmd/zlogin/zlogin.c index 890b055f83..b1848eb40b 100644 --- a/usr/src/cmd/zlogin/zlogin.c +++ b/usr/src/cmd/zlogin/zlogin.c @@ -57,6 +57,7 @@ #include <sys/types.h> #include <sys/contract/process.h> #include <sys/ctfs.h> +#include <sys/brand.h> #include <alloca.h> #include <assert.h> @@ -84,6 +85,7 @@ #include <locale.h> #include <libzonecfg.h> #include <libcontract.h> +#include <libbrand.h> static int masterfd; static struct termios save_termios; @@ -110,7 +112,6 @@ static const char *pname; #define SUPATH "/usr/bin/su" #define FAILSAFESHELL "/sbin/sh" #define DEFAULTSHELL "/sbin/sh" -#define LOGINPATH "/usr/bin/login" #define DEF_PATH "/usr/sbin:/usr/bin" /* @@ -749,6 +750,64 @@ doio(int stdin_fd, int stdout_fd, int stderr_fd, boolean_t raw_mode) } } +static char ** +zone_login_cmd(brand_handle_t *bhp, const char *login) +{ + static char result_buf[ARG_MAX]; + char **new_argv, *ptr, *lasts; + int n, a; + + /* Get the login command for the target zone. */ + bzero(result_buf, sizeof (result_buf)); + if (brand_get_login_cmd(bhp, login, + result_buf, sizeof (result_buf)) != 0) + return (NULL); + + /* + * We got back a string that we'd like to execute. But since + * we're not doing the execution via a shell we'll need to convert + * the exec string to an array of strings. We'll do that here + * but we're going to be very simplistic about it and break stuff + * up based on spaces. We're not even going to support any kind + * of quoting or escape characters. It's truly amazing that + * there is no library function in OpenSolaris to do this for us. + */ + + /* + * Be paranoid. Since we're deliniating based on spaces make + * sure there are no adjacent spaces. + */ + if (strstr(result_buf, " ") != NULL) + return (NULL); + + /* Remove any trailing whitespace. */ + n = strlen(result_buf); + if (result_buf[n - 1] == ' ') + result_buf[n - 1] = '\0'; + + /* Count how many elements there are in the exec string. */ + ptr = result_buf; + for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++) + ; + + /* Allocate the argv array that we're going to return. */ + if ((new_argv = malloc(sizeof (char *) * n)) == NULL) + return (NULL); + + /* Tokenize the exec string and return. */ + a = 0; + new_argv[a++] = result_buf; + if (n > 2) { + (void) strtok_r(result_buf, " ", &lasts); + while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL) + ; + } else { + new_argv[a++] = NULL; + } + assert(n == a); + return (new_argv); +} + /* * Prepare argv array for exec'd process; if we're passing commands to the * new process, then use su(1M) to do the invocation. Otherwise, use @@ -757,7 +816,7 @@ doio(int stdin_fd, int stdout_fd, int stderr_fd, boolean_t raw_mode) * checks). */ static char ** -prep_args(char *login, char **argv) +prep_args(brand_handle_t *bhp, const char *login, char **argv) { int argc = 0, a = 0, i, n = -1; char **new_argv; @@ -792,7 +851,7 @@ prep_args(char *login, char **argv) return (NULL); new_argv[a++] = SUPATH; - new_argv[a++] = login; + new_argv[a++] = (char *)login; } new_argv[a++] = "-c"; new_argv[a++] = subshell; @@ -805,25 +864,12 @@ prep_args(char *login, char **argv) return (NULL); new_argv[a++] = FAILSAFESHELL; new_argv[a++] = NULL; + assert(n == a); } else { - n = 6; - - if ((new_argv = malloc(sizeof (char *) * n)) == NULL) - return (NULL); - - new_argv[a++] = LOGINPATH; - new_argv[a++] = "-z"; - new_argv[a++] = "global"; /* hardcode, for now */ - new_argv[a++] = "-f"; - new_argv[a++] = login; - new_argv[a++] = NULL; + new_argv = zone_login_cmd(bhp, login); } } - /* - * If this assert ever trips, it's because we've botched the setup - * of ARGV above-- it's too large or too small. - */ - assert(n == a); + return (new_argv); } @@ -1332,8 +1378,10 @@ main(int argc, char **argv) char *slavename, slaveshortname[MAXPATHLEN]; priv_set_t *privset; int tmpl_fd; + char zonebrand[MAXNAMELEN]; struct stat sb; char kernzone[ZONENAME_MAX]; + brand_handle_t *bhp; (void) setlocale(LC_ALL, ""); (void) textdomain(TEXT_DOMAIN); @@ -1548,10 +1596,18 @@ main(int argc, char **argv) return (1); } - if ((new_args = prep_args(login, proc_args)) == NULL) { + /* Get a handle to the brand info for this zone */ + if ((zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) || + ((bhp = brand_open(zonebrand)) == NULL)) { + zerror(gettext("could not get brand for zone %s"), zonename); + return (1); + } + if ((new_args = prep_args(bhp, login, proc_args)) == NULL) { zperror(gettext("could not assemble new arguments")); + brand_close(bhp); return (1); } + brand_close(bhp); if ((new_env = prep_env()) == NULL) { zperror(gettext("could not assemble new environment")); @@ -1705,11 +1761,20 @@ main(int argc, char **argv) /* * In failsafe mode, we don't use login(1), so don't try * setting up a utmpx entry. + * + * A branded zone may have very different utmpx semantics. + * At the moment, we only have two brand types: + * Solaris-like (native, sn1) and Linux. In the Solaris + * case, we know exactly how to do the necessary utmpx + * setup. Fortunately for us, the Linux /bin/login is + * prepared to deal with a non-initialized utmpx entry, so + * we can simply skip it. If future brands don't fall into + * either category, we'll have to add a per-brand utmpx + * setup hook. */ - if (!failsafe) { + if (!failsafe && (strcmp(zonebrand, "lx") != 0)) if (setup_utmpx(slaveshortname) == -1) return (1); - } (void) execve(new_args[0], new_args, new_env); zperror(gettext("exec failure")); diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile index cbe135d379..1fc67e678a 100644 --- a/usr/src/cmd/zoneadm/Makefile +++ b/usr/src/cmd/zoneadm/Makefile @@ -43,7 +43,7 @@ SRCS = $(OBJS:.o=.c) POFILE=zoneadm_all.po POFILES= $(OBJS:%.o=%.po) -LDLIBS += -lzonecfg -lsocket -lgen -lpool -lbsm -lzfs -luuid -lnvpair +LDLIBS += -lzonecfg -lsocket -lgen -lpool -lbsm -lzfs -luuid -lnvpair -lbrand .KEEP_STATE: diff --git a/usr/src/cmd/zoneadm/svc-zones b/usr/src/cmd/zoneadm/svc-zones index dc6345b533..4c5164b829 100644 --- a/usr/src/cmd/zoneadm/svc-zones +++ b/usr/src/cmd/zoneadm/svc-zones @@ -27,6 +27,21 @@ # . /lib/svc/share/smf_include.sh +# +# Return a list of running, non-global zones for which a shutdown via +# "/sbin/init 0" may work (typically only Solaris zones.) +# +# At present, this means any running "lx" zones don't qualify. +# +shutdown_zones() +{ + zoneadm list -p | nawk -F: '{ + if (($5 != "lx") && ($2 != "global")) { + print $2 + } + }' +} + [ ! -x /usr/sbin/zoneadm ] && exit 0 # SUNWzoneu not installed if [ -z "$SMF_FMRI" ]; then @@ -75,32 +90,39 @@ case "$1" in SVC_TIMEOUT=`svcprop -p stop/timeout_seconds $SMF_FMRI` + # + # First, try shutting down any running zones for which an "init 0" may + # work. + # MAXSHUT=`expr 3 \* $SVC_TIMEOUT \/ 4` # 3/4 of time to zone shutdown MAXHALT=`expr $SVC_TIMEOUT \/ 4` # rest of time goes to halt - echo "Shutting down running zones (for up to $MAXSHUT seconds):\c" + zonelist=`shutdown_zones` - # First, try letting them run their shutdown scripts. + if [ -n "$zonelist" ]; then + SHUTDOWN=0 + echo "Shutting down running zones (for up to $MAXSHUT" \ + "seconds):\c" - SHUTDOWN=0 - for zone in `zoneadm list`; do - if [ "$zone" != "global" ]; then + for zone in $zonelist; do echo " $zone\c" zlogin -S $zone /sbin/init 0 < /dev/null >&0 2>&0 & SHUTDOWN=1 - fi - done - [ $SHUTDOWN -eq 1 ] && echo "." + done - # Allow time for zones to shutdown cleanly + [ $SHUTDOWN -eq 1 ] && echo "." - while [ $MAXSHUT -gt 0 -a "`zoneadm list`" != "global" ]; do - MAXSHUT=`expr $MAXSHUT - 1` - sleep 1 # wait a bit longer - done + # Allow time for zones to shutdown cleanly - # Second, try halting them. + while [ $MAXSHUT -gt 0 -a "`shutdown_zones`" != "" ]; do + MAXSHUT=`expr $MAXSHUT - 1` + sleep 1 # wait a bit longer + done + fi + # + # Second, try halting any non-global zones still running + # WAITPIDS="" for zone in `zoneadm list`; do if [ "$zone" != "global" ]; then @@ -122,6 +144,7 @@ case "$1" in # If the halts complete but a zone is still not shutdown, it might # be in a state like 'shutting_down' or 'down'. So we give it some # time to come all the way down. + while [ $MAXHALT -gt 0 -a "`zoneadm list`" != "global" ]; do MAXHALT=`expr $MAXHALT - 1` sleep 1 # wait a bit longer diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c index f42464d89b..f809151403 100644 --- a/usr/src/cmd/zoneadm/zoneadm.c +++ b/usr/src/cmd/zoneadm/zoneadm.c @@ -55,6 +55,7 @@ #include <libintl.h> #include <libzonecfg.h> #include <bsm/adt.h> +#include <sys/brand.h> #include <sys/param.h> #include <sys/types.h> #include <sys/stat.h> @@ -72,6 +73,7 @@ #include <libgen.h> #include <fnmatch.h> #include <sys/modctl.h> +#include <libbrand.h> #include <pool.h> #include <sys/pool.h> @@ -86,12 +88,14 @@ typedef struct zone_entry { char zname[ZONENAME_MAX]; char *zstate_str; zone_state_t zstate_num; + char zbrand[MAXNAMELEN]; char zroot[MAXPATHLEN]; char zuuid[UUID_PRINTABLE_STRING_LENGTH]; } zone_entry_t; static zone_entry_t *zents; static size_t nzents; +static boolean_t is_native_zone = B_TRUE; #define LOOPBACK_IF "lo0" #define SOCKET_AF(af) (((af) == AF_UNSPEC) ? AF_INET : (af)) @@ -120,7 +124,7 @@ struct cmd { #define SHELP_REBOOT "reboot [-- boot_arguments]" #define SHELP_LIST "list [-cipv]" #define SHELP_VERIFY "verify" -#define SHELP_INSTALL "install [-x nodataset]" +#define SHELP_INSTALL "install [-x nodataset] [brand-specific args]" #define SHELP_UNINSTALL "uninstall [-F]" #define SHELP_CLONE "clone [-m method] [-s <ZFS snapshot>] zonename" #define SHELP_MOVE "move zonepath" @@ -128,6 +132,12 @@ struct cmd { #define SHELP_ATTACH "attach [-F] [-n <path>]" #define SHELP_MARK "mark incomplete" +#define EXEC_PREFIX "exec " +#define EXEC_LEN (strlen(EXEC_PREFIX)) +#define RMCOMMAND "/usr/bin/rm -rf" + +static int cleanup_zonepath(char *, boolean_t); + static int help_func(int argc, char *argv[]); static int ready_func(int argc, char *argv[]); static int boot_func(int argc, char *argv[]); @@ -145,7 +155,7 @@ static int detach_func(int argc, char *argv[]); static int attach_func(int argc, char *argv[]); static int mark_func(int argc, char *argv[]); static int sanity_check(char *zone, int cmd_num, boolean_t running, - boolean_t unsafe_when_running); + boolean_t unsafe_when_running, boolean_t force); static int cmd_match(char *cmd); static int verify_details(int); @@ -174,12 +184,28 @@ static struct cmd cmdtab[] = { /* set early in main(), never modified thereafter, used all over the place */ static char *execname; +static char target_brand[MAXNAMELEN]; static char *locale; char *target_zone; static char *target_uuid; /* used in do_subproc() and signal handler */ static volatile boolean_t child_killed; +static int do_subproc_cnt = 0; + +/* + * Used to indicate whether this zoneadm instance has another zoneadm + * instance in its ancestry. + */ +static boolean_t zoneadm_is_nested = B_FALSE; + +/* used to track nested zone-lock operations */ +static int zone_lock_cnt = 0; + +/* used to communicate lock status to children */ +#define LOCK_ENV_VAR "_ZONEADM_LOCK_HELD" +static char zoneadm_lock_held[] = LOCK_ENV_VAR"=1"; +static char zoneadm_lock_not_held[] = LOCK_ENV_VAR"=0"; char * cmd_to_str(int cmd_num) @@ -233,7 +259,9 @@ long_help(int cmd_num) return (gettext("Install the configuration on to the system. " "The -x nodataset option\n\tcan be used to prevent the " "creation of a new ZFS file system for the\n\tzone " - "(assuming the zonepath is within a ZFS file system).")); + "(assuming the zonepath is within a ZFS file system).\n\t" + "All other arguments are passed to the brand installation " + "function;\n\tsee brand(4) for more information.")); case CMD_UNINSTALL: return (gettext("Uninstall the configuration from the system. " "The -F flag can be used\n\tto force the action.")); @@ -383,8 +411,8 @@ zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable) assert(!(verbose && parsable)); if (firsttime && verbose) { firsttime = B_FALSE; - (void) printf("%*s %-16s %-14s %-30s\n", ZONEID_WIDTH, "ID", - "NAME", "STATUS", "PATH"); + (void) printf("%*s %-16s %-14s %-30s %-10s\n", ZONEID_WIDTH, + "ID", "NAME", "STATUS", "PATH", "BRAND"); } if (!verbose) { char *cp, *clim; @@ -403,7 +431,7 @@ zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable) (void) printf("%.*s\\:", clim - cp, cp); cp = clim + 1; } - (void) printf("%s:%s\n", cp, zent->zuuid); + (void) printf("%s:%s:%s\n", cp, zent->zuuid, zent->zbrand); return; } if (zent->zstate_str != NULL) { @@ -411,8 +439,8 @@ zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable) (void) printf("%*s", ZONEID_WIDTH, "-"); else (void) printf("%*lu", ZONEID_WIDTH, zent->zid); - (void) printf(" %-16s %-14s %-30s\n", zent->zname, - zent->zstate_str, zent->zroot); + (void) printf(" %-16s %-14s %-30s %-10s\n", zent->zname, + zent->zstate_str, zent->zroot, zent->zbrand); } } @@ -425,6 +453,7 @@ lookup_zone_info(const char *zone_name, zoneid_t zid, zone_entry_t *zent) (void) strlcpy(zent->zname, zone_name, sizeof (zent->zname)); (void) strlcpy(zent->zroot, "???", sizeof (zent->zroot)); + (void) strlcpy(zent->zbrand, "???", sizeof (zent->zbrand)); zent->zstate_str = "???"; zent->zid = zid; @@ -469,6 +498,11 @@ lookup_zone_info(const char *zone_name, zoneid_t zid, zone_entry_t *zent) return (Z_ERR); } zent->zstate_str = zone_state_str(zent->zstate_num); + if (zone_get_brand(zent->zname, zent->zbrand, + sizeof (zent->zbrand)) != Z_OK) { + zperror2(zent->zname, gettext("could not get brand name")); + return (Z_ERR); + } return (Z_OK); } @@ -998,9 +1032,49 @@ validate_zonepath(char *path, int cmd_num) return (err ? Z_ERR : Z_OK); } +/* + * The following two routines implement a simple locking mechanism to + * ensure that only one instance of zoneadm at a time is able to manipulate + * a given zone. The lock is built on top of an fcntl(2) lock of + * [<altroot>]/var/run/zones/<zonename>.zoneadm.lock. If a zoneadm instance + * can grab that lock, it is allowed to manipulate the zone. + * + * Since zoneadm may call external applications which in turn invoke + * zoneadm again, we introduce the notion of "lock inheritance". Any + * instance of zoneadm that has another instance in its ancestry is assumed + * to be acting on behalf of the original zoneadm, and is thus allowed to + * manipulate its zone. + * + * This inheritance is implemented via the _ZONEADM_LOCK_HELD environment + * variable. When zoneadm is granted a lock on its zone, this environment + * variable is set to 1. When it releases the lock, the variable is set to + * 0. Since a child process inherits its parent's environment, checking + * the state of this variable indicates whether or not any ancestor owns + * the lock. + */ static void release_lock_file(int lockfd) { + /* + * If we are cleaning up from a failed attempt to lock the zone for + * the first time, we might have a zone_lock_cnt of 0. In that + * error case, we don't want to do anything but close the lock + * file. + */ + assert(zone_lock_cnt >= 0); + if (zone_lock_cnt > 0) { + assert(getenv(LOCK_ENV_VAR) != NULL); + assert(atoi(getenv(LOCK_ENV_VAR)) == 1); + if (--zone_lock_cnt > 0) { + assert(lockfd == -1); + return; + } + if (putenv(zoneadm_lock_not_held) != 0) { + zperror(target_zone, B_TRUE); + exit(Z_ERR); + } + } + assert(lockfd >= 0); (void) close(lockfd); } @@ -1010,6 +1084,18 @@ grab_lock_file(const char *zone_name, int *lockfd) char pathbuf[PATH_MAX]; struct flock flock; + /* + * If we already have the lock, we can skip this expensive song + * and dance. + */ + if (zone_lock_cnt > 0) { + zone_lock_cnt++; + *lockfd = -1; + return (Z_OK); + } + assert(getenv(LOCK_ENV_VAR) != NULL); + assert(atoi(getenv(LOCK_ENV_VAR)) == 0); + if (snprintf(pathbuf, sizeof (pathbuf), "%s%s", zonecfg_get_root(), ZONES_TMPDIR) >= sizeof (pathbuf)) { zerror(gettext("alternate root path is too long")); @@ -1045,12 +1131,14 @@ grab_lock_file(const char *zone_name, int *lockfd) flock.l_whence = SEEK_SET; flock.l_start = (off_t)0; flock.l_len = (off_t)0; - if (fcntl(*lockfd, F_SETLKW, &flock) < 0) { + if ((fcntl(*lockfd, F_SETLKW, &flock) < 0) || + (putenv(zoneadm_lock_held) != 0)) { zerror(gettext("unable to lock %s: %s"), pathbuf, strerror(errno)); release_lock_file(*lockfd); return (Z_ERR); } + zone_lock_cnt = 1; return (Z_OK); } @@ -1315,7 +1403,8 @@ ready_func(int argc, char *argv[]) sub_usage(SHELP_READY, CMD_READY); return (Z_USAGE); } - if (sanity_check(target_zone, CMD_READY, B_FALSE, B_FALSE) != Z_OK) + if (sanity_check(target_zone, CMD_READY, B_FALSE, B_FALSE, B_FALSE) + != Z_OK) return (Z_ERR); if (verify_details(CMD_READY) != Z_OK) return (Z_ERR); @@ -1332,6 +1421,7 @@ static int boot_func(int argc, char *argv[]) { zone_cmd_arg_t zarg; + boolean_t force = B_FALSE; int arg; if (zonecfg_in_alt_root()) { @@ -1348,15 +1438,17 @@ boot_func(int argc, char *argv[]) * zoneadm -z myzone boot [here] -- -v -m verbose * * Where [here] can either be nothing, -? (in which case we bail - * and print usage), or -s. Support for -s is vestigal and - * obsolete, but is retained because it was a documented interface - * and there are known consumers including admin/install; the - * proper way to specify boot arguments like -s is: + * and print usage), -f (a private option to indicate that the + * boot operation should be 'forced'), or -s. Support for -s is + * vestigal and obsolete, but is retained because it was a + * documented interface and there are known consumers including + * admin/install; the proper way to specify boot arguments like -s + * is: * * zoneadm -z myzone boot -- -s -v -m verbose. */ optind = 0; - if ((arg = getopt(argc, argv, "?s")) != EOF) { + while ((arg = getopt(argc, argv, "?fs")) != EOF) { switch (arg) { case '?': sub_usage(SHELP_BOOT, CMD_BOOT); @@ -1365,6 +1457,9 @@ boot_func(int argc, char *argv[]) (void) strlcpy(zarg.bootbuf, "-s", sizeof (zarg.bootbuf)); break; + case 'f': + force = B_TRUE; + break; default: sub_usage(SHELP_BOOT, CMD_BOOT); return (Z_USAGE); @@ -1384,12 +1479,12 @@ boot_func(int argc, char *argv[]) return (Z_ERR); } } - - if (sanity_check(target_zone, CMD_BOOT, B_FALSE, B_FALSE) != Z_OK) + if (sanity_check(target_zone, CMD_BOOT, B_FALSE, B_FALSE, force) + != Z_OK) return (Z_ERR); if (verify_details(CMD_BOOT) != Z_OK) return (Z_ERR); - zarg.cmd = Z_BOOT; + zarg.cmd = force ? Z_FORCEBOOT : Z_BOOT; if (call_zoneadmd(target_zone, &zarg) != 0) { zerror(gettext("call to %s failed"), "zoneadmd"); return (Z_ERR); @@ -1424,12 +1519,15 @@ fake_up_local_zone(zoneid_t zid, zone_entry_t *zeptr) if (is_system_labeled()) { (void) zone_getattr(zid, ZONE_ATTR_ROOT, zeptr->zroot, sizeof (zeptr->zroot)); + (void) strlcpy(zeptr->zbrand, NATIVE_BRAND_NAME, + sizeof (zeptr->zbrand)); } else { (void) strlcpy(zeptr->zroot, "/", sizeof (zeptr->zroot)); + (void) zone_getattr(zid, ZONE_ATTR_BRAND, zeptr->zbrand, + sizeof (zeptr->zbrand)); } zeptr->zstate_str = "running"; - if (zonecfg_get_uuid(zeptr->zname, uuid) == Z_OK && !uuid_is_null(uuid)) uuid_unparse(uuid, zeptr->zuuid); @@ -1487,8 +1585,8 @@ list_func(int argc, char *argv[]) if (zone_id == GLOBAL_ZONEID || is_system_labeled()) { retv = zone_print_list(min_state, verbose, parsable); } else { - retv = Z_OK; fake_up_local_zone(zone_id, &zent); + retv = Z_OK; zone_print(&zent, verbose, parsable); } return (retv); @@ -1552,6 +1650,7 @@ sigterm(int sig) * Ignore SIG{INT,TERM}, so we don't end up in an infinite loop, * then propagate the signal to our process group. */ + assert(sig == SIGINT || sig == SIGTERM); (void) sigset(SIGINT, SIG_IGN); (void) sigset(SIGTERM, SIG_IGN); (void) kill(0, sig); @@ -1564,6 +1663,7 @@ do_subproc(char *cmdbuf) char inbuf[1024]; /* arbitrary large amount */ FILE *file; + do_subproc_cnt++; child_killed = B_FALSE; /* * We use popen(3c) to launch child processes for [un]install; @@ -1574,8 +1674,9 @@ do_subproc(char *cmdbuf) * shell, so we close stdin and reopen it as /dev/null first. */ (void) close(STDIN_FILENO); - (void) open("/dev/null", O_RDONLY); - (void) setpgid(0, 0); + (void) openat(STDIN_FILENO, "/dev/null", O_RDONLY); + if (!zoneadm_is_nested) + (void) setpgid(0, 0); (void) sigset(SIGINT, sigterm); (void) sigset(SIGTERM, sigterm); file = popen(cmdbuf, "r"); @@ -1590,15 +1691,55 @@ do_subproc(char *cmdbuf) } static int -subproc_status(const char *cmd, int status) +do_subproc_interactive(char *cmdbuf) +{ + void (*saveint)(int); + void (*saveterm)(int); + void (*savequit)(int); + void (*savehup)(int); + int pid, child, status; + + /* + * do_subproc() links stdin to /dev/null, which would break any + * interactive subprocess we try to launch here. Similarly, we + * can't have been launched as a subprocess ourselves. + */ + assert(do_subproc_cnt == 0 && !zoneadm_is_nested); + + if ((child = vfork()) == 0) { + (void) execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL); + } + + if (child == -1) + return (-1); + + saveint = sigset(SIGINT, SIG_IGN); + saveterm = sigset(SIGTERM, SIG_IGN); + savequit = sigset(SIGQUIT, SIG_IGN); + savehup = sigset(SIGHUP, SIG_IGN); + + while ((pid = waitpid(child, &status, 0)) != child && pid != -1) + ; + + (void) sigset(SIGINT, saveint); + (void) sigset(SIGTERM, saveterm); + (void) sigset(SIGQUIT, savequit); + (void) sigset(SIGHUP, savehup); + + return (pid == -1 ? -1 : status); +} + +static int +subproc_status(const char *cmd, int status, boolean_t verbose_failure) { if (WIFEXITED(status)) { int exit_code = WEXITSTATUS(status); - if (exit_code == 0) - return (Z_OK); - zerror(gettext("'%s' failed with exit code %d."), cmd, - exit_code); + if ((verbose_failure) && (exit_code != ZONE_SUBPROC_OK)) + zerror(gettext("'%s' failed with exit code %d."), cmd, + exit_code); + + return (exit_code); } else if (WIFSIGNALED(status)) { int signal = WTERMSIG(status); char sigstr[SIG2STR_MAX]; @@ -1613,7 +1754,13 @@ subproc_status(const char *cmd, int status) } else { zerror(gettext("'%s' failed for unknown reasons."), cmd); } - return (Z_ERR); + + /* + * Assume a subprocess that died due to a signal or an unknown error + * should be considered an exit code of ZONE_SUBPROC_FATAL, as the + * user will likely need to do some manual cleanup. + */ + return (ZONE_SUBPROC_FATAL); } /* @@ -1629,11 +1776,11 @@ subproc_status(const char *cmd, int status) */ static int sanity_check(char *zone, int cmd_num, boolean_t running, - boolean_t unsafe_when_running) + boolean_t unsafe_when_running, boolean_t force) { zone_entry_t *zent; priv_set_t *privset; - zone_state_t state; + zone_state_t state, min_state; char kernzone[ZONENAME_MAX]; FILE *fp; @@ -1691,6 +1838,12 @@ sanity_check(char *zone, int cmd_num, boolean_t running, return (Z_ERR); } + if (!is_native_zone && cmd_num == CMD_MOUNT) { + zerror(gettext("%s operation is invalid for branded zones."), + cmd_to_str(cmd_num)); + return (Z_ERR); + } + if (!zonecfg_in_alt_root()) { zent = lookup_running_zone(zone); } else if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { @@ -1707,7 +1860,7 @@ sanity_check(char *zone, int cmd_num, boolean_t running, /* * Look up from the kernel for 'running' zones. */ - if (running) { + if (running && !force) { if (zent == NULL) { zerror(gettext("not running")); return (Z_ERR); @@ -1765,9 +1918,21 @@ sanity_check(char *zone, int cmd_num, boolean_t running, case CMD_BOOT: case CMD_MOUNT: case CMD_MARK: - if (state < ZONE_STATE_INSTALLED) { + if ((cmd_num == CMD_BOOT || cmd_num == CMD_MOUNT) && + force) + min_state = ZONE_STATE_INCOMPLETE; + else + min_state = ZONE_STATE_INSTALLED; + + if (force && cmd_num == CMD_BOOT && is_native_zone) { + zerror(gettext("Only branded zones may be " + "force-booted.")); + return (Z_ERR); + } + + if (state < min_state) { zerror(gettext("must be %s before %s."), - zone_state_str(ZONE_STATE_INSTALLED), + zone_state_str(min_state), cmd_to_str(cmd_num)); return (Z_ERR); } @@ -1824,7 +1989,8 @@ halt_func(int argc, char *argv[]) * so even though it seems that the fourth parameter below should * perhaps be B_TRUE, it really shouldn't be. */ - if (sanity_check(target_zone, CMD_HALT, B_FALSE, B_FALSE) != Z_OK) + if (sanity_check(target_zone, CMD_HALT, B_FALSE, B_FALSE, B_FALSE) + != Z_OK) return (Z_ERR); zarg.cmd = Z_HALT; @@ -1875,7 +2041,8 @@ reboot_func(int argc, char *argv[]) * so even though it seems that the fourth parameter below should * perhaps be B_TRUE, it really shouldn't be. */ - if (sanity_check(target_zone, CMD_REBOOT, B_TRUE, B_FALSE) != Z_OK) + if (sanity_check(target_zone, CMD_REBOOT, B_TRUE, B_FALSE, B_FALSE) + != Z_OK) return (Z_ERR); if (verify_details(CMD_REBOOT) != Z_OK) return (Z_ERR); @@ -1885,6 +2052,48 @@ reboot_func(int argc, char *argv[]) } static int +verify_brand(zone_dochandle_t handle) +{ + char cmdbuf[MAXPATHLEN]; + int err; + char zonepath[MAXPATHLEN]; + brand_handle_t *bhp = NULL; + int status; + + /* + * Fetch the verify command from the brand configuration. + * "exec" the command so that the returned status is that of + * the command and not the shell. + */ + if ((err = zonecfg_get_zonepath(handle, zonepath, sizeof (zonepath))) != + Z_OK) { + errno = err; + zperror(cmd_to_str(CMD_VERIFY), B_TRUE); + return (Z_ERR); + } + if ((bhp = brand_open(target_brand)) == NULL) { + zerror(gettext("missing or invalid brand")); + return (Z_ERR); + } + + /* + * If the brand has its own verification routine, execute it now. + */ + (void) strcpy(cmdbuf, EXEC_PREFIX); + err = brand_get_verify_adm(bhp, target_zone, zonepath, + cmdbuf + EXEC_LEN, sizeof (cmdbuf) - EXEC_LEN, 0, NULL); + brand_close(bhp); + if (err == 0 && strlen(cmdbuf) > EXEC_LEN) { + status = do_subproc_interactive(cmdbuf); + err = subproc_status(gettext("brand-specific verification"), + status, B_FALSE); + return ((err == ZONE_SUBPROC_OK) ? Z_OK : Z_BRAND_ERROR); + } + + return ((err == Z_OK) ? Z_OK : Z_BRAND_ERROR); +} + +static int verify_rctls(zone_dochandle_t handle) { struct zone_rctltab rctltab; @@ -2542,6 +2751,8 @@ no_net: return_code = Z_ERR; if (!in_alt_root && verify_pool(handle) != Z_OK) return_code = Z_ERR; + if (!in_alt_root && verify_brand(handle) != Z_OK) + return_code = Z_ERR; if (!in_alt_root && verify_datasets(handle) != Z_OK) return_code = Z_ERR; @@ -2643,31 +2854,90 @@ verify_func(int argc, char *argv[]) sub_usage(SHELP_VERIFY, CMD_VERIFY); return (Z_USAGE); } - if (sanity_check(target_zone, CMD_VERIFY, B_FALSE, B_FALSE) != Z_OK) + if (sanity_check(target_zone, CMD_VERIFY, B_FALSE, B_FALSE, B_FALSE) + != Z_OK) return (Z_ERR); return (verify_details(CMD_VERIFY)); } -#define LUCREATEZONE "/usr/lib/lu/lucreatezone" +static int +addopt(char *buf, int opt, char *optarg, size_t bufsize) +{ + char optstring[4]; + + if (opt > 0) + (void) sprintf(optstring, " -%c", opt); + else + (void) strcpy(optstring, " "); + + if ((strlcat(buf, optstring, bufsize) > bufsize)) + return (Z_ERR); + if ((optarg != NULL) && (strlcat(buf, optarg, bufsize) > bufsize)) + return (Z_ERR); + return (Z_OK); +} static int install_func(int argc, char *argv[]) { - /* 9: "exec " and " -z " */ - char cmdbuf[sizeof (LUCREATEZONE) + ZONENAME_MAX + 9]; + char cmdbuf[MAXPATHLEN]; int lockfd; - int err, arg; + int arg, err, subproc_err; char zonepath[MAXPATHLEN]; + brand_handle_t *bhp = NULL; int status; boolean_t nodataset = B_FALSE; + char opts[128]; + + if (target_zone == NULL) { + sub_usage(SHELP_INSTALL, CMD_INSTALL); + return (Z_USAGE); + } if (zonecfg_in_alt_root()) { zerror(gettext("cannot install zone in alternate root")); return (Z_ERR); } + if ((err = zone_get_zonepath(target_zone, zonepath, + sizeof (zonepath))) != Z_OK) { + errno = err; + zperror2(target_zone, gettext("could not get zone path")); + return (Z_ERR); + } + + /* Fetch the install command from the brand configuration. */ + if ((bhp = brand_open(target_brand)) == NULL) { + zerror(gettext("missing or invalid brand")); + return (Z_ERR); + } + + (void) strcpy(cmdbuf, EXEC_PREFIX); + if (brand_get_install(bhp, target_zone, zonepath, cmdbuf + EXEC_LEN, + sizeof (cmdbuf) - EXEC_LEN, 0, NULL) != 0) { + zerror("invalid brand configuration: missing install resource"); + brand_close(bhp); + return (Z_ERR); + } + + (void) strcpy(opts, "?x:"); + if (!is_native_zone) { + /* + * Fetch the list of recognized command-line options from + * the brand configuration file. + */ + if (brand_get_installopts(bhp, opts + strlen(opts), + sizeof (opts) - strlen(opts)) != 0) { + zerror("invalid brand configuration: missing " + "install options resource"); + brand_close(bhp); + return (Z_ERR); + } + } + brand_close(bhp); + optind = 0; - if ((arg = getopt(argc, argv, "?x:")) != EOF) { + while ((arg = getopt(argc, argv, opts)) != EOF) { switch (arg) { case '?': sub_usage(SHELP_INSTALL, CMD_INSTALL); @@ -2680,15 +2950,37 @@ install_func(int argc, char *argv[]) nodataset = B_TRUE; break; default: - sub_usage(SHELP_INSTALL, CMD_INSTALL); - return (Z_USAGE); + if (is_native_zone) { + sub_usage(SHELP_INSTALL, CMD_INSTALL); + return (Z_USAGE); + } + + /* + * This option isn't for zoneadm, so append it to + * the command line passed to the brand-specific + * install routine. + */ + if (addopt(cmdbuf, optopt, optarg, + sizeof (cmdbuf)) != Z_OK) { + zerror("Install command line too long"); + return (Z_ERR); + } + break; } } - if (argc > optind) { - sub_usage(SHELP_INSTALL, CMD_INSTALL); - return (Z_USAGE); + + if (!is_native_zone) { + for (; optind < argc; optind++) { + if (addopt(cmdbuf, 0, argv[optind], + sizeof (cmdbuf)) != Z_OK) { + zerror("Install command line too long"); + return (Z_ERR); + } + } } - if (sanity_check(target_zone, CMD_INSTALL, B_FALSE, B_TRUE) != Z_OK) + + if (sanity_check(target_zone, CMD_INSTALL, B_FALSE, B_TRUE, B_FALSE) + != Z_OK) return (Z_ERR); if (verify_details(CMD_INSTALL) != Z_OK) return (Z_ERR); @@ -2705,6 +2997,9 @@ install_func(int argc, char *argv[]) goto done; } + if (!nodataset) + create_zfs_zonepath(zonepath); + /* * According to the Application Packaging Developer's Guide, a * "checkinstall" script when included in a package is executed as @@ -2715,36 +3010,28 @@ install_func(int argc, char *argv[]) * has completed, regardless of whether it was successful, the * path to the zone is again restricted. */ - if ((err = zone_get_zonepath(target_zone, zonepath, - sizeof (zonepath))) != Z_OK) { - errno = err; - zperror2(target_zone, gettext("could not get zone path")); - goto done; - } - - if (!nodataset) - create_zfs_zonepath(zonepath); - if (chmod(zonepath, DEFAULT_DIR_MODE) != 0) { zperror(zonepath, B_FALSE); err = Z_ERR; goto done; } - /* - * "exec" the command so that the returned status is that of - * LUCREATEZONE and not the shell. - */ - (void) snprintf(cmdbuf, sizeof (cmdbuf), "exec " LUCREATEZONE " -z %s", - target_zone); - status = do_subproc(cmdbuf); + if (is_native_zone) + status = do_subproc(cmdbuf); + else + status = do_subproc_interactive(cmdbuf); + if (chmod(zonepath, S_IRWXU) != 0) { zperror(zonepath, B_FALSE); err = Z_ERR; goto done; } - if ((err = subproc_status(LUCREATEZONE, status)) != Z_OK) + if ((subproc_err = + subproc_status(gettext("brand-specific installation"), status, + B_FALSE)) != ZONE_SUBPROC_OK) { + err = Z_ERR; goto done; + } if ((err = zone_set_state(target_zone, ZONE_STATE_INSTALLED)) != Z_OK) { errno = err; @@ -2753,6 +3040,25 @@ install_func(int argc, char *argv[]) } done: + /* + * If the install script exited with ZONE_SUBPROC_USAGE or + * ZONE_SUBPROC_NOTCOMPLETE, try to clean up the zone and leave the + * zone in the CONFIGURED state so that another install can be + * attempted without requiring an uninstall first. + */ + if ((subproc_err == ZONE_SUBPROC_USAGE) || + (subproc_err == ZONE_SUBPROC_NOTCOMPLETE)) { + if ((err = cleanup_zonepath(zonepath, B_FALSE)) != Z_OK) { + errno = err; + zperror2(target_zone, + gettext("cleaning up zonepath failed")); + } else if ((err = zone_set_state(target_zone, + ZONE_STATE_CONFIGURED)) != Z_OK) { + errno = err; + zperror2(target_zone, gettext("could not set state")); + } + } + release_lock_file(lockfd); return ((err == Z_OK) ? Z_OK : Z_ERR); } @@ -3064,8 +3370,8 @@ warn_ip_match(zone_dochandle_t s_handle, char *source_zone, } static void -warn_dataset_match(zone_dochandle_t s_handle, char *source_zone, - zone_dochandle_t t_handle, char *target_zone) +warn_dataset_match(zone_dochandle_t s_handle, char *source, + zone_dochandle_t t_handle, char *target) { int err; struct zone_dstab s_dstab; @@ -3073,14 +3379,14 @@ warn_dataset_match(zone_dochandle_t s_handle, char *source_zone, if ((err = zonecfg_setdsent(t_handle)) != Z_OK) { errno = err; - zperror2(target_zone, gettext("could not enumerate datasets")); + zperror2(target, gettext("could not enumerate datasets")); return; } while (zonecfg_getdsent(t_handle, &t_dstab) == Z_OK) { if ((err = zonecfg_setdsent(s_handle)) != Z_OK) { errno = err; - zperror2(source_zone, + zperror2(source, gettext("could not enumerate datasets")); (void) zonecfg_enddsent(t_handle); return; @@ -3089,8 +3395,8 @@ warn_dataset_match(zone_dochandle_t s_handle, char *source_zone, while (zonecfg_getdsent(s_handle, &s_dstab) == Z_OK) { if (strcmp(t_dstab.zone_dataset_name, s_dstab.zone_dataset_name) == 0) { - (void) fprintf(stderr, - gettext("WARNING: dataset '%s' " + target_zone = source; + zerror(gettext("WARNING: dataset '%s' " "is configured in both zones.\n"), t_dstab.zone_dataset_name); break; @@ -3102,6 +3408,37 @@ warn_dataset_match(zone_dochandle_t s_handle, char *source_zone, (void) zonecfg_enddsent(t_handle); } +/* + * Check that the clone and its source have the same brand type. + */ +static int +valid_brand_clone(char *source_zone, char *target_zone) +{ + brand_handle_t *bhp; + char source_brand[MAXNAMELEN]; + + if ((zone_get_brand(source_zone, source_brand, + sizeof (source_brand))) != Z_OK) { + (void) fprintf(stderr, "%s: zone '%s': %s\n", + execname, source_zone, gettext("missing or invalid brand")); + return (Z_ERR); + } + + if (strcmp(source_brand, target_brand) != NULL) { + (void) fprintf(stderr, + gettext("%s: Zones '%s' and '%s' have different brand " + "types.\n"), execname, source_zone, target_zone); + return (Z_ERR); + } + + if ((bhp = brand_open(target_brand)) == NULL) { + zerror(gettext("missing or invalid brand")); + return (Z_ERR); + } + brand_close(bhp); + return (Z_OK); +} + static int validate_clone(char *source_zone, char *target_zone) { @@ -3131,6 +3468,11 @@ validate_clone(char *source_zone, char *target_zone) goto done; } + /* verify new zone has same brand type */ + err = valid_brand_clone(source_zone, target_zone); + if (err != Z_OK) + goto done; + /* verify new zone has same inherit-pkg-dirs */ err = valid_ipd_clone(s_handle, source_zone, t_handle, target_zone); @@ -3158,7 +3500,6 @@ copy_zone(char *src, char *dst) { boolean_t out_null = B_FALSE; int status; - int err; char *outfile; char cmdbuf[MAXPATHLEN * 2 + 128]; @@ -3182,11 +3523,11 @@ copy_zone(char *src, char *dst) status = do_subproc(cmdbuf); - if ((err = subproc_status("copy", status)) != Z_OK) { + if (subproc_status("copy", status, B_TRUE) != ZONE_SUBPROC_OK) { if (!out_null) (void) fprintf(stderr, gettext("\nThe copy failed.\n" "More information can be found in %s\n"), outfile); - return (err); + return (Z_ERR); } if (!out_null) @@ -3195,68 +3536,37 @@ copy_zone(char *src, char *dst) return (Z_OK); } -/* - * Run sys-unconfig on a zone. This will leave the zone in the installed - * state as long as there were no errors during the sys-unconfig. - */ static int -unconfigure_zone(char *zonepath) +zone_postclone(char *zonepath) { - int err; - int status; - struct stat unconfig_buf; - zone_cmd_arg_t zarg; - char cmdbuf[MAXPATHLEN + 51]; - - /* The zone has to be installed in order to mount the scratch zone. */ - if ((err = zone_set_state(target_zone, ZONE_STATE_INSTALLED)) != Z_OK) { - errno = err; - zperror2(target_zone, gettext("could not set state")); - return (Z_ERR); - } - - /* - * Trusted Extensions requires that cloned zones use the - * same sysid configuration, so it is not appropriate to - * unconfigure the zone. - */ - if (is_system_labeled()) - return (Z_OK); + char cmdbuf[MAXPATHLEN]; + int status; + brand_handle_t *bhp; + int err = Z_OK; /* - * Check if the zone is already sys-unconfiged. This saves us - * the work of bringing up the scratch zone so we can unconfigure it. + * Fetch the post-clone command, if any, from the brand + * configuration. */ - (void) snprintf(cmdbuf, sizeof (cmdbuf), "%s/root/etc/.UNCONFIGURED", - zonepath); - if (stat(cmdbuf, &unconfig_buf) == 0) - return (Z_OK); - - zarg.cmd = Z_MOUNT; - if (call_zoneadmd(target_zone, &zarg) != 0) { - zerror(gettext("call to %s failed"), "zoneadmd"); - (void) zone_set_state(target_zone, ZONE_STATE_INCOMPLETE); + if ((bhp = brand_open(target_brand)) == NULL) { + zerror(gettext("missing or invalid brand")); return (Z_ERR); } + (void) strcpy(cmdbuf, EXEC_PREFIX); + err = brand_get_postclone(bhp, target_zone, zonepath, cmdbuf + EXEC_LEN, + sizeof (cmdbuf) - EXEC_LEN, 0, NULL); + brand_close(bhp); - (void) snprintf(cmdbuf, sizeof (cmdbuf), - "/usr/sbin/zlogin -S %s /usr/sbin/sys-unconfig -R /a", target_zone); - - status = do_subproc(cmdbuf); - if ((err = subproc_status("sys-unconfig", status)) != Z_OK) { - errno = err; - zperror2(target_zone, gettext("sys-unconfig failed\n")); - (void) zone_set_state(target_zone, ZONE_STATE_INCOMPLETE); - } - - zarg.cmd = Z_UNMOUNT; - if (call_zoneadmd(target_zone, &zarg) != 0) { - zerror(gettext("call to %s failed"), "zoneadmd"); - (void) fprintf(stderr, gettext("could not unmount zone\n")); - return (Z_ERR); + if (err == 0 && strlen(cmdbuf) > EXEC_LEN) { + status = do_subproc(cmdbuf); + if ((err = subproc_status("postclone", status, B_FALSE)) + != ZONE_SUBPROC_OK) { + zerror(gettext("post-clone configuration failed.")); + err = Z_ERR; + } } - return ((err == Z_OK) ? Z_OK : Z_ERR); + return (err); } /* ARGSUSED */ @@ -3337,7 +3647,8 @@ clone_func(int argc, char *argv[]) return (Z_USAGE); } source_zone = argv[optind]; - if (sanity_check(target_zone, CMD_CLONE, B_FALSE, B_TRUE) != Z_OK) + if (sanity_check(target_zone, CMD_CLONE, B_FALSE, B_TRUE, B_FALSE) + != Z_OK) return (Z_ERR); if (verify_details(CMD_CLONE) != Z_OK) return (Z_ERR); @@ -3437,16 +3748,29 @@ clone_func(int argc, char *argv[]) err = clone_copy(source_zonepath, zonepath); } - if (err == Z_OK) - err = unconfigure_zone(zonepath); + /* + * Trusted Extensions requires that cloned zones use the same sysid + * configuration, so it is not appropriate to perform any + * post-clone reconfiguration. + */ + if ((err == Z_OK) && !is_system_labeled()) + err = zone_postclone(zonepath); done: + /* + * If everything went well, we mark the zone as installed. + */ + if (err == Z_OK) { + err = zone_set_state(target_zone, ZONE_STATE_INSTALLED); + if (err != Z_OK) { + errno = err; + zperror2(target_zone, gettext("could not set state")); + } + } release_lock_file(lockfd); return ((err == Z_OK) ? Z_OK : Z_ERR); } -#define RMCOMMAND "/usr/bin/rm -rf" - /* * Used when removing a zonepath after uninstalling or cleaning up after * the move subcommand. This handles a zonepath that has non-standard @@ -3538,7 +3862,8 @@ cleanup_zonepath(char *zonepath, boolean_t all) "removed.\n"), zonepath); - return (subproc_status(RMCOMMAND, status)); + return ((subproc_status(RMCOMMAND, status, B_TRUE) == + ZONE_SUBPROC_OK) ? Z_OK : Z_ERR); } /* @@ -3555,13 +3880,16 @@ cleanup_zonepath(char *zonepath, boolean_t all) (void) snprintf(cmdbuf, sizeof (cmdbuf), "exec " RMCOMMAND " %s/*", zonepath); status = do_subproc(cmdbuf); - return (subproc_status(RMCOMMAND, status)); + return ((subproc_status(RMCOMMAND, status, B_TRUE) == + ZONE_SUBPROC_OK) ? Z_OK : Z_ERR); } (void) snprintf(cmdbuf, sizeof (cmdbuf), "exec " RMCOMMAND " %s", zonepath); status = do_subproc(cmdbuf); - return (subproc_status(RMCOMMAND, status)); + + return ((subproc_status(RMCOMMAND, status, B_TRUE) == ZONE_SUBPROC_OK) + ? Z_OK : Z_ERR); } static int @@ -3602,7 +3930,8 @@ move_func(int argc, char *argv[]) return (Z_USAGE); } new_zonepath = argv[optind]; - if (sanity_check(target_zone, CMD_MOVE, B_FALSE, B_TRUE) != Z_OK) + if (sanity_check(target_zone, CMD_MOVE, B_FALSE, B_TRUE, B_FALSE) + != Z_OK) return (Z_ERR); if (verify_details(CMD_MOVE) != Z_OK) return (Z_ERR); @@ -3859,9 +4188,10 @@ detach_func(int argc, char *argv[]) return (Z_USAGE); } } + if (execute) { - if (sanity_check(target_zone, CMD_DETACH, B_FALSE, B_TRUE) - != Z_OK) + if (sanity_check(target_zone, CMD_DETACH, B_FALSE, B_TRUE, + B_FALSE) != Z_OK) return (Z_ERR); if (verify_details(CMD_DETACH) != Z_OK) return (Z_ERR); @@ -3984,10 +4314,11 @@ dev_fix(zone_dochandle_t handle) * "exec" the command so that the returned status is that of * RMCOMMAND and not the shell. */ - (void) snprintf(cmdbuf, sizeof (cmdbuf), "exec " RMCOMMAND " %s", + (void) snprintf(cmdbuf, sizeof (cmdbuf), EXEC_PREFIX RMCOMMAND " %s", devpath); status = do_subproc(cmdbuf); - if ((err = subproc_status(RMCOMMAND, status)) != Z_OK) { + if ((err = subproc_status(RMCOMMAND, status, B_TRUE)) != + ZONE_SUBPROC_OK) { (void) fprintf(stderr, gettext("could not remove existing /dev\n")); return (Z_ERR); @@ -4119,6 +4450,7 @@ attach_func(int argc, char *argv[]) zone_dochandle_t handle; zone_dochandle_t athandle = NULL; char zonepath[MAXPATHLEN]; + char brand[MAXNAMELEN], atbrand[MAXNAMELEN]; boolean_t execute = B_TRUE; char *manifest_path; @@ -4154,7 +4486,8 @@ attach_func(int argc, char *argv[]) if (!execute) return (dryrun_attach(manifest_path)); - if (sanity_check(target_zone, CMD_ATTACH, B_FALSE, B_TRUE) != Z_OK) + if (sanity_check(target_zone, CMD_ATTACH, B_FALSE, B_TRUE, B_FALSE) + != Z_OK) return (Z_ERR); if (verify_details(CMD_ATTACH) != Z_OK) return (Z_ERR); @@ -4213,6 +4546,24 @@ attach_func(int argc, char *argv[]) goto done; } + /* + * Ensure that the detached and locally defined zones are both of + * the same brand. + */ + if ((zonecfg_get_brand(handle, brand, sizeof (brand)) != 0) || + (zonecfg_get_brand(athandle, atbrand, sizeof (atbrand)) != 0)) { + err = Z_ERR; + zerror(gettext("missing or invalid brand")); + goto done; + } + + if (strcmp(atbrand, brand) != NULL) { + err = Z_ERR; + zerror(gettext("Trying to attach a '%s' zone to a '%s' " + "configuration."), atbrand, brand); + goto done; + } + /* sw_cmp prints error msgs as necessary */ if ((err = sw_cmp(handle, athandle, SW_CMP_NONE)) != Z_OK) goto done; @@ -4296,7 +4647,8 @@ uninstall_func(int argc, char *argv[]) return (Z_USAGE); } - if (sanity_check(target_zone, CMD_UNINSTALL, B_FALSE, B_TRUE) != Z_OK) + if (sanity_check(target_zone, CMD_UNINSTALL, B_FALSE, B_TRUE, B_FALSE) + != Z_OK) return (Z_ERR); if (!force) { @@ -4381,15 +4733,33 @@ static int mount_func(int argc, char *argv[]) { zone_cmd_arg_t zarg; + boolean_t force = B_FALSE; + int arg; - if (argc > 0) + /* + * The only supported subargument to the "mount" subcommand is + * "-f", which forces us to mount a zone in the INCOMPLETE state. + */ + optind = 0; + if ((arg = getopt(argc, argv, "f")) != EOF) { + switch (arg) { + case 'f': + force = B_TRUE; + break; + default: + return (Z_USAGE); + } + } + if (argc > optind) return (Z_USAGE); - if (sanity_check(target_zone, CMD_MOUNT, B_FALSE, B_FALSE) != Z_OK) + + if (sanity_check(target_zone, CMD_MOUNT, B_FALSE, B_FALSE, force) + != Z_OK) return (Z_ERR); if (verify_details(CMD_MOUNT) != Z_OK) return (Z_ERR); - zarg.cmd = Z_MOUNT; + zarg.cmd = force ? Z_FORCEMOUNT : Z_MOUNT; if (call_zoneadmd(target_zone, &zarg) != 0) { zerror(gettext("call to %s failed"), "zoneadmd"); return (Z_ERR); @@ -4405,7 +4775,8 @@ unmount_func(int argc, char *argv[]) if (argc > 0) return (Z_USAGE); - if (sanity_check(target_zone, CMD_UNMOUNT, B_FALSE, B_FALSE) != Z_OK) + if (sanity_check(target_zone, CMD_UNMOUNT, B_FALSE, B_FALSE, B_FALSE) + != Z_OK) return (Z_ERR); zarg.cmd = Z_UNMOUNT; @@ -4423,7 +4794,8 @@ mark_func(int argc, char *argv[]) if (argc != 1 || strcmp(argv[0], "incomplete") != 0) return (Z_USAGE); - if (sanity_check(target_zone, CMD_MARK, B_FALSE, B_FALSE) != Z_OK) + if (sanity_check(target_zone, CMD_MARK, B_FALSE, B_FALSE, B_FALSE) + != Z_OK) return (Z_ERR); if (grab_lock_file(target_zone, &lockfd) != Z_OK) { @@ -4531,6 +4903,8 @@ main(int argc, char **argv) int arg; zoneid_t zid; struct stat st; + char *zone_lock_env; + int err; if ((locale = setlocale(LC_ALL, "")) == NULL) locale = "C"; @@ -4595,5 +4969,39 @@ main(int argc, char **argv) zperror(target_zone, B_TRUE); exit(Z_ERR); } - return (parse_and_run(argc - optind, &argv[optind])); + + /* + * See if we have inherited the right to manipulate this zone from + * a zoneadm instance in our ancestry. If so, set zone_lock_cnt to + * indicate it. If not, make that explicit in our environment. + */ + zone_lock_env = getenv(LOCK_ENV_VAR); + if (zone_lock_env == NULL) { + if (putenv(zoneadm_lock_not_held) != 0) { + zperror(target_zone, B_TRUE); + exit(Z_ERR); + } + } else { + zoneadm_is_nested = B_TRUE; + if (atoi(zone_lock_env) == 1) + zone_lock_cnt = 1; + } + + /* + * If we are going to be operating on a single zone, retrieve its + * brand type and determine whether it is native or not. + */ + if ((target_zone != NULL) && + (strcmp(target_zone, GLOBAL_ZONENAME) != NULL)) { + if (zone_get_brand(target_zone, target_brand, + sizeof (target_brand)) != Z_OK) { + zerror(gettext("missing or invalid brand")); + exit(Z_ERR); + } + is_native_zone = (strcmp(target_brand, NATIVE_BRAND_NAME) == 0); + } + + err = parse_and_run(argc - optind, &argv[optind]); + + return (err); } diff --git a/usr/src/cmd/zoneadmd/Makefile b/usr/src/cmd/zoneadmd/Makefile index 2d9082894a..7f5ab2c283 100644 --- a/usr/src/cmd/zoneadmd/Makefile +++ b/usr/src/cmd/zoneadmd/Makefile @@ -43,7 +43,7 @@ CFLAGS += $(CCVERBOSE) LAZYLIBS = $(ZLAZYLOAD) -ltsol $(ZNOLAZYLOAD) lint := LAZYLIBS = -ltsol LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair -lpool \ - -lgen -lbsm -lcontract -lzfs -ltsnet -luuid $(LAZYLIBS) + -lgen -lbsm -lcontract -lzfs -ltsnet -luuid -lbrand $(LAZYLIBS) XGETFLAGS += -a -x zoneadmd.xcl .KEEP_STATE: diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index 88b79de7db..842312133d 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -107,6 +107,8 @@ #include <pool.h> #include <sys/pool.h> +#include <libbrand.h> +#include <sys/brand.h> #include <libzonecfg.h> #include <synch.h> @@ -128,82 +130,6 @@ #define DFSTYPES "/etc/dfs/fstypes" #define MAXTNZLEN 2048 -/* - * This is the set of directories and devices (relative to <zone_root>/dev) - * which must be present in every zone. Users can augment this list with - * additional device rules in their zone configuration, but at present cannot - * remove any of the this set of standard devices. - */ -static const char *standard_devs[] = { - "arp", - "conslog", - "cpu/self/cpuid", - "crypto", - "cryptoadm", - "dsk", - "dtrace/*", - "dtrace/provider/*", - "fd", - "kstat", - "lo0", - "lo1", - "lo2", - "lo3", - "log", - "logindmux", - "null", -#ifdef __sparc - "openprom", -#endif - "poll", - "pool", - "ptmx", - "pts/*", - "random", - "rdsk", - "rmt", - "sad/user", - "swap", - "sysevent", - "tcp", - "tcp6", - "term", - "ticlts", - "ticots", - "ticotsord", - "tty", - "udp", - "udp6", - "urandom", - "zero", - "zfs", - NULL -}; - -struct source_target { - const char *source; - const char *target; -}; - -/* - * Set of symlinks (relative to <zone_root>/dev) which must be present in - * every zone. - */ -static struct source_target standard_devlinks[] = { - { "stderr", "./fd/2" }, - { "stdin", "./fd/0" }, - { "stdout", "./fd/1" }, - { "dtremote", "/dev/null" }, - { "console", "zconsole" }, - { "syscon", "zconsole" }, - { "sysmsg", "zconsole" }, - { "systty", "zconsole" }, - { "msglog", "zconsole" }, - { NULL, NULL } -}; - -static int vplat_mount_dev(zlog_t *); - /* for routing socket */ static int rts_seqno = 0; @@ -619,6 +545,8 @@ is_remote_fstype(const char *fstype, char *const *remote_fstypes) static void root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved) { + assert(zone_isnative); + if (!isresolved && zonecfg_in_alt_root()) resolve_lofs(zlogp, zroot, zrootlen); (void) strcpy(strrchr(zroot, '/') + 1, "lu"); @@ -873,7 +801,7 @@ dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev) * that would cost us an extra fork/exec without buying us anything. */ if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype) - > sizeof (cmdbuf)) { + >= sizeof (cmdbuf)) { zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); return (-1); } @@ -904,7 +832,7 @@ domount(zlog_t *zlogp, const char *fstype, const char *opts, * that would cost us an extra fork/exec without buying us anything. */ if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype) - > sizeof (cmdbuf)) { + >= sizeof (cmdbuf)) { zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); return (-1); } @@ -1005,7 +933,7 @@ valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath) * Make sure abspath has at least one '/' after its rootpath * component, and ends with '/'. */ - if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) > + if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) >= sizeof (abspath)) { zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath, relpath); @@ -1025,12 +953,113 @@ valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath) } static int +mount_one_dev_device_cb(void *arg, const char *match, const char *name) +{ + di_prof_t prof = arg; + + if (name == NULL) + return (di_prof_add_dev(prof, match)); + return (di_prof_add_map(prof, match, name)); +} + +static int +mount_one_dev_symlink_cb(void *arg, const char *source, const char *target) +{ + di_prof_t prof = arg; + + return (di_prof_add_symlink(prof, source, target)); +} + +/* + * Apply the standard lists of devices/symlinks/mappings and the user-specified + * list of devices (via zonecfg) to the /dev filesystem. The filesystem will + * use these as a profile/filter to determine what exists in /dev. + */ +static int +mount_one_dev(zlog_t *zlogp, char *devpath) +{ + char brand[MAXNAMELEN]; + zone_dochandle_t handle = NULL; + brand_handle_t *bhp = NULL; + struct zone_devtab ztab; + di_prof_t prof = NULL; + int err; + int retval = -1; + + if (di_prof_init(devpath, &prof)) { + zerror(zlogp, B_TRUE, "failed to initialize profile"); + goto cleanup; + } + + /* Get a handle to the brand info for this zone */ + if ((zone_get_brand(zone_name, brand, sizeof (brand)) != Z_OK) || + (bhp = brand_open(brand)) == NULL) { + zerror(zlogp, B_FALSE, "unable to determine zone brand"); + goto cleanup; + } + + if (brand_platform_iter_devices(bhp, zone_name, + mount_one_dev_device_cb, prof) != 0) { + zerror(zlogp, B_TRUE, "failed to add standard device"); + goto cleanup; + } + + if (brand_platform_iter_link(bhp, + mount_one_dev_symlink_cb, prof) != 0) { + zerror(zlogp, B_TRUE, "failed to add standard symlink"); + goto cleanup; + } + + /* Add user-specified devices and directories */ + if ((handle = zonecfg_init_handle()) == NULL) { + zerror(zlogp, B_FALSE, "can't initialize zone handle"); + goto cleanup; + } + if (err = zonecfg_get_handle(zone_name, handle)) { + zerror(zlogp, B_FALSE, "can't get handle for zone " + "%s: %s", zone_name, zonecfg_strerror(err)); + goto cleanup; + } + if (err = zonecfg_setdevent(handle)) { + zerror(zlogp, B_FALSE, "%s: %s", zone_name, + zonecfg_strerror(err)); + goto cleanup; + } + while (zonecfg_getdevent(handle, &ztab) == Z_OK) { + if (di_prof_add_dev(prof, ztab.zone_dev_match)) { + zerror(zlogp, B_TRUE, "failed to add " + "user-specified device"); + goto cleanup; + } + } + (void) zonecfg_enddevent(handle); + + /* Send profile to kernel */ + if (di_prof_commit(prof)) { + zerror(zlogp, B_TRUE, "failed to commit profile"); + goto cleanup; + } + + retval = 0; + +cleanup: + if (bhp != NULL) + brand_close(bhp); + if (handle) + zonecfg_fini_handle(handle); + if (prof) + di_prof_fini(prof); + return (retval); +} + +static int mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath) { - char path[MAXPATHLEN]; - char specpath[MAXPATHLEN]; - char optstr[MAX_MNTOPT_STR]; + char path[MAXPATHLEN]; + char specpath[MAXPATHLEN]; + char optstr[MAX_MNTOPT_STR]; zone_fsopt_t *optptr; + int rv; if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) { zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", @@ -1136,8 +1165,23 @@ mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath) sizeof (optstr)); } } - return (domount(zlogp, fsptr->zone_fs_type, optstr, - fsptr->zone_fs_special, path)); + + if ((rv = domount(zlogp, fsptr->zone_fs_type, optstr, + fsptr->zone_fs_special, path)) != 0) + return (rv); + + /* + * The mount succeeded. If this was not a mount of /dev then + * we're done. + */ + if (strcmp(fsptr->zone_fs_type, MNTTYPE_DEV) != 0) + return (0); + + /* + * We just mounted an instance of a /dev filesystem, so now we + * need to configure it. + */ + return (mount_one_dev(zlogp, path)); } static void @@ -1180,6 +1224,8 @@ build_mounted_pre_var(zlog_t *zlogp, char *rootpath, FILE *fp; uuid_t uuid; + assert(zone_isnative); + resolve_lofs(zlogp, rootpath, rootlen); (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); resolve_lofs(zlogp, luroot, sizeof (luroot)); @@ -1334,71 +1380,86 @@ build_mounted_post_var(zlog_t *zlogp, char *rootpath, const char *zonepath) return (B_TRUE); } -static int -mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) -{ - char rootpath[MAXPATHLEN]; - char zonepath[MAXPATHLEN]; - int num_fs = 0, i; - struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr; - struct zone_fstab *fsp; - zone_dochandle_t handle = NULL; - zone_state_t zstate; +typedef struct plat_gmount_cb_data { + zlog_t *pgcd_zlogp; + struct zone_fstab **pgcd_fs_tab; + int *pgcd_num_fs; +} plat_gmount_cb_data_t; - if (zone_get_state(zone_name, &zstate) != Z_OK || - (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) { - zerror(zlogp, B_FALSE, - "zone must be in '%s' or '%s' state to mount file-systems", - zone_state_str(ZONE_STATE_READY), - zone_state_str(ZONE_STATE_MOUNTED)); - goto bad; +/* + * plat_gmount_cb() is a callback function invoked by libbrand to iterate + * through all global brand platform mounts. + */ +int +plat_gmount_cb(void *data, const char *spec, const char *dir, + const char *fstype, const char *opt) +{ + plat_gmount_cb_data_t *cp = data; + zlog_t *zlogp = cp->pgcd_zlogp; + struct zone_fstab *fs_ptr = *cp->pgcd_fs_tab; + int num_fs = *cp->pgcd_num_fs; + struct zone_fstab *fsp, *tmp_ptr; + + num_fs++; + if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) { + zerror(zlogp, B_TRUE, "memory allocation failed"); + return (-1); } - if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { - zerror(zlogp, B_TRUE, "unable to determine zone path"); - goto bad; - } + fs_ptr = tmp_ptr; + fsp = &fs_ptr[num_fs - 1]; - if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { - zerror(zlogp, B_TRUE, "unable to determine zone root"); - goto bad; - } + /* update the callback struct passed in */ + *cp->pgcd_fs_tab = fs_ptr; + *cp->pgcd_num_fs = num_fs; - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - goto bad; - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK || - zonecfg_setfsent(handle) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - goto bad; + fsp->zone_fs_raw[0] = '\0'; + (void) strlcpy(fsp->zone_fs_special, spec, + sizeof (fsp->zone_fs_special)); + (void) strlcpy(fsp->zone_fs_dir, dir, sizeof (fsp->zone_fs_dir)); + (void) strlcpy(fsp->zone_fs_type, fstype, sizeof (fsp->zone_fs_type)); + fsp->zone_fs_options = NULL; + if (zonecfg_add_fs_option(fsp, (char *)opt) != Z_OK) { + zerror(zlogp, B_FALSE, "error adding property"); + return (-1); } - /* - * Iterate through the rest of the filesystems, first the IPDs, then - * the general FSs. Sort them all, then mount them in sorted order. - * This is to make sure the higher level directories (e.g., /usr) - * get mounted before any beneath them (e.g., /usr/local). - */ + return (0); +} + +static int +mount_filesystems_ipdent(zone_dochandle_t handle, zlog_t *zlogp, + struct zone_fstab **fs_tabp, int *num_fsp) +{ + struct zone_fstab *tmp_ptr, *fs_ptr, *fsp, fstab; + int num_fs; + + num_fs = *num_fsp; + fs_ptr = *fs_tabp; + if (zonecfg_setipdent(handle) != Z_OK) { zerror(zlogp, B_FALSE, "invalid configuration"); - goto bad; + return (-1); } while (zonecfg_getipdent(handle, &fstab) == Z_OK) { num_fs++; if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) { zerror(zlogp, B_TRUE, "memory allocation failed"); - num_fs--; (void) zonecfg_endipdent(handle); - goto bad; + return (-1); } - fs_ptr = tmp_ptr; - fsp = &fs_ptr[num_fs - 1]; + + /* update the pointers passed in */ + *fs_tabp = tmp_ptr; + *num_fsp = num_fs; + /* * IPDs logically only have a mount point; all other properties * are implied. */ + fs_ptr = tmp_ptr; + fsp = &fs_ptr[num_fs - 1]; (void) strlcpy(fsp->zone_fs_dir, fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); fsp->zone_fs_special[0] = '\0'; @@ -1407,10 +1468,22 @@ mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) fsp->zone_fs_options = NULL; } (void) zonecfg_endipdent(handle); + return (0); +} + +static int +mount_filesystems_fsent(zone_dochandle_t handle, zlog_t *zlogp, + struct zone_fstab **fs_tabp, int *num_fsp, int mount_cmd) +{ + struct zone_fstab *tmp_ptr, *fs_ptr, *fsp, fstab; + int num_fs; + + num_fs = *num_fsp; + fs_ptr = *fs_tabp; if (zonecfg_setfsent(handle) != Z_OK) { zerror(zlogp, B_FALSE, "invalid configuration"); - goto bad; + return (-1); } while (zonecfg_getfsent(handle, &fstab) == Z_OK) { /* @@ -1425,10 +1498,13 @@ mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) { zerror(zlogp, B_TRUE, "memory allocation failed"); - num_fs--; (void) zonecfg_endfsent(handle); - goto bad; + return (-1); } + /* update the pointers passed in */ + *fs_tabp = tmp_ptr; + *num_fsp = num_fs; + fs_ptr = tmp_ptr; fsp = &fs_ptr[num_fs - 1]; (void) strlcpy(fsp->zone_fs_dir, @@ -1442,19 +1518,116 @@ mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) fsp->zone_fs_options = fstab.zone_fs_options; } (void) zonecfg_endfsent(handle); + return (0); +} + +static int +mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) +{ + char rootpath[MAXPATHLEN]; + char zonepath[MAXPATHLEN]; + char brand[MAXNAMELEN]; + int i, num_fs = 0; + struct zone_fstab *fs_ptr = NULL; + zone_dochandle_t handle = NULL; + zone_state_t zstate; + brand_handle_t *bhp; + plat_gmount_cb_data_t cb; + + if (zone_get_state(zone_name, &zstate) != Z_OK || + (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) { + zerror(zlogp, B_FALSE, + "zone must be in '%s' or '%s' state to mount file-systems", + zone_state_str(ZONE_STATE_READY), + zone_state_str(ZONE_STATE_MOUNTED)); + goto bad; + } + + if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { + zerror(zlogp, B_TRUE, "unable to determine zone path"); + goto bad; + } + + if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { + zerror(zlogp, B_TRUE, "unable to determine zone root"); + goto bad; + } + + if ((handle = zonecfg_init_handle()) == NULL) { + zerror(zlogp, B_TRUE, "getting zone configuration handle"); + goto bad; + } + if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK || + zonecfg_setfsent(handle) != Z_OK) { + zerror(zlogp, B_FALSE, "invalid configuration"); + goto bad; + } + + /* Get a handle to the brand info for this zone */ + if ((zone_get_brand(zone_name, brand, sizeof (brand)) != Z_OK) || + (bhp = brand_open(brand)) == NULL) { + zerror(zlogp, B_FALSE, "unable to determine zone brand"); + return (-1); + } + + /* + * Get the list of global filesystems to mount from the brand + * configuration. + */ + cb.pgcd_zlogp = zlogp; + cb.pgcd_fs_tab = &fs_ptr; + cb.pgcd_num_fs = &num_fs; + if (brand_platform_iter_gmounts(bhp, zonepath, + plat_gmount_cb, &cb) != 0) { + zerror(zlogp, B_FALSE, "unable to mount filesystems"); + brand_close(bhp); + return (-1); + } + brand_close(bhp); + + /* + * Iterate through the rest of the filesystems, first the IPDs, then + * the general FSs. Sort them all, then mount them in sorted order. + * This is to make sure the higher level directories (e.g., /usr) + * get mounted before any beneath them (e.g., /usr/local). + */ + if (mount_filesystems_ipdent(handle, zlogp, &fs_ptr, &num_fs) != 0) + goto bad; + + if (mount_filesystems_fsent(handle, zlogp, &fs_ptr, &num_fs, + mount_cmd) != 0) + goto bad; + zonecfg_fini_handle(handle); handle = NULL; /* - * When we're mounting a zone for administration, / is the - * scratch zone and dev is mounted at /dev. The to-be-upgraded - * zone is mounted at /a, and we set up that environment so that - * process can access both the running system's utilities - * and the to-be-modified zone's files. The only exception - * is the zone's /dev which isn't mounted at all, which is - * the same as global zone installation where /a/dev and - * /a/devices are not mounted. - * Zone mounting is done in three phases. + * Normally when we mount a zone all the zone filesystems + * get mounted relative to rootpath, which is usually + * <zonepath>/root. But when mounting a zone for administration + * purposes via the zone "mount" state, build_mounted_pre_var() + * updates rootpath to be <zonepath>/lu/a so we'll mount all + * the zones filesystems there instead. + * + * build_mounted_pre_var() and build_mounted_post_var() will + * also do some extra work to create directories and lofs mount + * a bunch of global zone file system paths into <zonepath>/lu. + * + * This allows us to be able to enter the zone (now rooted at + * <zonepath>/lu) and run the upgrade/patch tools that are in the + * global zone and have them upgrade the to-be-modified zone's + * files mounted on /a. (Which mirrors the existing standard + * upgrade environment.) + * + * There is of course one catch. When doing the upgrade + * we need <zoneroot>/lu/dev to be the /dev filesystem + * for the zone and we don't want to have any /dev filesystem + * mounted at <zoneroot>/lu/a/dev. Since /dev is specified + * as a normal zone filesystem by default we'll try to mount + * it at <zoneroot>/lu/a/dev, so we have to detect this + * case and instead mount it at <zoneroot>/lu/dev. + * + * All this work is done in three phases: * 1) Create and populate lu directory (build_mounted_pre_var()). * 2) Mount the required filesystems as per the zone configuration. * 3) Set up the rest of the scratch zone environment @@ -1466,7 +1639,25 @@ mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) goto bad; qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare); + for (i = 0; i < num_fs; i++) { + if (mount_cmd && + strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) { + size_t slen = strlen(rootpath) - 2; + + /* + * By default we'll try to mount /dev as /a/dev + * but /dev is special and always goes at the top + * so strip the trailing '/a' from the rootpath. + */ + assert(zone_isnative); + assert(strcmp(&rootpath[slen], "/a") == 0); + rootpath[slen] = '\0'; + if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) + goto bad; + rootpath[slen] = '/'; + continue; + } if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) goto bad; } @@ -2270,14 +2461,6 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, boolean_t mount_cmd) zone_dochandle_t handle; char *privname = NULL; - if (mount_cmd) { - if (zonecfg_default_privset(privs) == Z_OK) - return (0); - zerror(zlogp, B_FALSE, - "failed to determine the zone's default privilege set"); - return (-1); - } - if ((handle = zonecfg_init_handle()) == NULL) { zerror(zlogp, B_TRUE, "getting zone configuration handle"); return (-1); @@ -2288,6 +2471,15 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, boolean_t mount_cmd) return (-1); } + if (mount_cmd) { + if (zonecfg_default_privset(privs) == Z_OK) + return (0); + zerror(zlogp, B_FALSE, + "failed to determine the zone's default privilege set"); + zonecfg_fini_handle(handle); + return (-1); + } + switch (zonecfg_get_privset(handle, privs, &privname)) { case Z_OK: error = 0; @@ -2795,7 +2987,7 @@ again: /* * Create auto_home_<zone> map for this zone - * in the global zone. The local zone entry + * in the global zone. The non-global zone entry * will be created by automount when the zone * is booted. */ @@ -3296,6 +3488,9 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd) zoneid_t rval = -1; priv_set_t *privs; char rootpath[MAXPATHLEN]; + char modname[MAXPATHLEN]; + struct brand_attr attr; + brand_handle_t *bhp; char *rctlbuf = NULL; size_t rctlbufsz = 0; char *zfsbuf = NULL; @@ -3358,6 +3553,7 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd) goto error; if (mount_cmd) { + assert(zone_isnative); root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE); /* @@ -3444,6 +3640,33 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd) goto error; } + if ((zone_get_brand(zone_name, attr.ba_brandname, + MAXNAMELEN) != Z_OK) || + (bhp = brand_open(attr.ba_brandname)) == NULL) { + zerror(zlogp, B_FALSE, "unable to determine brand name"); + return (-1); + } + + /* + * If this brand requires any kernel support, now is the time to + * get it loaded and initialized. + */ + if (brand_get_modname(bhp, modname, MAXPATHLEN) < 0) { + zerror(zlogp, B_FALSE, "unable to determine brand kernel " + "module"); + return (-1); + } + + if (strlen(modname) > 0) { + (void) strlcpy(attr.ba_modname, modname, MAXPATHLEN); + if (zone_setattr(zoneid, ZONE_ATTR_BRAND, &attr, + sizeof (attr) != 0)) { + zerror(zlogp, B_TRUE, "could not set zone brand " + "attribute."); + goto error; + } + } + /* * The following is a warning, not an error, and is not performed when * merely mounting a zone for administrative use. @@ -3549,19 +3772,27 @@ write_index_file(zoneid_t zoneid) int vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd, zoneid_t zoneid) { + char zonepath[MAXPATHLEN]; if (!mount_cmd && validate_datasets(zlogp) != 0) { lofs_discard_mnttab(); return (-1); } - if (mount_filesystems(zlogp, mount_cmd) != 0) { + /* + * Before we try to mount filesystems we need to create the + * attribute backing store for /dev + */ + if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { + lofs_discard_mnttab(); + return (-1); + } + if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE) != 0) { lofs_discard_mnttab(); return (-1); } - /* mount /dev for zone (both normal and scratch zone) */ - if (vplat_mount_dev(zlogp) != 0) { + if (mount_filesystems(zlogp, mount_cmd) != 0) { lofs_discard_mnttab(); return (-1); } @@ -3582,6 +3813,8 @@ lu_root_teardown(zlog_t *zlogp) { char zroot[MAXPATHLEN]; + assert(zone_isnative); + if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { zerror(zlogp, B_FALSE, "unable to determine zone root"); return (-1); @@ -3647,6 +3880,10 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd) { char *kzone; zoneid_t zoneid; + char zroot[MAXPATHLEN]; + char cmdbuf[MAXPATHLEN]; + char brand[MAXNAMELEN]; + brand_handle_t *bhp = NULL; kzone = zone_name; if (zonecfg_in_alt_root()) { @@ -3679,6 +3916,38 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd) goto error; } + /* Get the path to the root of this zone */ + if (zone_get_zonepath(zone_name, zroot, sizeof (zroot)) != Z_OK) { + zerror(zlogp, B_FALSE, "unable to determine zone root"); + goto error; + } + + /* Get a handle to the brand info for this zone */ + if ((zone_get_brand(zone_name, brand, sizeof (brand)) != Z_OK) || + (bhp = brand_open(brand)) == NULL) { + zerror(zlogp, B_FALSE, "unable to determine zone brand"); + return (-1); + } + /* + * If there is a brand 'halt' callback, execute it now to give the + * brand a chance to cleanup any custom configuration. + */ + (void) strcpy(cmdbuf, EXEC_PREFIX); + if (brand_get_halt(bhp, zone_name, zroot, cmdbuf + EXEC_LEN, + sizeof (cmdbuf) - EXEC_LEN, 0, NULL) < 0) { + brand_close(bhp); + zerror(zlogp, B_FALSE, "unable to determine branded zone's " + "halt callback."); + goto error; + } + brand_close(bhp); + + if ((strlen(cmdbuf) > EXEC_LEN) && + (do_subproc(zlogp, cmdbuf) != Z_OK)) { + zerror(zlogp, B_FALSE, "%s failed", cmdbuf); + goto error; + } + if (!unmount_cmd && unconfigure_network_interfaces(zlogp, zoneid) != 0) { zerror(zlogp, B_FALSE, @@ -3722,129 +3991,3 @@ error: lofs_discard_mnttab(); return (-1); } - -/* - * Apply the standard lists of devices/symlinks/mappings and the user-specified - * list of devices (via zonecfg) to the /dev filesystem. The filesystem will - * use these as a profile/filter to determine what exists in /dev. - */ -static int -vplat_mount_dev(zlog_t *zlogp) -{ - char zonedevpath[MAXPATHLEN]; - zone_dochandle_t handle = NULL; - struct zone_devtab ztab; - zone_fsopt_t opt_attr; - di_prof_t prof = NULL; - int i, err, len; - int retval = -1; - - struct zone_fstab devtab = { - "/dev", - "/dev", - MNTTYPE_DEV, - NULL, - "" - }; - - if (err = zone_get_devroot(zone_name, zonedevpath, - sizeof (zonedevpath))) { - zerror(zlogp, B_FALSE, "can't get zone dev: %s", - zonecfg_strerror(err)); - return (-1); - } - - /* - * The old /dev was a lofs mount from <zonepath>/dev, with - * dev fs, that becomes a mount on <zonepath>/root/dev. - * However, we need to preserve device permission bits during - * upgrade. What we should do is migrate the attribute directory - * on upgrade, but for now, preserve it at <zonepath>/dev. - */ - (void) strcpy(opt_attr.zone_fsopt_opt, "attrdir="); - len = strlen(opt_attr.zone_fsopt_opt); - if (err = zone_get_zonepath(zone_name, - opt_attr.zone_fsopt_opt + len, MAX_MNTOPT_STR - len)) { - zerror(zlogp, B_FALSE, "can't get zone path: %s", - zonecfg_strerror(err)); - return (-1); - } - - if (make_one_dir(zlogp, opt_attr.zone_fsopt_opt + len, "/dev", - DEFAULT_DIR_MODE) != 0) - return (-1); - - (void) strlcat(opt_attr.zone_fsopt_opt, "/dev", MAX_MNTOPT_STR); - devtab.zone_fs_options = &opt_attr; - opt_attr.zone_fsopt_next = NULL; - - /* mount /dev inside the zone */ - i = strlen(zonedevpath); - if (mount_one(zlogp, &devtab, zonedevpath)) - return (-1); - - (void) strlcat(zonedevpath, "/dev", sizeof (zonedevpath)); - if (di_prof_init(zonedevpath, &prof)) { - zerror(zlogp, B_TRUE, "failed to initialize profile"); - goto cleanup; - } - - /* Add the standard devices and directories */ - for (i = 0; standard_devs[i] != NULL; ++i) { - if (di_prof_add_dev(prof, standard_devs[i])) { - zerror(zlogp, B_TRUE, "failed to add " - "standard device"); - goto cleanup; - } - } - - /* Add the standard symlinks */ - for (i = 0; standard_devlinks[i].source != NULL; ++i) { - if (di_prof_add_symlink(prof, - standard_devlinks[i].source, - standard_devlinks[i].target)) { - zerror(zlogp, B_TRUE, "failed to add " - "standard symlink"); - goto cleanup; - } - } - - /* Add user-specified devices and directories */ - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_FALSE, "can't initialize zone handle"); - goto cleanup; - } - if (err = zonecfg_get_handle(zone_name, handle)) { - zerror(zlogp, B_FALSE, "can't get handle for zone " - "%s: %s", zone_name, zonecfg_strerror(err)); - goto cleanup; - } - if (err = zonecfg_setdevent(handle)) { - zerror(zlogp, B_FALSE, "%s: %s", zone_name, - zonecfg_strerror(err)); - goto cleanup; - } - while (zonecfg_getdevent(handle, &ztab) == Z_OK) { - if (di_prof_add_dev(prof, ztab.zone_dev_match)) { - zerror(zlogp, B_TRUE, "failed to add " - "user-specified device"); - goto cleanup; - } - } - (void) zonecfg_enddevent(handle); - - /* Send profile to kernel */ - if (di_prof_commit(prof)) { - zerror(zlogp, B_TRUE, "failed to commit profile"); - goto cleanup; - } - - retval = 0; - -cleanup: - if (handle) - zonecfg_fini_handle(handle); - if (prof) - di_prof_fini(prof); - return (retval); -} diff --git a/usr/src/cmd/zoneadmd/zcons.c b/usr/src/cmd/zoneadmd/zcons.c index 4e775ccd57..a8961c8eaf 100644 --- a/usr/src/cmd/zoneadmd/zcons.c +++ b/usr/src/cmd/zoneadmd/zcons.c @@ -402,58 +402,28 @@ error: } /* - * prep_console_slave() takes care of setting up the console slave device - * (the side that the zone will eventually open). It is a helper for - * init_console_slave(). + * init_console_slave() sets up the console slave device; the device node + * itself has already been set up in the device tree; the primary job + * here is to do some STREAMS plumbing. * - * We have to mknod and setup the console device; then the slave side is - * opened, and the appropriate STREAMS modules are pushed on. A wrinkle is that - * 'ptem' must be anchored in place (see streamio(7i) since we always want the - * console to have terminal semantics. + * The slave side of the console is opened and the appropriate STREAMS + * modules are pushed on. A wrinkle is that 'ptem' must be anchored + * in place (see streamio(7i) since we always want the console to + * have terminal semantics.) */ -static int -prep_console_slave(zlog_t *zlogp, char *devroot) +int +init_console_slave(zlog_t *zlogp) { - char slavename[MAXPATHLEN]; - char zoneslavename[MAXPATHLEN]; - char zonedev[MAXPATHLEN]; - di_prof_t prof = NULL; - - assert(slavefd == -1); + char zconspath[MAXPATHLEN]; - (void) snprintf(slavename, sizeof (slavename), - "zcons/%s/%s", zone_name, ZCONS_SLAVE_NAME); - - (void) snprintf(zoneslavename, sizeof (zoneslavename), - "%s/dev/zconsole", devroot); - - (void) snprintf(zonedev, sizeof (zonedev), - "%s/dev", devroot); - - /* - * Specify zconsole as a name map in the dev profile - */ - if (di_prof_init(zonedev, &prof)) { - zerror(zlogp, B_TRUE, "failed to initialize profile"); - goto error; - } - - if (di_prof_add_map(prof, slavename, "zconsole")) { - zerror(zlogp, B_TRUE, "failed to add zconsole map"); - goto error; - } - - /* Send profile to kernel */ - if (di_prof_commit(prof)) { - zerror(zlogp, B_TRUE, "failed to commit profile"); - goto error; - } + if (slavefd != -1) + return (0); - di_prof_fini(prof); - prof = NULL; + (void) snprintf(zconspath, sizeof (zconspath), + "/dev/zcons/%s/%s", zone_name, ZCONS_SLAVE_NAME); - if ((slavefd = open(zoneslavename, O_RDWR | O_NOCTTY)) < 0) { - zerror(zlogp, B_TRUE, "failed to open %s", zoneslavename); + if ((slavefd = open(zconspath, O_RDWR | O_NOCTTY)) < 0) { + zerror(zlogp, B_TRUE, "failed to open %s", zconspath); goto error; } @@ -501,43 +471,15 @@ prep_console_slave(zlog_t *zlogp, char *devroot) } return (0); + error: if (slavefd != -1) (void) close(slavefd); slavefd = -1; - if (prof) - di_prof_fini(prof); + zerror(zlogp, B_FALSE, "could not initialize console slave"); return (-1); } -/* - * init_console_slave() sets up the console slave device; the device node - * itself has already been set up in the device tree; the primary job - * here is to do some STREAMS plumbing (via prep_console_slave()) and then - * to establish some symlinks. Eventually we should move that functionality - * into devfsadm. - */ -int -init_console_slave(zlog_t *zlogp) -{ - char devroot[MAXPATHLEN]; - - if (slavefd != -1) - return (0); - - if (zone_get_devroot(zone_name, devroot, sizeof (devroot)) != Z_OK) { - zerror(zlogp, B_TRUE, "unable to determine zone root"); - return (-1); - } - - if (prep_console_slave(zlogp, devroot) == -1) { - zerror(zlogp, B_FALSE, "could not prep console slave"); - return (-1); - } - - return (0); -} - void destroy_console_slave(void) { diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c index 9208b8177d..0612da87df 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.c +++ b/usr/src/cmd/zoneadmd/zoneadmd.c @@ -93,17 +93,19 @@ #include <wait.h> #include <limits.h> #include <zone.h> +#include <libbrand.h> #include <libcontract.h> #include <libcontract_priv.h> #include <sys/contract/process.h> #include <sys/ctfs.h> -#include <sys/objfs.h> #include <libzonecfg.h> #include "zoneadmd.h" static char *progname; char *zone_name; /* zone which we are managing */ +char brand_name[MAXNAMELEN]; +boolean_t zone_isnative; static zoneid_t zone_id; zlog_t logsys; @@ -123,8 +125,6 @@ boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ #endif -#define PATH_TO_INIT "/sbin/init" - #define DEFAULT_LOCALE "C" static const char * @@ -132,8 +132,8 @@ z_cmd_name(zone_cmd_t zcmd) { /* This list needs to match the enum in sys/zone.h */ static const char *zcmdstr[] = { - "ready", "boot", "reboot", "halt", "note_uninstalling", - "mount", "unmount" + "ready", "boot", "forceboot", "reboot", "halt", + "note_uninstalling", "mount", "forcemount", "unmount" }; if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) @@ -271,8 +271,6 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, bzero(outargs, BOOTARGS_MAX); bzero(badarg, BOOTARGS_MAX); - (void) strlcpy(init_file, PATH_TO_INIT, MAXPATHLEN); - /* * If the user didn't specify transient boot arguments, check * to see if there were any specified in the zone configuration, @@ -357,7 +355,7 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, optind = 0; opterr = 0; err = Z_OK; - while ((c = getopt(argc, argv, "i:m:s")) != -1) { + while ((c = getopt(argc, argv, "fi:m:s")) != -1) { switch (c) { case 'i': /* @@ -366,6 +364,9 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, */ (void) strlcpy(init_file, optarg, MAXPATHLEN); break; + case 'f': + /* This has already been processed by zoneadm */ + break; case 'm': case 's': /* These pass through unmolested */ @@ -498,10 +499,17 @@ init_template(void) return (fd); } +typedef struct fs_callback { + zlog_t *zlogp; + zoneid_t zoneid; +} fs_callback_t; + static int -mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec, - const char *dir, char *fstype) +mount_early_fs(void *data, const char *spec, const char *dir, + const char *fstype, const char *opt) { + zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; + zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; pid_t child; int child_status; int tmpl_fd; @@ -519,6 +527,10 @@ mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec, return (-1); } else if (child == 0) { /* child */ + char opt_buf[MAX_MNTOPT_STR]; + int optlen = 0; + int mflag = MS_DATA; + (void) ct_tmpl_clear(tmpl_fd); /* * Even though there are no procs running in the zone, we @@ -529,7 +541,23 @@ mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec, if (zone_enter(zoneid) == -1) { _exit(errno); } - if (mount(spec, dir, MS_DATA, fstype, NULL, 0, NULL, 0) != 0) + if (opt != NULL) { + /* + * The mount() system call is incredibly annoying. + * If options are specified, we need to copy them + * into a temporary buffer since the mount() system + * call will overwrite the options string. It will + * also fail if the new option string it wants to + * write is bigger than the one we passed in, so + * you must pass in a buffer of the maximum possible + * option string length. sigh. + */ + (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); + opt = opt_buf; + optlen = MAX_MNTOPT_STR; + mflag = MS_OPTIONSTR; + } + if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) _exit(errno); _exit(0); } @@ -554,27 +582,35 @@ mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec, return (0); } -static int -zone_mount_early(zlog_t *zlogp, zoneid_t zoneid) +int +do_subproc(zlog_t *zlogp, char *cmdbuf) { - if (mount_early_fs(zlogp, zoneid, "/proc", "/proc", "proc") != 0) - return (-1); + char inbuf[1024]; /* arbitrary large amount */ + FILE *file; + int status; - if (mount_early_fs(zlogp, zoneid, "ctfs", CTFS_ROOT, "ctfs") != 0) + file = popen(cmdbuf, "r"); + if (file == NULL) { + zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); return (-1); + } - if (mount_early_fs(zlogp, zoneid, "objfs", OBJFS_ROOT, "objfs") != 0) - return (-1); + while (fgets(inbuf, sizeof (inbuf), file) != NULL) + if (zlogp != &logsys) + zerror(zlogp, B_FALSE, "%s", inbuf); + status = pclose(file); - if (mount_early_fs(zlogp, zoneid, "swap", "/etc/svc/volatile", - "tmpfs") != 0) + if (WIFSIGNALED(status)) { + zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " + "signal %d", cmdbuf, WTERMSIG(status)); return (-1); - - if (mount_early_fs(zlogp, zoneid, "mnttab", "/etc/mnttab", - "mntfs") != 0) + } + assert(WIFEXITED(status)); + if (WEXITSTATUS(status) == ZEXIT_EXEC) { + zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); return (-1); - - return (0); + } + return (WEXITSTATUS(status)); } static int @@ -584,6 +620,9 @@ zone_bootup(zlog_t *zlogp, const char *bootargs) struct stat st; char zroot[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; char nbootargs[BOOTARGS_MAX]; + char cmdbuf[MAXPATHLEN]; + fs_callback_t cb; + brand_handle_t *bhp; int err; if (init_console_slave(zlogp) != 0) @@ -595,8 +634,52 @@ zone_bootup(zlog_t *zlogp, const char *bootargs) return (-1); } - if (zone_mount_early(zlogp, zoneid) != 0) + cb.zlogp = zlogp; + cb.zoneid = zoneid; + + /* Get a handle to the brand info for this zone */ + if ((bhp = brand_open(brand_name)) == NULL) { + zerror(zlogp, B_FALSE, "unable to determine zone brand"); + return (-1); + } + + /* + * Get the list of filesystems to mount from the brand + * configuration. These mounts are done via a thread that will + * enter the zone, so they are done from within the context of the + * zone. + */ + if (brand_platform_iter_mounts(bhp, mount_early_fs, &cb) != 0) { + zerror(zlogp, B_FALSE, "unable to mount filesystems"); + brand_close(bhp); + return (-1); + } + + /* + * Get the brand's boot callback if it exists. + */ + if (zone_get_zonepath(zone_name, zroot, sizeof (zroot)) != Z_OK) { + zerror(zlogp, B_FALSE, "unable to determine zone root"); + return (-1); + } + (void) strcpy(cmdbuf, EXEC_PREFIX); + if (brand_get_boot(bhp, zone_name, zroot, cmdbuf + EXEC_LEN, + sizeof (cmdbuf) - EXEC_LEN, 0, NULL) != 0) { + zerror(zlogp, B_FALSE, + "unable to determine branded zone's boot callback"); + brand_close(bhp); + return (-1); + } + + /* Get the path for this zone's init(1M) (or equivalent) process. */ + if (brand_get_initname(bhp, init_file, MAXPATHLEN) != 0) { + zerror(zlogp, B_FALSE, + "unable to determine zone's init(1M) location"); + brand_close(bhp); return (-1); + } + + brand_close(bhp); err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, bad_boot_arg); @@ -607,14 +690,12 @@ zone_bootup(zlog_t *zlogp, const char *bootargs) assert(init_file[0] != '\0'); - /* - * Try to anticipate possible problems: Make sure whatever binary - * is supposed to be init is executable. - */ + /* Try to anticipate possible problems: Make sure init is executable. */ if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { zerror(zlogp, B_FALSE, "unable to determine zone root"); return (-1); } + (void) snprintf(initpath, sizeof (initpath), "%s%s", zroot, init_file); if (stat(initpath, &st) == -1) { @@ -627,6 +708,17 @@ zone_bootup(zlog_t *zlogp, const char *bootargs) return (-1); } + /* + * If there is a brand 'boot' callback, execute it now to give the + * brand one last chance to do any additional setup before the zone + * is booted. + */ + if ((strlen(cmdbuf) > EXEC_LEN) && + (do_subproc(zlogp, cmdbuf) != Z_OK)) { + zerror(zlogp, B_FALSE, "%s failed", cmdbuf); + return (-1); + } + if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { zerror(zlogp, B_TRUE, "could not set zone boot file"); return (-1); @@ -736,6 +828,8 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, zlog_t *zlogp; zone_cmd_rval_t *rvalp; size_t rlen = getpagesize(); /* conservative */ + fs_callback_t cb; + brand_handle_t *bhp; /* LINTED E_BAD_PTR_CAST_ALIGN */ zargp = (zone_cmd_arg_t *)args; @@ -811,9 +905,9 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, /* * Check for validity of command. */ - if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_REBOOT && - cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT && - cmd != Z_UNMOUNT) { + if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && + cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && + cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); goto out; } @@ -856,6 +950,14 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, zlogp = &logsys; /* Log errors to syslog */ } + /* + * If we are being asked to forcibly mount or boot a zone, we + * pretend that an INCOMPLETE zone is actually INSTALLED. + */ + if (zstate == ZONE_STATE_INCOMPLETE && + (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) + zstate = ZONE_STATE_INSTALLED; + switch (zstate) { case ZONE_STATE_CONFIGURED: case ZONE_STATE_INCOMPLETE: @@ -876,6 +978,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, eventstream_write(Z_EVT_ZONE_READIED); break; case Z_BOOT: + case Z_FORCEBOOT: eventstream_write(Z_EVT_ZONE_BOOTING); if ((rval = zone_ready(zlogp, B_FALSE)) == 0) rval = zone_bootup(zlogp, zargp->bootbuf); @@ -916,23 +1019,51 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, eventstream_write(Z_EVT_ZONE_UNINSTALLING); break; case Z_MOUNT: + case Z_FORCEMOUNT: if (kernelcall) /* Invalid; can't happen */ abort(); + if (!zone_isnative) { + zerror(zlogp, B_FALSE, + "%s operation is invalid for branded " + "zones", z_cmd_name(cmd)); + rval = -1; + break; + } + rval = zone_ready(zlogp, B_TRUE); - if (rval == 0) { - eventstream_write(Z_EVT_ZONE_READIED); - rval = zone_mount_early(zlogp, zone_id); + if (rval != 0) + break; + + eventstream_write(Z_EVT_ZONE_READIED); + + /* Get a handle to the brand info for this zone */ + if ((bhp = brand_open(brand_name)) == NULL) { + rval = -1; + break; } /* + * Get the list of filesystems to mount from + * the brand configuration. These mounts are done + * via a thread that will enter the zone, so they + * are done from within the context of the zone. + */ + cb.zlogp = zlogp; + cb.zoneid = zone_id; + rval = brand_platform_iter_mounts(bhp, + mount_early_fs, &cb); + + brand_close(bhp); + + /* * Ordinarily, /dev/fd would be mounted inside the zone * by svc:/system/filesystem/usr:default, but since * we're not booting the zone, we need to do this * manually. */ if (rval == 0) - rval = mount_early_fs(zlogp, zone_id, "fd", - "/dev/fd", "fd"); + rval = mount_early_fs(&cb, + "fd", "/dev/fd", "fd", NULL); break; case Z_UNMOUNT: if (kernelcall) /* Invalid; can't happen */ @@ -1245,6 +1376,7 @@ main(int argc, char *argv[]) priv_set_t *privset; zone_state_t zstate; char parents_locale[MAXPATHLEN]; + brand_handle_t *bhp; int err; pid_t pid; @@ -1347,13 +1479,22 @@ main(int argc, char *argv[]) zonecfg_strerror(err)); return (1); } - if (zstate < ZONE_STATE_INSTALLED) { + if (zstate < ZONE_STATE_INCOMPLETE) { zerror(zlogp, B_FALSE, "cannot manage a zone which is in state '%s'", zone_state_str(zstate)); return (1); } + /* Get a handle to the brand info for this zone */ + if ((zone_get_brand(zone_name, brand_name, sizeof (brand_name)) + != Z_OK) || (bhp = brand_open(brand_name)) == NULL) { + zerror(zlogp, B_FALSE, "unable to determine zone brand"); + return (1); + } + zone_isnative = brand_is_native(bhp); + brand_close(bhp); + /* * Check that we have all privileges. It would be nice to pare * this down, but this is at least a first cut. diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h index 28df67e8cd..cfb90f93f3 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.h +++ b/usr/src/cmd/zoneadmd/zoneadmd.h @@ -53,6 +53,9 @@ extern "C" { #define DEVFSADM "devfsadm" #define DEVFSADM_PATH "/usr/sbin/devfsadm" +#define EXEC_PREFIX "exec " +#define EXEC_LEN (strlen(EXEC_PREFIX)) + typedef struct zlog { FILE *logfile; /* file to log to */ @@ -75,6 +78,7 @@ extern boolean_t bringup_failure_recovery; extern char *zone_name; extern char boot_args[BOOTARGS_MAX]; extern char bad_boot_arg[BOOTARGS_MAX]; +extern boolean_t zone_isnative; extern void zerror(zlog_t *, boolean_t, const char *, ...); @@ -104,7 +108,6 @@ extern zoneid_t vplat_create(zlog_t *, boolean_t); extern int vplat_bringup(zlog_t *, boolean_t, zoneid_t); extern int vplat_teardown(zlog_t *, boolean_t); - /* * Console subsystem routines. */ @@ -119,6 +122,11 @@ extern void serve_console(zlog_t *); */ extern int init_template(void); +/* + * Routine to manage child processes. + */ +extern int do_subproc(zlog_t *, char *); + #ifdef __cplusplus } #endif diff --git a/usr/src/cmd/zonecfg/Makefile b/usr/src/cmd/zonecfg/Makefile index ac0ce57047..f4a4d2d287 100644 --- a/usr/src/cmd/zonecfg/Makefile +++ b/usr/src/cmd/zonecfg/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -33,7 +32,7 @@ include ../Makefile.cmd LFLAGS = -t YFLAGS = -d -b zonecfg_grammar -LDLIBS += -lzonecfg -ll -lnsl -ltecla -lzfs +LDLIBS += -lzonecfg -ll -lnsl -ltecla -lzfs -lbrand CPPFLAGS += -I. CLEANFILES += zonecfg_lex.c zonecfg_grammar.tab.c zonecfg_grammar.tab.h diff --git a/usr/src/cmd/zonecfg/zonecfg.c b/usr/src/cmd/zonecfg/zonecfg.c index c6842e3eb5..6047f14abf 100644 --- a/usr/src/cmd/zonecfg/zonecfg.c +++ b/usr/src/cmd/zonecfg/zonecfg.c @@ -56,6 +56,7 @@ #include <sys/sysmacros.h> #include <errno.h> +#include <fcntl.h> #include <strings.h> #include <unistd.h> #include <ctype.h> @@ -69,8 +70,11 @@ #include <libintl.h> #include <alloca.h> #include <signal.h> +#include <wait.h> #include <libtecla.h> #include <libzfs.h> +#include <sys/brand.h> +#include <libbrand.h> #include <libzonecfg.h> #include "zonecfg.h" @@ -80,6 +84,8 @@ #endif #define PAGER "/usr/bin/more" +#define EXEC_PREFIX "exec " +#define EXEC_LEN (strlen(EXEC_PREFIX)) struct help { uint_t cmd_num; @@ -93,6 +99,7 @@ extern int lex_lineno; #define MAX_LINE_LEN 1024 #define MAX_CMD_HIST 1024 +#define MAX_CMD_LEN 1024 /* * Each SHELP_ should be a simple string. @@ -155,6 +162,7 @@ static char *res_types[] = { "dataset", "limitpriv", "bootargs", + "brand", NULL }; @@ -180,6 +188,7 @@ static char *prop_types[] = { "raw", "limitpriv", "bootargs", + "brand", NULL }; @@ -252,6 +261,7 @@ static const char *select_cmds[] = { static const char *set_cmds[] = { "set zonename=", "set zonepath=", + "set brand=", "set autoboot=", "set pool=", "set limitpriv=", @@ -368,6 +378,9 @@ static zone_dochandle_t handle; static char zone[ZONENAME_MAX]; static char revert_zone[ZONENAME_MAX]; +/* global brand operations */ +static brand_handle_t *brand; + /* set in modifying functions, checked in read_input() */ static bool need_to_commit = FALSE; bool saw_error; @@ -929,6 +942,8 @@ usage(bool verbose, uint_t flags) (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), pt_to_str(PT_ZONEPATH)); (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), + pt_to_str(PT_BRAND)); + (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), pt_to_str(PT_AUTOBOOT)); (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"), pt_to_str(PT_BOOTARGS)); @@ -1009,10 +1024,22 @@ static int initialize(bool handle_expected) { int err; + char brandname[MAXNAMELEN]; if (zonecfg_check_handle(handle) != Z_OK) { if ((err = zonecfg_get_handle(zone, handle)) == Z_OK) { got_handle = TRUE; + if (zonecfg_get_brand(handle, brandname, + sizeof (brandname)) != Z_OK) { + zerr("Zone %s is inconsistent: missing " + "brand attribute", zone); + exit(Z_ERR); + } + if ((brand = brand_open(brandname)) == NULL) { + zerr("Zone %s uses non-existent brand \"%s\"." + " Unable to continue", zone, brandname); + exit(Z_ERR); + } } else { zone_perror(zone, err, handle_expected || got_handle); if (err == Z_NO_ZONE && !got_handle && @@ -1224,7 +1251,8 @@ create_func(cmd_t *cmd) (void) strlcpy(zone_template, "SUNWdefault", sizeof (zone_template)); optind = 0; - while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "?a:bFt:")) != EOF) { + while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "?a:bFt:")) + != EOF) { switch (arg) { case '?': if (optopt == '?') @@ -1349,6 +1377,7 @@ export_func(cmd_t *cmd) int err, arg; char zonepath[MAXPATHLEN], outfile[MAXPATHLEN], pool[MAXNAMELEN]; char bootargs[BOOTARGS_MAX]; + char brand[MAXNAMELEN]; char *limitpriv; FILE *of; boolean_t autoboot; @@ -1400,6 +1429,11 @@ export_func(cmd_t *cmd) (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET), pt_to_str(PT_ZONEPATH), zonepath); + if ((zone_get_brand(zone, brand, sizeof (brand)) == Z_OK) && + (strcmp(brand, NATIVE_BRAND_NAME) != 0)) + (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET), + pt_to_str(PT_BRAND), brand); + if (zonecfg_get_autoboot(handle, &autoboot) == Z_OK) (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET), pt_to_str(PT_AUTOBOOT), autoboot ? "true" : "false"); @@ -2734,6 +2768,8 @@ set_func(cmd_t *cmd) res_type = RT_ZONEPATH; } else if (prop_type == PT_AUTOBOOT) { res_type = RT_AUTOBOOT; + } else if (prop_type == PT_BRAND) { + res_type = RT_BRAND; } else if (prop_type == PT_POOL) { res_type = RT_POOL; } else if (prop_type == PT_LIMITPRIV) { @@ -2834,6 +2870,18 @@ set_func(cmd_t *cmd) else need_to_commit = TRUE; return; + case RT_BRAND: + if (state_atleast(ZONE_STATE_INSTALLED)) { + zerr(gettext("Zone %s already installed; %s %s not " + "allowed."), zone, cmd_to_str(CMD_SET), + rt_to_str(RT_BRAND)); + return; + } + if ((err = zonecfg_set_brand(handle, prop_id)) != Z_OK) + zone_perror(zone, err, TRUE); + else + need_to_commit = TRUE; + return; case RT_AUTOBOOT: if (strcmp(prop_id, "true") == 0) { autoboot = B_TRUE; @@ -3097,6 +3145,19 @@ info_zonepath(zone_dochandle_t handle, FILE *fp) } static void +info_brand(zone_dochandle_t handle, FILE *fp) +{ + char brand[MAXNAMELEN]; + + if (zonecfg_get_brand(handle, brand, sizeof (brand)) == Z_OK) + (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_BRAND), + brand); + else + (void) fprintf(fp, "%s %s\n", pt_to_str(PT_BRAND), + gettext("not specified")); +} + +static void info_autoboot(zone_dochandle_t handle, FILE *fp) { boolean_t autoboot; @@ -3464,7 +3525,6 @@ info_ds(zone_dochandle_t handle, FILE *fp, cmd_t *cmd) rt_to_str(RT_DATASET)); } - void info_func(cmd_t *cmd) { @@ -3483,8 +3543,6 @@ info_func(cmd_t *cmd) pager = PAGER; if ((fp = popen(pager, "w")) != NULL) need_to_close = TRUE; - else - fp = stdout; setbuf(fp, NULL); } @@ -3519,6 +3577,7 @@ info_func(cmd_t *cmd) case RT_UNKNOWN: info_zonename(handle, fp); info_zonepath(handle, fp); + info_brand(handle, fp); info_autoboot(handle, fp); info_bootargs(handle, fp); info_pool(handle, fp); @@ -3537,6 +3596,9 @@ info_func(cmd_t *cmd) case RT_ZONEPATH: info_zonepath(handle, fp); break; + case RT_BRAND: + info_brand(handle, fp); + break; case RT_AUTOBOOT: info_autoboot(handle, fp); break; @@ -3596,6 +3658,95 @@ check_reqd_prop(char *attr, int rt, int pt, int *ret_val) } } +static int +do_subproc(char *cmdbuf) +{ + char inbuf[MAX_CMD_LEN]; + FILE *file; + int status; + + file = popen(cmdbuf, "r"); + if (file == NULL) { + zerr(gettext("Could not launch: %s"), cmdbuf); + return (-1); + } + + while (fgets(inbuf, sizeof (inbuf), file) != NULL) + fprintf(stderr, "%s", inbuf); + status = pclose(file); + + if (WIFSIGNALED(status)) { + zerr(gettext("%s unexpectedly terminated due to signal %d"), + cmdbuf, WTERMSIG(status)); + return (-1); + } + assert(WIFEXITED(status)); + return (WEXITSTATUS(status)); +} + +static int +brand_verify(zone_dochandle_t handle) +{ + char *xml_file = "/tmp/zonecfg_verify.XXXXXX"; + char cmdbuf[MAX_CMD_LEN]; + brand_handle_t *bhp; + char brand[MAXNAMELEN]; + int err; + + if (zonecfg_get_brand(handle, brand, sizeof (brand)) != Z_OK) { + zerr("%s: %s\n", zone, gettext("could not get zone brand")); + return (Z_INVALID_DOCUMENT); + } + if ((bhp = brand_open(brand)) == NULL) { + zerr("%s: %s\n", zone, gettext("unknown brand.")); + return (Z_INVALID_DOCUMENT); + } + + /* + * Fetch the verify command, if any, from the brand configuration + * and build the command line to execute it. + */ + strcpy(cmdbuf, EXEC_PREFIX); + err = brand_get_verify_cfg(bhp, cmdbuf + EXEC_LEN, + sizeof (cmdbuf) - (EXEC_LEN + (strlen(xml_file) + 1))); + brand_close(bhp); + if (err != Z_OK) { + zerr("%s: %s\n", zone, + gettext("could not get brand verification command")); + return (Z_INVALID_DOCUMENT); + } + + /* + * If the brand doesn't provide a verification routine, we just + * return success. + */ + if (strlen(cmdbuf) == EXEC_LEN) + return (Z_OK); + + /* + * Dump the current config information for this zone to a file. + */ + if (mkstemp(xml_file) == NULL) + return (Z_TEMP_FILE); + if ((err = zonecfg_verify_save(handle, xml_file)) != Z_OK) { + (void) unlink(xml_file); + return (err); + } + + /* + * Execute the verification command. + */ + if ((strlcat(cmdbuf, " ", MAX_CMD_LEN) >= MAX_CMD_LEN) || + (strlcat(cmdbuf, xml_file, MAX_CMD_LEN) >= MAX_CMD_LEN)) { + err = Z_BRAND_ERROR; + } else { + err = do_subproc(cmdbuf); + } + + (void) unlink(xml_file); + return ((err == Z_OK) ? Z_OK : Z_BRAND_ERROR); +} + /* * See the DTD for which attributes are required for which resources. * @@ -3614,6 +3765,7 @@ verify_func(cmd_t *cmd) struct zone_rctltab rctltab; struct zone_dstab dstab; char zonepath[MAXPATHLEN]; + char brand[MAXNAMELEN]; int err, ret_val = Z_OK, arg; bool save = FALSE; @@ -3654,6 +3806,17 @@ verify_func(cmd_t *cmd) saw_error = TRUE; } + if ((err = zonecfg_get_brand(handle, brand, sizeof (brand))) != Z_OK) { + zone_perror(zone, err, TRUE); + return; + } + if (strcmp(brand, NATIVE_BRAND_NAME) != 0) { + if ((err = brand_verify(handle)) != Z_OK) { + zone_perror(zone, err, TRUE); + return; + } + } + if ((err = zonecfg_setipdent(handle)) != Z_OK) { zone_perror(zone, err, TRUE); return; @@ -4780,6 +4943,8 @@ main(int argc, char *argv[]) err = one_command_at_a_time(argc - optind, &(argv[optind])); } zonecfg_fini_handle(handle); + if (brand != NULL) + brand_close(brand); (void) del_GetLine(gl); return (err); } diff --git a/usr/src/cmd/zonecfg/zonecfg.h b/usr/src/cmd/zonecfg/zonecfg.h index bacf19225a..6e153d40c1 100644 --- a/usr/src/cmd/zonecfg/zonecfg.h +++ b/usr/src/cmd/zonecfg/zonecfg.h @@ -82,9 +82,10 @@ typedef int bool; #define RT_DATASET 11 #define RT_LIMITPRIV 12 /* really a property, but for info ... */ #define RT_BOOTARGS 13 /* really a property, but for info ... */ +#define RT_BRAND 14 /* really a property, but for info ... */ #define RT_MIN RT_UNKNOWN -#define RT_MAX RT_BOOTARGS +#define RT_MAX RT_BRAND /* property types: increment PT_MAX when expanding this list */ #define PT_UNKNOWN 0 @@ -107,9 +108,10 @@ typedef int bool; #define PT_RAW 17 #define PT_LIMITPRIV 18 #define PT_BOOTARGS 19 +#define PT_BRAND 20 #define PT_MIN PT_UNKNOWN -#define PT_MAX PT_BOOTARGS +#define PT_MAX PT_BRAND #define MAX_EQ_PROP_PAIRS 3 diff --git a/usr/src/cmd/zonecfg/zonecfg_grammar.y b/usr/src/cmd/zonecfg/zonecfg_grammar.y index d47aac79ae..dc391da0b9 100644 --- a/usr/src/cmd/zonecfg/zonecfg_grammar.y +++ b/usr/src/cmd/zonecfg/zonecfg_grammar.y @@ -60,7 +60,7 @@ extern void yyerror(char *s); %token COMMIT REVERT EXIT SEMICOLON TOKEN ZONENAME ZONEPATH AUTOBOOT POOL NET %token FS IPD ATTR DEVICE RCTL SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL %token NAME MATCH PRIV LIMIT ACTION VALUE EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET -%token OPEN_PAREN CLOSE_PAREN COMMA DATASET LIMITPRIV BOOTARGS +%token OPEN_PAREN CLOSE_PAREN COMMA DATASET LIMITPRIV BOOTARGS BRAND %type <strval> TOKEN EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET property_value OPEN_PAREN CLOSE_PAREN COMMA simple_prop_val @@ -68,7 +68,7 @@ extern void yyerror(char *s); %type <ival> resource_type NET FS IPD DEVICE RCTL ATTR %type <ival> property_name SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL NAME MATCH ZONENAME ZONEPATH AUTOBOOT POOL LIMITPRIV BOOTARGS VALUE PRIV LIMIT - ACTION + ACTION BRAND %type <cmd> command %type <cmd> add_command ADD %type <cmd> cancel_command CANCEL @@ -420,6 +420,15 @@ info_command: INFO $$->cmd_res_type = RT_ZONEPATH; $$->cmd_prop_nv_pairs = 0; } + | INFO BRAND + { + if (($$ = alloc_cmd()) == NULL) + YYERROR; + cmd = $$; + $$->cmd_handler = &info_func; + $$->cmd_res_type = RT_BRAND; + $$->cmd_prop_nv_pairs = 0; + } | INFO AUTOBOOT { if (($$ = alloc_cmd()) == NULL) @@ -720,6 +729,7 @@ property_name: SPECIAL { $$ = PT_SPECIAL; } | PRIV { $$ = PT_PRIV; } | LIMIT { $$ = PT_LIMIT; } | ACTION { $$ = PT_ACTION; } + | BRAND { $$ = PT_BRAND; } /* * The grammar builds data structures from the bottom up. Thus various diff --git a/usr/src/cmd/zonecfg/zonecfg_lex.l b/usr/src/cmd/zonecfg/zonecfg_lex.l index 1f92fb0e34..aef16edbcb 100644 --- a/usr/src/cmd/zonecfg/zonecfg_lex.l +++ b/usr/src/cmd/zonecfg/zonecfg_lex.l @@ -165,6 +165,9 @@ char *safe_strdup(char *s); <TSTATE>zonepath { return ZONEPATH; } <CSTATE>zonepath { return ZONEPATH; } +<TSTATE>brand { return BRAND; } +<CSTATE>brand { return BRAND; } + <TSTATE>autoboot { return AUTOBOOT; } <CSTATE>autoboot { return AUTOBOOT; } diff --git a/usr/src/common/brand/lx/lx_signum.c b/usr/src/common/brand/lx/lx_signum.c new file mode 100644 index 0000000000..1248931085 --- /dev/null +++ b/usr/src/common/brand/lx/lx_signum.c @@ -0,0 +1,219 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/signal.h> +#include <lx_signum.h> + +/* + * Delivering signals to a Linux process is complicated by differences in + * signal numbering, stack structure and contents, and the action taken when a + * signal handler exits. In addition, many signal-related structures, such as + * sigset_ts, vary between Solaris and Linux. + * + * The simplest transformation that must be done when sending signals is to + * translate between Linux and Solaris signal numbers. + * + * These are the major signal number differences between Linux and Solaris: + * + * ==================================== + * | Number | Linux | Solaris | + * | ====== | ========= | ========== | + * | 7 | SIGBUS | SIGEMT | + * | 10 | SIGUSR1 | SIGBUS | + * | 12 | SIGUSR2 | SIGSYS | + * | 16 | SIGSTKFLT | SIGUSR1 | + * | 17 | SIGCHLD | SIGUSR2 | + * | 18 | SIGCONT | SIGCHLD | + * | 19 | SIGSTOP | SIGPWR | + * | 20 | SIGTSTP | SIGWINCH | + * | 21 | SIGTTIN | SIGURG | + * | 22 | SIGTTOU | SIGPOLL | + * | 23 | SIGURG | SIGSTOP | + * | 24 | SIGXCPU | SIGTSTP | + * | 25 | SIGXFSZ | SIGCONT | + * | 26 | SIGVTALARM | SIGTTIN | + * | 27 | SIGPROF | SIGTTOU | + * | 28 | SIGWINCH | SIGVTALARM | + * | 29 | SIGPOLL | SIGPROF | + * | 30 | SIGPWR | SIGXCPU | + * | 31 | SIGSYS | SIGXFSZ | + * ==================================== + * + * Not every Linux signal maps to a Solaris signal, nor does every Solaris + * signal map to a Linux counterpart. However, when signals do map, the + * mapping is unique. + * + * One mapping issue is that Linux supports 32 real time signals, with SIGRTMIN + * typically starting at or near 32 (SIGRTMIN) and proceeding to 63 (SIGRTMAX) + * (SIGRTMIN is "at or near" 32 because glibc usually "steals" one ore more of + * these signals for its own internal use, adjusting SIGRTMIN and SIGRTMAX as + * needed.) Conversely, Solaris actively uses signals 32-40 for other purposes + * and and only supports seven real time signals, in the range 41 (SIGRTMIN) + * to 48 (SIGRTMAX). + * + * At present, attempting to translate a Linux signal greater than 39 but + * less than 62 will generate an error (we allow SIGRTMAX because a program + * should be able to send SIGRTMAX without getting an EINVAL, though obviously + * anything that loops through the signals from SIGRTMIN to SIGRTMAX will + * fail.) + * + * Similarly, attempting to translate a native Solaris signal in the range + * 32-40 will also generate an error as we don't want to support the receipt of + * those signals from the Solaris global zone. + */ + +/* + * Linux to Solaris signal map + * + * Usage: solaris_signal = ltos_signum[lx_signal]; + */ +const int +ltos_signo[LX_NSIG] = { + 0, + SIGHUP, + SIGINT, + SIGQUIT, + SIGILL, + SIGTRAP, + SIGABRT, + SIGBUS, + SIGFPE, + SIGKILL, + SIGUSR1, + SIGSEGV, + SIGUSR2, + SIGPIPE, + SIGALRM, + SIGTERM, + SIGEMT, /* 16: Linux SIGSTKFLT; use Solaris SIGEMT */ + SIGCHLD, + SIGCONT, + SIGSTOP, + SIGTSTP, + SIGTTIN, + SIGTTOU, + SIGURG, + SIGXCPU, + SIGXFSZ, + SIGVTALRM, + SIGPROF, + SIGWINCH, + SIGPOLL, + SIGPWR, + SIGSYS, + _SIGRTMIN, /* 32: Linux SIGRTMIN */ + _SIGRTMIN + 1, + _SIGRTMIN + 2, + _SIGRTMIN + 3, + _SIGRTMIN + 4, + _SIGRTMIN + 5, + _SIGRTMIN + 6, + -1, /* 38 Linux SIGRTMIN + 7 */ + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, + -1, /* 63: Linux SIGRTMIN + 32, or SIGRTMAX - 1 */ + _SIGRTMAX, /* 64: Linux SIGRTMAX */ +}; + +/* + * Solaris to Linux signal map + * + * Usage: lx_signal = stol_signo[solaris_signal]; + */ +const int +stol_signo[NSIG] = { + 0, + LX_SIGHUP, + LX_SIGINT, + LX_SIGQUIT, + LX_SIGILL, + LX_SIGTRAP, + LX_SIGABRT, + LX_SIGSTKFLT, /* 7: Solaris SIGEMT; use for LX_SIGSTKFLT */ + LX_SIGFPE, + LX_SIGKILL, + LX_SIGBUS, + LX_SIGSEGV, + LX_SIGSYS, + LX_SIGPIPE, + LX_SIGALRM, + LX_SIGTERM, + LX_SIGUSR1, + LX_SIGUSR2, + LX_SIGCHLD, + LX_SIGPWR, + LX_SIGWINCH, + LX_SIGURG, + LX_SIGPOLL, + LX_SIGSTOP, + LX_SIGTSTP, + LX_SIGCONT, + LX_SIGTTIN, + LX_SIGTTOU, + LX_SIGVTALRM, + LX_SIGPROF, + LX_SIGXCPU, + LX_SIGXFSZ, + -1, /* 32: Solaris SIGWAITING */ + -1, /* 33: Solaris SIGLWP */ + -1, /* 34: Solaris SIGFREEZE */ + -1, /* 35: Solaris SIGTHAW */ + -1, /* 36: Solaris SIGCANCEL */ + -1, /* 37: Solaris SIGLOST */ + -1, /* 38: Solaris SIGXRES */ + -1, /* 39: Solaris SIGJVM1 */ + -1, /* 40: Solaris SIGJVM2 */ + LX_SIGRTMIN, /* 41: Solaris _SIGRTMIN */ + LX_SIGRTMIN + 1, + LX_SIGRTMIN + 2, + LX_SIGRTMIN + 3, + LX_SIGRTMIN + 4, + LX_SIGRTMIN + 5, + LX_SIGRTMIN + 6, + LX_SIGRTMAX, /* 48: Solaris _SIGRTMAX */ +}; diff --git a/usr/src/common/brand/lx/lx_signum.h b/usr/src/common/brand/lx/lx_signum.h new file mode 100644 index 0000000000..1ec6fa09c9 --- /dev/null +++ b/usr/src/common/brand/lx/lx_signum.h @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_SIGNUM_H +#define _LX_SIGNUM_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define LX_SIGHUP 1 +#define LX_SIGINT 2 +#define LX_SIGQUIT 3 +#define LX_SIGILL 4 +#define LX_SIGTRAP 5 +#define LX_SIGABRT 6 +#define LX_SIGIOT 6 +#define LX_SIGBUS 7 +#define LX_SIGFPE 8 +#define LX_SIGKILL 9 +#define LX_SIGUSR1 10 +#define LX_SIGSEGV 11 +#define LX_SIGUSR2 12 +#define LX_SIGPIPE 13 +#define LX_SIGALRM 14 +#define LX_SIGTERM 15 +#define LX_SIGSTKFLT 16 +#define LX_SIGCHLD 17 +#define LX_SIGCONT 18 +#define LX_SIGSTOP 19 +#define LX_SIGTSTP 20 +#define LX_SIGTTIN 21 +#define LX_SIGTTOU 22 +#define LX_SIGURG 23 +#define LX_SIGXCPU 24 +#define LX_SIGXFSZ 25 +#define LX_SIGVTALRM 26 +#define LX_SIGPROF 27 +#define LX_SIGWINCH 28 +#define LX_SIGIO 29 +#define LX_SIGPOLL LX_SIGIO +#define LX_SIGPWR 30 +#define LX_SIGSYS 31 +#define LX_SIGUNUSED 31 + +#define LX_NSIG_WORDS 2 +#define LX_NBPW 32 +#define LX_NSIG ((LX_NBPW * LX_NSIG_WORDS) + 1) + +#define LX_SIGRTMIN 32 +#define LX_SIGRTMAX LX_NSIG - 1 + +extern const int ltos_signo[]; +extern const int stol_signo[]; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_SIGNUM_H */ diff --git a/usr/src/head/libzonecfg.h b/usr/src/head/libzonecfg.h index 51b8dc7005..3af98c1a6b 100644 --- a/usr/src/head/libzonecfg.h +++ b/usr/src/head/libzonecfg.h @@ -48,6 +48,7 @@ extern "C" { #include <stdio.h> #include <rctl.h> #include <zone.h> +#include <libbrand.h> #include <sys/uuid.h> #define ZONE_ID_UNDEFINED -1 @@ -88,6 +89,7 @@ extern "C" { #define Z_PRIV_PROHIBITED 37 /* specified privilege is prohibited */ #define Z_PRIV_REQUIRED 38 /* required privilege is missing */ #define Z_PRIV_UNKNOWN 39 /* specified privilege is unknown */ +#define Z_BRAND_ERROR 40 /* brand-specific error */ /* * Warning: these are shared with the admin/install consolidation. @@ -213,6 +215,7 @@ extern int zonecfg_get_handle(const char *, zone_dochandle_t); extern int zonecfg_get_snapshot_handle(const char *, zone_dochandle_t); extern int zonecfg_get_template_handle(const char *, const char *, zone_dochandle_t); +extern int zonecfg_get_xml_handle(const char *, zone_dochandle_t); extern int zonecfg_check_handle(zone_dochandle_t); extern void zonecfg_fini_handle(zone_dochandle_t); extern int zonecfg_destroy(const char *, boolean_t); @@ -242,6 +245,12 @@ extern int zonecfg_get_bootargs(zone_dochandle_t, char *, size_t); extern int zonecfg_set_bootargs(zone_dochandle_t, char *); /* + * Set/retrieve the brand for the zone + */ +extern int zonecfg_get_brand(zone_dochandle_t, char *, size_t); +extern int zonecfg_set_brand(zone_dochandle_t, char *); + +/* * Filesystem configuration. */ extern int zonecfg_add_filesystem(zone_dochandle_t, struct zone_fstab *); @@ -335,6 +344,10 @@ extern int zonecfg_devwalk(zone_dochandle_t handle, extern int zonecfg_devperms_apply(zone_dochandle_t, const char *, uid_t, gid_t, mode_t, const char *); +/* + * External zone verification support. + */ +extern int zonecfg_verify_save(zone_dochandle_t, char *); /* * '*ent' iterator routines. @@ -383,6 +396,7 @@ extern int zonecfg_set_limitpriv(zone_dochandle_t, char *); /* * Higher-level routines. */ +extern int zone_get_brand(char *, char *, size_t); extern int zone_get_rootpath(char *, char *, size_t); extern int zone_get_devroot(char *, char *, size_t); extern int zone_get_zonepath(char *, char *, size_t); diff --git a/usr/src/head/link.h b/usr/src/head/link.h index 8fd331466c..59b33e6ae2 100644 --- a/usr/src/head/link.h +++ b/usr/src/head/link.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -103,7 +102,10 @@ extern void ld_section64(); #define LM_ID_BASE 0x00 #define LM_ID_LDSO 0x01 -#define LM_ID_NUM 2 +#define LM_ID_BRAND 0x02 /* marks branded objs in rd_loadobj_t */ + +#define LM_ID_NUM 3 + #define LM_ID_NEWLM 0xff /* create a new link-map */ diff --git a/usr/src/head/proc_service.h b/usr/src/head/proc_service.h index 85cbd1c0f1..1f726da0f2 100644 --- a/usr/src/head/proc_service.h +++ b/usr/src/head/proc_service.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -132,6 +131,7 @@ extern ps_err_e ps_lgetLDT(struct ps_prochandle *, lwpid_t, struct ssd *); #endif extern ps_err_e ps_pauxv(struct ps_prochandle *, const auxv_t **); +extern ps_err_e ps_pbrandname(struct ps_prochandle *, char *, size_t); extern ps_err_e ps_kill(struct ps_prochandle *, int sig); extern ps_err_e ps_lrolltoaddr(struct ps_prochandle *, diff --git a/usr/src/head/regexp.h b/usr/src/head/regexp.h index 05f6012f59..c8a58a1981 100644 --- a/usr/src/head/regexp.h +++ b/usr/src/head/regexp.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ /* - * Copyright 1997-2002 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -101,6 +100,11 @@ int seof; int i, cflg; int iflag; /* used for non-ascii characters in brackets */ +#ifdef __lint + /* make lint happy */ + c = nodelim; +#endif + lastep = NULL; if ((c = GETC()) == eof || c == '\n') { if (c == '\n') { @@ -117,7 +121,7 @@ int seof; circf++; else UNGETC(c); - while (1) { + for (;;) { if (ep >= endbuf) ERROR(50); c = GETC(); @@ -308,6 +312,7 @@ int seof; *ep++ = (char)c; } } + /*NOTREACHED*/ } #ifdef __STDC__ @@ -367,7 +372,7 @@ register char *lp, *ep; register char neg; size_t ct; - while (1) { + for (;;) { neg = 0; switch (*ep++) { @@ -571,6 +576,7 @@ register char *lp, *ep; } } + /*NOTREACHED*/ } static void diff --git a/usr/src/head/rtld_db.h b/usr/src/head/rtld_db.h index 2d726730dc..52575a3bd3 100644 --- a/usr/src/head/rtld_db.h +++ b/usr/src/head/rtld_db.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -36,6 +35,7 @@ extern "C" { #include <sys/types.h> #include <sys/lwp.h> +#include <sys/elf.h> #include <link.h> #include <proc_service.h> @@ -129,6 +129,10 @@ typedef struct rd_loadobj { /* module which was dynamically */ /* loaded */ +/* + * Commands for rd_ctl() + */ +#define RD_CTL_SET_HELPPATH 0x01 /* Set the path used to find helpers */ typedef struct rd_agent rd_agent_t; #ifdef __STDC__ @@ -158,6 +162,28 @@ typedef struct rd_plt_info { } rd_plt_info_t; /* + * State kept for brand helper libraries + */ +typedef struct rd_helper_ops { + void *(*rho_init)(struct ps_prochandle *); + int (*rho_loadobj_iter)(struct ps_prochandle *, rl_iter_f *cb, + void *client_data, void *helper_data); + void (*rho_fix_phdrs)(struct rd_agent *, Elf32_Dyn *, size_t, + psaddr_t addr); +} rd_helper_ops_t; + +typedef struct rd_helper { + rd_helper_ops_t *rh_ops; + void *rh_data; + void *rh_dlhandle; +} rd_helper_t; + +/* + * Brand helper libraries must name their ops vector using this macro. + */ +#define RTLD_DB_BRAND_OPS rtld_db_brand_ops + +/* * Values for pi_flags */ #define RD_FLG_PI_PLTBOUND 0x0001 /* Indicates that the PLT */ @@ -177,6 +203,7 @@ extern rd_err_e rd_event_addr(rd_agent_t *, rd_event_e, rd_notify_t *); extern rd_err_e rd_event_enable(rd_agent_t *, int); extern rd_err_e rd_event_getmsg(rd_agent_t *, rd_event_msg_t *); extern rd_err_e rd_init(int); +extern rd_err_e rd_ctl(int, void *); extern rd_err_e rd_loadobj_iter(rd_agent_t *, rl_iter_f *, void *); extern void rd_log(const int); @@ -184,6 +211,8 @@ extern rd_agent_t *rd_new(struct ps_prochandle *); extern rd_err_e rd_objpad_enable(struct rd_agent *, size_t); extern rd_err_e rd_plt_resolution(rd_agent_t *, psaddr_t, lwpid_t, psaddr_t, rd_plt_info_t *); +extern void rd_fix_phdrs(struct rd_agent *, Elf32_Dyn *, + size_t, uintptr_t); extern rd_err_e rd_reset(struct rd_agent *); #else extern void rd_delete(); @@ -192,11 +221,13 @@ extern rd_err_e rd_event_addr(); extern rd_err_e rd_event_enable(); extern rd_err_e rd_event_getmsg(); extern rd_err_e rd_init(); +extern rd_err_e rd_ctl(); extern rd_err_e rd_loadobj_iter(); extern void rd_log(); extern rd_agent_t *rd_new(); extern rd_err_e rd_objpad_enable(); extern rd_err_e rd_plt_resolution(); +extern void rd_fix_phdrs(); extern rd_err_e rd_reset(); #endif diff --git a/usr/src/head/string.h b/usr/src/head/string.h index 7111355608..da057817bf 100644 --- a/usr/src/head/string.h +++ b/usr/src/head/string.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -88,6 +87,8 @@ extern char *strtok_r(char *_RESTRICT_KYWD, const char *_RESTRICT_KYWD, defined(__XOPEN_OR_POSIX) extern void *memccpy(void *_RESTRICT_KYWD, const void *_RESTRICT_KYWD, int, size_t); +extern int uucopy(const void *_RESTRICT_KYWD, void *_RESTRICT_KYWD, size_t); +extern int uucopystr(const void *_RESTRICT_KYWD, void *_RESTRICT_KYWD, size_t); #endif #if defined(__EXTENSIONS__) || \ @@ -121,6 +122,8 @@ extern char *strtok_r(); #if defined(__EXTENSIONS__) || !defined(_STRICT_STDC) || \ defined(__XOPEN_OR_POSIX) extern void *memccpy(); +extern int uucopy(); +extern int uucopystr(); #endif #if defined(__EXTENSIONS__) || !defined(__XOPEN_OR_POSIX) diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index d2ee41d742..ad44c2be2d 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -186,6 +186,7 @@ SUBDIRS += \ libxcurses \ libxcurses2 \ libxnet \ + libbrand .WAIT \ libzonecfg \ libzoneinfo \ libtsnet \ @@ -218,6 +219,7 @@ SUBDIRS += \ libzfs \ libzfs_jni \ libmapid \ + brand \ $($(MACH)_SUBDIRS) sparc_SUBDIRS= .WAIT \ @@ -248,6 +250,7 @@ DCSUBDIRS = \ MSGSUBDIRS= \ abi \ auditd_plugins \ + brand \ cfgadm_plugins \ gss_mechs/mech_dh \ gss_mechs/mech_krb5 \ @@ -294,6 +297,7 @@ sparc_MSGSUBDIRS= \ HDRSUBDIRS= \ auditd_plugins \ + libbrand \ libbsm \ libc \ libcmdutils \ @@ -466,6 +470,10 @@ libsocket: libnsl libldap5: libsasl libsocket libnsl libmd libsldap: libldap5 libtsol libpool: libnvpair libexacct +libzonecfg: libc libsocket libnsl libuuid libnvpair libsysevent libsec \ + libbrand +libproc: ../cmd/sgs/librtld_db libzonecfg ../cmd/sgs/libelf libctf \ + libbrand libproject: libpool libproc libsecdb libtsnet: libnsl libtsol libsecdb libwrap: libnsl libsocket @@ -485,6 +493,7 @@ libzfs: libdevinfo libdevid libgen libnvpair libuutil libzfs_jni: libdiskmgt libnvpair libzfs libzpool: libavl libumem libnvpair libsec: libavl +brand: libc libsocket # # The reason this rule checks for the existence of the diff --git a/usr/src/lib/brand/Makefile b/usr/src/lib/brand/Makefile new file mode 100644 index 0000000000..6c265d50fb --- /dev/null +++ b/usr/src/lib/brand/Makefile @@ -0,0 +1,58 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/Makefile +# +# include global definitions +include ../../Makefile.master + +# +# Build everything in parallel; use .WAIT for dependencies +.PARALLEL: + +i386_SUBDIRS= lx +i386_MSGSUBDIRS= lx + +SUBDIRS= sn1 native $($(MACH)_SUBDIRS) +MSGSUBDIRS= $($(MACH)_MSGSUBDIRS) + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint +_msg := TARGET= _msg + +.KEEP_STATE: + +all install clean clobber lint: $(SUBDIRS) + +_msg: $(MSGSUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/Makefile.brand b/usr/src/lib/brand/Makefile.brand new file mode 100644 index 0000000000..7811875d66 --- /dev/null +++ b/usr/src/lib/brand/Makefile.brand @@ -0,0 +1,79 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +include $(SRC)/Makefile.master + +ROOTDIR= $(ROOT)/usr/lib/brand +ROOTTEMPLATEDIR= $(ROOT)/etc/zones +ROOTBRANDDIR= $(ROOTDIR)/$(BRAND) +ROOTBRANDDIR64= $(ROOTDIR)/$(BRAND)/$(MACH64) + +ROOTPROGS= $(PROGS:%=$(ROOTBRANDDIR)/%) +ROOTTXTS= $(TXTS:%=$(ROOTBRANDDIR)/%) +ROOTXMLDOCS= $(XMLDOCS:%=$(ROOTBRANDDIR)/%) + +ROOTTEMPLATES= $(TEMPLATES:%=$(ROOTTEMPLATEDIR)/%) + +$(ROOTBRANDDIR) := FILEMODE = 755 +$(ROOTBRANDDIR) := OWNER = root +$(ROOTBRANDDIR) := GROUP = sys + +$(ROOTXMLDOCS) := FILEMODE = 444 +$(ROOTXMLDOCS) := OWNER = root +$(ROOTXMLDOCS) := GROUP = bin + +$(ROOTTEMPLATEDIR) := FILEMODE = 755 +$(ROOTTEMPLATEDIR) := OWNER = root +$(ROOTTEMPLATEDIR) := GROUP = sys + +$(ROOTTEMPLATES) := FILEMODE = 444 +$(ROOTTEMPLATES) := OWNER = root +$(ROOTTEMPLATES) := GROUP = bin + +$(ROOTTXTS) := FILEMODE = 444 +$(ROOTTXTS) := OWNER = root +$(ROOTTXTS) := GROUP = sys + +$(ROOTPROGS) := FILEMODE = 755 +$(ROOTPROGS) := OWNER = root +$(ROOTPROGS) := GROUP = bin + +$(ROOTBRANDDIR) $(ROOTTEMPLATEDIR): + $(INS.dir) + +$(ROOTBRANDDIR64): + $(INS.dir); cd $(ROOTBRANDDIR); \ + $(RM) 64; $(SYMLINK) $(MACH64) 64; + +$(ROOTBRANDDIR)/% $(ROOTBRANDDIR64)/% $(ROOTTEMPLATEDIR)/%: % + $(INS.file) + +.SUFFIXES: .ksh + +.ksh: + $(RM) $@ + $(CAT) $< > $@ + $(CHMOD) +x $@ diff --git a/usr/src/lib/brand/lx/Makefile b/usr/src/lib/brand/lx/Makefile new file mode 100644 index 0000000000..7fafad20da --- /dev/null +++ b/usr/src/lib/brand/lx/Makefile @@ -0,0 +1,56 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +default: all + +include Makefile.lx + +# Build everything in parallel; use .WAIT for dependencies +.PARALLEL: + +SUBDIRS= cmd librtld_db lx_support lx_brand lx_thunk netfiles zone \ + .WAIT lx_nametoaddr +MSGSUBDIRS= lx_brand lx_support zone + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint +_msg := TARGET= _msg + +.KEEP_STATE: + +all install clean clobber lint: $(SUBDIRS) + +_msg: $(MSGSUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/Makefile.lx b/usr/src/lib/brand/lx/Makefile.lx new file mode 100644 index 0000000000..4db4679cef --- /dev/null +++ b/usr/src/lib/brand/lx/Makefile.lx @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/lx/Makefile.lx +# +# include global definitions + +BRAND= lx + +include $(SRC)/lib/brand/Makefile.brand + diff --git a/usr/src/lib/brand/lx/cmd/Makefile b/usr/src/lib/brand/lx/cmd/Makefile new file mode 100644 index 0000000000..6b096dd129 --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/Makefile @@ -0,0 +1,48 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +PROGS = lx_lockd lx_native lx_statd lx_thunk + +include ../Makefile.lx + +# override the install directory +ROOTBIN = $(ROOTBRANDDIR) +CLOBBERFILES = $(ROOTPROGS) + +.KEEP_STATE: + +lint: + +all: $(PROGS) + +install: all $(ROOTBRANDDIR) $(ROOTPROGS) + +clean: + $(RM) $(PROGS) + +clobber: clean + $(RM) $(ROOTPROGS) diff --git a/usr/src/lib/brand/lx/cmd/lx_lockd.sh b/usr/src/lib/brand/lx/cmd/lx_lockd.sh new file mode 100644 index 0000000000..9cd96d99d5 --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/lx_lockd.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LD_LIBRARY_PATH=/usr/lib/brand/lx +LD_PRELOAD=/native/usr/lib/brand/lx/lx_thunk.so.1 +export LD_LIBRARY_PATH LD_PRELOAD + +exec /native/usr/lib/brand/lx/lx_native \ + /native/usr/lib/nfs/lockd -P -U 29 -G 29 diff --git a/usr/src/lib/brand/lx/cmd/lx_native.sh b/usr/src/lib/brand/lx/cmd/lx_native.sh new file mode 100644 index 0000000000..8e8344a375 --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/lx_native.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +exit 0 diff --git a/usr/src/lib/brand/lx/cmd/lx_statd.sh b/usr/src/lib/brand/lx/cmd/lx_statd.sh new file mode 100644 index 0000000000..f218d7876a --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/lx_statd.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LD_LIBRARY_PATH=/usr/lib/brand/lx +LD_PRELOAD=/native/usr/lib/brand/lx/lx_thunk.so.1 +export LD_LIBRARY_PATH LD_PRELOAD + +exec /native/usr/lib/brand/lx/lx_native \ + /native/usr/lib/nfs/statd -P -U 29 -G 29 diff --git a/usr/src/lib/brand/lx/cmd/lx_thunk.sh b/usr/src/lib/brand/lx/cmd/lx_thunk.sh new file mode 100644 index 0000000000..4e1e6cbc03 --- /dev/null +++ b/usr/src/lib/brand/lx/cmd/lx_thunk.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +exec /native/usr/lib/brand/lx/lx_thunk diff --git a/usr/src/lib/brand/lx/librtld_db/Makefile b/usr/src/lib/brand/lx/librtld_db/Makefile new file mode 100644 index 0000000000..ce2b48a6a8 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/Makefile @@ -0,0 +1,53 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +default: all + +include ../../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +LINT_SUBDIRS= $(MACH) +$(BUILD64)LINT_SUBDIRS += $(MACH64) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all install clean clobber: $(ROOTBRANDDIR) $(ROOTBRANDDIR64) $(SUBDIRS) + +lint: $(LINT_SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/librtld_db/Makefile.com b/usr/src/lib/brand/lx/librtld_db/Makefile.com new file mode 100644 index 0000000000..8c723ad86b --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/Makefile.com @@ -0,0 +1,59 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = lx_librtld_db.a +VERS = .1 +COBJS = lx_librtld_db.o +OBJECTS = $(COBJS) + +include ../../../../Makefile.lib +include ../../Makefile.lx + +CSRCS = $(COBJS:%o=../common/%c) +SRCS = $(CSRCS) + +SRCDIR = ../common +UTSBASE = ../../../../../uts + +LIBS = $(DYNLIB) +LDLIBS += -lc -lproc +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I../ -I$(UTSBASE)/common/brand/lx \ + -I../../../../../cmd/sgs/librtld_db/common \ + -I../../../../../cmd/sgs/include \ + -I../../../../../cmd/sgs/include/$(MACH) + +ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx +ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64) + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../../Makefile.targ diff --git a/usr/src/lib/brand/lx/librtld_db/amd64/Makefile b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile new file mode 100644 index 0000000000..4d6f84378e --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile @@ -0,0 +1,38 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/lx/librtld_db/amd64/Makefile + +ISASRCDIR=. + +ASFLAGS += -P -D_ASM + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) + +install: $(ROOTLIBDIR64) all $(ROOTLIBS64) diff --git a/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c new file mode 100644 index 0000000000..db66f77274 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c @@ -0,0 +1,471 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/link.h> +#include <libproc.h> +#include <proc_service.h> +#include <rtld_db.h> +#include <synch.h> +#include <sys/lx_brand.h> + +static void *lx_ldb_client_init(struct ps_prochandle *); +static int lx_ldb_iter(struct ps_prochandle *, rl_iter_f *, void *, void *); +static void lx_ldb_fix_phdr(struct rd_agent *, Elf32_Dyn *, size_t, + psaddr_t); + +struct rd_agent { + mutex_t rd_mutex; + struct ps_prochandle *rd_psp; /* prochandle pointer */ + psaddr_t rd_rdebug; /* rtld r_debug */ + psaddr_t rd_preinit; /* rtld_db_preinit */ + psaddr_t rd_postinit; /* rtld_db_postinit */ + psaddr_t rd_dlact; /* rtld_db_dlact */ + psaddr_t rd_tbinder; /* tail of binder */ + psaddr_t rd_rtlddbpriv; /* rtld rtld_db_priv */ + ulong_t rd_flags; /* flags */ + ulong_t rd_rdebugvers; /* rtld_db_priv.vers */ + int rd_dmodel; /* data model */ + rd_helper_t rd_helper; /* private to helper */ +}; + +typedef struct lx_rd { + struct ps_prochandle *lr_php; /* prochandle of target */ + uint32_t lr_rdebug; /* address of lx r_debug */ + uint32_t lr_exec; /* base address of main executable */ +} lx_rd_t; + +rd_helper_ops_t RTLD_DB_BRAND_OPS = { + lx_ldb_client_init, + lx_ldb_iter, + lx_ldb_fix_phdr +}; + +struct lx_link_map +{ + uint32_t lxm_addr; /* Base address shared object is loaded at. */ + uint32_t lxm_name; /* Absolute file name object was found in. */ + uint32_t lxm_ld; /* Dynamic section of the shared object. */ + uint32_t lxm_next; /* Chain of loaded objects. */ + uint32_t lxm_prev; +}; + +struct lx_r_debug +{ + int r_version; /* Version number for this protocol. */ + uint32_t r_map; /* Head of the chain of loaded objects. */ + + /* + * This is the address of a function internal to the run-time linker, + * that will always be called when the linker begins to map in a + * library or unmap it, and again when the mapping change is complete. + * The debugger can set a breakpoint at this address if it wants to + * notice shared object mapping changes. + */ + uint32_t r_brk; + r_state_e r_state; /* defined the same way between lx/solaris */ + uint32_t r_ldbase; /* Base address the linker is loaded at. */ +}; + +/* + * A key difference between the linux linker and ours' is that the linux + * linker adds the base address of segments to certain values in the + * segments' ELF header. As an example, look at the address of the + * DT_HASH hash table in a Solaris section - it is a relative address + * which locates the start of the hash table, relative to the beginning + * of the ELF file. However, when the linux linker loads a section, it + * modifies the in-memory ELF image by changing address of the hash + * table to be an absolute address. This is only done for libraries - not for + * executables. + * + * Solaris tools expect the relative address to remain relative, so + * here we will modify the in-memory ELF image so that it once again + * contains relative addresses. + * + * To accomplish this, we walk through all sections in the target. + * Linux sections are identified by pointing to the linux linker or libc in the + * DT_NEEDED section. For all matching sections, we subtract the segment + * base address to get back to relative addresses. + */ +static void +lx_ldb_fix_phdr(struct rd_agent *rap, Elf32_Dyn *dp, size_t size, + psaddr_t addr) +{ + struct ps_prochandle *php = rap->rd_psp; + int i; + int strsz = 0; + uint32_t strtab_p = NULL; + char *strtab; + + /* + * First we need to find the address of the string table. + */ + for (i = 0; i < size / sizeof (Elf32_Dyn); i++) { + if (dp[i].d_tag == DT_STRTAB) + strtab_p = dp[i].d_un.d_ptr; + if (dp[i].d_tag == DT_STRSZ) + strsz = dp[i].d_un.d_val; + } + if (strtab_p == NULL) { + ps_plog("lx_librtld_db: couldn't find strtab\n"); + return; + } + if (strsz == 0) { + ps_plog("lx_librtld_db: couldn't find strsz\n"); + return; + } + + if ((strtab = malloc(strsz)) == NULL) + return; + if (Pread(php, strtab, strsz, strtab_p) != strsz) { + ps_plog("lx_librtld_db: couldn't read strtab at %x\n", + strtab_p); + free(strtab); + return; + } + + /* + * ELF binaries may have more than one DT_NEEDED entry - we must + * check them all. The linux linker segment also needs to be fixed, + * but it doesn't have a DT_NEEDED entry. Instead, look for a + * matching DT_SONAME. + */ + for (i = 0; i < size / sizeof (Elf32_Dyn); i++) { + if (dp[i].d_tag == DT_SONAME && + strncmp(strtab + dp[i].d_un.d_ptr, LX_LINKER_NAME, + sizeof (LX_LINKER_NAME)) == 0) + break; + + if (dp[i].d_tag != DT_NEEDED) + continue; + + if (strncmp(strtab + dp[i].d_un.d_ptr, + LX_LINKER_NAME, sizeof (LX_LINKER_NAME)) == 0 || + strncmp(strtab + dp[i].d_un.d_ptr, LX_LIBC_NAME, + sizeof (LX_LIBC_NAME)) == 0) + break; + } + free(strtab); + if (i == size / sizeof (Elf32_Dyn)) { + /* + * This is not a linux mapping, so we have nothing left to do. + */ + ps_plog("lx_librtld_db: %x doesn't appear to be an lx object\n", + addr); + return; + } + + /* + * The linux linker added the segment's base address to a bunch of the + * dynamic section addresses. Fix them back to their original, on-disk + * format so Solaris understands them. + */ + for (i = 0; i < size / sizeof (Elf32_Dyn); i++) { + switch (dp[i].d_tag) { + case DT_INIT: + case DT_FINI: + case DT_HASH: + case DT_STRTAB: + case DT_SYMTAB: + case DT_DEBUG: + case DT_PLTGOT: + case DT_JMPREL: + case DT_REL: + case DT_VERNEED: + case DT_VERSYM: + if (dp[i].d_un.d_val > addr) { + dp[i].d_un.d_ptr -= addr; + } + break; + default: + break; + } + } +} + +/* + * The linux linker has an r_debug structure somewhere in its data section that + * contains the address of the head of the link map list. To find this, we will + * use the DT_DEBUG token in the executable's dynamic section. The linux linker + * wrote the address of its r_debug structure to the DT_DEBUG dynamic entry. We + * get the address of the executable's program headers from the + * AT_SUN_BRAND_PHDR aux vector entry. From there, we calculate the address of + * the Elf header, and from there we can easily get to the DT_DEBUG entry. + */ +static void * +lx_ldb_client_init(struct ps_prochandle *php) +{ + lx_rd_t *rd = calloc(sizeof (lx_rd_t), 1); + uint32_t phdr_addr; + Elf32_Dyn *dp; + Elf32_Phdr *ph, phdr, *phdrs; + Elf32_Ehdr ehdr; + int i; + + rd->lr_rdebug = 0; + + if (rd == NULL) { + ps_plog("lx_ldb_client_init: cannot allocate memory\n"); + return (NULL); + } + + phdr_addr = Pgetauxval(php, AT_SUN_BRAND_PHDR); + + if (ps_pread(php, phdr_addr, &phdr, sizeof (phdr)) != PS_OK) { + ps_plog("lx_ldb_client_init: couldn't read phdr at %x\n", + phdr_addr); + free(rd); + return (NULL); + } + + rd->lr_exec = phdr.p_vaddr - phdr.p_offset; + + if (ps_pread(php, rd->lr_exec, &ehdr, sizeof (ehdr)) != + PS_OK) { + ps_plog("lx_ldb_client_init: couldn't read ehdr\n"); + free(rd); + return (NULL); + } + + if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL) { + ps_plog("lx_ldb_client_init: couldn't alloc phdrs memory\n"); + free(rd); + return (NULL); + } + + if (ps_pread(php, phdr_addr, phdrs, ehdr.e_phnum * ehdr.e_phentsize) != + PS_OK) { + ps_plog("lx_ldb_client_init: couldn't read phdrs at %x\n", + phdr_addr); + free(rd); + free(phdrs); + return (NULL); + } + + for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++, + /*LINTED */ + ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) { + + if (ph->p_type == PT_DYNAMIC) + break; + } + if (i == ehdr.e_phnum) { + ps_plog("lx_ldb_client_init: no PT_DYNAMIC in executable\n"); + free(rd); + free(phdrs); + return (NULL); + } + + if ((dp = malloc(ph->p_filesz)) == NULL) { + ps_plog("lx_ldb_client_init: couldn't alloc for PT_DYNAMIC\n"); + free(rd); + free(phdrs); + return (NULL); + } + + if (ps_pread(php, ph->p_vaddr, dp, ph->p_filesz) != PS_OK) { + ps_plog("lx_ldb_client_init: couldn't read dynamic at %x\n", + ph->p_vaddr); + free(rd); + free(phdrs); + free(dp); + return (NULL); + } + + for (i = 0; i < ph->p_filesz / sizeof (Elf32_Dyn); i++) { + if (dp[i].d_tag == DT_DEBUG) { + rd->lr_rdebug = dp[i].d_un.d_ptr; + break; + } + } + free(phdrs); + free(dp); + + if (rd->lr_rdebug == 0) { + ps_plog("lx_ldb_client_init: no DT_DEBUG found in exe\n"); + free(rd); + return (NULL); + } + + return (rd); +} + +/* + * Given the address of an ELF object in the target, return its size and + * the proper link map ID. + */ +static size_t +lx_elf_props(struct ps_prochandle *php, uint32_t addr, psaddr_t *data_addr) +{ + Elf32_Ehdr ehdr; + Elf32_Phdr *phdrs, *ph; + int i; + uintptr_t min = (uintptr_t)-1; + uintptr_t max = 0; + size_t sz; + + if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) { + ps_plog("lx_elf_props: Couldn't read ELF header at %x\n", addr); + return (0); + } + + if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL) + return (0); + + if (ps_pread(php, addr + ehdr.e_phoff, phdrs, ehdr.e_phnum * + ehdr.e_phentsize) != PS_OK) { + ps_plog("lx_elf_props: Couldn't read program headers at %x\n", + addr + ehdr.e_phoff); + return (0); + } + + for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++, + /*LINTED */ + ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) { + + if (ph->p_type != PT_LOAD) + continue; + + if ((ph->p_flags & (PF_W | PF_R)) == (PF_W | PF_R)) { + *data_addr = ph->p_vaddr; + if (ehdr.e_type == ET_DYN) + *data_addr += addr; + if (*data_addr & (ph->p_align - 1)) + *data_addr = *data_addr & (~(ph->p_align -1)); + } + + if (ph->p_vaddr < min) + min = ph->p_vaddr; + + if (ph->p_vaddr > max) { + max = ph->p_vaddr; + sz = ph->p_memsz + max - min; + if (sz & (ph->p_align - 1)) + sz = (sz & (~(ph->p_align - 1))) + ph->p_align; + } + } + + free(phdrs); + return (sz); +} + +static int +lx_ldb_iter(struct ps_prochandle *php, rl_iter_f *cb, void *client_data, + void *rd_addr) +{ + lx_rd_t *lx_rd = (lx_rd_t *)rd_addr; + struct lx_r_debug r_debug; + struct lx_link_map map; + psaddr_t p = NULL; + int rc; + rd_loadobj_t exec; + + if ((rc = ps_pread(php, (psaddr_t)lx_rd->lr_rdebug, &r_debug, + sizeof (r_debug))) != PS_OK) { + ps_plog("lx_ldb_iter: Couldn't read linux r_debug at %x\n", + rd_addr); + return (rc); + } + + p = r_debug.r_map; + + /* + * The first item on the link map list is for the executable, but it + * doesn't give us any useful information about it. We need to + * synthesize a rd_loadobj_t for the client. + * + * Linux doesn't give us the executable name, so we'll get it from + * the AT_EXECNAME entry instead. + */ + if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) != PS_OK) { + ps_plog("lx_ldb_iter: Couldn't read linux link map at %x\n", p); + return (rc); + } + + bzero(&exec, sizeof (exec)); + exec.rl_base = lx_rd->lr_exec; + exec.rl_dynamic = map.lxm_ld; + exec.rl_nameaddr = Pgetauxval(php, AT_SUN_EXECNAME); + exec.rl_lmident = LM_ID_BASE; + + exec.rl_bend = exec.rl_base + + lx_elf_props(php, lx_rd->lr_exec, &exec.rl_data_base); + + if ((*cb)(&exec, client_data) == 0) { + ps_plog("lx_ldb_iter: client callb failed for executable\n"); + return (PS_ERR); + } + + for (p = map.lxm_next; p != NULL; p = map.lxm_next) { + rd_loadobj_t obj; + + if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) != + PS_OK) { + ps_plog("lx_ldb_iter: Couldn't read lk map at %x\n", p); + return (rc); + } + + /* + * The linux link map has less information than the Solaris one. + * We need to go fetch the missing information from the ELF + * headers. + */ + + obj.rl_nameaddr = (psaddr_t)map.lxm_name; + obj.rl_base = map.lxm_addr; + obj.rl_refnameaddr = (psaddr_t)map.lxm_name; + obj.rl_plt_base = NULL; + obj.rl_plt_size = 0; + obj.rl_lmident = LM_ID_BASE; + + /* + * Ugh - we have to walk the ELF stuff, find the PT_LOAD + * sections, and calculate the end of the file's mappings + * ourselves. + */ + + obj.rl_bend = map.lxm_addr + + lx_elf_props(php, map.lxm_addr, &obj.rl_data_base); + obj.rl_padstart = obj.rl_base; + obj.rl_padend = obj.rl_bend; + obj.rl_dynamic = map.lxm_ld; + obj.rl_tlsmodid = 0; + + ps_plog("lx_ldb_iter: %x to %x\n", obj.rl_base, obj.rl_bend); + + if ((*cb)(&obj, client_data) == 0) { + ps_plog("lx_ldb_iter: Client callback failed on %s\n", + map.lxm_name); + return (rc); + } + } + return (RD_OK); +} diff --git a/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers new file mode 100644 index 0000000000..e3c40016b3 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +{ + global: + rtld_db_brand_ops; + local: + *; +}; diff --git a/usr/src/lib/brand/lx/librtld_db/i386/Makefile b/usr/src/lib/brand/lx/librtld_db/i386/Makefile new file mode 100644 index 0000000000..b931c56348 --- /dev/null +++ b/usr/src/lib/brand/lx/librtld_db/i386/Makefile @@ -0,0 +1,37 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/lx/librtld_db/i386/Makefile + +ISASRCDIR=. + +ASFLAGS += -P -D_ASM + +include ../Makefile.com + +CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) + +install: $(ROOTLIBDIR) all $(ROOTLIBS) diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile b/usr/src/lib/brand/lx/lx_brand/Makefile new file mode 100644 index 0000000000..de4fa338a0 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/Makefile @@ -0,0 +1,53 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../../../Makefile.lib + +default: all + +SUBDIRS= $(MACH) + +LINT_SUBDIRS= $(MACH) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint +_msg := TARGET= _msg + +.KEEP_STATE: + +all install clean clobber _msg: $(SUBDIRS) + +lint: $(LINT_SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile.com b/usr/src/lib/brand/lx/lx_brand/Makefile.com new file mode 100644 index 0000000000..1b58e78ba0 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/Makefile.com @@ -0,0 +1,102 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LX_CMN = $(SRC)/common/brand/lx + +LIBRARY = lx_brand.a +VERS = .1 +COBJS = clock.o \ + clone.o \ + debug.o \ + dir.o \ + file.o \ + fcntl.o \ + fork.o \ + id.o \ + ioctl.o \ + iovec.o \ + lx_brand.o \ + lx_thunk_server.o \ + mem.o \ + misc.o \ + module.o \ + mount.o \ + open.o \ + pgrp.o \ + poll_select.o \ + priority.o \ + ptrace.o \ + rlimit.o \ + sched.o \ + sendfile.o \ + signal.o \ + socket.o \ + stat.o \ + statfs.o \ + sysctl.o \ + sysv_ipc.o \ + time.o \ + truncate.o \ + wait.o + +CMNOBJS = lx_signum.o +ASOBJS = lx_handler.o lx_runexe.o lx_crt.o +OBJECTS = $(CMNOBJS) $(COBJS) $(ASOBJS) + +include ../../Makefile.lx +include ../../../../Makefile.lib + +CSRCS = $(COBJS:%o=../common/%c) $(CMNOBJS:%o=$(LX_CMN)/%c) +ASSRCS = $(ASOBJS:%o=$(ISASRCDIR)/%s) +SRCS = $(CSRCS) $(ASSRCS) + +SRCDIR = ../common +UTSBASE = ../../../../../uts + +LIBS = $(DYNLIB) +LDLIBS += -lc -lsocket -lmapmalloc -lproc -lrtld_db +DYNFLAGS += -Wl,-e_start -Wl,-I/native/lib/ld.so.1 -M../common/mapfile +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I../ -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) +ASFLAGS = -P $(ASFLAGS_$(CURTYPE)) -D_ASM -I../ \ + -I$(UTSBASE)/common/brand/lx + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +include ../../../../Makefile.targ + +pics/%.o: $(ISASRCDIR)/%.s + $(COMPILE.s) -o $@ $< + $(POST_PROCESS_O) + +pics/%.o: $(LX_CMN)/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) diff --git a/usr/src/lib/brand/lx/lx_brand/common/clock.c b/usr/src/lib/brand/lx/lx_brand/common/clock.c new file mode 100644 index 0000000000..45610c0e76 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/clock.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <time.h> +#include <sys/lx_misc.h> + +/* + * Linux uses different values for it clock identifiers, so we have to do basic + * translations between the two. Thankfully, both Linux and Solaris implement + * the same POSIX SUSv3 clock types, so the semantics should be identical. + */ + +static int ltos_clock[] = { + CLOCK_REALTIME, + CLOCK_MONOTONIC, + CLOCK_PROCESS_CPUTIME_ID, + CLOCK_THREAD_CPUTIME_ID +}; + +#define LX_CLOCK_MAX (sizeof (ltos_clock) / sizeof (ltos_clock[0])) + +int +lx_clock_gettime(int clock, struct timespec *tp) +{ + if (clock < 0 || clock > LX_CLOCK_MAX) + return (EINVAL); + + return (clock_gettime(ltos_clock[clock], tp)); +} + +int +lx_clock_settime(int clock, struct timespec *tp) +{ + if (clock < 0 || clock > LX_CLOCK_MAX) + return (EINVAL); + + return (clock_settime(ltos_clock[clock], tp)); +} + +int +lx_clock_getres(int clock, struct timespec *tp) +{ + if (clock < 0 || clock > LX_CLOCK_MAX) + return (EINVAL); + + return (clock_getres(ltos_clock[clock], tp)); +} + +int +lx_clock_nanosleep(int clock, int flags, struct timespec *rqtp, + struct timespec *rmtp) +{ + if (clock < 0 || clock > LX_CLOCK_MAX) + return (EINVAL); + + /* the TIMER_ABSTIME flag is the same on Linux */ + return (clock_nanosleep(ltos_clock[clock], flags, rqtp, rmtp)); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c new file mode 100644 index 0000000000..a1f7b82889 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c @@ -0,0 +1,502 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <ucontext.h> +#include <thread.h> +#include <strings.h> +#include <libintl.h> +#include <sys/regset.h> +#include <sys/syscall.h> +#include <sys/inttypes.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/segments.h> +#include <signal.h> +#include <sys/lx_misc.h> +#include <sys/lx_types.h> +#include <sys/lx_signal.h> +#include <sys/lx_syscall.h> +#include <sys/lx_brand.h> +#include <sys/lx_debug.h> +#include <sys/lx_thread.h> + +#define LX_CSIGNAL 0x000000ff +#define LX_CLONE_VM 0x00000100 +#define LX_CLONE_FS 0x00000200 +#define LX_CLONE_FILES 0x00000400 +#define LX_CLONE_SIGHAND 0x00000800 +#define LX_CLONE_PID 0x00001000 +#define LX_CLONE_PTRACE 0x00002000 +#define LX_CLONE_VFORK 0x00004000 +#define LX_CLONE_PARENT 0x00008000 +#define LX_CLONE_THREAD 0x00010000 +#define LX_CLONE_SYSVSEM 0x00040000 +#define LX_CLONE_SETTLS 0x00080000 +#define LX_CLONE_PARENT_SETTID 0x00100000 +#define LX_CLONE_CHILD_CLEARTID 0x00200000 +#define LX_CLONE_DETACH 0x00400000 +#define LX_CLONE_CHILD_SETTID 0x01000000 + +#define SHARED_AS \ + (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND) +#define CLONE_VFORK (LX_CLONE_VM | LX_CLONE_VFORK) +#define CLONE_TD (LX_CLONE_THREAD|LX_CLONE_DETACH) + +#define IS_FORK(f) (((f) & SHARED_AS) == 0) +#define IS_VFORK(f) (((f) & CLONE_VFORK) == CLONE_VFORK) + +#define LX_EXIT 1 +#define LX_EXIT_GROUP 2 + +/* + * This is dicey. This seems to be an internal glibc structure, and not + * part of any external interface. Thus, it is subject to change without + * notice. FWIW, clone(2) itself seems to be an internal (or at least + * unstable) interface, since strace(1) shows it differently than the man + * page. + */ +struct lx_desc +{ + uint32_t entry_number; + uint32_t base_addr; + uint32_t limit; + uint32_t seg_32bit:1; + uint32_t contents:2; + uint32_t read_exec_only:1; + uint32_t limit_in_pages:1; + uint32_t seg_not_present:1; + uint32_t useable:1; + uint32_t empty:25; +}; + +struct clone_state { + void *c_retaddr; /* instr after clone()'s int80 */ + int c_flags; /* flags to clone(2) */ + int c_sig; /* signal to send on thread exit */ + void *c_stk; /* %esp of new thread */ + void *c_ptidp; + struct lx_desc *c_ldtinfo; /* thread-specific segment */ + void *c_ctidp; + uintptr_t c_gs; /* Linux's %gs */ + sigset_t c_sigmask; /* signal mask */ + uint64_t c_affmask; /* CPU affinity mask */ + volatile int *c_clone_res; /* pid/error returned to cloner */ +}; + +extern void lx_setup_clone(uintptr_t, void *, void *); + +/* + * Counter incremented when we vfork(2) ourselves, and decremented when the + * vfork(2)ed child exit(2)s or exec(2)s. + */ +static int is_vforked = 0; + +int +lx_exit(uintptr_t p1) +{ + int ret, status = (int)p1; + lx_tsd_t *lx_tsd; + + /* + * If we are a vfork(2)ed child, we need to exit as quickly and + * cleanly as possible to avoid corrupting our parent. + */ + if (is_vforked != 0) { + is_vforked--; + _exit(status); + } + + if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "exit", strerror(ret)); + + assert(lx_tsd != 0); + + lx_tsd->lxtsd_exit = LX_EXIT; + lx_tsd->lxtsd_exit_status = status; + + /* + * Block all signals in the exit context to avoid taking any signals + * (to the degree possible) while exiting. + */ + (void) sigfillset(&lx_tsd->lxtsd_exit_context.uc_sigmask); + + /* + * This thread is exiting. Restore the state of the thread to + * what it was before we started running linux code. + */ + (void) setcontext(&lx_tsd->lxtsd_exit_context); + + /* + * If we returned from the setcontext(2), something is very wrong. + */ + lx_err_fatal(gettext("%s: unable to set exit context: %s"), + "exit", strerror(errno)); + + /*NOTREACHED*/ + return (0); +} + +int +lx_group_exit(uintptr_t p1) +{ + int ret, status = (int)p1; + lx_tsd_t *lx_tsd; + + /* + * If we are a vfork(2)ed child, we need to exit as quickly and + * cleanly as possible to avoid corrupting our parent. + */ + if (is_vforked != 0) { + is_vforked--; + _exit(status); + } + + if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "group_exit", strerror(ret)); + + assert(lx_tsd != 0); + + lx_tsd->lxtsd_exit = LX_EXIT_GROUP; + lx_tsd->lxtsd_exit_status = status; + + /* + * Block all signals in the exit context to avoid taking any signals + * (to the degree possible) while exiting. + */ + (void) sigfillset(&lx_tsd->lxtsd_exit_context.uc_sigmask); + + /* + * This thread is exiting. Restore the state of the thread to + * what it was before we started running linux code. + */ + (void) setcontext(&lx_tsd->lxtsd_exit_context); + + /* + * If we returned from the setcontext(2), something is very wrong. + */ + lx_err_fatal(gettext("%s: unable to set exit context: %s"), + "group_exit", strerror(errno)); + + /*NOTREACHED*/ + return (0); +} + +static void * +clone_start(void *arg) +{ + int rval; + struct clone_state *cs = (struct clone_state *)arg; + lx_tsd_t lx_tsd; + + /* + * Let the kernel finish setting up all the needed state for this + * new thread. + * + * We already created the thread using the thr_create(3C) library + * call, so most of the work required to emulate lx_clone(2) has + * been done by the time we get to this point. Instead of creating + * a new brandsys(2) subcommand to perform the last few bits of + * bookkeeping, we just use the lx_clone() slot in the syscall + * table. + */ + lx_debug("\tre-vectoring to lx kernel module to complete lx_clone()"); + lx_debug("\tLX_SYS_clone(0x%x, 0x%p, 0x%p, 0x%p, 0x%p)", + cs->c_flags, cs->c_stk, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp); + + rval = syscall(SYS_brand, B_EMULATE_SYSCALL + LX_SYS_clone, + cs->c_flags, cs->c_stk, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp, + NULL); + + /* + * At this point the parent is waiting for cs->c_clone_res to go + * non-zero to indicate the thread has been cloned. The value set + * in cs->c_clone_res will be used for the return value from + * clone(). + */ + if (rval < 0) { + *(cs->c_clone_res) = -errno; + lx_debug("\tkernel clone failed, errno %d\n", errno); + return (NULL); + } + + if (lx_sched_setaffinity(0, sizeof (cs->c_affmask), + (uintptr_t)&cs->c_affmask) != 0) { + *(cs->c_clone_res) = -errno; + + lx_err_fatal(gettext( + "Unable to set affinity mask in child thread: %s"), + strerror(errno)); + } + + /* Initialize the thread specific data for this thread. */ + bzero(&lx_tsd, sizeof (lx_tsd)); + lx_tsd.lxtsd_gs = cs->c_gs; + + /* + * Use the address of the stack-allocated lx_tsd as the + * per-thread storage area to cache various values for later + * use. + * + * This address is only used by this thread, so there is no + * danger of other threads using this storage area, nor of it + * being accessed once this stack frame has been freed. + */ + if (thr_setspecific(lx_tsd_key, &lx_tsd) != 0) { + *(cs->c_clone_res) = -errno; + lx_err_fatal( + gettext("Unable to set thread-specific ptr for clone: %s"), + strerror(rval)); + } + + /* + * Save the current context of this thread. + * + * We'll restore this context when this thread attempts to exit. + */ + if (getcontext(&lx_tsd.lxtsd_exit_context) != 0) { + *(cs->c_clone_res) = -errno; + + lx_err_fatal(gettext( + "Unable to initialize thread-specific exit context: %s"), + strerror(errno)); + } + + /* + * Do the final stack twiddling, reset %gs, and return to the + * clone(2) path. + */ + if (lx_tsd.lxtsd_exit == 0) { + if (sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL) < 0) { + *(cs->c_clone_res) = -errno; + + lx_err_fatal(gettext( + "Unable to release held signals for child " + "thread: %s"), strerror(errno)); + } + + /* + * Let the parent know that the clone has (effectively) been + * completed. + */ + *(cs->c_clone_res) = rval; + + lx_setup_clone(cs->c_gs, cs->c_retaddr, cs->c_stk); + + /* lx_setup_clone() should never return. */ + assert(0); + } + + /* + * We are here because the Linux application called the exit() or + * exit_group() system call. In turn the brand library did a + * setcontext() to jump to the thread context state saved in + * getcontext(), above. + */ + if (lx_tsd.lxtsd_exit == LX_EXIT) + thr_exit((void *)lx_tsd.lxtsd_exit_status); + else + exit(lx_tsd.lxtsd_exit_status); + + assert(0); + /*NOTREACHED*/ +} + +int +lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + struct clone_state *cs; + int flags = (int)p1; + void *cldstk = (void *)p2; + void *ptidp = (void *)p3; + struct lx_desc *ldtinfo = (void *)p4; + void *ctidp = (void *)p5; + thread_t tid; + volatile int clone_res; + int sig; + int rval; + lx_regs_t *rp; + sigset_t sigmask; + + if (flags & LX_CLONE_SETTLS) { + lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p " + "ctidp=0x%p", flags, cldstk, ptidp, ldtinfo, ctidp); + } else { + lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p)", + flags, cldstk, ptidp); + } + + /* + * Only supported for pid 0 on Linux + */ + if (flags & LX_CLONE_PID) + return (-EINVAL); + + /* + * CLONE_THREAD require CLONE_SIGHAND. CLONE_THREAD and + * CLONE_DETACHED must both be either set or cleared. + */ + if ((flags & CLONE_TD) && + (!(flags & LX_CLONE_SIGHAND) || ((flags & CLONE_TD) != CLONE_TD))) + return (-EINVAL); + + rp = lx_syscall_regs(); + + /* See if this is a fork() operation or a thr_create(). */ + if (IS_FORK(flags) || IS_VFORK(flags)) { + if (flags & LX_CLONE_PARENT) { + lx_unsupported(gettext( + "clone(2) only supports CLONE_PARENT " + "for threads.\n")); + return (-ENOTSUP); + } + + if (flags & LX_CLONE_PTRACE) + lx_ptrace_fork(); + + if (flags & LX_CLONE_VFORK) { + is_vforked++; + rval = vfork(); + if (rval != 0) + is_vforked--; + } else { + rval = fork1(); + } + + if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) + *((int *)ptidp) = rval; + + /* Parent just returns */ + if (rval != 0) + return ((rval < 0) ? -errno : rval); + + /* + * If provided, the child needs its new stack set up. + */ + if (cldstk) + lx_setup_clone(rp->lxr_gs, (void *)rp->lxr_eip, cldstk); + + return (0); + } + + /* + * We have very restricted support.... only exactly these flags are + * supported + */ + if (((flags & SHARED_AS) != SHARED_AS)) { + lx_unsupported(gettext( + "clone(2) requires that all or none of CLONE_VM " + "CLONE_FS, CLONE_FILES, and CLONE_SIGHAND be set.\n")); + return (-ENOTSUP); + } + + if (cldstk == NULL) { + lx_unsupported(gettext( + "clone(2) requires the caller to allocate the " + "child's stack.\n")); + return (-ENOTSUP); + } + + /* + * If we want a signal-on-exit, ensure that the signal is valid. + */ + if ((sig = ltos_signo[flags & LX_CSIGNAL]) == -1) { + lx_unsupported(gettext( + "clone(2) passed unsupported signal: %d"), sig); + return (-ENOTSUP); + } + + /* + * To avoid malloc() here, we steal a part of the new thread's + * stack to store all the info that thread might need for + * initialization. We also make it 64-bit aligned for good + * measure. + */ + cs = (struct clone_state *) + ((p2 - sizeof (struct clone_state)) & -((uintptr_t)8)); + cs->c_flags = flags; + cs->c_sig = sig; + cs->c_stk = cldstk; + cs->c_ptidp = ptidp; + cs->c_ldtinfo = ldtinfo; + cs->c_ctidp = ctidp; + cs->c_clone_res = &clone_res; + cs->c_gs = rp->lxr_gs; + + if (lx_sched_getaffinity(0, sizeof (cs->c_affmask), + (uintptr_t)&cs->c_affmask) == -1) + lx_err_fatal(gettext( + "Unable to get affinity mask for parent thread: %s"), + strerror(errno)); + + /* + * We want the new thread to return directly to the return site for + * the system call. + */ + cs->c_retaddr = (void *)rp->lxr_eip; + clone_res = 0; + + (void) sigfillset(&sigmask); + + /* + * Block all signals because the thread we create won't be able to + * properly handle them until it's fully set up. + */ + if (sigprocmask(SIG_BLOCK, &sigmask, &cs->c_sigmask) < 0) { + lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno)); + return (-errno); + } + + rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid); + + /* + * Release any pending signals + */ + (void) sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL); + + /* + * Wait for the child to be created and have its tid assigned. + */ + if (rval == 0) { + while (clone_res == 0) + ; + + rval = clone_res; + } + + return (rval); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/debug.c b/usr/src/lib/brand/lx/lx_brand/common/debug.c new file mode 100644 index 0000000000..dccdcbb419 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/debug.c @@ -0,0 +1,147 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <thread.h> +#include <unistd.h> + +#include <sys/modctl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <sys/lx_brand.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> + +/* internal debugging state */ +static char *lx_debug_path = NULL; /* debug output file path */ +static char lx_debug_path_buf[MAXPATHLEN]; + +void +lx_debug_enable(void) +{ + /* send all debugging output to /dev/tty */ + lx_debug_path = "/dev/tty"; + lx_debug("lx_debug: debugging output enabled: %s", lx_debug_path); +} + +void +lx_debug_init(void) +{ + if (getenv("LX_DEBUG") == NULL) + return; + + /* + * It's OK to use this value without any locking, as all callers can + * use the return value to decide whether extra work should be done + * before calling lx_debug(). + * + * If debugging is disabled after a routine calls this function it + * doesn't really matter as lx_debug() will see debugging is disabled + * and will not output anything. + */ + lx_debug_enabled = 1; + + /* check if there's a debug log file specified */ + lx_debug_path = getenv("LX_DEBUG_FILE"); + if (lx_debug_path == NULL) { + /* send all debugging output to /dev/tty */ + lx_debug_path = "/dev/tty"; + } + + (void) strlcpy(lx_debug_path_buf, lx_debug_path, + sizeof (lx_debug_path_buf)); + lx_debug_path = lx_debug_path_buf; + + lx_debug("lx_debug: debugging output ENABLED to path: \"%s\"", + lx_debug_path); +} + +void +lx_debug(const char *msg, ...) +{ + va_list ap; + char buf[LX_MSG_MAXLEN + 1]; + int rv, fd, n; + int errno_backup; + + if (lx_debug_enabled == 0) + return; + + errno_backup = errno; + + /* prefix the message with pid/tid */ + if ((n = snprintf(buf, sizeof (buf), "%u/%u: ", + getpid(), thr_self())) == -1) { + errno = errno_backup; + return; + } + + /* format the message */ + va_start(ap, msg); + rv = vsnprintf(&buf[n], sizeof (buf) - n, msg, ap); + va_end(ap); + if (rv == -1) { + errno = errno_backup; + return; + } + + /* add a carrige return if there isn't one already */ + if ((buf[strlen(buf) - 1] != '\n') && + (strlcat(buf, "\n", sizeof (buf)) >= sizeof (buf))) { + errno = errno_backup; + return; + } + + /* + * Open the debugging output file. note that we don't protect + * ourselves against exec or fork1 here. if an mt process were + * to exec/fork1 while we're doing this they'd end up with an + * extra open desciptor in their fd space. a'well. shouldn't + * really matter. + */ + if ((fd = open(lx_debug_path, + O_WRONLY|O_APPEND|O_CREAT|O_NDELAY|O_NOCTTY, 0666)) == -1) { + return; + } + (void) fchmod(fd, 0666); + + /* we retry in case of EINTR */ + do { + rv = write(fd, buf, strlen(buf)); + } while ((rv == -1) && (errno == EINTR)); + (void) fsync(fd); + + (void) close(fd); + errno = errno_backup; +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/dir.c b/usr/src/lib/brand/lx/lx_brand/common/dir.c new file mode 100644 index 0000000000..1c0a5aaf8f --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/dir.c @@ -0,0 +1,160 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <string.h> +#include <stddef.h> +#include <errno.h> +#include <unistd.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/dirent.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> + +#define LX_NAMEMAX 256 + +struct lx_dirent { + long d_ino; /* not l_ino_t */ + long d_off; + ushort_t d_reclen; + char d_name[LX_NAMEMAX]; +}; + +struct lx_dirent64 { + uint64_t d_ino; + int64_t d_off; + ushort_t d_reclen; + uchar_t d_type; + char d_name[LX_NAMEMAX]; +}; + +#define LX_RECLEN(namelen) \ + ((offsetof(struct lx_dirent64, d_name) + 1 + (namelen) + 7) & ~7) + +/* + * Read in one dirent structure from fd into dirp. + * p3 (count) is ignored. + */ +/*ARGSUSED*/ +int +lx_readdir(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + struct lx_dirent *dirp = (struct lx_dirent *)p2; + uint_t count = sizeof (struct lx_dirent); + int rc = 0; + struct lx_dirent _ld; + struct dirent *sd = (struct dirent *)&_ld; + + /* + * The return value from getdents is not applicable, as + * it might have squeezed more than one dirent in the buffer + * we provided. + * + * getdents() will deal with the case of dirp == NULL + */ + if ((rc = getdents(fd, sd, count)) < 0) + return (-errno); + + /* + * Set rc 1 (pass), or 0 (end of directory). + */ + rc = (sd->d_reclen == 0) ? 0 : 1; + + if (uucopy(sd, dirp, count) != 0) + return (-errno); + + return (rc); +} + +/* + * Read in dirent64 structures from p1 (fd) into p2 (buffer). + * p3 (count) is the size of the memory area. + */ +int +lx_getdents64(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (uint_t)p1; + void *buf = (void *)p2; + void *sbuf, *lbuf; + int lbufsz = (uint_t)p3; + int sbufsz; + int namelen; + struct dirent *sd; + struct lx_dirent64 *ld; + int bytes, rc; + + if (lbufsz < sizeof (struct lx_dirent64)) + return (-EINVAL); + + /* + * The Linux dirent64 is bigger than the Solaris dirent64. To + * avoid inadvertently consuming more of the directory than we can + * pass back to the Linux app, we hand the kernel a smaller buffer + * than the app handed us. + */ + sbufsz = (lbufsz / 32) * 24; + + sbuf = SAFE_ALLOCA(sbufsz); + lbuf = SAFE_ALLOCA(lbufsz); + if (sbuf == NULL || lbuf == NULL) + return (-ENOMEM); + + if ((bytes = getdents(fd, sbuf, sbufsz)) < 0) + return (-errno); + + /* munge the Solaris buffer to a linux buffer. */ + sd = (struct dirent *)sbuf; + ld = (struct lx_dirent64 *)lbuf; + rc = 0; + while (bytes > 0) { + namelen = strlen(sd->d_name); + if (namelen >= LX_NAMEMAX) + namelen = LX_NAMEMAX - 1; + ld->d_ino = (uint64_t)sd->d_ino; + ld->d_off = (int64_t)sd->d_off; + ld->d_type = 0; + + (void) strncpy(ld->d_name, sd->d_name, namelen); + ld->d_name[namelen] = 0; + ld->d_reclen = (ushort_t)LX_RECLEN(namelen); + + bytes -= (int)sd->d_reclen; + rc += (int)ld->d_reclen; + + sd = (struct dirent *)(void *)((caddr_t)sd + sd->d_reclen); + ld = (struct lx_dirent64 *)(void *)((caddr_t)ld + ld->d_reclen); + } + + /* now copy the lbuf to the userland buffer */ + assert(rc <= lbufsz); + if (uucopy(lbuf, buf, rc) != 0) + return (-EFAULT); + + return (rc); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/fcntl.c b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c new file mode 100644 index 0000000000..22fcef1ed8 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c @@ -0,0 +1,385 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/filio.h> +#include <unistd.h> +#include <fcntl.h> +#include <stropts.h> +#include <libintl.h> +#include <errno.h> +#include <string.h> + +#include <sys/lx_fcntl.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> + +static int lx_fcntl_com(int fd, int cmd, ulong_t arg); +static void ltos_flock(struct lx_flock *l, struct flock *s); +static void stol_flock(struct flock *s, struct lx_flock *l); +static void ltos_flock64(struct lx_flock64 *l, struct flock64 *s); +static void stol_flock64(struct flock64 *s, struct lx_flock64 *l); +static short ltos_type(short l_type); +static short stol_type(short l_type); +static int lx_fcntl_getfl(int fd); +static int lx_fcntl_setfl(int fd, ulong_t arg); + +int +lx_dup2(uintptr_t p1, uintptr_t p2) +{ + int oldfd = (int)p1; + int newfd = (int)p2; + int rc; + + rc = fcntl(oldfd, F_DUP2FD, newfd); + return ((rc == -1) ? -errno : rc); +} + +int +lx_fcntl(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + int cmd = (int)p2; + ulong_t arg = (ulong_t)p3; + struct lx_flock lxflk; + struct flock fl; + int lk = 0; + int rc; + + /* + * The 64-bit fcntl commands must go through fcntl64(). + */ + if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLK64 || + cmd == LX_F_SETLKW64) + return (-EINVAL); + + if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE || + cmd == LX_F_GETLEASE) { + lx_unsupported(gettext("%s(): unsupported command: %d"), + "fcntl", cmd); + return (-ENOTSUP); + } + + if (cmd == LX_F_GETLK || cmd == LX_F_SETLK || + cmd == LX_F_SETLKW) { + if (uucopy((void *)p3, (void *)&lxflk, + sizeof (struct lx_flock)) != 0) + return (-errno); + lk = 1; + ltos_flock(&lxflk, &fl); + arg = (ulong_t)&fl; + } + + rc = lx_fcntl_com(fd, cmd, arg); + + if (lk) + stol_flock(&fl, (struct lx_flock *)p3); + + return (rc); +} + +int +lx_fcntl64(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + int cmd = (int)p2; + struct lx_flock lxflk; + struct lx_flock64 lxflk64; + struct flock fl; + struct flock64 fl64; + int rc; + + if (cmd == LX_F_SETSIG || cmd == LX_F_GETSIG || cmd == LX_F_SETLEASE || + cmd == LX_F_GETLEASE) { + lx_unsupported(gettext("%s(): unsupported command: %d"), + "fcntl64", cmd); + return (-ENOTSUP); + } + + if (cmd == LX_F_GETLK || cmd == LX_F_SETLK || cmd == LX_F_SETLKW) { + if (uucopy((void *)p3, (void *)&lxflk, + sizeof (struct lx_flock)) != 0) + return (-errno); + ltos_flock(&lxflk, &fl); + rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl); + stol_flock(&fl, (struct lx_flock *)p3); + } else if (cmd == LX_F_GETLK64 || cmd == LX_F_SETLKW64 || \ + cmd == LX_F_SETLK64) { + if (uucopy((void *)p3, (void *)&lxflk64, + sizeof (struct lx_flock64)) != 0) + return (-errno); + ltos_flock64(&lxflk64, &fl64); + rc = lx_fcntl_com(fd, cmd, (ulong_t)&fl64); + stol_flock64(&fl64, (struct lx_flock64 *)p3); + } else { + rc = lx_fcntl_com(fd, cmd, (ulong_t)p3); + } + + return (rc); +} + +static int +lx_fcntl_com(int fd, int cmd, ulong_t arg) +{ + int rc = 0; + + switch (cmd) { + case LX_F_DUPFD: + rc = fcntl(fd, F_DUPFD, arg); + break; + + case LX_F_GETFD: + rc = fcntl(fd, F_GETFD, 0); + break; + + case LX_F_SETFD: + rc = fcntl(fd, F_SETFD, arg); + break; + + case LX_F_GETFL: + rc = lx_fcntl_getfl(fd); + break; + + case LX_F_SETFL: + rc = lx_fcntl_setfl(fd, arg); + break; + + case LX_F_GETLK: + rc = fcntl(fd, F_GETLK, arg); + break; + + case LX_F_SETLK: + rc = fcntl(fd, F_SETLK, arg); + break; + + case LX_F_SETLKW: + rc = fcntl(fd, F_SETLKW, arg); + break; + + case LX_F_GETLK64: + rc = fcntl(fd, F_GETLK64, arg); + break; + + case LX_F_SETLK64: + rc = fcntl(fd, F_SETLK64, arg); + break; + + case LX_F_SETLKW64: + rc = fcntl(fd, F_SETLKW64, arg); + break; + + case LX_F_SETOWN: + rc = fcntl(fd, FIOSETOWN, arg); + break; + + case LX_F_GETOWN: + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, FIOGETOWN, "FIOGETOWN"); + rc = ioctl(fd, FIOGETOWN, arg); + break; + + default: + return (-EINVAL); + } + + return ((rc == -1) ? -errno : rc); +} + + +#define LTOS_FLOCK(l, s) \ +{ \ + s->l_type = ltos_type(l->l_type); \ + s->l_whence = l->l_whence; \ + s->l_start = l->l_start; \ + s->l_len = l->l_len; \ + s->l_sysid = 0; /* not defined in linux */ \ + s->l_pid = (pid_t)l->l_pid; \ +} + +#define STOL_FLOCK(s, l) \ +{ \ + l->l_type = stol_type(s->l_type); \ + l->l_whence = s->l_whence; \ + l->l_start = s->l_start; \ + l->l_len = s->l_len; \ + l->l_pid = (int)s->l_pid; \ +} + +static void +ltos_flock(struct lx_flock *l, struct flock *s) +{ + LTOS_FLOCK(l, s) +} + +static void +stol_flock(struct flock *s, struct lx_flock *l) +{ + STOL_FLOCK(s, l) +} + +static void +ltos_flock64(struct lx_flock64 *l, struct flock64 *s) +{ + LTOS_FLOCK(l, s) +} + +static void +stol_flock64(struct flock64 *s, struct lx_flock64 *l) +{ + STOL_FLOCK(s, l) +} + +static short +ltos_type(short l_type) +{ + switch (l_type) { + case LX_F_RDLCK: + return (F_RDLCK); + case LX_F_WRLCK: + return (F_WRLCK); + case LX_F_UNLCK: + return (F_UNLCK); + default: + return (-1); + } +} + +static short +stol_type(short l_type) +{ + switch (l_type) { + case F_RDLCK: + return (LX_F_RDLCK); + case F_WRLCK: + return (LX_F_WRLCK); + case F_UNLCK: + return (LX_F_UNLCK); + default: + /* can't ever happen */ + return (0); + } +} + +int +lx_fcntl_getfl(int fd) +{ + int retval; + int rc; + + retval = fcntl(fd, F_GETFL, 0); + + if ((retval & O_ACCMODE) == O_RDONLY) + rc = LX_O_RDONLY; + else if ((retval & O_ACCMODE) == O_WRONLY) + rc = LX_O_WRONLY; + else + rc = LX_O_RDWR; + /* O_NDELAY != O_NONBLOCK, so we need to check for both */ + if (retval & O_NDELAY) + rc |= LX_O_NDELAY; + if (retval & O_NONBLOCK) + rc |= LX_O_NONBLOCK; + if (retval & O_APPEND) + rc |= LX_O_APPEND; + if (retval & O_SYNC) + rc |= LX_O_SYNC; + if (retval & O_LARGEFILE) + rc |= LX_O_LARGEFILE; + + return (rc); +} + +int +lx_fcntl_setfl(int fd, ulong_t arg) +{ + int new_arg; + + new_arg = 0; + /* LX_O_NDELAY == LX_O_NONBLOCK, so we only check for one */ + if (arg & LX_O_NDELAY) + new_arg |= O_NONBLOCK; + if (arg & LX_O_APPEND) + new_arg |= O_APPEND; + if (arg & LX_O_SYNC) + new_arg |= O_SYNC; + if (arg & LX_O_LARGEFILE) + new_arg |= O_LARGEFILE; + + return ((fcntl(fd, F_SETFL, new_arg) == 0) ? 0 : -errno); +} + +/* + * flock() applies or removes an advisory lock on the file + * associated with the file descriptor fd. + * + * Stolen verbatim from usr/src/ucblib/libucb/port/sys/flock.c + * + * operation is: LX_LOCK_SH, LX_LOCK_EX, LX_LOCK_UN, LX_LOCK_NB + */ +int +lx_flock(uintptr_t p1, uintptr_t p2) +{ + int fd = (int)p1; + int operation = (int)p2; + struct flock fl; + int cmd; + int ret; + + /* In non-blocking lock, use F_SETLK for cmd, F_SETLKW otherwise */ + if (operation & LX_LOCK_NB) { + cmd = F_SETLK; + operation &= ~LX_LOCK_NB; /* turn off this bit */ + } else + cmd = F_SETLKW; + + switch (operation) { + case LX_LOCK_UN: + fl.l_type = F_UNLCK; + break; + case LX_LOCK_SH: + fl.l_type = F_RDLCK; + break; + case LX_LOCK_EX: + fl.l_type = F_WRLCK; + break; + default: + return (-EINVAL); + } + + fl.l_whence = 0; + fl.l_start = 0; + fl.l_len = 0; + + ret = fcntl(fd, cmd, &fl); + + if (ret == -1 && errno == EACCES) + return (-EWOULDBLOCK); + + return ((ret == -1) ? -errno : ret); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/file.c b/usr/src/lib/brand/lx/lx_brand/common/file.c new file mode 100644 index 0000000000..a2f81c5b34 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/file.c @@ -0,0 +1,454 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/fstyp.h> +#include <sys/fsid.h> + +#include <errno.h> +#include <unistd.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/vnode.h> +#include <fcntl.h> +#include <string.h> +#include <utime.h> +#include <atomic.h> + +#include <sys/lx_syscall.h> +#include <sys/lx_types.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> + +static int +install_checkpath(uintptr_t p1) +{ + int saved_errno = errno; + char path[MAXPATHLEN]; + + /* + * The "dev" RPM package wants to modify /dev/pts, but /dev/pts is a + * lofs mounted copy of /native/dev/pts, so that won't work. + * + * Instead, if we're trying to modify /dev/pts from install mode, just + * act as if it succeded. + */ + if (uucopystr((void *)p1, path, MAXPATHLEN) == -1) + return (-errno); + + if (strcmp(path, "/dev/pts") == 0) + return (0); + + errno = saved_errno; + return (-errno); +} + +/* + * Miscellaneous file-related system calls. + */ + +/* + * Linux creates half-duplex unnamed pipes and Solaris creates full-duplex + * pipes. Thus, to get the correct semantics, our simple pipe() system + * call actually needs to create a named pipe, do three opens, a close, and + * an unlink. This is woefully expensive. If performance becomes a real + * issue, we can implement a half-duplex pipe() in the brand module. + */ +#define PIPENAMESZ 32 /* enough room for /tmp/.pipe.<pid>.<num> */ + +int +lx_pipe(uintptr_t p1) +{ + static uint32_t pipecnt = 0; + int cnt; + char pipename[PIPENAMESZ]; + int fds[3]; + int r = 0; + + fds[0] = -1; + fds[1] = -1; + fds[2] = -1; + + /* + * Construct a name for the named pipe: /tmp/.pipe.<pid>.<++cnt> + */ + cnt = atomic_inc_32_nv(&pipecnt); + + (void) snprintf(pipename, PIPENAMESZ, "/tmp/.pipe.%d.%d", + getpid(), cnt); + + if (mkfifo(pipename, 0600)) + return (-errno); + + /* + * To prevent either the read-only or write-only open from + * blocking, we first need to open the pipe for both reading and + * writing. + */ + if (((fds[2] = open(pipename, O_RDWR)) < 0) || + ((fds[0] = open(pipename, O_RDONLY)) < 0) || + ((fds[1] = open(pipename, O_WRONLY)) < 0)) { + r = errno; + } else { + /* + * Copy the two one-way fds back to the app's address + * space. + */ + if (uucopy(fds, (void *)p1, 2 * sizeof (int))) + r = errno; + } + + if (fds[2] >= 0) + (void) close(fds[2]); + (void) unlink(pipename); + + if (r != 0) { + if (fds[0] >= 0) + (void) close(fds[0]); + if (fds[1] >= 0) + (void) close(fds[1]); + } + + return (-r); +} + +/* + * On Linux, even root cannot create a link to a directory, so we have to + * add an explicit check. + */ +int +lx_link(uintptr_t p1, uintptr_t p2) +{ + char *from = (char *)p1; + char *to = (char *)p2; + struct stat64 statbuf; + + if ((stat64(from, &statbuf) == 0) && S_ISDIR(statbuf.st_mode)) + return (-EPERM); + + return (link(from, to) ? -errno : 0); +} + +/* + * On Linux, an unlink of a directory returns EISDIR, not EPERM. + */ +int +lx_unlink(uintptr_t p) +{ + char *pathname = (char *)p; + struct stat64 statbuf; + + if ((lstat64(pathname, &statbuf) == 0) && S_ISDIR(statbuf.st_mode)) + return (-EISDIR); + + return (unlink(pathname) ? -errno : 0); +} + +/* + * fsync() and fdatasync() - On Solaris, these calls translate into a common + * fsync() syscall with a different parameter, so we layer on top of the librt + * functions instead. + */ +int +lx_fsync(uintptr_t fd) +{ + return (fsync((int)fd) ? -errno : 0); +} + +int +lx_fdatasync(uintptr_t fd) +{ + return (fdatasync((int)fd) ? -errno : 0); +} + +/* + * Linux, unlike Solaris, ALWAYS resets the setuid and setgid bits on a + * chown/fchown regardless of whether it was done by root or not. Therefore, + * we must do extra work after each chown/fchown call to emulate this behavior. + */ +#define SETUGID (S_ISUID | S_ISGID) + +/* + * [lf]chown16() - Translate the uid/gid and pass onto the real functions. + */ +int +lx_chown16(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + char *filename = (char *)p1; + struct stat64 statbuf; + + if (chown(filename, LX_UID16_TO_UID32((lx_gid16_t)p2), + LX_GID16_TO_GID32((lx_gid16_t)p3))) + return (-errno); + + if (stat64(filename, &statbuf) == 0) { + statbuf.st_mode &= ~S_ISUID; + if (statbuf.st_mode & S_IXGRP) + statbuf.st_mode &= ~S_ISGID; + (void) chmod(filename, (statbuf.st_mode & MODEMASK)); + } + + return (0); +} + +int +lx_fchown16(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + struct stat64 statbuf; + + if (fchown(fd, LX_UID16_TO_UID32((lx_gid16_t)p2), + LX_GID16_TO_GID32((lx_gid16_t)p3))) + return (-errno); + + if (fstat64(fd, &statbuf) == 0) { + statbuf.st_mode &= ~S_ISUID; + if (statbuf.st_mode & S_IXGRP) + statbuf.st_mode &= ~S_ISGID; + (void) fchmod(fd, (statbuf.st_mode & MODEMASK)); + } + + return (0); +} + +int +lx_lchown16(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + return (lchown((char *)p1, LX_UID16_TO_UID32((lx_gid16_t)p2), + LX_GID16_TO_GID32((lx_gid16_t)p3)) ? -errno : 0); +} + +int +lx_chown(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + char *filename = (char *)p1; + struct stat64 statbuf; + int ret; + + ret = chown(filename, (uid_t)p2, (gid_t)p3); + + if (ret < 0) { + /* + * If chown() failed and we're in install mode, return success + * if the the reason we failed was because the source file + * didn't actually exist or if we're trying to modify /dev/pts. + */ + if ((lx_install != 0) && + ((errno == ENOENT) || (install_checkpath(p1) == 0))) + return (0); + + return (-errno); + } + + if (stat64(filename, &statbuf) == 0) { + statbuf.st_mode &= ~S_ISUID; + if (statbuf.st_mode & S_IXGRP) + statbuf.st_mode &= ~S_ISGID; + (void) chmod(filename, (statbuf.st_mode & MODEMASK)); + } + + return (0); +} + +int +lx_fchown(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + struct stat64 statbuf; + + if (fchown(fd, (uid_t)p2, (gid_t)p3)) + return (-errno); + + if (fstat64(fd, &statbuf) == 0) { + statbuf.st_mode &= ~S_ISUID; + if (statbuf.st_mode & S_IXGRP) + statbuf.st_mode &= ~S_ISGID; + (void) fchmod(fd, (statbuf.st_mode & MODEMASK)); + } + + return (0); +} + +int +lx_chmod(uintptr_t p1, uintptr_t p2) +{ + int ret; + + ret = chmod((const char *)p1, (mode_t)p2); + + if (ret < 0) { + /* + * If chown() failed and we're in install mode, return success + * if the the reason we failed was because the source file + * didn't actually exist or if we're trying to modify /dev/pts. + */ + if ((lx_install != 0) && + ((errno == ENOENT) || (install_checkpath(p1) == 0))) + return (0); + + return (-errno); + } + + return (0); +} + +int +lx_utime(uintptr_t p1, uintptr_t p2) +{ + int ret; + + ret = utime((const char *)p1, (const struct utimbuf *)p2); + + if (ret < 0) { + /* + * If chown() failed and we're in install mode, return success + * if the the reason we failed was because the source file + * didn't actually exist or if we're trying to modify /dev/pts. + */ + if ((lx_install != 0) && + ((errno == ENOENT) || (install_checkpath(p1) == 0))) + return (0); + + return (-errno); + } + + return (0); +} + +/* + * llseek() - The Linux implementation takes an additional parameter, which is + * the resulting position in the file. + */ +int +lx_llseek(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + offset_t ret; + offset_t *res = (offset_t *)p4; + + /* SEEK_DATA and SEEK_HOLE are only valid in Solaris */ + if ((int)p5 > SEEK_END) + return (-EINVAL); + + if ((ret = llseek((int)p1, LX_32TO64(p3, p2), p5)) < 0) + return (-errno); + + *res = ret; + return (0); +} + +/* + * seek() - When the resultant file offset cannot be represented in 32 bits, + * Linux performs the seek but Solaris doesn't, though both set EOVERFLOW. We + * call llseek() and then check to see if we need to return EOVERFLOW. + */ +int +lx_lseek(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + offset_t offset = (offset_t)(off_t)(p2); /* sign extend */ + offset_t ret; + off_t ret32; + + /* SEEK_DATA and SEEK_HOLE are only valid in Solaris */ + if ((int)p3 > SEEK_END) + return (-EINVAL); + + if ((ret = llseek((int)p1, offset, p3)) < 0) + return (-errno); + + ret32 = (off_t)ret; + if ((offset_t)ret32 == ret) + return (ret32); + else + return (-EOVERFLOW); +} + +/* + * Neither Solaris nor Linux actually returns anything to the caller, but glibc + * expects to see SOME value returned, so placate it and return 0. + */ +int +lx_sync(void) +{ + sync(); + return (0); +} + +int +lx_rmdir(uintptr_t p1) +{ + int r; + + r = rmdir((char *)p1); + if (r < 0) + return ((errno == EEXIST) ? -ENOTEMPTY : -errno); + return (0); +} + +/* + * Exactly the same as Solaris' sysfs(2), except Linux numbers their fs indices + * starting at 0, and Solaris starts at 1. + */ +int +lx_sysfs(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int option = (int)p1; + int res; + + /* + * Linux actually doesn't have #defines for these; their sysfs(2) + * man page literally defines the "option" field as being 1, 2 or 3, + * corresponding to Solaris' GETFSIND, GETFSTYP and GETNFSTYP, + * respectively. + */ + switch (option) { + case 1: + if ((res = sysfs(GETFSIND, (const char *)p2)) < 0) + return (-errno); + + return (res - 1); + + case 2: + if ((res = sysfs(GETFSTYP, (int)p2 + 1, + (char *)p3)) < 0) + return (-errno); + + return (0); + + case 3: + if ((res = sysfs(GETNFSTYP)) < 0) + return (-errno); + + return (res); + + default: + break; + } + + return (-EINVAL); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/fork.c b/usr/src/lib/brand/lx/lx_brand/common/fork.c new file mode 100644 index 0000000000..8cfeec4a5b --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/fork.c @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <sys/lx_misc.h> + +/* + * fork() and vfork() + * + * These cannot be pass thru system calls because we need libc to do its own + * initialization or else bad things will happen (i.e. ending up with a bad + * schedctl page). On Linux, there is no such thing as forkall(), so we use + * fork1() here. + */ +int +lx_fork(void) +{ + int ret = fork1(); + + return (ret == -1 ? -errno : ret); +} + +/* + * For vfork(), we have a serious problem because the child is not allowed to + * return from the current frame because it will corrupt the parent's stack. + * Since the semantics of vfork() are rather ill-defined (other than "it's + * faster than fork"), we should theoretically be safe by falling back to + * fork1(). + */ +int +lx_vfork(void) +{ + int ret = fork1(); + + return (ret == -1 ? -errno : ret); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/id.c b/usr/src/lib/brand/lx/lx_brand/common/id.c new file mode 100644 index 0000000000..a9987cea52 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/id.c @@ -0,0 +1,269 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/zone.h> +#include <sys/lx_types.h> +#include <sys/lx_syscall.h> +#include <sys/cred_impl.h> +#include <sys/policy.h> +#include <sys/ucred.h> +#include <sys/syscall.h> +#include <alloca.h> +#include <errno.h> +#include <ucred.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/lx_misc.h> + +int +lx_setuid16(uintptr_t uid) +{ + return ((setuid(LX_UID16_TO_UID32((lx_uid16_t)uid))) ? -errno : 0); +} + +int +lx_getuid16(void) +{ + return ((int)LX_UID32_TO_UID16(getuid())); +} + +int +lx_setgid16(uintptr_t gid) +{ + return ((setgid(LX_GID16_TO_GID32((lx_gid16_t)gid))) ? -errno : 0); +} + +int +lx_getgid16(void) +{ + return ((int)LX_GID32_TO_GID16(getgid())); +} + +int +lx_geteuid16(void) +{ + return ((int)LX_UID32_TO_UID16(geteuid())); +} + +int +lx_getegid16(void) +{ + return ((int)LX_GID32_TO_GID16(getegid())); +} + +int +lx_geteuid(void) +{ + return ((int)geteuid()); +} + +int +lx_getegid(void) +{ + return ((int)getegid()); +} + +int +lx_getresuid(uintptr_t ruid, uintptr_t euid, uintptr_t suid) +{ + lx_uid_t lx_ruid, lx_euid, lx_suid; + ucred_t *cr; + size_t sz; + + /* + * We allocate a ucred_t ourselves rather than call ucred_get(3C) + * because ucred_get() calls malloc(3C), which the brand library cannot + * use. Because we allocate the space with SAFE_ALLOCA(), there's + * no need to free it when we're done. + */ + sz = ucred_size(); + cr = (ucred_t *)SAFE_ALLOCA(sz); + if (cr == NULL) + return (-ENOMEM); + + if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0) + return (-errno); + + if (((lx_ruid = (lx_uid_t)ucred_getruid(cr)) == (lx_uid_t)-1) || + ((lx_euid = (lx_uid_t)ucred_geteuid(cr)) == (lx_uid_t)-1) || + ((lx_suid = (lx_uid_t)ucred_getsuid(cr)) == (lx_uid_t)-1)) { + return (-errno); + } + + if (uucopy(&lx_ruid, (void *)ruid, sizeof (lx_uid_t)) != 0) + return (-errno); + + if (uucopy(&lx_euid, (void *)euid, sizeof (lx_uid_t)) != 0) + return (-errno); + + return ((uucopy(&lx_suid, (void *)suid, sizeof (lx_uid_t)) != 0) + ? -errno : 0); +} + +int +lx_getresuid16(uintptr_t ruid16, uintptr_t euid16, uintptr_t suid16) +{ + lx_uid_t lx_ruid, lx_euid, lx_suid; + lx_uid16_t lx_ruid16, lx_euid16, lx_suid16; + int rv; + + if ((rv = lx_getresuid((uintptr_t)&lx_ruid, (uintptr_t)&lx_euid, + (uintptr_t)&lx_suid)) != 0) + return (rv); + + lx_ruid16 = LX_UID32_TO_UID16(lx_ruid); + lx_euid16 = LX_UID32_TO_UID16(lx_euid); + lx_suid16 = LX_UID32_TO_UID16(lx_suid); + + if (uucopy(&lx_ruid16, (void *)ruid16, sizeof (lx_uid16_t)) != 0) + return (-errno); + + if (uucopy(&lx_euid16, (void *)euid16, sizeof (lx_uid16_t)) != 0) + return (-errno); + + return ((uucopy(&lx_suid16, (void *)suid16, sizeof (lx_uid16_t)) != 0) + ? -errno : 0); +} + +int +lx_getresgid(uintptr_t rgid, uintptr_t egid, uintptr_t sgid) +{ + ucred_t *cr; + lx_gid_t lx_rgid, lx_egid, lx_sgid; + size_t sz; + + /* + * We allocate a ucred_t ourselves rather than call ucred_get(3C) + * because ucred_get() calls malloc(3C), which the brand library cannot + * use. Because we allocate the space with SAFE_ALLOCA(), there's + * no need to free it when we're done. + */ + sz = ucred_size(); + cr = (ucred_t *)SAFE_ALLOCA(sz); + if (cr == NULL) + return (-ENOMEM); + + if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0) + return (-errno); + + if (((lx_rgid = (lx_gid_t)ucred_getrgid(cr)) == (lx_gid_t)-1) || + ((lx_egid = (lx_gid_t)ucred_getegid(cr)) == (lx_gid_t)-1) || + ((lx_sgid = (lx_gid_t)ucred_getsgid(cr)) == (lx_gid_t)-1)) { + return (-errno); + } + + if (uucopy(&lx_rgid, (void *)rgid, sizeof (lx_gid_t)) != 0) + return (-errno); + + if (uucopy(&lx_egid, (void *)egid, sizeof (lx_gid_t)) != 0) + return (-errno); + + return ((uucopy(&lx_sgid, (void *)sgid, sizeof (lx_gid_t)) != 0) + ? -errno : 0); +} + +int +lx_getresgid16(uintptr_t rgid16, uintptr_t egid16, uintptr_t sgid16) +{ + lx_gid_t lx_rgid, lx_egid, lx_sgid; + lx_gid16_t lx_rgid16, lx_egid16, lx_sgid16; + int rv; + + if ((rv = lx_getresgid((uintptr_t)&lx_rgid, (uintptr_t)&lx_egid, + (uintptr_t)&lx_sgid)) != 0) + return (rv); + + lx_rgid16 = LX_UID32_TO_UID16(lx_rgid); + lx_egid16 = LX_UID32_TO_UID16(lx_egid); + lx_sgid16 = LX_UID32_TO_UID16(lx_sgid); + + if (uucopy(&lx_rgid16, (void *)rgid16, sizeof (lx_gid16_t)) != 0) + return (-errno); + + if (uucopy(&lx_egid16, (void *)egid16, sizeof (lx_gid16_t)) != 0) + return (-errno); + + return ((uucopy(&lx_sgid16, (void *)sgid16, sizeof (lx_gid16_t)) != 0) + ? -errno : 0); +} + +int +lx_setreuid16(uintptr_t ruid, uintptr_t euid) +{ + return ((setreuid(LX_UID16_TO_UID32((lx_uid16_t)ruid), + LX_UID16_TO_UID32((lx_uid16_t)euid))) ? -errno : 0); +} + +int +lx_setregid16(uintptr_t rgid, uintptr_t egid) +{ + return ((setregid(LX_UID16_TO_UID32((lx_gid16_t)rgid), + LX_UID16_TO_UID32((lx_gid16_t)egid))) ? -errno : 0); +} + +/* + * The lx brand cannot support the setfs[ug]id16/setfs[ug]id calls as that + * would require significant rework of Solaris' privilege mechanisms, so + * instead return the current effective [ug]id. + * + * In Linux, fsids track effective IDs, so returning the effective IDs works + * as a substitute; returning the current value also denotes failure of the + * call if the caller had specified something different. We don't need to + * worry about setting error codes because the Linux calls don't set any. + */ +/*ARGSUSED*/ +int +lx_setfsuid16(uintptr_t fsuid16) +{ + return (lx_geteuid16()); +} + +/*ARGSUSED*/ +int +lx_setfsgid16(uintptr_t fsgid16) +{ + return (lx_getegid16()); +} + +/*ARGSUSED*/ +int +lx_setfsuid(uintptr_t fsuid) +{ + return (geteuid()); +} + +/*ARGSUSED*/ +int +lx_setfsgid(uintptr_t fsgid) +{ + return (getegid()); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/ioctl.c b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c new file mode 100644 index 0000000000..34edefdbde --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c @@ -0,0 +1,2716 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <fcntl.h> +#include <sys/types.h> +#include <signal.h> +#include <sys/stat.h> +#include <unistd.h> +#include <limits.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stropts.h> +#include <strings.h> +#include <thread.h> +#include <errno.h> +#include <libintl.h> +#include <sys/bitmap.h> +#include <sys/lx_autofs.h> +#include <sys/modctl.h> +#include <sys/filio.h> +#include <sys/termios.h> +#include <sys/termio.h> +#include <sys/sockio.h> +#include <net/if.h> +#include <net/if_arp.h> +#include <sys/ptms.h> +#include <sys/ldlinux.h> +#include <sys/lx_ptm.h> +#include <sys/lx_socket.h> +#include <sys/syscall.h> +#include <sys/brand.h> +#include <sys/lx_audio.h> +#include <sys/lx_ioctl.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/ptyvar.h> +#include <sys/audio.h> +#include <sys/mixer.h> + +/* Define _KERNEL to get the devt manipulation macros. */ +#define _KERNEL +#include <sys/sysmacros.h> +#undef _KERNEL + +/* Maximum number of modules on a stream that we can handle. */ +#define MAX_STRMODS 10 + +/* Maximum buffer size for debugging messages. */ +#define MSGBUF 1024 + +/* Structure used to define an ioctl translator. */ +typedef struct ioc_cmd_translator { + int ict_lx_cmd; + char *ict_lx_cmd_str; + int ict_cmd; + char *ict_cmd_str; + int (*ict_func)(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg); +} ioc_cmd_translator_t; + +/* + * Structures used to associate a group of ioctl translators with + * a specific device. + */ +typedef struct ioc_dev_translator { + char *idt_driver; + major_t idt_major; + + /* Array of command translators. */ + ioc_cmd_translator_t *idt_cmds; +} ioc_dev_translator_t; + +/* + * Structures used to associate a group of ioctl translators with + * a specific filesystem. + */ +typedef struct ioc_fs_translator { + char *ift_filesystem; + + /* Array of command translators. */ + ioc_cmd_translator_t *ift_cmds; +} ioc_fs_translator_t; + +/* Structure used to define a unsupported ioctl error codes. */ +typedef struct ioc_errno_translator { + int iet_lx_cmd; + char *iet_lx_cmd_str; + int iet_errno; +} ioc_errno_translator_t; + +/* Structure used to convert oss format flags into Solaris options. */ +typedef struct oss_fmt_translator { + int oft_oss_fmt; + int oft_encoding; + int oft_precision; +} oss_fmt_translator_t; + +/* Translator forward declerations. */ +static oss_fmt_translator_t oft_table[]; +static ioc_cmd_translator_t ioc_translators_file[]; +static ioc_cmd_translator_t ioc_translators_fifo[]; +static ioc_cmd_translator_t ioc_translators_sock[]; +static ioc_dev_translator_t ioc_translator_ptm; +static ioc_dev_translator_t *ioc_translators_dev[]; +static ioc_fs_translator_t *ioc_translators_fs[]; +static ioc_errno_translator_t ioc_translators_errno[]; + +/* + * Interface name table. + */ +typedef struct ifname_map { + char im_linux[IFNAMSIZ]; + char im_solaris[IFNAMSIZ]; + struct ifname_map *im_next; +} ifname_map_t; + +static ifname_map_t *ifname_map; +static mutex_t ifname_mtx; + +/* + * Macros and structures to help convert integers to string + * values that they represent (for displaying in debug output). + */ +#define I2S_ENTRY(x) { x, #x }, +#define I2S_END { 0, NULL } + +typedef struct int2str { + int i2s_int; + char *i2s_str; +} int2str_t; + +static int2str_t st_mode_strings[] = { + I2S_ENTRY(S_IFIFO) + I2S_ENTRY(S_IFCHR) + I2S_ENTRY(S_IFDIR) + I2S_ENTRY(S_IFBLK) + I2S_ENTRY(S_IFREG) + I2S_ENTRY(S_IFLNK) + I2S_ENTRY(S_IFSOCK) + I2S_ENTRY(S_IFDOOR) + I2S_ENTRY(S_IFPORT) + I2S_END +}; + +static int2str_t oss_fmt_str[] = { + I2S_ENTRY(LX_OSS_AFMT_QUERY) + I2S_ENTRY(LX_OSS_AFMT_MU_LAW) + I2S_ENTRY(LX_OSS_AFMT_A_LAW) + I2S_ENTRY(LX_OSS_AFMT_IMA_ADPCM) + I2S_ENTRY(LX_OSS_AFMT_U8) + I2S_ENTRY(LX_OSS_AFMT_S16_LE) + I2S_ENTRY(LX_OSS_AFMT_S16_BE) + I2S_ENTRY(LX_OSS_AFMT_S8) + I2S_ENTRY(LX_OSS_AFMT_U16_LE) + I2S_ENTRY(LX_OSS_AFMT_U16_BE) + I2S_ENTRY(LX_OSS_AFMT_MPEG) + I2S_END +}; + +static void +lx_ioctl_msg(int fd, int cmd, char *lx_cmd_str, struct stat *stat, char *msg) +{ + int errno_backup = errno; + char *path, path_buf[MAXPATHLEN]; + + assert(msg != NULL); + + if (lx_debug_enabled == 0) + return; + + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + if (path == NULL) + path = "?"; + + if (lx_cmd_str == NULL) + lx_cmd_str = "?"; + + /* Display the initial error message and extended ioctl information. */ + lx_debug("\t%s", msg); + lx_debug("\tlx_ioctl(): cmd = 0x%x - %s, fd = %d - %s", + cmd, lx_cmd_str, fd, path); + + /* Display information about the target file, if it's available. */ + if (stat != NULL) { + major_t fd_major = getmajor(stat->st_rdev); + minor_t fd_minor = getminor(stat->st_rdev); + int fd_mode = stat->st_mode & S_IFMT; + char *fd_mode_str = "unknown"; + char buf[LX_MSG_MAXLEN]; + int i; + + /* Translate the file type bits into a string. */ + for (i = 0; st_mode_strings[i].i2s_str != NULL; i++) { + if (fd_mode != st_mode_strings[i].i2s_int) + continue; + fd_mode_str = st_mode_strings[i].i2s_str; + break; + } + + (void) snprintf(buf, sizeof (buf), + "\tlx_ioctl(): mode = %s", fd_mode_str); + + if ((fd_mode == S_IFCHR) || (fd_mode == S_IFBLK)) { + char *fd_driver[MODMAXNAMELEN + 1]; + int i; + + /* This is a device so display the devt. */ + i = strlen(buf); + (void) snprintf(buf + i, sizeof (buf) - i, + "; rdev = [%d, %d]", fd_major, fd_minor); + + /* Try to display the drivers name. */ + if (modctl(MODGETNAME, + fd_driver, sizeof (fd_driver), &fd_major) == 0) + i = strlen(buf); + (void) snprintf(buf + i, sizeof (buf) - i, + "; driver = %s", fd_driver); + } + lx_debug(buf); + } + + /* Restore errno. */ + errno = errno_backup; +} + +static int +ldlinux_check(int fd, struct stat *stat) +{ + struct str_mlist mlist[MAX_STRMODS]; + struct str_list strlist; + int i; + + assert((stat->st_mode & S_IFMT) == S_IFCHR); + + /* Get the number of modules on the stream. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, I_LIST, "I_LIST"); + if ((i = ioctl(fd, I_LIST, (struct str_list *)NULL)) < 0) { + lx_debug("\tldlinux_check(): unable to count stream modules"); + return (-errno); + } + + /* Sanity check the number of modules on the stream. */ + assert(i <= MAX_STRMODS); + + /* Get the list of modules on the stream. */ + strlist.sl_nmods = i; + strlist.sl_modlist = mlist; + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, I_LIST, "I_LIST"); + if (ioctl(fd, I_LIST, &strlist) < 0) { + lx_debug("\tldlinux_check(): unable to list stream modules"); + return (-errno); + } + + for (i = 0; i < strlist.sl_nmods; i++) + if (strcmp(strlist.sl_modlist[i].l_name, LDLINUX_MOD) == 0) + return (1); + + return (0); +} + +static int +ioctl_istr(int fd, int cmd, char *cmd_str, void *arg, int arg_len) +{ + struct strioctl istr; + + istr.ic_cmd = cmd; + istr.ic_len = arg_len; + istr.ic_timout = 0; + istr.ic_dp = arg; + + lx_debug("\tioctl_istr(%d, 0x%x - %s, ...)", fd, cmd, cmd_str); + if (ioctl(fd, I_STR, &istr) < 0) + return (-1); + return (0); +} + +/* + * Add an interface name mapping if it doesn't already exist. + * + * Interfaces with IFF_LOOPBACK flag get renamed to loXXX. + * Interfaces with IFF_BROADCAST flag get renamed to ethXXX. + * + * Caller locks the name table. + */ +static int +ifname_add(char *if_name, int if_flags) +{ + static int eth_index = 0; + static int lo_index = 0; + ifname_map_t **im_pp; + + for (im_pp = &ifname_map; *im_pp; im_pp = &(*im_pp)->im_next) + if (strncmp((*im_pp)->im_solaris, if_name, IFNAMSIZ) == 0) + return (0); + + *im_pp = calloc(1, sizeof (ifname_map_t)); + if (*im_pp == NULL) + return (-1); + + (void) strlcpy((*im_pp)->im_solaris, if_name, IFNAMSIZ); + if (if_flags & IFF_LOOPBACK) { + /* Loopback */ + if (lo_index == 0) + (void) strlcpy((*im_pp)->im_linux, "lo", IFNAMSIZ); + else + (void) snprintf((*im_pp)->im_linux, IFNAMSIZ, + "lo:%d", lo_index); + lo_index++; + } else if (if_flags & IFF_BROADCAST) { + /* Assume ether if it has a broadcast address */ + (void) snprintf((*im_pp)->im_linux, IFNAMSIZ, + "eth%d", eth_index); + eth_index++; + } else { + /* Do not translate unknown interfaces */ + (void) strlcpy((*im_pp)->im_linux, if_name, IFNAMSIZ); + } + + lx_debug("map interface %s -> %s", if_name, (*im_pp)->im_linux); + + return (0); +} + +static int +ifname_cmp(const void *p1, const void *p2) +{ + struct ifreq *rp1 = (struct ifreq *)p1; + struct ifreq *rp2 = (struct ifreq *)p2; + + return (strncmp(rp1->ifr_name, rp2->ifr_name, IFNAMSIZ)); +} + +/* + * (Re-)scan all interfaces and add them to the name table. + * Caller locks the name table. + */ +static int +ifname_scan(void) +{ + struct ifconf conf; + int i, fd, ifcount; + + conf.ifc_buf = NULL; + + if ((fd = socket(PF_INET, SOCK_DGRAM, 0)) < 0) + goto fail; + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGIFNUM, "SIOCGIFNUM"); + if (ioctl(fd, SIOCGIFNUM, &ifcount) < 0) { + lx_debug("\tifname_scan(): unable to get number of interfaces"); + goto fail; + } + + conf.ifc_len = ifcount * sizeof (struct ifreq); + if ((conf.ifc_buf = calloc(ifcount, sizeof (struct ifreq))) == NULL) + goto fail; + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGIFCONF, "SIOCGIFCONF"); + if (ioctl(fd, SIOCGIFCONF, &conf) < 0) { + lx_debug("\tifname_scan(): unable to get interfaces"); + goto fail; + } + + /* Get the interface flags */ + for (i = 0; i < ifcount; i++) { + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, SIOCGIFFLAGS, "SIOCGIFFLAGS"); + if (ioctl(fd, SIOCGIFFLAGS, &conf.ifc_req[i]) < 0) { + conf.ifc_req[i].ifr_flags = 0; + lx_debug("\tifname_scan(): unable to get flags for %s", + conf.ifc_req[i].ifr_name); + } + } + + /* + * Sort the interfaces by name to preserve the order + * across reboots of this zone. Note that the order of + * interface names won't be consistent across network + * configuration changes. ie. If network interfaces + * are added or removed from a zone (either dynamically + * or statically) the network interfaces names to physical + * network interface mappings that linux apps see may + * change. + */ + qsort(conf.ifc_req, ifcount, sizeof (struct ifreq), ifname_cmp); + + /* Add to the name table */ + for (i = 0; i < ifcount; i++) + if (ifname_add(conf.ifc_req[i].ifr_name, + conf.ifc_req[i].ifr_flags) != 0) + goto fail; + + (void) close(fd); + free(conf.ifc_buf); + + return (0); + +fail: + if (fd >= 0) + (void) close(fd); + if (conf.ifc_buf != NULL) + free(conf.ifc_buf); + + return (-1); +} + +static int +ifname_from_linux(char *name) +{ + int pass; + ifname_map_t *im_p; + + (void) mutex_lock(&ifname_mtx); + + for (pass = 0; pass < 2; pass++) { + for (im_p = ifname_map; im_p; im_p = im_p->im_next) + if (strncmp(im_p->im_linux, name, IFNAMSIZ) == 0) + break; + if (im_p != NULL || (pass == 0 && ifname_scan() != 0)) + break; + } + + (void) mutex_unlock(&ifname_mtx); + + if (im_p) { + (void) strlcpy(name, im_p->im_solaris, IFNAMSIZ); + return (0); + } + + return (-1); +} + +static int +ifname_from_solaris(char *name) +{ + int pass; + ifname_map_t *im_p; + + (void) mutex_lock(&ifname_mtx); + + for (pass = 0; pass < 2; pass++) { + for (im_p = ifname_map; im_p; im_p = im_p->im_next) + if (strncmp(im_p->im_solaris, name, IFNAMSIZ) == 0) + break; + if (im_p != NULL || (pass == 0 && ifname_scan() != 0)) + break; + } + + (void) mutex_unlock(&ifname_mtx); + + if (im_p) { + (void) strlcpy(name, im_p->im_linux, IFNAMSIZ); + return (0); + } + + return (-1); +} + +/* + * Called to initialize the ioctl translation subsystem. + */ +int +lx_ioctl_init() +{ + int i, ret; + + /* Figure out the major numbers for our devices translators. */ + for (i = 0; ioc_translators_dev[i] != NULL; i++) { + ioc_dev_translator_t *idt = ioc_translators_dev[i]; + + ret = modctl(MODGETMAJBIND, + idt->idt_driver, strlen(idt->idt_driver) + 1, + &idt->idt_major); + + if (ret != 0) { + lx_err(gettext("%s%s) failed: %s\n"), + "lx_ioctl_init(): modctl(MODGETMAJBIND, ", + idt->idt_driver, strerror(errno)); + lx_err(gettext("%s: %s translator disabled for: %s\n"), + "lx_ioctl_init()", "ioctl", idt->idt_driver); + idt->idt_major = (major_t)-1; + } + } + + /* Create the interface name table */ + if (ifname_scan() != 0) + lx_err("lx_ioctl_init(): ifname_scan() failed\n"); + + return (0); +} + +static ioc_cmd_translator_t * +lx_ioctl_find_ict_cmd(ioc_cmd_translator_t *ict, int cmd) +{ + assert(ict != NULL); + while ((ict != NULL) && (ict->ict_func != NULL)) { + if (cmd == ict->ict_lx_cmd) + return (ict); + ict++; + } + return (NULL); +} + +/* + * Main entry point for the ioctl translater. + */ +int +lx_ioctl(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + int cmd = (int)p2; + intptr_t arg = (uintptr_t)p3; + struct stat stat; + ioc_cmd_translator_t *ict; + ioc_errno_translator_t *iet = NULL; + major_t fd_major; + int i, ret; + + if (fstat(fd, &stat) != 0) { + lx_ioctl_msg(fd, cmd, NULL, NULL, + "lx_ioctl(): fstat() failed"); + + /* + * Linux ioctl(2) is only documented to return EBADF, EFAULT, + * EINVAL or ENOTTY. + * + * EINVAL is documented to be "Request or argp is not valid", + * so it's reasonable to force any errno that's not EBADF, + * EFAULT or ENOTTY to be EINVAL. + */ + if ((errno != EBADF) && (errno != EFAULT) && (errno != ENOTTY)) + errno = EINVAL; + + return (-errno); /* errno already set. */ + } + + switch (stat.st_mode & S_IFMT) { + default: + ict = NULL; + break; + case S_IFREG: + /* Use file translators. */ + ict = ioc_translators_file; + break; + + case S_IFSOCK: + /* Use socket translators. */ + ict = ioc_translators_sock; + break; + + case S_IFIFO: + /* Use fifo translators. */ + ict = ioc_translators_fifo; + break; + + case S_IFCHR: + fd_major = getmajor(stat.st_rdev); + + /* + * Look through all the device translators to see if there + * is one for this device. + */ + for (i = 0; ioc_translators_dev[i] != NULL; i++) { + if (fd_major != ioc_translators_dev[i]->idt_major) + continue; + + /* We found a translator for this device. */ + ict = ioc_translators_dev[i]->idt_cmds; + break; + } + break; + } + + /* + * Search the selected translator group to see if we have a + * translator for this specific command. + */ + if ((ict != NULL) && + ((ict = lx_ioctl_find_ict_cmd(ict, cmd)) != NULL)) { + /* We found a translator for this command, invoke it. */ + lx_ioctl_msg(fd, cmd, ict->ict_lx_cmd_str, &stat, + "lx_ioctl(): emulating ioctl"); + + ret = ict->ict_func(fd, &stat, ict->ict_cmd, ict->ict_cmd_str, + arg); + + if ((ret < 0) && (ret != -EBADF) && (ret != -EFAULT) && + (ret != -ENOTTY)) + ret = -EINVAL; + + return (ret); + } + + /* + * If we didn't find a file or device translator for this + * command then try to find a filesystem translator for + * this command. + */ + for (i = 0; ioc_translators_fs[i] != NULL; i++) { + if (strcmp(stat.st_fstype, + ioc_translators_fs[i]->ift_filesystem) != 0) + continue; + + /* We found a translator for this filesystem. */ + ict = ioc_translators_fs[i]->ift_cmds; + break; + } + + /* + * Search the selected translator group to see if we have a + * translator for this specific command. + */ + if ((ict != NULL) && + ((ict = lx_ioctl_find_ict_cmd(ict, cmd)) != NULL)) { + /* We found a translator for this command, invoke it. */ + lx_ioctl_msg(fd, cmd, ict->ict_lx_cmd_str, &stat, + "lx_ioctl(): emulating ioctl"); + ret = ict->ict_func(fd, &stat, ict->ict_cmd, ict->ict_cmd_str, + arg); + + if ((ret < 0) && (ret != -EBADF) && (ret != -EFAULT) && + (ret != -ENOTTY)) + ret = -EINVAL; + + return (ret); + } + + /* + * No translator for this ioctl was found. + * Check if there is an errno translator. + */ + for (iet = ioc_translators_errno; iet->iet_lx_cmd_str != NULL; iet++) { + if (cmd != iet->iet_lx_cmd) + continue; + + /* We found a an errno translator for this ioctl. */ + lx_ioctl_msg(fd, cmd, iet->iet_lx_cmd_str, &stat, + "lx_ioctl(): emulating errno"); + + ret = -iet->iet_errno; + + if ((ret < 0) && (ret != -EBADF) && (ret != -EFAULT) && + (ret != -ENOTTY)) + ret = -EINVAL; + + return (ret); + } + + lx_ioctl_msg(fd, cmd, NULL, &stat, + "lx_ioctl(): unsupported linux ioctl"); + lx_unsupported(gettext("lx_ioctl(): unsupported linux ioctl (%d)"), + cmd); + return (-EINVAL); +} + + +/* + * Ioctl translator functions. + */ +/* + * Used by translators that want to explicitly return EINVAL for an + * ioctl(2) instead of having the translation framework do it implicitly. + * This allows us to indicate which unsupported ioctl(2)s should not + * trigger a SIGSYS when running in LX_STRICT mode. + */ +/* ARGSUSED */ +static int +ict_einval(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + return (-EINVAL); +} + +static int +/*ARGSUSED*/ +ict_pass(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + int ret; + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, cmd, cmd_str); + ret = ioctl(fd, cmd, arg); + return (ret < 0 ? -errno : ret); +} + +static int +/*ARGSUSED*/ +ict_tcsbrkp(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + int ret, dur = 0; + + assert(cmd == LX_TCSBRKP); + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TCSBRK, "TCSBRK"); + ret = ioctl(fd, TCSBRK, (intptr_t)&dur); + return (ret < 0 ? -errno : ret); +} + +static int +/*ARGSUSED*/ +ict_sioifoob(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + int req, *reqp = (int *)arg; + int len, val; + + assert(cmd == SIOCATMARK); + + if (uucopy(reqp, &req, sizeof (req)) != 0) + return (-errno); + + len = sizeof (val); + + /* + * Linux expects a SIOCATMARK of a UDP socket to return EINVAL, while + * Solaris allows it. + */ + if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &val, &len) < 0) { + lx_debug("ict_siofmark: getsockopt failed, errno %d", errno); + return (-EINVAL); + } + + if ((len != sizeof (val)) || (val != SOCK_STREAM)) + return (-EINVAL); + + if (ioctl(fd, cmd, &req) < 0) + return (-errno); + + if (uucopy(&req, reqp, sizeof (req)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_sioifreq(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct ifreq req, *reqp = (struct ifreq *)arg; + + assert(cmd == SIOCGIFFLAGS || cmd == SIOCSIFFLAGS || + cmd == SIOCGIFADDR || cmd == SIOCSIFADDR || + cmd == SIOCGIFDSTADDR || cmd == SIOCSIFDSTADDR || + cmd == SIOCGIFBRDADDR || cmd == SIOCSIFBRDADDR || + cmd == SIOCGIFNETMASK || cmd == SIOCSIFNETMASK || + cmd == SIOCGIFMETRIC || cmd == SIOCSIFMETRIC || + cmd == SIOCGIFMTU || cmd == SIOCSIFMTU); + + /* Copy in the data */ + if (uucopy(reqp, &req, sizeof (struct ifreq)) != 0) + return (-errno); + + if (ifname_from_linux(req.ifr_name) < 0) + return (-EINVAL); + + lx_debug("\tioctl(%d, 0x%x - %s, %.14s", + fd, cmd, cmd_str, req.ifr_name); + + if (ioctl(fd, cmd, &req) < 0) + return (-errno); + + if (ifname_from_solaris(req.ifr_name) < 0) + return (-EINVAL); + + /* Copy out the data */ + if (uucopy(&req, reqp, sizeof (struct ifreq)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_siocgifconf(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct ifconf conf, *confp = (struct ifconf *)arg; + int i, ifcount, ret; + + assert(cmd == LX_SIOCGIFCONF); + + /* Copy in the data. */ + if (uucopy(confp, &conf, sizeof (conf)) != 0) + return (-errno); + + if (conf.ifc_len == 0) { + /* They want to know how many interfaces there are. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, SIOCGIFNUM, "SIOCGIFNUM"); + if (ioctl(fd, SIOCGIFNUM, (intptr_t)&ifcount) < 0) + return (-errno); + conf.ifc_len = ifcount * sizeof (struct ifreq); + + /* Check if we're done. */ + if (conf.ifc_buf == NULL) { + /* Copy out the data. */ + if (uucopy(&conf, confp, sizeof (conf)) != 0) + return (-errno); + return (0); + } + } + + /* Get interface configuration list. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGIFCONF, "SIOCGIFCONF"); + ret = ioctl(fd, SIOCGIFCONF, &conf); + if (ret < 0) + return (-errno); + + /* Rename interfaces to linux */ + for (i = 0; i < conf.ifc_len / sizeof (struct ifreq); i++) + if (ifname_from_solaris(conf.ifc_req[i].ifr_name) < 0) + return (-EINVAL); + + /* Copy out the data */ + if (uucopy(&conf, confp, sizeof (conf)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_siocifhwaddr(int fd, struct stat *stat, int cmd, char *cmd_str, + intptr_t arg) +{ + struct ifreq req, *reqp = (struct ifreq *)arg; + struct arpreq arpreq; + + assert(cmd == LX_SIOCGIFHWADDR || cmd == LX_SIOCSIFHWADDR); + + /* Copy in the data */ + if (uucopy(reqp, &req, sizeof (struct ifreq)) != 0) + return (-errno); + + lx_debug("\tioctl(%d, 0x%x - %s, lx %.14s)", + fd, cmd, + (cmd == LX_SIOCGIFHWADDR) ? "SIOCGIFHWADDR" : "SIOCSIFHWADDR", + req.ifr_name); + + /* + * We're not going to support SIOCSIFHWADDR, but we need to be + * able to check the result of the uucopy first to see if the command + * should have returned EFAULT. + */ + if (cmd == LX_SIOCSIFHWADDR) { + lx_unsupported(gettext( + "lx_ioctl(): unsupported linux ioctl: %s"), + "SIOCSIFHWADDR"); + return (-EINVAL); + } + + if (strcmp(req.ifr_name, "lo") == 0 || + strncmp(req.ifr_name, "lo:", 3) == 0) { + /* Abuse ifr_addr for linux ifr_hwaddr */ + bzero(&req.ifr_addr, sizeof (struct sockaddr)); + req.ifr_addr.sa_family = LX_ARPHRD_LOOPBACK; + + /* Copy out the data */ + if (uucopy(&req, reqp, sizeof (struct ifreq)) != 0) + return (-errno); + + return (0); + } + + if (ifname_from_linux(req.ifr_name) < 0) + return (-EINVAL); + + lx_debug("\tioctl(%d, 0x%x - %s, %.14s)", + fd, SIOCGIFADDR, "SIOCGIFADDR", req.ifr_name); + + if (ioctl(fd, SIOCGIFADDR, &req) < 0) + return (-errno); + + bcopy(&req.ifr_addr, &arpreq.arp_pa, sizeof (struct sockaddr)); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, SIOCGARP, "SIOCGARP"); + + if (ioctl(fd, SIOCGARP, &arpreq) < 0) + return (-errno); + + if (ifname_from_solaris(req.ifr_name) < 0) + return (-EINVAL); + + /* Abuse ifr_addr for linux ifr_hwaddr */ + bcopy(&arpreq.arp_ha, &req.ifr_addr, sizeof (struct sockaddr)); + if (strncmp(req.ifr_name, "eth", 3) == 0) + req.ifr_addr.sa_family = LX_ARPHRD_ETHER; + else + req.ifr_addr.sa_family = LX_ARPHRD_VOID; + + /* Copy out the data */ + if (uucopy(&req, reqp, sizeof (struct ifreq)) != 0) + return (-errno); + + return (0); +} + +static void +l2s_termios(struct lx_termios *l_tios, struct termios *s_tios) +{ + assert((l_tios != NULL) && (s_tios != NULL)); + + bzero(s_tios, sizeof (*s_tios)); + + s_tios->c_iflag = l_tios->c_iflag; + s_tios->c_oflag = l_tios->c_oflag; + s_tios->c_cflag = l_tios->c_cflag; + + s_tios->c_lflag = l_tios->c_lflag; + if (s_tios->c_lflag & ICANON) { + s_tios->c_cc[VEOF] = l_tios->c_cc[LX_VEOF]; + s_tios->c_cc[VEOL] = l_tios->c_cc[LX_VEOL]; + } else { + s_tios->c_cc[VMIN] = l_tios->c_cc[LX_VMIN]; + s_tios->c_cc[VTIME] = l_tios->c_cc[LX_VTIME]; + } + + s_tios->c_cc[VEOL2] = l_tios->c_cc[LX_VEOL2]; + s_tios->c_cc[VERASE] = l_tios->c_cc[LX_VERASE]; + s_tios->c_cc[VKILL] = l_tios->c_cc[LX_VKILL]; + s_tios->c_cc[VREPRINT] = l_tios->c_cc[LX_VREPRINT]; + s_tios->c_cc[VLNEXT] = l_tios->c_cc[LX_VLNEXT]; + s_tios->c_cc[VWERASE] = l_tios->c_cc[LX_VWERASE]; + s_tios->c_cc[VINTR] = l_tios->c_cc[LX_VINTR]; + s_tios->c_cc[VQUIT] = l_tios->c_cc[LX_VQUIT]; + s_tios->c_cc[VSWTCH] = l_tios->c_cc[LX_VSWTC]; + s_tios->c_cc[VSTART] = l_tios->c_cc[LX_VSTART]; + s_tios->c_cc[VSTOP] = l_tios->c_cc[LX_VSTOP]; + s_tios->c_cc[VSUSP] = l_tios->c_cc[LX_VSUSP]; + s_tios->c_cc[VDISCARD] = l_tios->c_cc[LX_VDISCARD]; +} + +static void +l2s_termio(struct lx_termio *l_tio, struct termio *s_tio) +{ + assert((l_tio != NULL) && (s_tio != NULL)); + + bzero(s_tio, sizeof (*s_tio)); + + s_tio->c_iflag = l_tio->c_iflag; + s_tio->c_oflag = l_tio->c_oflag; + s_tio->c_cflag = l_tio->c_cflag; + + s_tio->c_lflag = l_tio->c_lflag; + if (s_tio->c_lflag & ICANON) { + s_tio->c_cc[VEOF] = l_tio->c_cc[LX_VEOF]; + } else { + s_tio->c_cc[VMIN] = l_tio->c_cc[LX_VMIN]; + s_tio->c_cc[VTIME] = l_tio->c_cc[LX_VTIME]; + } + + s_tio->c_cc[VINTR] = l_tio->c_cc[LX_VINTR]; + s_tio->c_cc[VQUIT] = l_tio->c_cc[LX_VQUIT]; + s_tio->c_cc[VERASE] = l_tio->c_cc[LX_VERASE]; + s_tio->c_cc[VKILL] = l_tio->c_cc[LX_VKILL]; + s_tio->c_cc[VSWTCH] = l_tio->c_cc[LX_VSWTC]; +} + +static void +termios2lx_cc(struct lx_termios *l_tios, struct lx_cc *lio) +{ + assert((l_tios != NULL) && (lio != NULL)); + + bzero(lio, sizeof (*lio)); + + lio->veof = l_tios->c_cc[LX_VEOF]; + lio->veol = l_tios->c_cc[LX_VEOL]; + lio->vmin = l_tios->c_cc[LX_VMIN]; + lio->vtime = l_tios->c_cc[LX_VTIME]; +} + +static void +termio2lx_cc(struct lx_termio *l_tio, struct lx_cc *lio) +{ + assert((l_tio != NULL) && (lio != NULL)); + + bzero(lio, sizeof (*lio)); + + lio->veof = l_tio->c_cc[LX_VEOF]; + lio->veol = 0; + lio->vmin = l_tio->c_cc[LX_VMIN]; + lio->vtime = l_tio->c_cc[LX_VTIME]; +} + +static void +s2l_termios(struct termios *s_tios, struct lx_termios *l_tios) +{ + assert((s_tios != NULL) && (l_tios != NULL)); + + bzero(l_tios, sizeof (*l_tios)); + + l_tios->c_iflag = s_tios->c_iflag; + l_tios->c_oflag = s_tios->c_oflag; + l_tios->c_cflag = s_tios->c_cflag; + l_tios->c_lflag = s_tios->c_lflag; + + if (s_tios->c_lflag & ICANON) { + l_tios->c_cc[LX_VEOF] = s_tios->c_cc[VEOF]; + l_tios->c_cc[LX_VEOL] = s_tios->c_cc[VEOL]; + } else { + l_tios->c_cc[LX_VMIN] = s_tios->c_cc[VMIN]; + l_tios->c_cc[LX_VTIME] = s_tios->c_cc[VTIME]; + } + + l_tios->c_cc[LX_VEOL2] = s_tios->c_cc[VEOL2]; + l_tios->c_cc[LX_VERASE] = s_tios->c_cc[VERASE]; + l_tios->c_cc[LX_VKILL] = s_tios->c_cc[VKILL]; + l_tios->c_cc[LX_VREPRINT] = s_tios->c_cc[VREPRINT]; + l_tios->c_cc[LX_VLNEXT] = s_tios->c_cc[VLNEXT]; + l_tios->c_cc[LX_VWERASE] = s_tios->c_cc[VWERASE]; + l_tios->c_cc[LX_VINTR] = s_tios->c_cc[VINTR]; + l_tios->c_cc[LX_VQUIT] = s_tios->c_cc[VQUIT]; + l_tios->c_cc[LX_VSWTC] = s_tios->c_cc[VSWTCH]; + l_tios->c_cc[LX_VSTART] = s_tios->c_cc[VSTART]; + l_tios->c_cc[LX_VSTOP] = s_tios->c_cc[VSTOP]; + l_tios->c_cc[LX_VSUSP] = s_tios->c_cc[VSUSP]; + l_tios->c_cc[LX_VDISCARD] = s_tios->c_cc[VDISCARD]; +} + +static void +s2l_termio(struct termio *s_tio, struct lx_termio *l_tio) +{ + assert((s_tio != NULL) && (l_tio != NULL)); + + bzero(l_tio, sizeof (*l_tio)); + + l_tio->c_iflag = s_tio->c_iflag; + l_tio->c_oflag = s_tio->c_oflag; + l_tio->c_cflag = s_tio->c_cflag; + l_tio->c_lflag = s_tio->c_lflag; + + if (s_tio->c_lflag & ICANON) { + l_tio->c_cc[LX_VEOF] = s_tio->c_cc[VEOF]; + } else { + l_tio->c_cc[LX_VMIN] = s_tio->c_cc[VMIN]; + l_tio->c_cc[LX_VTIME] = s_tio->c_cc[VTIME]; + } + + l_tio->c_cc[LX_VINTR] = s_tio->c_cc[VINTR]; + l_tio->c_cc[LX_VQUIT] = s_tio->c_cc[VQUIT]; + l_tio->c_cc[LX_VERASE] = s_tio->c_cc[VERASE]; + l_tio->c_cc[LX_VKILL] = s_tio->c_cc[VKILL]; + l_tio->c_cc[LX_VSWTC] = s_tio->c_cc[VSWTCH]; +} + +static int +/*ARGSUSED*/ +ict_tcsets(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termios l_tios, *l_tiosp = (struct lx_termios *)arg; + struct termios s_tios; + struct lx_cc lio; + int ldlinux, ret; + + assert(cmd == TCSETS || cmd == TCSETSW || cmd == TCSETSF); + + /* Copy in the data. */ + if (uucopy(l_tiosp, &l_tios, sizeof (l_tios)) != 0) + return (-errno); + + /* + * The TIOCSETLD/TIOCGETLD ioctls are only supported by the + * ldlinux strmod. So make sure the module exists on the + * target stream before we invoke the ioctl. + */ + if ((ldlinux = ldlinux_check(fd, stat)) < 0) + return (ldlinux); + + if (ldlinux == 1) { + termios2lx_cc(&l_tios, &lio); + if (ioctl_istr(fd, TIOCSETLD, "TIOCSETLD", + &lio, sizeof (lio)) < 0) + return (-errno); + } + + l2s_termios(&l_tios, &s_tios); + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, cmd, cmd_str); + ret = ioctl(fd, cmd, (intptr_t)&s_tios); + return ((ret < 0) ? -errno : ret); +} + +static int +/*ARGSUSED*/ +ict_tcseta(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termio l_tio, *l_tiop = (struct lx_termio *)arg; + struct termio s_tio; + struct lx_cc lio; + int ldlinux, ret; + + assert(cmd == TCSETA || cmd == TCSETAW || cmd == TCSETAF); + + /* Copy in the data. */ + if (uucopy(l_tiop, &l_tio, sizeof (l_tio)) != 0) + return (-errno); + + /* + * The TIOCSETLD/TIOCGETLD ioctls are only supported by the + * ldlinux strmod. So make sure the module exists on the + * target stream before we invoke the ioctl. + */ + if ((ldlinux = ldlinux_check(fd, stat)) < 0) + return (ldlinux); + + if (ldlinux == 1) { + termio2lx_cc(&l_tio, &lio); + if (ioctl_istr(fd, TIOCSETLD, "TIOCSETLD", + &lio, sizeof (lio)) < 0) + return (-errno); + } + + l2s_termio(&l_tio, &s_tio); + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, cmd, cmd_str); + ret = ioctl(fd, cmd, (intptr_t)&s_tio); + return ((ret < 0) ? -errno : ret); +} + +/* + * The Solaris TIOCGPGRP ioctl does not have exactly the same semantics as + * the Linux one. To mimic Linux semantics we have to do some extra work + * normally done by the Solaris version of tcgetpgrp(). + */ +static int +/*ARGSUSED*/ +ict_tiocgpgrp(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + pid_t ttysid, mysid; + int ret; + + assert(cmd == LX_TIOCGPGRP); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TIOCGSID, "TIOCGSID"); + if (ioctl(fd, TIOCGSID, (intptr_t)&ttysid) < 0) + return (-errno); + if ((mysid = getsid(0)) < 0) + return (-errno); + if (mysid != ttysid) + return (-ENOTTY); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TIOCGPGRP, "TIOCGPGRP"); + ret = ioctl(fd, TIOCGPGRP, arg); + return ((ret < 0) ? -errno : ret); +} + +static int +/*ARGSUSED*/ +ict_sptlock(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + assert(cmd == LX_TIOCSPTLCK); + + /* + * The success/fail return values are different between Linux + * and Solaris. Linux expects 0 or -1. Solaris can return + * positive number on success. + */ + if (ioctl_istr(fd, UNLKPT, "UNLKPT", NULL, 0) < 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_gptn(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + int ptyno, *ptynop = (int *)arg; + + assert(cmd == LX_TIOCGPTN); + assert(getmajor(stat->st_rdev) == ioc_translator_ptm.idt_major); + + /* This operation is only valid for the lx_ptm device. */ + ptyno = LX_PTM_DEV_TO_PTS(stat->st_rdev); + + /* Copy out the data. */ + if (uucopy(&ptyno, ptynop, sizeof (ptyno)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tiocgwinsz(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct winsize winsize, *winsizep = (struct winsize *)arg; + + assert(cmd == LX_TIOCGWINSZ); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, TIOCGWINSZ, "TIOCGWINSZ"); + if (ioctl(fd, TIOCGWINSZ, arg) >= 0) + return (0); + if (errno != EINVAL) + return (-errno); + + bzero(&winsize, sizeof (winsize)); + if (uucopy(&winsize, winsizep, sizeof (winsize)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tcgets_emulate(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termios l_tio, *l_tiop = (struct lx_termios *)arg; + struct termios s_tio; + + assert(cmd == LX_TCGETS); + + if (syscall(SYS_brand, B_TTYMODES, &s_tio) < 0) + return (-errno); + + /* Now munge the data to how Linux wants it. */ + s2l_termios(&s_tio, &l_tio); + if (uucopy(&l_tio, l_tiop, sizeof (l_tio)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tcgets_native(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termios l_tio, *l_tiop = (struct lx_termios *)arg; + struct termios s_tio; + struct lx_cc lio; + int ldlinux; + + assert(cmd == LX_TCGETS); + + if ((ldlinux = ldlinux_check(fd, stat)) < 0) + return (ldlinux); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TCGETS, "TCGETS"); + if (ioctl(fd, TCGETS, (intptr_t)&s_tio) < 0) + return (-errno); + + /* Now munge the data to how Linux wants it. */ + s2l_termios(&s_tio, &l_tio); + + /* Copy out the data. */ + if (uucopy(&l_tio, l_tiop, sizeof (l_tio)) != 0) + return (-errno); + + /* + * The TIOCSETLD/TIOCGETLD ioctls are only supported by the + * ldlinux strmod. So make sure the module exists on the + * target stream before we invoke the ioctl. + */ + if (ldlinux == 0) + return (0); + + if (ioctl_istr(fd, TIOCGETLD, "TIOCGETLD", &lio, sizeof (lio)) < 0) + return (-errno); + + l_tio.c_cc[LX_VEOF] = lio.veof; + l_tio.c_cc[LX_VEOL] = lio.veol; + l_tio.c_cc[LX_VMIN] = lio.vmin; + l_tio.c_cc[LX_VTIME] = lio.vtime; + + /* Copy out the data. */ + if (uucopy(&l_tio, l_tiop, sizeof (l_tio)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tcgeta(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + struct lx_termio l_tio, *l_tiop = (struct lx_termio *)arg; + struct termio s_tio; + struct lx_cc lio; + int ldlinux; + + assert(cmd == LX_TCGETA); + + if ((ldlinux = ldlinux_check(fd, stat)) < 0) + return (ldlinux); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TCGETA, "TCGETA"); + if (ioctl(fd, TCGETA, (intptr_t)&s_tio) < 0) + return (-errno); + + /* Now munge the data to how Linux wants it. */ + s2l_termio(&s_tio, &l_tio); + + /* Copy out the data. */ + if (uucopy(&l_tio, l_tiop, sizeof (l_tio)) != 0) + return (-errno); + + /* + * The TIOCSETLD/TIOCGETLD ioctls are only supported by the + * ldlinux strmod. So make sure the module exists on the + * target stream before we invoke the ioctl. + */ + if (ldlinux == 0) + return (0); + + if (ioctl_istr(fd, TIOCGETLD, "TIOCGETLD", &lio, sizeof (lio)) < 0) + return (-errno); + + l_tio.c_cc[LX_VEOF] = lio.veof; + l_tio.c_cc[LX_VEOL] = lio.veol; + l_tio.c_cc[LX_VMIN] = lio.vmin; + l_tio.c_cc[LX_VTIME] = lio.vtime; + + /* Copy out the data. */ + if (uucopy(&l_tio, l_tiop, sizeof (l_tio)) != 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_tiocsctty(int fd, struct stat *stat, int cmd, char *cmd_str, intptr_t arg) +{ + pid_t mysid, ttysid; + + if ((mysid = getsid(0)) < 0) + return (-errno); + + /* Check if this fd is already our ctty. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TIOCGSID, "TIOCGSID"); + if (ioctl(fd, TIOCGSID, (intptr_t)&ttysid) >= 0) + if (mysid == ttysid) + return (0); + + /* + * Need to make sure we're a session leader, otherwise the + * TIOCSCTTY ioctl will fail. + */ + if (mysid != getpid()) + (void) setpgrp(); + + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, TIOCSCTTY, "TIOCSCTTY"); + if (ioctl(fd, TIOCSCTTY, 0) < 0) + return (-errno); + return (0); +} + +/* + * /dev/dsp ioctl translators and support + */ +static int +i_is_dsp_dev(int fd) +{ + int minor; + + /* + * This is a cloning device so we have to ask the driver + * what kind of minor node this is. + */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GETMINORNUM, "LXA_IOC_GETMINORNUM"); + if (ioctl(fd, LXA_IOC_GETMINORNUM, &minor) < 0) + return (-EINVAL); + if (minor != LXA_MINORNUM_DSP) + return (-EINVAL); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_reset(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + int err; + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* Nothing to really do on Solaris. */ + return (0); +} + +static void +i_oss_fmt_str(char *buf, int buf_size, uint_t mask) +{ + int i, first = 1; + + assert(buf != NULL); + + buf[0] = '\0'; + for (i = 0; oss_fmt_str[i].i2s_str != NULL; i++) { + if ((oss_fmt_str[i].i2s_int != mask) && + ((oss_fmt_str[i].i2s_int & mask) == 0)) + continue; + if (first) + first = 0; + else + (void) strlcat(buf, " | ", buf_size); + (void) strlcat(buf, oss_fmt_str[i].i2s_str, buf_size); + } +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getfmts(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + audio_info_t sa_info; + char buf[MSGBUF]; + uint_t *maskp = (uint_t *)arg; + uint_t mask = 0; + int i, amode, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_GETFMTS); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* Test to see what Linux oss formats the target device supports. */ + for (i = 0; oft_table[i].oft_oss_fmt != 0; i++) { + + /* Initialize the mode request. */ + AUDIO_INITINFO(&sa_info); + + /* Translate a Linux oss format into Solaris settings. */ + if ((amode == O_RDONLY) || (amode == O_RDWR)) { + sa_info.record.encoding = oft_table[i].oft_encoding; + sa_info.record.precision = oft_table[i].oft_precision; + } + if ((amode == O_WRONLY) || (amode == O_RDWR)) { + sa_info.play.encoding = oft_table[i].oft_encoding; + sa_info.play.precision = oft_table[i].oft_precision; + } + + /* Send the request. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_SETINFO, "AUDIO_SETINFO"); + if (ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) + continue; + + /* This Linux oss format is supported. */ + mask |= oft_table[i].oft_oss_fmt; + } + + if (lx_debug_enabled != 0) { + i_oss_fmt_str(buf, sizeof (buf), mask); + lx_debug("\toss formats supported = 0x%x (%s)", mask, buf); + } + if (uucopy(&mask, maskp, sizeof (mask)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_setfmts(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + audio_info_t sa_info; + char buf[MSGBUF]; + uint_t *maskp = (uint_t *)arg; + uint_t mask; + int i, amode, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SETFMTS); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(maskp, &mask, sizeof (mask)) != 0) + return (-errno); + + if (lx_debug_enabled != 0) { + i_oss_fmt_str(buf, sizeof (buf), mask); + lx_debug("\toss formats request = 0x%x (%s)", mask, buf); + } + + if ((mask == (uint_t)-1) || (mask == 0)) { + lx_debug("\tXXX: possible oss formats query?"); + return (-EINVAL); + } + + /* Check if multiple format bits were specified. */ + if (!BIT_ONLYONESET(mask)) + return (-EINVAL); + + /* Decode the oss format request into a native format. */ + for (i = 0; oft_table[i].oft_oss_fmt != 0; i++) { + if (oft_table[i].oft_oss_fmt == mask) + break; + } + if (oft_table[i].oft_oss_fmt == 0) + return (-EINVAL); + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* Initialize the mode request. */ + AUDIO_INITINFO(&sa_info); + + /* Translate the Linux oss request into a Solaris request. */ + if ((amode == O_RDONLY) || (amode == O_RDWR)) { + sa_info.record.encoding = oft_table[i].oft_encoding; + sa_info.record.precision = oft_table[i].oft_precision; + } + if ((amode == O_WRONLY) || (amode == O_RDWR)) { + sa_info.play.encoding = oft_table[i].oft_encoding; + sa_info.play.precision = oft_table[i].oft_precision; + } + + /* Send the request. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_SETINFO, "AUDIO_SETINFO"); + return ((ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) ? -errno : 0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_channels(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + audio_info_t sa_info; + uint_t *channelsp = (uint_t *)arg; + uint_t channels; + int amode, err; + + assert((cmd == LX_OSS_SNDCTL_DSP_CHANNELS) || + (cmd == LX_OSS_SNDCTL_DSP_STEREO)); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(channelsp, &channels, sizeof (channels)) != 0) + return (-errno); + + lx_debug("\toss %s request = 0x%x (%u)", + (cmd == LX_OSS_SNDCTL_DSP_CHANNELS) ? "channel" : "stereo", + channels, channels); + + if (channels == (uint_t)-1) { + lx_debug("\tXXX: possible channel/stereo query?"); + return (-EINVAL); + } + + if (cmd == LX_OSS_SNDCTL_DSP_STEREO) { + /* + * There doesn't seem to be any documentation for + * SNDCTL_DSP_STEREO. Looking at source that uses or + * used this ioctl seems to indicate that the + * functionality provided by this ioctl has been + * subsumed by the SNDCTL_DSP_CHANNELS ioctl. It + * seems that the only arguments ever passed to + * the SNDCTL_DSP_STEREO. Ioctl are boolean values + * of '0' or '1'. Hence we'll start out strict and + * only support those values. + * + * Some online forum discussions about this ioctl + * seemed to indicate that in case of success it + * returns the "stereo" setting (ie, either + * '0' for mono or '1' for stereo). + */ + if ((channels != 0) && (channels != 1)) { + lx_debug("\tinvalid stereo request"); + return (-EINVAL); + } + channels += 1; + } else { + /* Limit the system to one or two channels. */ + if ((channels != 1) && (channels != 2)) { + lx_debug("\tinvalid channel request"); + return (-EINVAL); + } + } + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* Initialize the channel request. */ + AUDIO_INITINFO(&sa_info); + + /* Translate the Linux oss request into a Solaris request. */ + if ((amode == O_RDONLY) || (amode == O_RDWR)) + sa_info.record.channels = channels; + if ((amode == O_WRONLY) || (amode == O_RDWR)) + sa_info.play.channels = channels; + + /* Send the request. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_SETINFO, "AUDIO_SETINFO"); + if (ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) + return (-errno); + + if (cmd == LX_OSS_SNDCTL_DSP_STEREO) + return (channels - 1); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_speed(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + audio_info_t sa_info; + uint_t *speedp = (uint_t *)arg; + uint_t speed; + int amode, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SPEED); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(speedp, &speed, sizeof (speed)) != 0) + return (-errno); + + lx_debug("\toss speed request = 0x%x (%u)", speed, speed); + + if (speed == (uint_t)-1) { + lx_debug("\tXXX: possible oss speed query?"); + return (-EINVAL); + } + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* Initialize the speed request. */ + AUDIO_INITINFO(&sa_info); + + /* Translate the Linux oss request into a Solaris request. */ + if ((amode == O_RDONLY) || (amode == O_RDWR)) + sa_info.record.sample_rate = speed; + if ((amode == O_WRONLY) || (amode == O_RDWR)) + sa_info.play.sample_rate = speed; + + /* Send the request. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_SETINFO, "AUDIO_SETINFO"); + return ((ioctl(fd, AUDIO_SETINFO, &sa_info) < 0) ? -errno : 0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getblksize(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_frag_info_t fi; + uint_t *blksizep = (uint_t *)arg; + uint_t blksize; + int err; + + assert(cmd == LX_OSS_SNDCTL_DSP_GETBLKSIZE); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* Query the current fragment count and size. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GET_FRAG_INFO, "LXA_IOC_GET_FRAG_INFO"); + if (ioctl(fd, LXA_IOC_GET_FRAG_INFO, &fi) < 0) + return (-errno); + + blksize = fi.lxa_fi_size; + + if (uucopy(&blksize, blksizep, sizeof (blksize)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getspace(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lx_oss_audio_buf_info_t *spacep = (lx_oss_audio_buf_info_t *)arg; + lx_oss_audio_buf_info_t space; + lxa_frag_info_t fi; + int err; + + assert((cmd == LX_OSS_SNDCTL_DSP_GETOSPACE) || + (cmd == LX_OSS_SNDCTL_DSP_GETISPACE)); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* Query the current fragment count and size. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GET_FRAG_INFO, "LXA_IOC_GET_FRAG_INFO"); + if (ioctl(fd, LXA_IOC_GET_FRAG_INFO, &fi) < 0) + return (-errno); + + /* Return the current fragment count and size. */ + space.fragstotal = fi.lxa_fi_cnt; + space.fragsize = fi.lxa_fi_size; + + /* + * We'll lie and tell applications that they can always write + * out at least one fragment without blocking. + */ + space.fragments = 1; + space.bytes = space.fragsize; + + if (cmd == LX_OSS_SNDCTL_DSP_GETOSPACE) + lx_debug("\toss get output space result = "); + if (cmd == LX_OSS_SNDCTL_DSP_GETISPACE) + lx_debug("\toss get input space result = "); + + lx_debug("\t\tbytes = 0x%x (%u), fragments = 0x%x (%u)", + space.bytes, space.bytes, space.fragments, space.fragments); + lx_debug("\t\tfragtotal = 0x%x (%u), fragsize = 0x%x (%u)", + space.fragstotal, space.fragstotal, + space.fragsize, space.fragsize); + + if (uucopy(&space, spacep, sizeof (space)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_setfragment(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_frag_info_t fi; + uint_t *fraginfop = (uint_t *)arg; + uint_t fraginfo, frag_size, frag_cnt; + int err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SETFRAGMENT); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(fraginfop, &fraginfo, sizeof (fraginfo)) != 0) + return (-errno); + + /* + * The argument to this ioctl is a 32-bit integer of the + * format 0x MMMM SSSS where: + * SSSS - requests a fragment size of 2^SSSS + * MMMM - requests a maximum fragment count of 2^MMMM + * if MMMM is 0x7fff then the application is requesting + * no limits on the number of fragments. + */ + + frag_size = fraginfo & 0xffff; + frag_cnt = fraginfo >> 16; + + lx_debug("\toss fragment request: " + "power size = 0x%x (%u), power cnt = 0x%x (%u)", + frag_size, frag_size, frag_cnt, frag_cnt); + + /* Limit the supported fragment size from 2^4 to 2^31. */ + if ((frag_size < 4) || (frag_size > 31)) + return (-EINVAL); + + /* Limit the number of fragments from 2^1 to 2^32. */ + if (((frag_cnt < 1) || (frag_cnt > 32)) && (frag_cnt != 0x7fff)) + return (-EINVAL); + + /* Expand the fragment values. */ + frag_size = 1 << frag_size; + if ((frag_cnt == 32) || (frag_cnt == 0x7fff)) { + frag_cnt = UINT_MAX; + } else { + frag_cnt = 1 << frag_cnt; + } + + lx_debug("\toss fragment request: " + "translated size = 0x%x (%u), translated cnt = 0x%x (%u)", + frag_size, frag_size, frag_cnt, frag_cnt); + + fi.lxa_fi_size = frag_size; + fi.lxa_fi_cnt = frag_cnt; + + /* Set the current fragment count and size. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_SET_FRAG_INFO, "LXA_IOC_SET_FRAG_INFO"); + return ((ioctl(fd, LXA_IOC_SET_FRAG_INFO, &fi) < 0) ? -errno : 0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getcaps(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + uint_t *capsp = (uint_t *)arg; + uint_t caps; + int err; + + assert(cmd == LX_OSS_SNDCTL_DSP_GETCAPS); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* + * Report that we support mmap access + * this is where things start to get fun. + */ + caps = LX_OSS_DSP_CAP_MMAP | LX_OSS_DSP_CAP_TRIGGER; + + if (uucopy(&caps, capsp, sizeof (caps)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_settrigger(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + uint_t *triggerp = (uint_t *)arg; + uint_t trigger; + int err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SETTRIGGER); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + if (uucopy(triggerp, &trigger, sizeof (trigger)) != 0) + return (-errno); + + lx_debug("\toss set trigger request = 0x%x (%u)", + trigger, trigger); + + /* We only support two types of trigger requests. */ + if ((trigger != LX_OSS_PCM_DISABLE_OUTPUT) && + (trigger != LX_OSS_PCM_ENABLE_OUTPUT)) + return (-EINVAL); + + /* + * We only support triggers on devices open for write access, + * but we don't need to check for that here since the driver will + * verify this for us. + */ + + /* Send the trigger command to the audio device. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_MMAP_OUTPUT, "LXA_IOC_MMAP_OUTPUT"); + return ((ioctl(fd, LXA_IOC_MMAP_OUTPUT, &trigger) < 0) ? -errno : 0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_getoptr(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + static uint_t bytes = 0; + lx_oss_count_info_t ci; + lxa_frag_info_t fi; + audio_info_t ai; + int ptr, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_GETOPTR); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* Query the current fragment size. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GET_FRAG_INFO, "LXA_IOC_GET_FRAG_INFO"); + if (ioctl(fd, LXA_IOC_GET_FRAG_INFO, &fi) < 0) + return (-errno); + + /* Figure out how many samples have been played. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_GETINFO, "AUDIO_GETINFO"); + if (ioctl(fd, AUDIO_GETINFO, &ai) < 0) + return (-errno); + ci.bytes = ai.play.samples + ai.record.samples; + + /* + * Figure out how many fragments of audio have gone out since + * the last call to this ioctl. + */ + ci.blocks = (ci.bytes - bytes) / fi.lxa_fi_size; + bytes = ci.bytes; + + /* Figure out the current fragment offset for mmap audio output. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_MMAP_PTR, "LXA_IOC_MMAP_PTR"); + if (ioctl(fd, LXA_IOC_MMAP_PTR, &ptr) < 0) { + /* + * We really should return an error here, but some + * application (*cough* *cough* flash) expect this + * ioctl to work even if they haven't mmaped the + * device. + */ + ci.ptr = 0; + } else { + ci.ptr = ptr; + } + + lx_debug("\toss get output ptr result = "); + lx_debug("\t\t" + "bytes = 0x%x (%u), blocks = 0x%x (%u), ptr = 0x%x (%u)", + ci.bytes, ci.bytes, ci.blocks, ci.blocks, ci.ptr, ci.ptr); + + if (uucopy(&ci, (void *)arg, sizeof (ci)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_sndctl_dsp_sync(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + int amode, err; + + assert(cmd == LX_OSS_SNDCTL_DSP_SYNC); + + /* Ioctl is only supported on dsp audio devices. */ + if ((err = i_is_dsp_dev(fd)) != 0) + return (err); + + /* We need to know the access mode for the file. */ + if ((amode = fcntl(fd, F_GETFL)) < 0) + return (-EINVAL); + amode &= O_ACCMODE; + assert((amode == O_RDONLY) || (amode == O_WRONLY) || (amode == O_RDWR)); + + /* + * A sync is basically a noop for record only device. + * We check for this here because on Linux a sync on a record + * only device returns success immediately. But the Solaris + * equivalent to a drain operation is a AUDIO_DRAIN, and if + * it's issued to a record only device it will fail and return + * EINVAL. + */ + if (amode == O_RDONLY) + return (0); + + /* Drain any pending output. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, AUDIO_DRAIN, "AUDIO_DRAIN"); + return ((ioctl(fd, AUDIO_DRAIN, NULL) < 0) ? -errno : 0); +} + +/* + * /dev/mixer ioctl translators and support + * + * There are some interesting things to take note of for supporting + * /dev/mixer ioctls. + * + * 1) We report support for the following mixer resources: + * VOLUME, PCM, MIC + * + * 2) We assume the following number of channels for each mixer resource: + * VOLUME: 2 channels + * PCM: 2 channels + * MIC: 1 channel + * + * 3) OSS sets the gain on each channel independently but on Solaris + * there is only one gain value and a balance value. So we need + * to do some translation back and forth. + * + * 4) OSS assumes direct access to hardware but Solaris provides + * virtualized audio device access (where everyone who opens /dev/audio + * get a virtualized audio channel stream, all of which are merged + * together by a software mixer before reaching the hardware). Hence + * mapping OSS mixer resources to Solaris mixer resources takes some + * work. VOLUME and Mic resources are mapped to the actual underlying + * audio hardware resources. PCM resource are mapped to the virtual + * audio channel output level. This mapping becomes more complicated + * if there are no open audio output channels. In this case the + * lx_audio device caches the PCM channels setting for us and applies + * them to any new audio output channels that get opened. (This + * is the reason that we don't use AUDIO_SETINFO ioctls directly + * but instead the lx_audio driver custom LXA_IOC_MIXER_SET_* + * and LXA_IOC_MIXER_GET_* ioctls.) For more information see + * the comments in lx_audio.c. + */ +static int +i_is_mixer_dev(int fd) +{ + int minor; + + /* + * This is a cloning device so we have to ask the driver + * what kind of minor node this is. + */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_GETMINORNUM, "LXA_IOC_GETMINORNUM"); + if (ioctl(fd, LXA_IOC_GETMINORNUM, &minor) < 0) + return (-EINVAL); + if (minor != LXA_MINORNUM_MIXER) + return (-EINVAL); + return (0); +} + +static int +i_oss_mixer_ml_to_val(lxa_mixer_levels_t *ml, uint_t *val) +{ + int range, val1, val2; + + /* Deal with the other easy case, both channels have the same level. */ + if (ml->lxa_ml_balance == AUDIO_MID_BALANCE) { + *val = LX_OSS_MIXER_ENC2( + LX_OSS_S2L_GAIN(ml->lxa_ml_gain), + LX_OSS_S2L_GAIN(ml->lxa_ml_gain)); + assert(LX_OSS_MIXER_2CH_OK(*val)); + return (0); + } + + /* Decode the balance/gain into two separate levels. */ + if (ml->lxa_ml_balance > AUDIO_MID_BALANCE) { + val2 = ml->lxa_ml_gain; + + range = AUDIO_RIGHT_BALANCE - AUDIO_MID_BALANCE; + val1 = AUDIO_RIGHT_BALANCE - ml->lxa_ml_balance; + val1 = (val2 * val1) / range; + } else { + assert(ml->lxa_ml_balance < AUDIO_MID_BALANCE); + val1 = ml->lxa_ml_gain; + + range = AUDIO_MID_BALANCE - AUDIO_LEFT_BALANCE; + val2 = ml->lxa_ml_balance; + val2 = (val1 * val2) / range; + } + + *val = LX_OSS_MIXER_ENC2(LX_OSS_S2L_GAIN(val1), + LX_OSS_S2L_GAIN(val2)); + return (0); +} + +static int +i_oss_mixer_val_to_ml(uint_t val, lxa_mixer_levels_t *ml_old, + lxa_mixer_levels_t *ml) +{ + int range, val1, val2; + + if (!LX_OSS_MIXER_2CH_OK(val)) + return (-EINVAL); + + val1 = LX_OSS_MIXER_DEC1(val); + val2 = LX_OSS_MIXER_DEC2(val); + + /* + * Deal with the easy case. + * Both channels have the same non-zero level. + */ + if ((val1 != 0) && (val1 == val2)) { + ml->lxa_ml_gain = LX_OSS_L2S_GAIN(val1); + ml->lxa_ml_balance = AUDIO_MID_BALANCE; + return (0); + } + + /* If both levels are zero, preserve the current balance setting. */ + if ((val1 == 0) && (val2 == 0)) { + ml->lxa_ml_gain = 0; + ml->lxa_ml_balance = ml_old->lxa_ml_balance; + return (0); + } + + /* + * First set the gain to match the highest channel value volume. + * Then use the balance to simulate lower volume on the second + * channel. + */ + if (val1 > val2) { + ml->lxa_ml_gain = LX_OSS_L2S_GAIN(val1); + + range = AUDIO_MID_BALANCE - AUDIO_LEFT_BALANCE; + ml->lxa_ml_balance = 0; + ml->lxa_ml_balance += ((val2 * range) / val1); + } else { + assert(val1 < val2); + + ml->lxa_ml_gain = LX_OSS_L2S_GAIN(val2); + + range = AUDIO_RIGHT_BALANCE - AUDIO_MID_BALANCE; + ml->lxa_ml_balance = AUDIO_RIGHT_BALANCE; + ml->lxa_ml_balance -= ((val1 * range) / val2); + } + + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_read_volume(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_mixer_levels_t ml; + uint_t *valp = (uint_t *)arg; + uint_t val; + char *cmd_txt; + int err, cmd_new; + + assert((cmd == LX_OSS_SOUND_MIXER_READ_VOLUME) || + (cmd == LX_OSS_SOUND_MIXER_READ_PCM)); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + if (cmd == LX_OSS_SOUND_MIXER_READ_VOLUME) { + cmd_new = LXA_IOC_MIXER_GET_VOL; + cmd_txt = "LXA_IOC_MIXER_GET_VOL"; + } + if (cmd == LX_OSS_SOUND_MIXER_READ_PCM) { + cmd_new = LXA_IOC_MIXER_GET_PCM; + cmd_txt = "LXA_IOC_MIXER_GET_PCM"; + } + + /* Attempt to set the device output gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, cmd_new, cmd_txt); + if (ioctl(fd, cmd_new, &ml) < 0) + return (-errno); + + lx_debug("\tlx_audio mixer results, " + "gain = 0x%x (%u), balance = 0x%x (%u)", + ml.lxa_ml_gain, ml.lxa_ml_gain, + ml.lxa_ml_balance, ml.lxa_ml_balance); + + assert(LXA_MIXER_LEVELS_OK(&ml)); + + /* Translate the mixer levels struct to an OSS mixer value. */ + if ((err = i_oss_mixer_ml_to_val(&ml, &val)) != 0) + return (err); + assert(LX_OSS_MIXER_2CH_OK(val)); + + lx_debug("\toss get mixer %s result = 0x%x (%u)", + (cmd == LX_OSS_SOUND_MIXER_READ_VOLUME) ? "volume" : "pcm", + val, val); + + if (uucopy(&val, valp, sizeof (val)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_write_volume(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_mixer_levels_t ml, ml_old; + uint_t *valp = (uint_t *)arg; + uint_t val; + char *cmd_txt; + int err, cmd_new; + + assert((cmd == LX_OSS_SOUND_MIXER_WRITE_VOLUME) || + (cmd == LX_OSS_SOUND_MIXER_WRITE_PCM)); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + if (uucopy(valp, &val, sizeof (val)) != 0) + return (-errno); + + if (cmd == LX_OSS_SOUND_MIXER_WRITE_VOLUME) { + cmd_new = LXA_IOC_MIXER_SET_VOL; + cmd_txt = "LXA_IOC_MIXER_SET_VOL"; + + /* Attempt to get the device output gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, + LXA_IOC_MIXER_GET_VOL, "LXA_IOC_MIXER_GET_VOL"); + if (ioctl(fd, LXA_IOC_MIXER_GET_VOL, &ml_old) < 0) + return (-errno); + } + + if (cmd == LX_OSS_SOUND_MIXER_WRITE_PCM) { + cmd_new = LXA_IOC_MIXER_SET_PCM; + cmd_txt = "LXA_IOC_MIXER_SET_PCM"; + + /* Attempt to get the device output gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, + LXA_IOC_MIXER_GET_PCM, "LXA_IOC_MIXER_GET_PCM"); + if (ioctl(fd, LXA_IOC_MIXER_GET_PCM, &ml_old) < 0) + return (-errno); + } + + lx_debug("\toss set mixer %s request = 0x%x (%u)", + (cmd == LX_OSS_SOUND_MIXER_WRITE_VOLUME) ? "volume" : "pcm", + val, val); + + /* Translate an OSS mixer value to mixer levels. */ + if ((err = i_oss_mixer_val_to_ml(val, &ml_old, &ml)) != 0) + return (err); + assert(LXA_MIXER_LEVELS_OK(&ml)); + + lx_debug("\tlx_audio mixer request, " + "gain = 0x%x (%u), balance = 0x%x (%u)", + ml.lxa_ml_gain, ml.lxa_ml_gain, + ml.lxa_ml_balance, ml.lxa_ml_balance); + + /* Attempt to set the device output gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", fd, cmd_new, cmd_txt); + if (ioctl(fd, cmd_new, &ml) < 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_read_mic(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_mixer_levels_t ml; + uint_t *valp = (uint_t *)arg; + uint_t val; + int err; + + assert((cmd == LX_OSS_SOUND_MIXER_READ_MIC) || + (cmd == LX_OSS_SOUND_MIXER_READ_IGAIN)); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + /* Attempt to get the device input gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_MIXER_GET_MIC, "LXA_IOC_MIXER_GET_MIC"); + if (ioctl(fd, LXA_IOC_MIXER_GET_MIC, &ml) < 0) + return (-errno); + + /* Report the mixer as having two channels. */ + val = LX_OSS_MIXER_ENC2( + LX_OSS_S2L_GAIN(ml.lxa_ml_gain), + LX_OSS_S2L_GAIN(ml.lxa_ml_gain)); + + if (cmd == LX_OSS_SOUND_MIXER_READ_MIC) + lx_debug("\toss get mixer mic result = 0x%x (%u)", val, val); + if (cmd == LX_OSS_SOUND_MIXER_READ_IGAIN) + lx_debug("\toss get mixer igain result = 0x%x (%u)", val, val); + + if (uucopy(&val, valp, sizeof (val)) != 0) + return (-errno); + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_write_mic(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + lxa_mixer_levels_t ml; + uint_t *valp = (uint_t *)arg; + uint_t val; + int err; + + assert((cmd == LX_OSS_SOUND_MIXER_WRITE_MIC) || + (cmd == LX_OSS_SOUND_MIXER_WRITE_IGAIN)); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + if (uucopy(valp, &val, sizeof (val)) != 0) + return (-errno); + + if (cmd == LX_OSS_SOUND_MIXER_WRITE_MIC) + lx_debug("\toss set mixer mic request = 0x%x (%u)", val, val); + if (cmd == LX_OSS_SOUND_MIXER_WRITE_IGAIN) + lx_debug("\toss set mixer igain request = 0x%x (%u)", val, val); + + /* The mic only supports one channel. */ + val = LX_OSS_MIXER_DEC1(val); + + ml.lxa_ml_balance = AUDIO_MID_BALANCE; + ml.lxa_ml_gain = LX_OSS_L2S_GAIN(val); + + /* Attempt to set the device input gain. */ + lx_debug("\tioctl(%d, 0x%x - %s, ...)", + fd, LXA_IOC_MIXER_SET_MIC, "LXA_IOC_MIXER_SET_MIC"); + if (ioctl(fd, LXA_IOC_MIXER_SET_MIC, &ml) < 0) + return (-errno); + + return (0); +} + +static int +/*ARGSUSED*/ +ict_oss_mixer_read_devs(int fd, struct stat *stat, + int cmd, char *cmd_str, intptr_t arg) +{ + uint_t *resultp = (uint_t *)arg; + uint_t result = 0; + int err; + + if (cmd == LX_OSS_SOUND_MIXER_READ_DEVMASK) { + /* Bitmap of all the mixer channels we supposedly support. */ + result = ((1 << LX_OSS_SM_PCM) | + (1 << LX_OSS_SM_MIC) | + (1 << LX_OSS_SM_VOLUME)); + } + if (cmd == LX_OSS_SOUND_MIXER_READ_STEREODEVS) { + /* Bitmap of the stereo mixer channels we supposedly support. */ + result = ((1 << LX_OSS_SM_PCM) | + (1 << LX_OSS_SM_VOLUME)); + } + if ((cmd == LX_OSS_SOUND_MIXER_READ_RECMASK) || + (cmd == LX_OSS_SOUND_MIXER_READ_RECSRC)) { + /* Bitmap of the mixer input channels we supposedly support. */ + result = (1 << LX_OSS_SM_MIC); + } + assert(result != 0); + + /* Ioctl is only supported on mixer audio devices. */ + if ((err = i_is_mixer_dev(fd)) != 0) + return (err); + + if (uucopy(&result, resultp, sizeof (result)) != 0) + return (-errno); + + return (0); +} + +/* + * Audio ioctl conversion support structures. + */ +static oss_fmt_translator_t oft_table[] = { + { LX_OSS_AFMT_MU_LAW, AUDIO_ENCODING_ULAW, 8 }, + { LX_OSS_AFMT_A_LAW, AUDIO_ENCODING_ALAW, 8 }, + { LX_OSS_AFMT_S8, AUDIO_ENCODING_LINEAR, 8 }, + { LX_OSS_AFMT_U8, AUDIO_ENCODING_LINEAR8, 8 }, + { LX_OSS_AFMT_S16_NE, AUDIO_ENCODING_LINEAR, 16 }, + { 0, 0, 0 } +}; + +/* + * Ioctl translator definitions. + */ + +/* + * Defines to help with creating ioctl translators. + * + * IOC_CMD_TRANSLATOR_NONE - Ioctl has the same semantics and argument + * values on Solaris and Linux but may have different command values. + * (Macro assumes the symbolic Linux name assigned to the ioctl command + * value is the same as the Solaris symbol but pre-pended with an "LX_") + * + * IOC_CMD_TRANSLATOR_PASS - Ioctl is a Linux specific ioctl and should + * be passed through unmodified. + * + * IOC_CMD_TRANSLATOR_FILTER - Ioctl has the same command name on + * Solaris and Linux and needs a translation function that is common to + * more than one ioctl. (Macro assumes the symbolic Linux name assigned + * to the ioctl command value is the same as the Solaris symbol but + * pre-pended with an "LX_") + * + * IOC_CMD_TRANSLATOR_CUSTOM - Ioctl needs special handling via a + * translation function. + */ +#define IOC_CMD_TRANSLATOR_NONE(ioc_cmd_sym) \ + { (int)LX_##ioc_cmd_sym, "LX_" #ioc_cmd_sym, \ + ioc_cmd_sym, #ioc_cmd_sym, ict_pass }, + +#define IOC_CMD_TRANSLATOR_PASS(ioc_cmd_sym) \ + { (int)ioc_cmd_sym, #ioc_cmd_sym, \ + ioc_cmd_sym, #ioc_cmd_sym, ict_pass }, + +#define IOC_CMD_TRANSLATOR_FILTER(ioc_cmd_sym, ioct_handler) \ + { (int)LX_##ioc_cmd_sym, "LX_" #ioc_cmd_sym, \ + ioc_cmd_sym, #ioc_cmd_sym, ioct_handler }, + +#define IOC_CMD_TRANSLATOR_CUSTOM(ioc_cmd_sym, ioct_handler) \ + { (int)ioc_cmd_sym, #ioc_cmd_sym, \ + (int)ioc_cmd_sym, #ioc_cmd_sym, ioct_handler }, + +#define IOC_CMD_TRANSLATOR_END \ + { 0, NULL, 0, NULL, NULL } + +/* All files will need to support these ioctls. */ +#define IOC_CMD_TRANSLATORS_ALL \ + IOC_CMD_TRANSLATOR_NONE(FIONREAD) \ + IOC_CMD_TRANSLATOR_NONE(FIONBIO) + +/* Any files supporting streams semantics will need these ioctls. */ +#define IOC_CMD_TRANSLATORS_STREAMS \ + IOC_CMD_TRANSLATOR_NONE(TCXONC) \ + IOC_CMD_TRANSLATOR_NONE(TCFLSH) \ + IOC_CMD_TRANSLATOR_NONE(TIOCEXCL) \ + IOC_CMD_TRANSLATOR_NONE(TIOCNXCL) \ + IOC_CMD_TRANSLATOR_NONE(TIOCSPGRP) \ + IOC_CMD_TRANSLATOR_NONE(TIOCSTI) \ + IOC_CMD_TRANSLATOR_NONE(TIOCSWINSZ) \ + IOC_CMD_TRANSLATOR_NONE(TIOCMBIS) \ + IOC_CMD_TRANSLATOR_NONE(TIOCMBIC) \ + IOC_CMD_TRANSLATOR_NONE(TIOCMSET) \ + IOC_CMD_TRANSLATOR_NONE(TIOCSETD) \ + IOC_CMD_TRANSLATOR_NONE(FIOASYNC) \ + IOC_CMD_TRANSLATOR_NONE(FIOSETOWN) \ + IOC_CMD_TRANSLATOR_NONE(TCSBRK) \ + \ + IOC_CMD_TRANSLATOR_FILTER(TCSETS, ict_tcsets) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETSW, ict_tcsets) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETSF, ict_tcsets) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETA, ict_tcseta) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETAW, ict_tcseta) \ + IOC_CMD_TRANSLATOR_FILTER(TCSETAF, ict_tcseta) \ + \ + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCSBRKP, ict_tcsbrkp) + + +/* + * Translators for non-device files. + */ +static ioc_cmd_translator_t ioc_translators_file[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATOR_END +}; + +static ioc_cmd_translator_t ioc_translators_fifo[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATORS_STREAMS + IOC_CMD_TRANSLATOR_END +}; + +static ioc_cmd_translator_t ioc_translators_sock[] = { + IOC_CMD_TRANSLATORS_ALL + + IOC_CMD_TRANSLATOR_NONE(FIOGETOWN) + IOC_CMD_TRANSLATOR_NONE(SIOCSPGRP) + IOC_CMD_TRANSLATOR_NONE(SIOCGPGRP) + + IOC_CMD_TRANSLATOR_FILTER(SIOCATMARK, ict_sioifoob) + + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFFLAGS, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFFLAGS, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFDSTADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFDSTADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFBRDADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFBRDADDR, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFNETMASK, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFNETMASK, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMETRIC, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMETRIC, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMTU, ict_sioifreq) + IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMTU, ict_sioifreq) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCGIFCONF, ict_siocgifconf) + IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCGIFHWADDR, ict_siocifhwaddr) + IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCSIFHWADDR, ict_siocifhwaddr) + + IOC_CMD_TRANSLATOR_END +}; + +/* + * Translators for devices. + */ +static ioc_cmd_translator_t ioc_cmd_translators_ptm[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATORS_STREAMS + + IOC_CMD_TRANSLATOR_NONE(TIOCPKT) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPGRP, ict_tiocgpgrp) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSPTLCK, ict_sptlock) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPTN, ict_gptn) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGWINSZ, ict_tiocgwinsz) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETS, ict_tcgets_emulate) + + IOC_CMD_TRANSLATOR_END +}; +static ioc_dev_translator_t ioc_translator_ptm = { + LX_PTM_DRV, /* idt_driver */ + 0, /* idt_major */ + ioc_cmd_translators_ptm +}; + +static ioc_cmd_translator_t ioc_cmd_translators_pts[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATORS_STREAMS + + IOC_CMD_TRANSLATOR_NONE(TIOCGETD) + IOC_CMD_TRANSLATOR_NONE(TIOCGSID) + IOC_CMD_TRANSLATOR_NONE(TIOCNOTTY) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPGRP, ict_tiocgpgrp) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETS, ict_tcgets_native) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETA, ict_tcgeta) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGWINSZ, ict_tiocgwinsz) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSCTTY, ict_tiocsctty) + + IOC_CMD_TRANSLATOR_END +}; +static ioc_dev_translator_t ioc_translator_pts = { + "pts", /* idt_driver */ + 0, /* idt_major */ + ioc_cmd_translators_pts +}; + +static ioc_dev_translator_t ioc_translator_sy = { + "sy", /* idt_driver */ + 0, /* idt_major */ + + /* + * /dev/tty (which is implemented via the "sy" driver) is basically + * a layered driver that passes on requests to the ctty for the + * current process. Since ctty's are currently always implemented + * via the pts driver, we should make sure to support all the + * same ioctls on the sy driver as we do on the pts driver. + */ + ioc_cmd_translators_pts +}; + +static ioc_cmd_translator_t ioc_cmd_translators_zcons[] = { + IOC_CMD_TRANSLATORS_ALL + IOC_CMD_TRANSLATORS_STREAMS + + IOC_CMD_TRANSLATOR_NONE(TIOCNOTTY) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETS, ict_tcgets_native) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TCGETA, ict_tcgeta) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGWINSZ, ict_tiocgwinsz) + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSCTTY, ict_tiocsctty) + + IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCLINUX, ict_einval) + + IOC_CMD_TRANSLATOR_END +}; +static ioc_dev_translator_t ioc_translator_zcons = { + "zcons", /* idt_driver */ + 0, /* idt_major */ + ioc_cmd_translators_zcons +}; + +static ioc_cmd_translator_t ioc_cmd_translators_lx_audio[] = { + IOC_CMD_TRANSLATORS_ALL + + /* /dev/dsp ioctls */ + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_RESET, + ict_oss_sndctl_dsp_reset) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETFMTS, + ict_oss_sndctl_dsp_getfmts) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SETFMTS, + ict_oss_sndctl_dsp_setfmts) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_CHANNELS, + ict_oss_sndctl_dsp_channels) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_STEREO, + ict_oss_sndctl_dsp_channels) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SPEED, + ict_oss_sndctl_dsp_speed) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETBLKSIZE, + ict_oss_sndctl_dsp_getblksize) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SYNC, + ict_oss_sndctl_dsp_sync) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SETFRAGMENT, + ict_oss_sndctl_dsp_setfragment) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETOSPACE, + ict_oss_sndctl_dsp_getspace) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETCAPS, + ict_oss_sndctl_dsp_getcaps) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_SETTRIGGER, + ict_oss_sndctl_dsp_settrigger) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETOPTR, + ict_oss_sndctl_dsp_getoptr) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SNDCTL_DSP_GETISPACE, + ict_oss_sndctl_dsp_getspace) + + /* /dev/mixer level ioctls */ + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_VOLUME, + ict_oss_mixer_read_volume) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_PCM, + ict_oss_mixer_read_volume) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_MIC, + ict_oss_mixer_read_mic) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_IGAIN, + ict_oss_mixer_read_mic) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_VOLUME, + ict_oss_mixer_write_volume) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_PCM, + ict_oss_mixer_write_volume) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_MIC, + ict_oss_mixer_write_mic) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_WRITE_IGAIN, + ict_oss_mixer_write_mic) + + /* /dev/mixer capability ioctls */ + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_STEREODEVS, + ict_oss_mixer_read_devs) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_DEVMASK, + ict_oss_mixer_read_devs) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_RECMASK, + ict_oss_mixer_read_devs) + IOC_CMD_TRANSLATOR_CUSTOM(LX_OSS_SOUND_MIXER_READ_RECSRC, + ict_oss_mixer_read_devs) + + IOC_CMD_TRANSLATOR_END +}; +static ioc_dev_translator_t ioc_translator_lx_audio = { + "lx_audio", /* idt_driver */ + 0, /* idt_major */ + ioc_cmd_translators_lx_audio +}; + +/* + * An array of all the device translators. + */ +static ioc_dev_translator_t *ioc_translators_dev[] = { + &ioc_translator_lx_audio, + &ioc_translator_ptm, + &ioc_translator_pts, + &ioc_translator_sy, + &ioc_translator_zcons, + NULL +}; + +/* + * Translators for filesystems. + */ +static ioc_cmd_translator_t ioc_cmd_translators_autofs[] = { + IOC_CMD_TRANSLATOR_PASS(LX_AUTOFS_IOC_READY) + IOC_CMD_TRANSLATOR_PASS(LX_AUTOFS_IOC_FAIL) + IOC_CMD_TRANSLATOR_PASS(LX_AUTOFS_IOC_CATATONIC) + IOC_CMD_TRANSLATOR_END +}; + +static ioc_fs_translator_t ioc_translator_autofs = { + LX_AUTOFS_NAME, /* ift_filesystem */ + ioc_cmd_translators_autofs +}; + +/* + * An array of all the filesystem translators. + */ +static ioc_fs_translator_t *ioc_translators_fs[] = { + &ioc_translator_autofs, + NULL +}; + +/* + * Ioctl error translator definitions. + */ +#define IOC_ERRNO_TRANSLATOR(iet_cmd_sym, iet_errno) \ + { (int)LX_##iet_cmd_sym, "LX_" #iet_cmd_sym, iet_errno }, + +#define IOC_ERRNO_TRANSLATOR_END \ + { 0, NULL, 0 } + +static ioc_errno_translator_t ioc_translators_errno[] = { + IOC_ERRNO_TRANSLATOR(TCGETS, ENOTTY) + IOC_ERRNO_TRANSLATOR(TCSETS, ENOTTY) + IOC_ERRNO_TRANSLATOR(TCSBRK, ENOTTY) + IOC_ERRNO_TRANSLATOR(TCXONC, ENOTTY) + IOC_ERRNO_TRANSLATOR(TCFLSH, ENOTTY) + IOC_ERRNO_TRANSLATOR(TIOCGPGRP, ENOTTY) + IOC_ERRNO_TRANSLATOR(TIOCSPGRP, ENOTTY) + IOC_ERRNO_TRANSLATOR(TIOCGWINSZ, ENOTTY) + IOC_ERRNO_TRANSLATOR_END +}; + +int +lx_vhangup(void) +{ + if (geteuid() != 0) + return (-EPERM); + + vhangup(); + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/iovec.c b/usr/src/lib/brand/lx/lx_brand/common/iovec.c new file mode 100644 index 0000000000..c9b48fb173 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/iovec.c @@ -0,0 +1,206 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <sys/uio.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <alloca.h> +#include <string.h> +#include <sys/lx_syscall.h> +#include <sys/lx_misc.h> +#include <sys/lx_types.h> + +static int +lx_is_directory(int fd) +{ + struct stat64 sbuf; + + if (fstat64(fd, &sbuf) < 0) + sbuf.st_mode = 0; + + return ((sbuf.st_mode & S_IFMT) == S_IFDIR); +} + +int +lx_read(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + void *buf = (void *)p2; + size_t nbyte = (size_t)p3; + ssize_t ret; + + if (lx_is_directory(fd)) + return (-EISDIR); + + if ((ret = read(fd, buf, nbyte)) < 0) + return (-errno); + + return (ret); +} + +int +lx_pread64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5) +{ + int fd = (int)p1; + void *buf = (void *)p2; + size_t nbyte = (size_t)p3; + uintptr_t off_lo = p4; + uintptr_t off_hi = p5; + ssize_t ret; + + if (lx_is_directory(fd)) + return (-EISDIR); + + ret = pread64(fd, buf, nbyte, (off64_t)LX_32TO64(off_lo, off_hi)); + + if (ret < 0) + return (-errno); + + return (ret); +} + +/* + * Implementation of Linux readv() and writev() system calls. + * + * The Linux system calls differ from the Solaris system calls in a few key + * areas: + * + * - On Solaris, the maximum number of I/O vectors that can be passed to readv() + * or writev() is IOV_MAX (16). Linux has a much larger restriction (1024). + * + * - Passing 0 as a vector count is an error on Solaris, but on Linux results + * in a return value of 0. Even though the man page says the opposite. + * + * - If the Nth vector results in an error, Solaris will return an error code + * for the entire operation. Linux only returns an error if there has been + * no data transferred yet. Otherwise, it returns the number of bytes + * transferred up until that point. + * + * In order to accomodate these differences, we implement these functions as a + * series of ordinary read() or write() calls. + */ + +#define LX_IOV_MAX 1024 /* Also called MAX_IOVEC */ + +static int +lx_iovec_copy_and_check(const struct iovec *iovp, struct iovec *iov, int count) +{ + int i; + ssize_t cnt = 0; + + if (uucopy(iovp, (void *)iov, count * sizeof (struct iovec)) != 0) + return (-errno); + + for (i = 0; i < count; i++) { + cnt += iov[i].iov_len; + if (iov[i].iov_len < 0 || cnt < 0) + return (-EINVAL); + } + + return (0); +} + +int +lx_readv(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + const struct iovec *iovp = (const struct iovec *)p2; + int count = (int)p3; + struct iovec *iov; + ssize_t total = 0, ret; + int i; + + if (count == 0) + return (0); + + if (count < 0 || count > LX_IOV_MAX) + return (-EINVAL); + + if (lx_is_directory(fd)) + return (-EISDIR); + + iov = SAFE_ALLOCA(count * sizeof (struct iovec)); + if (iov == NULL) + return (-ENOMEM); + if ((ret = lx_iovec_copy_and_check(iovp, iov, count)) != 0) + return (ret); + + for (i = 0; i < count; i++) { + ret = read(fd, iov[i].iov_base, iov[i].iov_len); + + if (ret < 0) { + if (total > 0) + return (total); + return (-errno); + } + + total += ret; + } + + return (total); +} + +int +lx_writev(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int fd = (int)p1; + const struct iovec *iovp = (const struct iovec *)p2; + int count = (int)p3; + struct iovec *iov; + ssize_t total = 0, ret; + int i; + + if (count == 0) + return (0); + + if (count < 0 || count > LX_IOV_MAX) + return (-EINVAL); + + iov = SAFE_ALLOCA(count * sizeof (struct iovec)); + if (iov == NULL) + return (-ENOMEM); + if ((ret = lx_iovec_copy_and_check(iovp, iov, count)) != 0) + return (ret); + + for (i = 0; i < count; i++) { + ret = write(fd, iov[i].iov_base, iov[i].iov_len); + + if (ret < 0) { + if (total > 0) + return (total); + return (-errno); + } + + total += ret; + } + + return (total); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c new file mode 100644 index 0000000000..8392b770c8 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c @@ -0,0 +1,1210 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/syscall.h> +#include <sys/utsname.h> +#include <sys/inttypes.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/fstyp.h> +#include <sys/fsid.h> +#include <sys/systm.h> +#include <sys/auxv.h> +#include <sys/frame.h> +#include <sys/brand.h> + +#include <assert.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <errno.h> +#include <syslog.h> +#include <signal.h> +#include <fcntl.h> +#include <synch.h> +#include <libelf.h> +#include <pthread.h> +#include <utime.h> +#include <dirent.h> +#include <ucontext.h> +#include <libintl.h> +#include <locale.h> + +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/lx_brand.h> +#include <sys/lx_types.h> +#include <sys/lx_stat.h> +#include <sys/lx_statfs.h> +#include <sys/lx_ioctl.h> +#include <sys/lx_signal.h> +#include <sys/lx_syscall.h> +#include <sys/lx_thread.h> +#include <sys/lx_thunk_server.h> + +/* + * Map solaris errno to the linux equivalent. + */ +static int stol_errno[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 35, 47, 22, 38, 22, /* 49 */ + 52, 53, 54, 55, 56, 57, 58, 59, 22, 22, + 61, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 22, 22, 72, 22, 22, 74, 36, 75, + 76, 77, 78, 79, 80, 81, 82, 83, 84, 38, + 40, 85, 86, 39, 87, 88, 89, 90, 91, 92, /* 99 */ + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, + 103, 104, 105, 106, 107, 22, 22, 22, 22, 22, + 22, 22, 22, 108, 109, 110, 111, 112, 113, 114, /* 149 */ + 115, 116 +}; + +char lx_release[128]; + +/* + * Map a linux locale ending string to the solaris equivalent. + */ +struct lx_locale_ending { + const char *linux_end; /* linux ending string */ + const char *solaris_end; /* to transform with this string */ + int le_size; /* linux ending string length */ + int se_size; /* solaris ending string length */ +}; + +#define l2s_locale(lname, sname) \ + {(lname), (sname), sizeof ((lname)) - 1, sizeof ((sname)) - 1} + +static struct lx_locale_ending lx_locales[] = { + l2s_locale(".utf8", ".UTF-8"), + l2s_locale(".utf8@euro", ".UTF-8"), + l2s_locale("@euro", ".ISO8859-15"), + l2s_locale(".iso885915", ".ISO8859-15"), + l2s_locale(".euckr", ".EUC"), + l2s_locale(".euctw", ".EUC"), + l2s_locale(".koi8r", ".KOI8-R"), + l2s_locale(".gb18030", ".GB18030"), + l2s_locale(".gbk", ".GBK"), + l2s_locale("@cyrillic", ".ISO8859-5") +}; + +#define MAXLOCALENAMELEN 30 +#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ +#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ +#endif + +/* + * This flag is part of the registration with the in-kernel brand module. It's + * used in lx_handler() to determine if we should go back into the kernel after + * a system call in case the kernel needs to perform some post-syscall work + * like tracing for example. + */ +int lx_traceflag; + +#define NOSYS_NULL 1 +#define NOSYS_NO_EQUIV 2 +#define NOSYS_KERNEL 3 +#define NOSYS_UNDOC 4 +#define NOSYS_OBSOLETE 5 + +/* + * SYS_PASSTHRU denotes a system call we can just call on behalf of the + * branded process without having to translate the arguments. + * + * The restriction on this is that the call in question MUST return -1 to + * denote an error. + */ +#define SYS_PASSTHRU 5 + +static char *nosys_msgs[] = { + "Either not yet done, or we haven't come up with an excuse", + "No such Linux system call", + "No equivalent Solaris functionality", + "Reads/modifies Linux kernel state", + "Undocumented and/or rarely used system call", + "Unsupported, obsolete system call" +}; + +struct lx_sysent { + char *sy_name; + int (*sy_callc)(); + char sy_flags; + char sy_narg; +}; + +static struct lx_sysent sysents[LINUX_MAX_SYSCALL + 1]; + +static uintptr_t stack_bottom; + +int lx_install = 0; /* install mode enabled if non-zero */ +int lx_strict = 0; /* "strict" mode enabled if non-zero */ +int lx_verbose = 0; /* verbose mode enabled if non-zero */ +int lx_debug_enabled = 0; /* debugging output enabled if non-zero */ + +pid_t zoneinit_pid; /* zone init PID */ + +thread_key_t lx_tsd_key; + +int +uucopy_unsafe(const void *src, void *dst, size_t n) +{ + bcopy(src, dst, n); + return (0); +} + +int +uucopystr_unsafe(const void *src, void *dst, size_t n) +{ + (void) strncpy((char *)src, dst, n); + return (0); +} + +static void +i_lx_msg(int fd, char *msg, va_list ap) +{ + int i; + char buf[LX_MSG_MAXLEN]; + + /* LINTED [possible expansion issues] */ + i = vsnprintf(buf, sizeof (buf), msg, ap); + buf[LX_MSG_MAXLEN - 1] = '\0'; + if (i == -1) + return; + + /* if debugging is enabled, send this message to debug output */ + if (lx_debug_enabled != 0) + lx_debug(buf); + + /* + * If we are trying to print to stderr, we also want to send the + * message to syslog. + */ + if (fd == 2) { + syslog(LOG_ERR, "%s", buf); + + /* + * We let the user choose whether or not to see these + * messages on the console. + */ + if (lx_verbose == 0) + return; + } + + /* we retry in case of EINTR */ + do { + i = write(fd, buf, strlen(buf)); + } while ((i == -1) && (errno == EINTR)); +} + +/*PRINTFLIKE1*/ +void +lx_err(char *msg, ...) +{ + va_list ap; + + assert(msg != NULL); + + va_start(ap, msg); + i_lx_msg(STDERR_FILENO, msg, ap); + va_end(ap); +} + +/* + * This is just a non-zero exit value which also isn't one that would allow + * us to easily detect if a branded process exited because of a recursive + * fatal error. + */ +#define LX_ERR_FATAL 42 + +/* + * Our own custom version of abort(), this routine will be used in place + * of the one located in libc. The primary difference is that this version + * will first reset the signal handler for SIGABRT to SIG_DFL, ensuring the + * SIGABRT sent causes us to dump core and is not caught by a user program. + */ +void +abort(void) +{ + static int aborting = 0; + + struct sigaction sa; + sigset_t sigmask; + + /* watch out for recursive calls to this function */ + if (aborting != 0) + exit(LX_ERR_FATAL); + + aborting = 1; + + /* + * Block all signals here to avoid taking any signals while exiting + * in an effort to avoid any strange user interaction with our death. + */ + (void) sigfillset(&sigmask); + (void) sigprocmask(SIG_BLOCK, &sigmask, NULL); + + /* + * Our own version of abort(3C) that we know will never call + * a user-installed SIGABRT handler first. We WANT to die. + * + * Do this by resetting the handler to SIG_DFL, and releasing any + * held SIGABRTs. + * + * If no SIGABRTs are pending, send ourselves one. + * + * The while loop is a bit of overkill, but abort(3C) does it to + * assure it never returns so we will as well. + */ + (void) sigemptyset(&sa.sa_mask); + sa.sa_sigaction = SIG_DFL; + sa.sa_flags = 0; + + for (;;) { + (void) sigaction(SIGABRT, &sa, NULL); + (void) sigrelse(SIGABRT); + (void) thr_kill(thr_self(), SIGABRT); + } + + /*NOTREACHED*/ +} + +/*PRINTFLIKE1*/ +void +lx_msg(char *msg, ...) +{ + va_list ap; + + assert(msg != NULL); + va_start(ap, msg); + i_lx_msg(STDOUT_FILENO, msg, ap); + va_end(ap); +} + +/*PRINTFLIKE1*/ +void +lx_err_fatal(char *msg, ...) +{ + va_list ap; + + assert(msg != NULL); + + va_start(ap, msg); + i_lx_msg(STDERR_FILENO, msg, ap); + va_end(ap); + abort(); +} + +/* + * See if it is safe to alloca() sz bytes. Return 1 for yes, 0 for no. + */ +int +lx_check_alloca(size_t sz) +{ + uintptr_t sp = (uintptr_t)&sz; + uintptr_t end = sp - sz; + + return ((end < sp) && (end >= stack_bottom)); +} + +/*PRINTFLIKE1*/ +void +lx_unsupported(char *msg, ...) +{ + va_list ap; + + assert(msg != NULL); + + /* send the msg to the error stream */ + va_start(ap, msg); + i_lx_msg(STDERR_FILENO, msg, ap); + va_end(ap); + + /* + * If the user doesn't trust the application to responsibly + * handle ENOTSUP, we kill the application. + */ + if (lx_strict) + (void) kill(getpid(), SIGSYS); +} + +extern void lx_runexe(void *argv, int32_t entry); +int lx_init(int argc, char *argv[], char *envp[]); + +static int +lx_emulate_args(lx_regs_t *rp, struct lx_sysent *s, uintptr_t *args) +{ + /* + * If the system call takes 6 args, then libc has stashed them in + * memory at the address contained in %ebx. Except for some syscalls + * which store the 6th argument in %ebp. + */ + if (s->sy_narg == 6 && !(s->sy_flags & EBP_HAS_ARG6)) { + if (uucopy((void *)rp->lxr_ebx, args, + sizeof (args[0]) * 6) != 0) + return (-stol_errno[errno]); + } else { + args[0] = rp->lxr_ebx; + args[1] = rp->lxr_ecx; + args[2] = rp->lxr_edx; + args[3] = rp->lxr_esi; + args[4] = rp->lxr_edi; + args[5] = rp->lxr_ebp; + } + + return (0); +} + +void +lx_emulate(lx_regs_t *rp) +{ + struct lx_sysent *s; + uintptr_t args[6]; + uintptr_t gs = rp->lxr_gs & 0xffff; /* %gs is only 16 bits */ + int syscall_num, ret; + + syscall_num = rp->lxr_eax; + + if (syscall_num < 0 || syscall_num > LINUX_MAX_SYSCALL) + s = &sysents[0]; + else + s = &sysents[syscall_num]; + + if ((ret = lx_emulate_args(rp, s, args)) != 0) + goto out; + + /* + * If the tracing flag is enabled we call into the brand-specific + * kernel module to handle the tracing activity (DTrace or ptrace). + * It would be tempting to perform DTrace activity in the brand + * module's syscall trap callback, rather than having to return + * to the kernel here, but -- since argument encoding can vary + * according to the specific system call -- that would require + * replicating the knowledge of argument decoding in the kernel + * module as well as here in the brand library. + */ + if (lx_traceflag != 0) { + /* + * Part of the ptrace "interface" is that on syscall entry + * %eax should be reported as -ENOSYS while the orig_eax + * field of the user structure needs to contain the actual + * system call number. If we end up stopping here, the + * controlling process will dig the lx_regs_t structure out of + * our stack. + */ + rp->lxr_orig_eax = syscall_num; + rp->lxr_eax = -stol_errno[ENOSYS]; + + (void) syscall(SYS_brand, B_SYSENTRY, syscall_num, args); + + /* + * The external tracer may have modified the arguments to this + * system call. Refresh the argument cache to account for this. + */ + if ((ret = lx_emulate_args(rp, s, args)) != 0) + goto out; + } + + if (s->sy_callc == NULL) { + lx_unsupported(gettext("unimplemented syscall #%d (%s): %s\n"), + syscall_num, s->sy_name, nosys_msgs[s->sy_flags]); + ret = -stol_errno[ENOTSUP]; + goto out; + } + + if (lx_debug_enabled != 0) { + const char *fmt; + + switch (s->sy_narg) { + case 0: + fmt = "calling %s()"; + break; + case 1: + fmt = "calling %s(0x%p)"; + break; + case 2: + fmt = "calling %s(0x%p, 0x%p)"; + break; + case 3: + fmt = "calling %s(0x%p, 0x%p, 0x%p)"; + break; + case 4: + fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p)"; + break; + case 5: + fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p)"; + break; + case 6: + fmt = "calling %s(0x%p, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)"; + break; + } + + lx_debug(fmt, s->sy_name, args[0], args[1], args[2], args[3], + args[4], args[5]); + } + + if (gs != LWPGS_SEL) { + lx_tsd_t *lx_tsd; + + /* + * While a %gs of 0 is technically legal (as long as the + * application never dereferences memory using %gs), Solaris + * has its own ideas as to how a zero %gs should be handled in + * _update_sregs(), such that any 32-bit user process with a + * %gs of zero running on a system with a 64-bit kernel will + * have its %gs hidden base register stomped on on return from + * a system call, leaving an incorrect base address in place + * until the next time %gs is actually reloaded (forcing a + * reload of the base address from the appropriate descriptor + * table.) + * + * Of course the kernel will once again stomp on THAT base + * address when returning from a system call, resulting in an + * an application segmentation fault. + * + * To avoid this situation, disallow a save of a zero %gs + * here in order to try and capture any Linux process that + * attempts to make a syscall with a zero %gs installed. + */ + assert(gs != 0); + + if ((ret = thr_getspecific(lx_tsd_key, + (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "lx_emulate", strerror(ret)); + + assert(lx_tsd != 0); + + lx_tsd->lxtsd_gs = gs; + + lx_debug("lx_emulate(): gsp 0x%p, saved gs: 0x%x", lx_tsd, gs); + } + + if (s->sy_flags == SYS_PASSTHRU) + lx_debug("\tCalling Solaris %s()", s->sy_name); + + ret = s->sy_callc(args[0], args[1], args[2], args[3], args[4], args[5]); + + if (ret > -65536 && ret < 65536) + lx_debug("\t= %d", ret); + else + lx_debug("\t= 0x%x", ret); + + if ((s->sy_flags == SYS_PASSTHRU) && (ret == -1)) { + ret = -stol_errno[errno]; + } else { + /* + * If the return value is between -4096 and 0 we assume it's an + * error, so we translate the Solaris error number into the + * Linux equivalent. + */ + if (ret < 0 && ret > -4096) { + if (-ret >= + sizeof (stol_errno) / sizeof (stol_errno[0])) { + lx_debug("Invalid return value from emulated " + "syscall %d (%s): %d\n", + syscall_num, s->sy_name, ret); + assert(0); + } + + ret = -stol_errno[-ret]; + } + } + +out: + /* + * %eax holds the return code from the system call. + */ + rp->lxr_eax = ret; + + /* + * If the trace flag is set, bounce into the kernel to let it do + * any necessary tracing (DTrace or ptrace). + */ + if (lx_traceflag != 0) { + rp->lxr_orig_eax = syscall_num; + (void) syscall(SYS_brand, B_SYSRETURN, syscall_num, ret); + } +} + +/* Transform the Linux locale name to make it look like a Solaris locale name */ +static const char * +lx_translate_locale(char *translated_name_mem, int mem_size) +{ + char *loc; + int i; + size_t len; + + if ((loc = getenv("LC_ALL")) == NULL) + if ((loc = getenv("LANG")) == NULL) + return ("C"); + + if (strlcpy(translated_name_mem, loc, mem_size) >= mem_size) + return (""); + + len = strlen(loc); + + /* replace the end of the locale name if it's a known pattern */ + for (i = 0; i < sizeof (lx_locales) / sizeof (struct lx_locale_ending); + i++) { + if (len <= lx_locales[i].le_size) + continue; + + if (strncmp(loc + len - lx_locales[i].le_size, + lx_locales[i].linux_end, lx_locales[i].le_size)) + continue; /* don't match */ + + if (len - lx_locales[i].le_size + lx_locales[i].se_size + >= mem_size) + return ("C"); /* size too small for the new name */ + + (void) strlcpy(translated_name_mem + len - + lx_locales[i].le_size, lx_locales[i].solaris_end, + lx_locales[i].se_size + 1); + + return ((const char *)translated_name_mem); + } + + /* no match */ + return (""); +} + +static void +lx_close_fh(FILE *file) +{ + int fd, fd_new; + + if (file == NULL) + return; + + if ((fd = fileno(file)) < 0) + return; + + fd_new = dup(fd); + if (fd_new == -1) + return; + + (void) fclose(file); + (void) dup2(fd_new, fd); + (void) close(fd_new); +} + +extern int set_l10n_alternate_root(char *path); + +/*ARGSUSED*/ +int +lx_init(int argc, char *argv[], char *envp[]) +{ + char *r; + auxv_t *ap; + int *p, err; + lx_elf_data_t edp; + lx_brand_registration_t reg; + char locale_translated_name[MAXLOCALENAMELEN]; + static lx_tsd_t lx_tsd; + + if ((set_l10n_alternate_root("/native") == 0) && + (setlocale(LC_ALL, lx_translate_locale(locale_translated_name, + sizeof (locale_translated_name))) != NULL) && + (bindtextdomain(TEXT_DOMAIN, "/native/usr/lib/locale") != NULL)) { + (void) textdomain(TEXT_DOMAIN); + } + + stack_bottom = 2 * sysconf(_SC_PAGESIZE); + + /* + * We need to shutdown all libc stdio. libc stdio normally goes to + * file descriptors, but since we're actually part of a linux + * process we don't own these file descriptors and we can't make + * any assumptions about their state. + */ + lx_close_fh(stdin); + lx_close_fh(stdout); + lx_close_fh(stderr); + + lx_debug_init(); + + r = getenv("LX_RELEASE"); + if (r == NULL) + (void) strlcpy(lx_release, LX_UNAME_RELEASE, 128); + else + (void) strlcpy(lx_release, r, 128); + + lx_debug("lx_release: %s\n", lx_release); + + /* + * Should we kill an application that attempts an unimplemented + * system call? + */ + if (getenv("LX_STRICT") != NULL) { + lx_strict = 1; + lx_debug("STRICT mode enabled.\n"); + } + + /* + * Are we in install mode? + */ + if (getenv("LX_INSTALL") != NULL) { + lx_install = 1; + lx_debug("INSTALL mode enabled.\n"); + } + + /* + * Should we attempt to send messages to the screen? + */ + if (getenv("LX_VERBOSE") != NULL) { + lx_verbose = 1; + lx_debug("VERBOSE mode enabled.\n"); + } + + lx_debug("executing linux process: %s", argv[0]); + lx_debug("branding myself and setting handler to 0x%p", + (void *)lx_handler_table); + + reg.lxbr_version = LX_VERSION; + reg.lxbr_handler = (void *)&lx_handler_table; + reg.lxbr_tracehandler = (void *)&lx_handler_trace_table; + reg.lxbr_traceflag = &lx_traceflag; + + /* + * Register the address of the user-space handler with the lx + * brand module. + */ + if (syscall(SYS_brand, B_REGISTER, ®)) + lx_err_fatal(gettext("failed to brand the process")); + + /* + * Download data about the lx executable from the kernel. + */ + if (syscall(SYS_brand, B_ELFDATA, (void *)&edp)) + lx_err_fatal(gettext( + "failed to get required ELF data from the kernel")); + + if (lx_ioctl_init() != 0) + lx_err_fatal(gettext("failed to setup the %s translator"), + "ioctl"); + + if (lx_stat_init() != 0) + lx_err_fatal(gettext("failed to setup the %s translator"), + "stat"); + + if (lx_statfs_init() != 0) + lx_err_fatal(gettext("failed to setup the %s translator"), + "statfs"); + + /* + * Find the aux vector on the stack. + */ + p = (int *)envp; + while (*p != NULL) + p++; + /* + * p is now pointing at the 0 word after the environ pointers. After + * that is the aux vectors. + */ + p++; + for (ap = (auxv_t *)p; ap->a_type != 0; ap++) { + switch (ap->a_type) { + case AT_BASE: + ap->a_un.a_val = edp.ed_base; + break; + case AT_ENTRY: + ap->a_un.a_val = edp.ed_entry; + break; + case AT_PHDR: + ap->a_un.a_val = edp.ed_phdr; + break; + case AT_PHENT: + ap->a_un.a_val = edp.ed_phent; + break; + case AT_PHNUM: + ap->a_un.a_val = edp.ed_phnum; + break; + default: + break; + } + } + + /* Do any thunk server initalization. */ + lxt_server_init(argc, argv); + + /* Setup signal handler information. */ + if (lx_siginit()) + lx_err_fatal(gettext( + "failed to initialize lx signals for the branded process")); + + /* Setup thread-specific data area for managing linux threads. */ + if ((err = thr_keycreate(&lx_tsd_key, NULL)) != 0) + lx_err_fatal( + gettext("%s failed: %s"), "thr_keycreate(lx_tsd_key)", + strerror(err)); + + lx_debug("thr_keycreate created lx_tsd_key (%d)", lx_tsd_key); + + /* Initialize the thread specific data for this thread. */ + bzero(&lx_tsd, sizeof (lx_tsd)); + lx_tsd.lxtsd_gs = LWPGS_SEL; + + if ((err = thr_setspecific(lx_tsd_key, &lx_tsd)) != 0) + lx_err_fatal(gettext( + "Unable to initialize thread-specific data: %s"), + strerror(err)); + + /* Look up the PID that serves as init for this zone */ + if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0) + lx_err_fatal(gettext( + "Unable to find PID for zone init process: %s"), + strerror(err)); + + /* + * Save the current context of this thread. + * We'll restore this context when this thread attempts to exit. + */ + if (getcontext(&lx_tsd.lxtsd_exit_context) != 0) + lx_err_fatal(gettext( + "Unable to initialize thread-specific exit context: %s"), + strerror(errno)); + + if (lx_tsd.lxtsd_exit == 0) { + lx_runexe(argv, edp.ed_ldentry); + /* lx_runexe() never returns. */ + assert(0); + } + + /* + * We are here because the Linux application called the exit() or + * exit_group() system call. In turn the brand library did a + * setcontext() to jump to the thread context state we saved above. + */ + if (lx_tsd.lxtsd_exit == 1) + thr_exit((void *)lx_tsd.lxtsd_exit_status); + else + exit(lx_tsd.lxtsd_exit_status); + + assert(0); + + /*NOTREACHED*/ + return (0); +} + +/* + * Walk back through the stack until we find the lx_emulate() frame. + */ +lx_regs_t * +lx_syscall_regs(void) +{ + /* LINTED - alignment */ + struct frame *fr = (struct frame *)_getfp(); + + while (fr->fr_savpc != (uintptr_t)&lx_emulate_done) { + fr = (struct frame *)fr->fr_savfp; + assert(fr->fr_savpc != NULL); + } + + return ((lx_regs_t *)((uintptr_t *)fr)[2]); +} + +int +lx_lpid_to_spair(pid_t lpid, pid_t *spid, lwpid_t *slwp) +{ + pid_t pid; + lwpid_t tid; + + if (lpid == 0) { + pid = getpid(); + tid = thr_self(); + } else { + if (syscall(SYS_brand, B_LPID_TO_SPAIR, lpid, &pid, &tid) < 0) + return (-errno); + + /* + * If the returned pid is -1, that indicates we tried to + * look up the PID for init, but that process no longer + * exists. + */ + if (pid == -1) + return (-ESRCH); + } + + if (uucopy(&pid, spid, sizeof (pid_t)) != 0) + return (-errno); + + if (uucopy(&tid, slwp, sizeof (lwpid_t)) != 0) + return (-errno); + + return (0); +} + +int +lx_lpid_to_spid(pid_t lpid, pid_t *spid) +{ + lwpid_t slwp; + + return (lx_lpid_to_spair(lpid, spid, &slwp)); +} + +char * +lx_fd_to_path(int fd, char *buf, int buf_size) +{ + char path_proc[MAXPATHLEN]; + pid_t pid; + int n; + + assert((buf != NULL) && (buf_size >= 0)); + + if (fd < 0) + return (NULL); + + if ((pid = getpid()) == -1) + return (NULL); + + (void) snprintf(path_proc, MAXPATHLEN, + "/native/proc/%d/path/%d", pid, fd); + + if ((n = readlink(path_proc, buf, buf_size - 1)) == -1) + return (NULL); + buf[n] = '\0'; + + return (buf); +} + +/* + * Create a translation routine that jumps to a particular emulation + * module syscall. + */ +#define IN_KERNEL_SYSCALL(name, num) \ +int \ +lx_##name(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, \ + uintptr_t p5, uintptr_t p6) \ +{ \ + int r; \ + lx_debug("\tsyscall %d re-vectoring to lx kernel module " \ + "for " #name "()", num); \ + r = syscall(SYS_brand, B_EMULATE_SYSCALL + num, p1, p2, \ + p3, p4, p5, p6); \ + return ((r == -1) ? -errno : r); \ +} + +IN_KERNEL_SYSCALL(kill, 37) +IN_KERNEL_SYSCALL(brk, 45) +IN_KERNEL_SYSCALL(ustat, 62) +IN_KERNEL_SYSCALL(getppid, 64) +IN_KERNEL_SYSCALL(sysinfo, 116) +IN_KERNEL_SYSCALL(modify_ldt, 123) +IN_KERNEL_SYSCALL(adjtimex, 124) +IN_KERNEL_SYSCALL(setresuid16, 164) +IN_KERNEL_SYSCALL(setresgid16, 170) +IN_KERNEL_SYSCALL(setresuid, 208) +IN_KERNEL_SYSCALL(setresgid, 210) +IN_KERNEL_SYSCALL(gettid, 224) +IN_KERNEL_SYSCALL(tkill, 238) +IN_KERNEL_SYSCALL(futex, 240) +IN_KERNEL_SYSCALL(set_thread_area, 243) +IN_KERNEL_SYSCALL(get_thread_area, 244) +IN_KERNEL_SYSCALL(set_tid_address, 258) + +static struct lx_sysent sysents[] = { + {"nosys", NULL, NOSYS_NULL, 0}, /* 0 */ + {"exit", lx_exit, 0, 1}, /* 1 */ + {"fork", lx_fork, 0, 0}, /* 2 */ + {"read", lx_read, 0, 3}, /* 3 */ + {"write", write, SYS_PASSTHRU, 3}, /* 4 */ + {"open", lx_open, 0, 3}, /* 5 */ + {"close", close, SYS_PASSTHRU, 1}, /* 6 */ + {"waitpid", lx_waitpid, 0, 3}, /* 7 */ + {"creat", creat, SYS_PASSTHRU, 2}, /* 8 */ + {"link", lx_link, 0, 2}, /* 9 */ + {"unlink", lx_unlink, 0, 1}, /* 10 */ + {"execve", lx_execve, 0, 3}, /* 11 */ + {"chdir", chdir, SYS_PASSTHRU, 1}, /* 12 */ + {"time", lx_time, 0, 1}, /* 13 */ + {"mknod", lx_mknod, 0, 3}, /* 14 */ + {"chmod", lx_chmod, 0, 2}, /* 15 */ + {"lchown16", lx_lchown16, 0, 3}, /* 16 */ + {"break", NULL, NOSYS_OBSOLETE, 0}, /* 17 */ + {"stat", NULL, NOSYS_OBSOLETE, 0}, /* 18 */ + {"lseek", lx_lseek, 0, 3}, /* 19 */ + {"getpid", lx_getpid, 0, 0}, /* 20 */ + {"mount", lx_mount, 0, 5}, /* 21 */ + {"umount", lx_umount, 0, 1}, /* 22 */ + {"setuid16", lx_setuid16, 0, 1}, /* 23 */ + {"getuid16", lx_getuid16, 0, 0}, /* 24 */ + {"stime", stime, SYS_PASSTHRU, 1}, /* 25 */ + {"ptrace", lx_ptrace, 0, 4}, /* 26 */ + {"alarm", (int (*)())alarm, SYS_PASSTHRU, 1}, /* 27 */ + {"fstat", NULL, NOSYS_OBSOLETE, 0}, /* 28 */ + {"pause", pause, SYS_PASSTHRU, 0}, /* 29 */ + {"utime", lx_utime, 0, 2}, /* 30 */ + {"stty", NULL, NOSYS_OBSOLETE, 0}, /* 31 */ + {"gtty", NULL, NOSYS_OBSOLETE, 0}, /* 32 */ + {"access", access, SYS_PASSTHRU, 2}, /* 33 */ + {"nice", nice, SYS_PASSTHRU, 1}, /* 34 */ + {"ftime", NULL, NOSYS_OBSOLETE, 0}, /* 35 */ + {"sync", lx_sync, 0, 0}, /* 36 */ + {"kill", lx_kill, 0, 2}, /* 37 */ + {"rename", lx_rename, 0, 2}, /* 38 */ + {"mkdir", mkdir, SYS_PASSTHRU, 2}, /* 39 */ + {"rmdir", lx_rmdir, 0, 1}, /* 40 */ + {"dup", dup, SYS_PASSTHRU, 1}, /* 41 */ + {"pipe", lx_pipe, 0, 1}, /* 42 */ + {"times", lx_times, 0, 1}, /* 43 */ + {"prof", NULL, NOSYS_OBSOLETE, 0}, /* 44 */ + {"brk", lx_brk, 0, 1}, /* 45 */ + {"setgid16", lx_setgid16, 0, 1}, /* 46 */ + {"getgid16", lx_getgid16, 0, 0}, /* 47 */ + {"signal", lx_signal, 0, 2}, /* 48 */ + {"geteuid16", lx_geteuid16, 0, 0}, /* 49 */ + {"getegid16", lx_getegid16, 0, 0}, /* 50 */ + {"acct", NULL, NOSYS_NO_EQUIV, 0}, /* 51 */ + {"umount2", lx_umount2, 0, 2}, /* 52 */ + {"lock", NULL, NOSYS_OBSOLETE, 0}, /* 53 */ + {"ioctl", lx_ioctl, 0, 3}, /* 54 */ + {"fcntl", lx_fcntl, 0, 3}, /* 55 */ + {"mpx", NULL, NOSYS_OBSOLETE, 0}, /* 56 */ + {"setpgid", lx_setpgid, 0, 2}, /* 57 */ + {"ulimit", NULL, NOSYS_OBSOLETE, 0}, /* 58 */ + {"olduname", NULL, NOSYS_OBSOLETE, 0}, /* 59 */ + {"umask", (int (*)())umask, SYS_PASSTHRU, 1}, /* 60 */ + {"chroot", chroot, SYS_PASSTHRU, 1}, /* 61 */ + {"ustat", lx_ustat, 0, 2}, /* 62 */ + {"dup2", lx_dup2, 0, 2}, /* 63 */ + {"getppid", lx_getppid, 0, 0}, /* 64 */ + {"getpgrp", lx_getpgrp, 0, 0}, /* 65 */ + {"setsid", lx_setsid, 0, 0}, /* 66 */ + {"sigaction", lx_sigaction, 0, 3}, /* 67 */ + {"sgetmask", NULL, NOSYS_OBSOLETE, 0}, /* 68 */ + {"ssetmask", NULL, NOSYS_OBSOLETE, 0}, /* 69 */ + {"setreuid16", lx_setreuid16, 0, 2}, /* 70 */ + {"setregid16", lx_setregid16, 0, 2}, /* 71 */ + {"sigsuspend", lx_sigsuspend, 0, 1}, /* 72 */ + {"sigpending", lx_sigpending, 0, 1}, /* 73 */ + {"sethostname", lx_sethostname, 0, 2}, /* 74 */ + {"setrlimit", lx_setrlimit, 0, 2}, /* 75 */ + {"getrlimit", lx_oldgetrlimit, 0, 2}, /* 76 */ + {"getrusage", lx_getrusage, 0, 2}, /* 77 */ + {"gettimeofday", lx_gettimeofday, 0, 2}, /* 78 */ + {"settimeofday", lx_settimeofday, 0, 2}, /* 79 */ + {"getgroups16", lx_getgroups16, 0, 2}, /* 80 */ + {"setgroups16", lx_setgroups16, 0, 2}, /* 81 */ + {"select", NULL, NOSYS_OBSOLETE, 0}, /* 82 */ + {"symlink", symlink, SYS_PASSTHRU, 2}, /* 83 */ + {"oldlstat", NULL, NOSYS_OBSOLETE, 0}, /* 84 */ + {"readlink", readlink, SYS_PASSTHRU, 3}, /* 85 */ + {"uselib", NULL, NOSYS_KERNEL, 0}, /* 86 */ + {"swapon", NULL, NOSYS_KERNEL, 0}, /* 87 */ + {"reboot", lx_reboot, 0, 4}, /* 88 */ + {"readdir", lx_readdir, 0, 3}, /* 89 */ + {"mmap", lx_mmap, 0, 6}, /* 90 */ + {"munmap", munmap, SYS_PASSTHRU, 2}, /* 91 */ + {"truncate", lx_truncate, 0, 2}, /* 92 */ + {"ftruncate", lx_ftruncate, 0, 2}, /* 93 */ + {"fchmod", fchmod, SYS_PASSTHRU, 2}, /* 94 */ + {"fchown16", lx_fchown16, 0, 3}, /* 95 */ + {"getpriority", lx_getpriority, 0, 2}, /* 96 */ + {"setpriority", lx_setpriority, 0, 3}, /* 97 */ + {"profil", NULL, NOSYS_NO_EQUIV, 0}, /* 98 */ + {"statfs", lx_statfs, 0, 2}, /* 99 */ + {"fstatfs", lx_fstatfs, 0, 2}, /* 100 */ + {"ioperm", NULL, NOSYS_NO_EQUIV, 0}, /* 101 */ + {"socketcall", lx_socketcall, 0, 2}, /* 102 */ + {"syslog", NULL, NOSYS_KERNEL, 0}, /* 103 */ + {"setitimer", lx_setitimer, 0, 3}, /* 104 */ + {"getitimer", getitimer, SYS_PASSTHRU, 2}, /* 105 */ + {"stat", lx_stat, 0, 2}, /* 106 */ + {"lstat", lx_lstat, 0, 2}, /* 107 */ + {"fstat", lx_fstat, 0, 2}, /* 108 */ + {"uname", NULL, NOSYS_OBSOLETE, 0}, /* 109 */ + {"oldiopl", NULL, NOSYS_NO_EQUIV, 0}, /* 110 */ + {"vhangup", lx_vhangup, 0, 0}, /* 111 */ + {"idle", NULL, NOSYS_NO_EQUIV, 0}, /* 112 */ + {"vm86old", NULL, NOSYS_OBSOLETE, 0}, /* 113 */ + {"wait4", lx_wait4, 0, 4}, /* 114 */ + {"swapoff", NULL, NOSYS_KERNEL, 0}, /* 115 */ + {"sysinfo", lx_sysinfo, 0, 1}, /* 116 */ + {"ipc", lx_ipc, 0, 5}, /* 117 */ + {"fsync", lx_fsync, 0, 1}, /* 118 */ + {"sigreturn", lx_sigreturn, 0, 1}, /* 119 */ + {"clone", lx_clone, 0, 5}, /* 120 */ + {"setdomainname", lx_setdomainname, 0, 2}, /* 121 */ + {"uname", lx_uname, 0, 1}, /* 122 */ + {"modify_ldt", lx_modify_ldt, 0, 3}, /* 123 */ + {"adjtimex", lx_adjtimex, 0, 1}, /* 124 */ + {"mprotect", lx_mprotect, 0, 3}, /* 125 */ + {"sigprocmask", lx_sigprocmask, 0, 3}, /* 126 */ + {"create_module", NULL, NOSYS_KERNEL, 0}, /* 127 */ + {"init_module", NULL, NOSYS_KERNEL, 0}, /* 128 */ + {"delete_module", NULL, NOSYS_KERNEL, 0}, /* 129 */ + {"get_kernel_syms", NULL, NOSYS_KERNEL, 0}, /* 130 */ + {"quotactl", NULL, NOSYS_KERNEL, 0}, /* 131 */ + {"getpgid", lx_getpgid, 0, 1}, /* 132 */ + {"fchdir", fchdir, SYS_PASSTHRU, 1}, /* 133 */ + {"bdflush", NULL, NOSYS_KERNEL, 0}, /* 134 */ + {"sysfs", lx_sysfs, 0, 3}, /* 135 */ + {"personality", lx_personality, 0, 1}, /* 136 */ + {"afs_syscall", NULL, NOSYS_KERNEL, 0}, /* 137 */ + {"setfsuid16", lx_setfsuid16, 0, 1}, /* 138 */ + {"setfsgid16", lx_setfsgid16, 0, 1}, /* 139 */ + {"llseek", lx_llseek, 0, 5}, /* 140 */ + {"getdents", getdents, SYS_PASSTHRU, 3}, /* 141 */ + {"select", lx_select, 0, 5}, /* 142 */ + {"flock", lx_flock, 0, 2}, /* 143 */ + {"msync", lx_msync, 0, 3}, /* 144 */ + {"readv", lx_readv, 0, 3}, /* 145 */ + {"writev", lx_writev, 0, 3}, /* 146 */ + {"getsid", lx_getsid, 0, 1}, /* 147 */ + {"fdatasync", lx_fdatasync, 0, 1}, /* 148 */ + {"sysctl", lx_sysctl, 0, 1}, /* 149 */ + {"mlock", lx_mlock, 0, 2}, /* 150 */ + {"munlock", lx_munlock, 0, 2}, /* 151 */ + {"mlockall", lx_mlockall, 0, 1}, /* 152 */ + {"munlockall", lx_munlockall, 0, 0}, /* 153 */ + {"sched_setparam", lx_sched_setparam, 0, 2}, /* 154 */ + {"sched_getparam", lx_sched_getparam, 0, 2}, /* 155 */ + {"sched_setscheduler", lx_sched_setscheduler, 0, 3}, /* 156 */ + {"sched_getscheduler", lx_sched_getscheduler, 0, 1}, /* 157 */ + {"sched_yield", (int (*)())yield, SYS_PASSTHRU, 0}, /* 158 */ + {"sched_get_priority_max", lx_sched_get_priority_max, 0, 1}, /* 159 */ + {"sched_get_priority_min", lx_sched_get_priority_min, 0, 1}, /* 160 */ + {"sched_rr_get_interval", lx_sched_rr_get_interval, 0, 2}, /* 161 */ + {"nanosleep", nanosleep, SYS_PASSTHRU, 2}, /* 162 */ + {"mremap", NULL, NOSYS_NO_EQUIV, 0}, /* 163 */ + {"setresuid16", lx_setresuid16, 0, 3}, /* 164 */ + {"getresuid16", lx_getresuid16, 0, 3}, /* 165 */ + {"vm86", NULL, NOSYS_NO_EQUIV, 0}, /* 166 */ + {"query_module", lx_query_module, NOSYS_KERNEL, 5}, /* 167 */ + {"poll", lx_poll, 0, 3}, /* 168 */ + {"nfsservctl", NULL, NOSYS_KERNEL, 0}, /* 169 */ + {"setresgid16", lx_setresgid16, 0, 3}, /* 170 */ + {"getresgid16", lx_getresgid16, 0, 3}, /* 171 */ + {"prctl", NULL, NOSYS_UNDOC, 0}, /* 172 */ + {"rt_sigreturn", lx_rt_sigreturn, 0, 0}, /* 173 */ + {"rt_sigaction", lx_rt_sigaction, 0, 4}, /* 174 */ + {"rt_sigprocmask", lx_rt_sigprocmask, 0, 4}, /* 175 */ + {"rt_sigpending", lx_rt_sigpending, 0, 2}, /* 176 */ + {"rt_sigtimedwait", lx_rt_sigtimedwait, 0, 4}, /* 177 */ + {"sigqueueinfo", NULL, NOSYS_UNDOC, 0}, /* 178 */ + {"rt_sigsuspend", lx_rt_sigsuspend, 0, 2}, /* 179 */ + {"pread64", lx_pread64, 0, 5}, /* 180 */ + {"pwrite64", pwrite64, SYS_PASSTHRU, 5}, /* 181 */ + {"chown16", lx_chown16, 0, 3}, /* 182 */ + {"getcwd", lx_getcwd, 0, 2}, /* 183 */ + {"capget", NULL, NOSYS_NO_EQUIV, 0}, /* 184 */ + {"capset", NULL, NOSYS_NO_EQUIV, 0}, /* 185 */ + {"sigaltstack", lx_sigaltstack, 0, 2}, /* 186 */ + {"sendfile", lx_sendfile, 0, 4}, /* 187 */ + {"getpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 188 */ + {"putpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 189 */ + {"vfork", lx_vfork, 0, 0}, /* 190 */ + {"getrlimit", lx_getrlimit, 0, 2}, /* 191 */ + {"mmap2", lx_mmap2, EBP_HAS_ARG6, 6}, /* 192 */ + {"truncate64", lx_truncate64, 0, 3}, /* 193 */ + {"ftruncate64", lx_ftruncate64, 0, 3}, /* 194 */ + {"stat64", lx_stat64, 0, 2}, /* 195 */ + {"lstat64", lx_lstat64, 0, 2}, /* 196 */ + {"fstat64", lx_fstat64, 0, 2}, /* 197 */ + {"lchown", lchown, SYS_PASSTHRU, 3}, /* 198 */ + {"getuid", (int (*)())getuid, SYS_PASSTHRU, 0}, /* 199 */ + {"getgid", (int (*)())getgid, SYS_PASSTHRU, 0}, /* 200 */ + {"geteuid", lx_geteuid, 0, 0}, /* 201 */ + {"getegid", lx_getegid, 0, 0}, /* 202 */ + {"setreuid", setreuid, SYS_PASSTHRU, 0}, /* 203 */ + {"setregid", setregid, SYS_PASSTHRU, 0}, /* 204 */ + {"getgroups", getgroups, SYS_PASSTHRU, 2}, /* 205 */ + {"setgroups", lx_setgroups, 0, 2}, /* 206 */ + {"fchown", lx_fchown, 0, 3}, /* 207 */ + {"setresuid", lx_setresuid, 0, 3}, /* 208 */ + {"getresuid", lx_getresuid, 0, 3}, /* 209 */ + {"setresgid", lx_setresgid, 0, 3}, /* 210 */ + {"getresgid", lx_getresgid, 0, 3}, /* 211 */ + {"chown", lx_chown, 0, 3}, /* 212 */ + {"setuid", setuid, SYS_PASSTHRU, 1}, /* 213 */ + {"setgid", setgid, SYS_PASSTHRU, 1}, /* 214 */ + {"setfsuid", lx_setfsuid, 0, 1}, /* 215 */ + {"setfsgid", lx_setfsgid, 0, 1}, /* 216 */ + {"pivot_root", NULL, NOSYS_KERNEL, 0}, /* 217 */ + {"mincore", mincore, SYS_PASSTHRU, 3}, /* 218 */ + {"madvise", lx_madvise, 0, 3}, /* 219 */ + {"getdents64", lx_getdents64, 0, 3}, /* 220 */ + {"fcntl64", lx_fcntl64, 0, 3}, /* 221 */ + {"tux", NULL, NOSYS_NO_EQUIV, 0}, /* 222 */ + {"security", NULL, NOSYS_NO_EQUIV, 0}, /* 223 */ + {"gettid", lx_gettid, 0, 0}, /* 224 */ + {"readahead", NULL, NOSYS_NO_EQUIV, 0}, /* 225 */ + {"setxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 226 */ + {"lsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 227 */ + {"fsetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 228 */ + {"getxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 229 */ + {"lgetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 230 */ + {"fgetxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 231 */ + {"listxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 232 */ + {"llistxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 233 */ + {"flistxattr", NULL, NOSYS_NO_EQUIV, 0}, /* 234 */ + {"removexattr", NULL, NOSYS_NO_EQUIV, 0}, /* 235 */ + {"lremovexattr", NULL, NOSYS_NO_EQUIV, 0}, /* 236 */ + {"fremovexattr", NULL, NOSYS_NO_EQUIV, 0}, /* 237 */ + {"tkill", lx_tkill, 0, 2}, /* 238 */ + {"sendfile64", lx_sendfile64, 0, 4}, /* 239 */ + {"futex", lx_futex, EBP_HAS_ARG6, 6}, /* 240 */ + {"sched_setaffinity", lx_sched_setaffinity, 0, 3}, /* 241 */ + {"sched_getaffinity", lx_sched_getaffinity, 0, 3}, /* 242 */ + {"set_thread_area", lx_set_thread_area, 0, 1}, /* 243 */ + {"get_thread_area", lx_get_thread_area, 0, 1}, /* 244 */ + {"io_setup", NULL, NOSYS_NO_EQUIV, 0}, /* 245 */ + {"io_destroy", NULL, NOSYS_NO_EQUIV, 0}, /* 246 */ + {"io_getevents", NULL, NOSYS_NO_EQUIV, 0}, /* 247 */ + {"io_submit", NULL, NOSYS_NO_EQUIV, 0}, /* 248 */ + {"io_cancel", NULL, NOSYS_NO_EQUIV, 0}, /* 249 */ + {"fadvise64", NULL, NOSYS_UNDOC, 0}, /* 250 */ + {"nosys", NULL, 0, 0}, /* 251 */ + {"group_exit", lx_group_exit, 0, 1}, /* 252 */ + {"lookup_dcookie", NULL, NOSYS_NO_EQUIV, 0}, /* 253 */ + {"epoll_create", NULL, NOSYS_NO_EQUIV, 0}, /* 254 */ + {"epoll_ctl", NULL, NOSYS_NO_EQUIV, 0}, /* 255 */ + {"epoll_wait", NULL, NOSYS_NO_EQUIV, 0}, /* 256 */ + {"remap_file_pages", NULL, NOSYS_NO_EQUIV, 0}, /* 257 */ + {"set_tid_address", lx_set_tid_address, 0, 1}, /* 258 */ + {"timer_create", NULL, NOSYS_UNDOC, 0}, /* 259 */ + {"timer_settime", NULL, NOSYS_UNDOC, 0}, /* 260 */ + {"timer_gettime", NULL, NOSYS_UNDOC, 0}, /* 261 */ + {"timer_getoverrun", NULL, NOSYS_UNDOC, 0}, /* 262 */ + {"timer_delete", NULL, NOSYS_UNDOC, 0}, /* 263 */ + {"clock_settime", lx_clock_settime, 0, 2}, /* 264 */ + {"clock_gettime", lx_clock_gettime, 0, 2}, /* 265 */ + {"clock_getres", lx_clock_getres, 0, 2}, /* 266 */ + {"clock_nanosleep", lx_clock_nanosleep, 0, 4}, /* 267 */ + {"statfs64", lx_statfs64, 0, 2}, /* 268 */ + {"fstatfs64", lx_fstatfs64, 0, 2}, /* 269 */ + {"tgkill", lx_tgkill, 0, 3} /* 270 */ +}; diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c b/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c new file mode 100644 index 0000000000..e547762378 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_thunk_server.c @@ -0,0 +1,1026 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The BrandZ Linux thunking server. + * + * The interfaces defined in this file form the server side of a bridge + * to allow native solaris process to access Linux services. Currently + * the Linux services that is made accessible by these interfaces here + * are: + * - Linux host <-> address naming services + * - Linux service <-> port naming services + * - Linux syslog + * + * Access to all these services is provided through a doors server. + * Currently the only client of these interfaces and the process that + * initially starts up the doors server is lx_thunk.so. + * + * lx_thunk.so is a native solaris library that is loaded into native + * solaris process that need to run inside a Linux zone and have access + * to Linux services. When lx_thunk.so receives a request that requires + * accessing Linux services it creates a "thunk server" process by + * forking and executing the following shell script (which runs as + * a native /bin/sh Linux process): + * /native/usr/lib/brand/lx/lx_thunk + * + * The first and only thing this shell script attempts to do is re-exec + * itself. The brand library will detect when this script attempts to + * re-exec itself and take control of the process. The exec() system + * call made by the Linux shell will never return. + * + * At this point the process becomes a "thunk server" process. + * The first thing it does is a bunch of initialization: + * + * - Sanity check that a file descriptor based communication mechanism + * needed talk to the parent process is correctly initialized. + * + * - Verify that two predetermined file descriptors are FIFOs. + * These FIFOs will be used to establish communications with + * the client program that spawned us and which will be sending + * us requests. + * + * - Use existing debugging libraries (libproc.so, librtld_db.so, + * and the BrandZ lx plug-in to librtld_db.so) and /native/proc to + * walk the Linux link maps in our own address space to determine + * the address of the Linux dlsym() function. + * + * - Use the native Linux dlsym() function to look up other symbols + * (for both functions and variables) that we will need access + * to service thunking requests. + * + * - Create a doors server and notify the parent process that we + * are ready to service requests. + * + * - Enter a service loop and wait for requests. + * + * At this point the lx_thunk process is ready to service door + * based requests. When door service request is received the + * following happens inside the lx_thunk process: + * + * - The doors server function is is invoked on a new solaris thread + * that the kernel injects into the lx_thunk process. We sanity + * check the incoming request, place it on a service queue, and + * wait for notification that the request has been completed. + * + * - A Linux thread takes this request off the service queue + * and dispatches it to a service function that will: + * - Decode the request. + * - Handle the request by invoking native Linux interfaces. + * - Encode the results for the request. + * + * - The Linux thread then notifies the requesting doors server + * thread that the request has been completed and goes to sleep + * until it receives another request. + * + * - the solaris door server thread returns the results of the + * operation to the caller. + * + * Notes: + * + * - The service request hand off operation from the solaris doors thread to + * the "Linux thread" is required because only "Linux threads" can call + * into Linux code. In this context a "Linux thread" is a thread that + * is either the initial thread of a Linux process or a thread that was + * created by calling the Linux version of thread_create(). The reason + * for this restriction is that any thread that invokes Linux code needs + * to have been initialized in the Linux threading libraries and have + * things like Linux thread local storage properly setup. + * + * But under solaris all door server threads are created and destroyed + * dynamically. This means that when a doors server function is invoked, + * it is invoked via a thread that hasn't been initialized in the Linux + * environment and there for can't call directly into Linux code. + * + * - Currently when a thunk server process is starting up, it communicated + * with it's parent via two FIFOs. These FIFOs are setup by the + * lx_thunk.so library. After creating the FIFOs and starting the lx_thunk + * server, lx_thunk.so writes the name of the file that the door should + * be attached to to the first pipe. The lx_thunk server reads in this + * value, initialized the server, fattach()s it to the file request by + * lx_thunk.so and does a write to the second FIFO to let lx_thunk.so + * know that the server is ready to take requests. + * + * This negotiation could be simplified to use only use one FIFO. + * lx_thunk.so would attempt to read from the FIFO and the lx_thunk + * server process could send the new door server file descriptor + * to this process via an I_SENDFD ioctl (see streamio.7I). + * + * - The lx_thunk server process will exit when the client process + * that it's handling requests for exists. (ie, when there are no + * more open file handles to the doors server.) + */ + +#include <assert.h> +#include <door.h> +#include <errno.h> +#include <libproc.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_thread.h> +#include <sys/lx_thunk_server.h> +#include <sys/varargs.h> +#include <thread.h> +#include <unistd.h> + +/* + * Generic interfaces used for looking up and calling Linux functions. + */ +typedef struct __lx_handle_dlsym *lx_handle_dlsym_t; +typedef struct __lx_handle_sym *lx_handle_sym_t; + +uintptr_t lx_call0(lx_handle_sym_t); +uintptr_t lx_call1(lx_handle_sym_t, uintptr_t); +uintptr_t lx_call2(lx_handle_sym_t, uintptr_t, uintptr_t); +uintptr_t lx_call3(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t); +uintptr_t lx_call4(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); +uintptr_t lx_call5(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t); +uintptr_t lx_call6(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t); +uintptr_t lx_call7(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t, uintptr_t); +uintptr_t lx_call8(lx_handle_sym_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +/* + * Flag indicating if this process is destined to become a thunking + * server process. + */ +static int lxt_server_processes = 0; + +/* + * Linux function call defines and handles. + */ +static lx_handle_dlsym_t lxh_init = NULL; + +#define LXTH_GETHOSTBYNAME_R 0 +#define LXTH_GETHOSTBYADDR_R 1 +#define LXTH_GETSERVBYNAME_R 2 +#define LXTH_GETSERVBYPORT_R 3 +#define LXTH_OPENLOG 4 +#define LXTH_SYSLOG 5 +#define LXTH_CLOSELOG 6 +#define LXTH_PROGNAME 7 + +static struct lxt_handles { + int lxth_index; + char *lxth_name; + lx_handle_sym_t lxth_handle; +} lxt_handles[] = { + { LXTH_GETHOSTBYNAME_R, "gethostbyname_r", NULL }, + { LXTH_GETHOSTBYADDR_R, "gethostbyaddr_r", NULL }, + { LXTH_GETSERVBYNAME_R, "getservbyname_r", NULL }, + { LXTH_GETSERVBYPORT_R, "getservbyport_r", NULL }, + { LXTH_OPENLOG, "openlog", NULL }, + { LXTH_SYSLOG, "syslog", NULL }, + { LXTH_CLOSELOG, "closelog", NULL }, + { LXTH_PROGNAME, "__progname", NULL }, + { -1, NULL, NULL }, +}; + +/* + * Door server operations dispatch functions and table. + * + * When the doors server get's a request for a particlar operation + * this dispatch table controls what function will be invoked to + * service the request. The function is invoked via Linux thread + * so that it can call into native Linux code if necessary. + */ +static void lxt_server_gethost(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); +static void lxt_server_getserv(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); +static void lxt_server_openlog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); +static void lxt_server_syslog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); +static void lxt_server_closelog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); + +typedef void (*lxt_op_func_t)(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size); + +static struct lxt_operations { + int lxto_index; + lxt_op_func_t lxto_fp; +} lxt_operations[] = { + { LXT_SERVER_OP_PING, NULL }, + { LXT_SERVER_OP_NAME2HOST, lxt_server_gethost }, + { LXT_SERVER_OP_ADDR2HOST, lxt_server_gethost }, + { LXT_SERVER_OP_NAME2SERV, lxt_server_getserv }, + { LXT_SERVER_OP_PORT2SERV, lxt_server_getserv }, + { LXT_SERVER_OP_OPENLOG, lxt_server_openlog }, + { LXT_SERVER_OP_SYSLOG, lxt_server_syslog }, + { LXT_SERVER_OP_CLOSELOG, lxt_server_closelog }, +}; + +/* + * Structures for passing off requests from doors threads (which are + * solaris threads) to a Linux thread that that can handle them. + */ +typedef struct lxt_req { + lxt_server_arg_t *lxtr_request; + size_t lxtr_request_size; + char *lxtr_result; + size_t lxtr_result_size; + int lxtr_complete; + cond_t lxtr_complete_cv; +} lxt_req_t; + +static mutex_t lxt_req_lock = DEFAULTMUTEX; +static cond_t lxt_req_cv = DEFAULTCV; +static lxt_req_t *lxt_req_ptr = NULL; + +static mutex_t lxt_pid_lock = DEFAULTMUTEX; +static pid_t lxt_pid = NULL; + +/* + * Interfaces used to call from lx_brand.so into Linux code. + */ +typedef struct lookup_cb_arg { + struct ps_prochandle *lca_ph; + caddr_t lca_ptr; +} lookup_cb_arg_t; + +static int +/*ARGSUSED*/ +lookup_cb(void *data, const prmap_t *pmp, const char *object) +{ + lookup_cb_arg_t *lcap = (lookup_cb_arg_t *)data; + prsyminfo_t si; + GElf_Sym sym; + + if (Pxlookup_by_name(lcap->lca_ph, + LM_ID_BASE, object, "dlsym", &sym, &si) != 0) + return (0); + + if (sym.st_shndx == SHN_UNDEF) + return (0); + + /* + * XXX: we should be more paranoid and verify that the symbol + * we just looked up is libdl.so.2`dlsym + */ + lcap->lca_ptr = (caddr_t)(uintptr_t)sym.st_value; + return (1); +} + +lx_handle_dlsym_t +lx_call_init(void) +{ + struct ps_prochandle *ph; + lookup_cb_arg_t lca; + extern int __libc_threaded; + int err; + + lx_debug("lx_call_init(): looking up Linux dlsym"); + + /* + * The handle is really the address of the Linux "dlsym" function. + * Once we have this address we can call into the Linux "dlsym" + * function to lookup other functions. It's the initial lookup + * of "dlsym" that's difficult. To do this we'll leverage the + * brand support that we added to librtld_db. We're going + * to fire up a seperate native solaris process that will + * attach to us via libproc/librtld_db and lookup the symbol + * for us. + */ + + /* Make sure we're single threaded. */ + if (__libc_threaded) { + lx_debug("lx_call_init() fail: " + "process must be single threaded"); + return (NULL); + } + + /* Tell libproc.so where the real procfs is mounted. */ + Pset_procfs_path("/native/proc"); + + /* Tell librtld_db.so where the real /native is */ + (void) rd_ctl(RD_CTL_SET_HELPPATH, "/native"); + + /* Grab ourselves but don't stop ourselves. */ + if ((ph = Pgrab(getpid(), + PGRAB_FORCE | PGRAB_RDONLY | PGRAB_NOSTOP, &err)) == NULL) { + lx_debug("lx_call_init() fail: Pgrab failed: %s", + Pgrab_error(err)); + return (NULL); + } + + lca.lca_ph = ph; + if (Pobject_iter(ph, lookup_cb, &lca) == -1) { + lx_debug("lx_call_init() fail: couldn't find Linux dlsym"); + return (NULL); + } + + lx_debug("lx_call_init(): Linux dlsym = 0x%p", lca.lca_ptr); + return ((lx_handle_dlsym_t)lca.lca_ptr); +} + +#define LX_RTLD_DEFAULT ((void *)0) +#define LX_RTLD_NEXT ((void *) -1l) + +lx_handle_sym_t +lx_call_dlsym(lx_handle_dlsym_t lxh_dlsym, const char *str) +{ + lx_handle_sym_t result; + lx_debug("lx_call_dlsym: calling Linux dlsym for: %s", str); + result = (lx_handle_sym_t)lx_call2((lx_handle_sym_t)lxh_dlsym, + (uintptr_t)LX_RTLD_DEFAULT, (uintptr_t)str); + lx_debug("lx_call_dlsym: Linux sym: \"%s\" = 0x%p", str, result); + return (result); +} + +static uintptr_t +/*ARGSUSED*/ +lx_call(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, + uintptr_t p3, uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7, + uintptr_t p8) +{ + typedef uintptr_t (*fp8_t)(uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + lx_regs_t *rp; + uintptr_t ret; + fp8_t lx_funcp = (fp8_t)lx_ch; + long cur_gs; + + rp = lx_syscall_regs(); + + lx_debug("lx_call: calling to Linux code at 0x%p", lx_ch); + lx_debug("lx_call: loading Linux gs, rp = 0x%p, gs = 0x%p", + rp, rp->lxr_gs); + + lx_swap_gs(rp->lxr_gs, &cur_gs); + ret = lx_funcp(p1, p2, p3, p4, p5, p6, p7, p8); + lx_swap_gs(cur_gs, &rp->lxr_gs); + + lx_debug("lx_call: returned from Linux code at 0x%p (%p)", lx_ch, ret); + lx_debug("lx_call: restored solaris gs 0x%p", cur_gs); + return (ret); +} + +uintptr_t +lx_call0(lx_handle_sym_t lx_ch) +{ + return (lx_call(lx_ch, 0, 0, 0, 0, 0, 0, 0, 0)); +} + +uintptr_t +lx_call1(lx_handle_sym_t lx_ch, uintptr_t p1) +{ + return (lx_call(lx_ch, p1, 0, 0, 0, 0, 0, 0, 0)); +} + +uintptr_t +lx_call2(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2) +{ + return (lx_call(lx_ch, p1, p2, 0, 0, 0, 0, 0, 0)); +} + +uintptr_t +lx_call3(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + return (lx_call(lx_ch, p1, p2, p3, 0, 0, 0, 0, 0)); +} + +uintptr_t +lx_call4(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, 0, 0, 0, 0)); +} + +uintptr_t +lx_call5(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4, uintptr_t p5) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, p5, 0, 0, 0)); +} + +uintptr_t +lx_call6(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4, uintptr_t p5, uintptr_t p6) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, 0, 0)); +} + +uintptr_t +lx_call7(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, p7, 0)); +} + +uintptr_t +lx_call8(lx_handle_sym_t lx_ch, uintptr_t p1, uintptr_t p2, uintptr_t p3, + uintptr_t p4, uintptr_t p5, uintptr_t p6, uintptr_t p7, uintptr_t p8) +{ + return (lx_call(lx_ch, p1, p2, p3, p4, p5, p6, p7, p8)); +} + +/* + * Linux Thunking Interfaces - Server Side + */ +static int +lxt_gethost_arg_check(lxt_gethost_arg_t *x, int x_size) +{ + if (x_size != sizeof (*x) + x->lxt_gh_buf_len - 1) + return (-1); + + if ((x->lxt_gh_token_len < 0) || (x->lxt_gh_buf_len < 0)) + return (-1); + + /* Token and buf should use up all the storage. */ + if ((x->lxt_gh_token_len + x->lxt_gh_buf_len) != x->lxt_gh_storage_len) + return (-1); + + return (0); +} + +static void +lxt_server_gethost(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + lxt_gethost_arg_t *data; + struct hostent *result, *rv; + int token_len, buf_len, type, data_size, i; + char *token, *buf; + int h_errnop; + + assert((request->lxt_sa_op == LXT_SERVER_OP_NAME2HOST) || + (request->lxt_sa_op == LXT_SERVER_OP_ADDR2HOST)); + + /*LINTED*/ + data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0]; + data_size = request_size - sizeof (*request) - 1; + + if (!lxt_gethost_arg_check(data, data_size)) { + lx_debug("lxt_server_gethost: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + /* Unpack the arguments. */ + type = data->lxt_gh_type; + token = &data->lxt_gh_storage[0]; + token_len = data->lxt_gh_token_len; + result = &data->lxt_gh_result; + buf = &data->lxt_gh_storage[data->lxt_gh_token_len]; + buf_len = data->lxt_gh_buf_len - data->lxt_gh_token_len; + + if (request->lxt_sa_op == LXT_SERVER_OP_NAME2HOST) { + (void) lx_call6(lxt_handles[LXTH_GETHOSTBYNAME_R].lxth_handle, + (uintptr_t)token, (uintptr_t)result, + (uintptr_t)buf, buf_len, (uintptr_t)&rv, + (uintptr_t)&h_errnop); + } else { + (void) lx_call8(lxt_handles[LXTH_GETHOSTBYADDR_R].lxth_handle, + (uintptr_t)token, token_len, type, (uintptr_t)result, + (uintptr_t)buf, buf_len, (uintptr_t)&rv, + (uintptr_t)&h_errnop); + } + + if (rv == NULL) { + /* the lookup failed */ + request->lxt_sa_success = 0; + request->lxt_sa_errno = errno; + data->lxt_gh_h_errno = h_errnop; + *door_result = (char *)request; + *door_result_size = request_size; + return; + } + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + data->lxt_gh_h_errno = 0; + + /* + * The result structure that we would normally return contains a + * bunch of pointers, but those pointers are useless to our caller + * since they are in a different address space. So before returning + * we'll convert all the result pointers into offsets. The caller + * can then map the offsets back into pointers. + */ + for (i = 0; result->h_aliases[i] != NULL; i++) { + result->h_aliases[i] = + LXT_PTR_TO_OFFSET(result->h_aliases[i], buf); + } + for (i = 0; result->h_addr_list[i] != NULL; i++) { + result->h_addr_list[i] = + LXT_PTR_TO_OFFSET(result->h_addr_list[i], buf); + } + result->h_name = LXT_PTR_TO_OFFSET(result->h_name, buf); + result->h_aliases = LXT_PTR_TO_OFFSET(result->h_aliases, buf); + result->h_addr_list = LXT_PTR_TO_OFFSET(result->h_addr_list, buf); + + *door_result = (char *)request; + *door_result_size = request_size; +} + +static int +lxt_getserv_arg_check(lxt_getserv_arg_t *x, int x_size) +{ + if (x_size != sizeof (*x) + x->lxt_gs_buf_len - 1) + return (-1); + + if ((x->lxt_gs_token_len < 0) || (x->lxt_gs_buf_len < 0)) + return (-1); + + /* Token and buf should use up all the storage. */ + if ((x->lxt_gs_token_len + x->lxt_gs_buf_len) != x->lxt_gs_storage_len) + return (-1); + + return (0); +} + +static void +lxt_server_getserv(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + lxt_getserv_arg_t *data; + struct servent *result, *rv; + int token_len, buf_len, data_size, i, port; + char *token, *buf, *proto = NULL; + + assert((request->lxt_sa_op == LXT_SERVER_OP_NAME2SERV) || + (request->lxt_sa_op == LXT_SERVER_OP_PORT2SERV)); + + /*LINTED*/ + data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0]; + data_size = request_size - sizeof (*request) - 1; + + if (!lxt_getserv_arg_check(data, data_size)) { + lx_debug("lxt_server_getserv: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + /* Unpack the arguments. */ + token = &data->lxt_gs_storage[0]; + token_len = data->lxt_gs_token_len; + result = &data->lxt_gs_result; + buf = &data->lxt_gs_storage[data->lxt_gs_token_len]; + buf_len = data->lxt_gs_buf_len - data->lxt_gs_token_len; + if (strlen(data->lxt_gs_proto) > 0) + proto = data->lxt_gs_proto; + + /* Do more sanity checks */ + if ((request->lxt_sa_op == LXT_SERVER_OP_PORT2SERV) && + (token_len != sizeof (int))) { + lx_debug("lxt_server_getserv: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + if (request->lxt_sa_op == LXT_SERVER_OP_NAME2SERV) { + (void) lx_call6(lxt_handles[LXTH_GETSERVBYNAME_R].lxth_handle, + (uintptr_t)token, (uintptr_t)proto, (uintptr_t)result, + (uintptr_t)buf, buf_len, (uintptr_t)&rv); + } else { + bcopy(token, &port, sizeof (int)); + (void) lx_call6(lxt_handles[LXTH_GETSERVBYPORT_R].lxth_handle, + port, (uintptr_t)proto, (uintptr_t)result, + (uintptr_t)buf, buf_len, (uintptr_t)&rv); + } + + if (rv == NULL) { + /* the lookup failed */ + request->lxt_sa_success = 0; + request->lxt_sa_errno = errno; + *door_result = (char *)request; + *door_result_size = request_size; + return; + } + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + + /* + * The result structure that we would normally return contains a + * bunch of pointers, but those pointers are useless to our caller + * since they are in a different address space. So before returning + * we'll convert all the result pointers into offsets. The caller + * can then map the offsets back into pointers. + */ + for (i = 0; result->s_aliases[i] != NULL; i++) { + result->s_aliases[i] = + LXT_PTR_TO_OFFSET(result->s_aliases[i], buf); + } + result->s_proto = LXT_PTR_TO_OFFSET(result->s_proto, buf); + result->s_aliases = LXT_PTR_TO_OFFSET(result->s_aliases, buf); + result->s_name = LXT_PTR_TO_OFFSET(result->s_name, buf); + + *door_result = (char *)request; + *door_result_size = request_size; +} + +static void +/*ARGSUSED*/ +lxt_server_openlog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + lxt_openlog_arg_t *data; + int data_size; + static char ident[128]; + + assert(request->lxt_sa_op == LXT_SERVER_OP_OPENLOG); + + /*LINTED*/ + data = (lxt_openlog_arg_t *)&request->lxt_sa_data[0]; + data_size = request_size - sizeof (*request); + + if (data_size != sizeof (*data)) { + lx_debug("lxt_server_openlog: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + /* + * Linux expects that the ident pointer passed to openlog() + * points to a static string that won't go away. Linux + * saves the pointer and references with syslog() is called. + * Hence we'll make a local copy of the ident string here. + */ + (void) mutex_lock(&lxt_pid_lock); + (void) strlcpy(ident, data->lxt_ol_ident, sizeof (ident)); + (void) mutex_unlock(&lxt_pid_lock); + + /* Call Linx openlog(). */ + (void) lx_call3(lxt_handles[LXTH_OPENLOG].lxth_handle, + (uintptr_t)ident, data->lxt_ol_logopt, data->lxt_ol_facility); + + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + *door_result = (char *)request; + *door_result_size = request_size; +} + +static void +/*ARGSUSED*/ +lxt_server_syslog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + lxt_syslog_arg_t *data; + int data_size; + char *progname_ptr_new; + char *progname_ptr_old; + + assert(request->lxt_sa_op == LXT_SERVER_OP_SYSLOG); + + /*LINTED*/ + data = (lxt_syslog_arg_t *)&request->lxt_sa_data[0]; + data_size = request_size - sizeof (*request); + + if (data_size != sizeof (*data)) { + lx_debug("lxt_server_openlog: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + progname_ptr_new = data->lxt_sl_progname; + + (void) mutex_lock(&lxt_pid_lock); + + /* + * Ensure the message has the correct pid. + * We do this by telling our getpid() system call to return a + * different value. + */ + lxt_pid = data->lxt_sl_pid; + + /* + * Ensure the message has the correct program name. + * Normally instead of a program name an "ident" string is + * used, this is the string passed to openlog(). But if + * openlog() wasn't called before syslog() then Linux + * syslog() will attempt to use the program name as + * the ident string, and the program name is determined + * by looking at the __progname variable. So we'll just + * update the Linux __progname variable while we do the + * call. + */ + (void) uucopy(lxt_handles[LXTH_PROGNAME].lxth_handle, + &progname_ptr_old, sizeof (char *)); + (void) uucopy(&progname_ptr_new, + lxt_handles[LXTH_PROGNAME].lxth_handle, sizeof (char *)); + + /* Call Linux syslog(). */ + (void) lx_call2(lxt_handles[LXTH_SYSLOG].lxth_handle, + data->lxt_sl_priority, (uintptr_t)data->lxt_sl_message); + + /* Restore pid and program name. */ + (void) uucopy(&progname_ptr_old, + lxt_handles[LXTH_PROGNAME].lxth_handle, sizeof (char *)); + lxt_pid = NULL; + + (void) mutex_unlock(&lxt_pid_lock); + + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + *door_result = (char *)request; + *door_result_size = request_size; +} + +static void +/*ARGSUSED*/ +lxt_server_closelog(lxt_server_arg_t *request, size_t request_size, + char **door_result, size_t *door_result_size) +{ + int data_size; + + assert(request->lxt_sa_op == LXT_SERVER_OP_CLOSELOG); + + data_size = request_size - sizeof (*request); + if (data_size != 0) { + lx_debug("lxt_server_closelog: invalid request"); + *door_result = NULL; + *door_result_size = 0; + return; + } + + /* Call Linux closelog(). */ + (void) lx_call0(lxt_handles[LXTH_CLOSELOG].lxth_handle); + + request->lxt_sa_success = 1; + request->lxt_sa_errno = 0; + *door_result = (char *)request; + *door_result_size = request_size; +} + +static void +/*ARGSUSED*/ +lxt_server(void *cookie, char *argp, size_t request_size, + door_desc_t *dp, uint_t n_desc) +{ + /*LINTED*/ + lxt_server_arg_t *request = (lxt_server_arg_t *)argp; + lxt_req_t lxt_req; + char *door_path = cookie; + + /* Check if there's no callers left */ + if (argp == DOOR_UNREF_DATA) { + (void) fdetach(door_path); + (void) unlink(door_path); + lx_debug("lxt_thunk_server: no clients, exiting"); + exit(0); + } + + /* Sanity check the incomming request. */ + if (request_size < sizeof (*request)) { + /* the lookup failed */ + lx_debug("lxt_thunk_server: invalid request size"); + (void) door_return(NULL, 0, NULL, 0); + return; + } + + if ((request->lxt_sa_op < LXT_SERVER_OP_MIN) || + (request->lxt_sa_op > LXT_SERVER_OP_MAX)) { + lx_debug("lxt_thunk_server: invalid request op"); + (void) door_return(NULL, 0, NULL, 0); + return; + } + + /* Handle ping requests immediatly, return here. */ + if (request->lxt_sa_op == LXT_SERVER_OP_PING) { + lx_debug("lxt_thunk_server: handling ping request"); + request->lxt_sa_success = 1; + (void) door_return((char *)request, request_size, NULL, 0); + return; + } + + lx_debug("lxt_thunk_server: hand off request to Linux thread, " + "request = 0x%p", request); + + /* Pack the request up so we can pass it to a Linux thread. */ + lxt_req.lxtr_request = request; + lxt_req.lxtr_request_size = request_size; + lxt_req.lxtr_result = NULL; + lxt_req.lxtr_result_size = 0; + lxt_req.lxtr_complete = 0; + (void) cond_init(&lxt_req.lxtr_complete_cv, USYNC_THREAD, NULL); + + /* Pass the request onto a Linux thread. */ + (void) mutex_lock(&lxt_req_lock); + while (lxt_req_ptr != NULL) + (void) cond_wait(&lxt_req_cv, &lxt_req_lock); + lxt_req_ptr = &lxt_req; + (void) cond_broadcast(&lxt_req_cv); + + /* Wait for the request to be completed. */ + while (lxt_req.lxtr_complete == 0) + (void) cond_wait(&lxt_req.lxtr_complete_cv, &lxt_req_lock); + assert(lxt_req_ptr != &lxt_req); + (void) mutex_unlock(&lxt_req_lock); + + lx_debug("lxt_thunk_server: hand off request completed, " + "request = 0x%p", request); + + /* + * If door_return() is successfull it never returns, so if we made + * it here there was some kind of error, but there's nothing we can + * really do about it. + */ + (void) door_return( + lxt_req.lxtr_result, lxt_req.lxtr_result_size, NULL, 0); +} + +static void +lxt_server_loop(void) +{ + lxt_req_t *lxt_req; + lxt_server_arg_t *request; + size_t request_size; + char *door_result; + size_t door_result_size; + + for (;;) { + /* Wait for a request from a doors server thread. */ + (void) mutex_lock(&lxt_req_lock); + while (lxt_req_ptr == NULL) + (void) cond_wait(&lxt_req_cv, &lxt_req_lock); + + /* We got a request, get a local pointer to it. */ + lxt_req = lxt_req_ptr; + lxt_req_ptr = NULL; + (void) cond_broadcast(&lxt_req_cv); + (void) mutex_unlock(&lxt_req_lock); + + /* Get a pointer to the request. */ + request = lxt_req->lxtr_request; + request_size = lxt_req->lxtr_request_size; + + lx_debug("lxt_server_loop: Linux thread request recieved, " + "request = %p", request); + + /* Dispatch the request. */ + assert((request->lxt_sa_op > LXT_SERVER_OP_PING) || + (request->lxt_sa_op < LXT_SERVER_OP_MAX)); + lxt_operations[request->lxt_sa_op].lxto_fp( + request, request_size, &door_result, &door_result_size); + + lx_debug("lxt_server_loop: Linux thread request completed, " + "request = %p", request); + + (void) mutex_lock(&lxt_req_lock); + + /* Set the result pointers for the calling door thread. */ + lxt_req->lxtr_result = door_result; + lxt_req->lxtr_result_size = door_result_size; + + /* Let the door thread know we're done. */ + lxt_req->lxtr_complete = 1; + (void) cond_signal(&lxt_req->lxtr_complete_cv); + + (void) mutex_unlock(&lxt_req_lock); + } + /*NOTREACHED*/ +} + +static void +lxt_server_enter(int fifo1_wr, int fifo2_rd) +{ + struct stat stat; + char door_path[MAXPATHLEN]; + int i, dfd, junk = 0; + + /* + * Do some sanity checks. Make sure we've got the fifos + * we need passed to us on the correct file descriptors. + */ + if ((fstat(fifo1_wr, &stat) != 0) || + ((stat.st_mode & S_IFMT) != S_IFIFO) || + (fstat(fifo2_rd, &stat) != 0) || + ((stat.st_mode & S_IFMT) != S_IFIFO)) { + lx_err("lx_thunk server aborting, can't contact parent"); + exit(-1); + } + + /* + * Get the initial Linux call handle so we can invoke other + * Linux calls. + */ + lxh_init = lx_call_init(); + if (lxh_init == NULL) { + lx_err("lx_thunk server aborting, failed Linux call init"); + exit(-1); + } + + /* Now lookup other Linux symbols we'll need access to. */ + for (i = 0; lxt_handles[i].lxth_name != NULL; i++) { + assert(lxt_handles[i].lxth_index == i); + if ((lxt_handles[i].lxth_handle = lx_call_dlsym(lxh_init, + lxt_handles[i].lxth_name)) == NULL) { + lx_err("lx_thunk server aborting, " + "failed Linux symbol lookup: %s", + lxt_handles[i].lxth_name); + exit(-1); + } + } + + /* get the path to the door server */ + if (read(fifo2_rd, door_path, sizeof (door_path)) < 0) { + lx_err("lxt_server_enter: failed to get door path"); + exit(-1); + } + (void) close(fifo2_rd); + + /* Create the door server. */ + if ((dfd = door_create(lxt_server, door_path, + DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { + lx_err("lxt_server_enter: door_create() failed"); + exit(-1); + } + + /* Attach the door to a file system path. */ + (void) fdetach(door_path); + if (fattach(dfd, door_path) < 0) { + lx_err("lxt_server_enter: fattach() failed"); + exit(-1); + } + + /* The door server is ready, signal this via a fifo write */ + (void) write(fifo1_wr, &junk, 1); + (void) close(fifo1_wr); + + lx_debug("lxt_server_enter: doors server initialized"); + lxt_server_loop(); + /*NOTREACHED*/ +} + +void +lxt_server_exec_check(void) +{ + if (lxt_server_processes == 0) + return; + + /* + * We're a thunk server process, so we take over control of + * the current Linux process here. + */ + lx_debug("lx_thunk server initalization starting"); + lxt_server_enter(LXT_SERVER_FIFO_WR_FD, LXT_SERVER_FIFO_RD_FD); + /*NOTREACHED*/ +} + +void +lxt_server_init(int argc, char *argv[]) +{ + /* + * The thunk server process is a shell script named LXT_SERVER_BINARY. + * It is executed without any parameters. Since it's a shell script + * the arguments passed to the shell's main entry point are: + * 1) the name of the shell + * 2) the name of the script to execute + * + * So to check if we're the thunk server process we first check + * for the expected number of arduments and then we'll look at + * the second parameter to see if it's LXT_SERVER_BINARY. + */ + if ((argc != 2) || + (strcmp(argv[1], LXT_SERVER_BINARY) != 0)) + return; + + lxt_server_processes = 1; + lx_debug("lx_thunk server detected, delaying initalization"); +} + +int +lxt_server_pid(int *pid) +{ + if (lxt_server_processes == 0) + return (0); + *pid = lxt_pid; + return (1); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile b/usr/src/lib/brand/lx/lx_brand/common/mapfile new file mode 100644 index 0000000000..9ba563a2cb --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +# +# Scope everything local -- our .init section is our only public interface. +# +{ + local: + *; +}; diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers new file mode 100644 index 0000000000..9ba563a2cb --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +# +# Scope everything local -- our .init section is our only public interface. +# +{ + local: + *; +}; diff --git a/usr/src/lib/brand/lx/lx_brand/common/mem.c b/usr/src/lib/brand/lx/lx_brand/common/mem.c new file mode 100644 index 0000000000..15b077bd33 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/mem.c @@ -0,0 +1,210 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> + +/* + * There are two forms of mmap, mmap() and mmap2(). The only difference is that + * the final argument to mmap2() specifies the number of pages, not bytes. + * Linux has a number of additional flags, but they are all deprecated. We also + * ignore the MAP_GROWSDOWN flag, which has no equivalent on Solaris. + * + * The Linux mmap() returns ENOMEM in some cases where Solaris returns + * EOVERFLOW, so we translate the errno as necessary. + */ + +int pagesize; /* needed for mmap2() */ + +#define LX_MAP_ANONYMOUS 0x00020 +#define LX_MAP_NORESERVE 0x04000 + +static int +ltos_mmap_flags(int flags) +{ + int new_flags; + + new_flags = flags & (MAP_TYPE | MAP_FIXED); + if (flags & LX_MAP_ANONYMOUS) + new_flags |= MAP_ANONYMOUS; + if (flags & LX_MAP_NORESERVE) + new_flags |= MAP_NORESERVE; + + return (new_flags); +} + +static int +mmap_common(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5, off64_t p6) +{ + void *addr = (void *)p1; + size_t len = p2; + int prot = p3; + int flags = p4; + int fd = p5; + off64_t off = p6; + void *ret; + + if (lx_debug_enabled != 0) { + char *path, path_buf[MAXPATHLEN]; + + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + if (path == NULL) + path = "?"; + + lx_debug("\tmmap_common(): fd = %d - %s", fd, path); + } + + /* + * Under Linux, the file descriptor is ignored when mapping zfod + * anonymous memory, On Solaris, we want the fd set to -1 for the + * same functionality. + */ + if (flags & LX_MAP_ANONYMOUS) + fd = -1; + + /* + * This is totally insane. The NOTES section in the linux mmap(2) man + * page claims that on some architectures, read protection may + * automatically include exec protection. It has been observed on a + * native linux system that the /proc/<pid>/maps file does indeed + * show that segments mmap'd from userland (such as libraries mapped in + * by the dynamic linker) all have exec the permission set, even for + * data segments. + */ + if (prot & PROT_READ) + prot |= PROT_EXEC; + + ret = mmap64(addr, len, prot, ltos_mmap_flags(flags), fd, off); + + if (ret == MAP_FAILED) + return (errno == EOVERFLOW ? -ENOMEM : -errno); + else + return ((int)ret); +} + +int +lx_mmap(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5, uintptr_t p6) +{ + return (mmap_common(p1, p2, p3, p4, p5, (off64_t)p6)); +} + +int +lx_mmap2(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5, uintptr_t p6) +{ + if (pagesize == 0) + pagesize = sysconf(_SC_PAGESIZE); + + return (mmap_common(p1, p2, p3, p4, p5, (off64_t)p6 * pagesize)); +} + + +/* + * The locking family of system calls, as well as msync(), are identical. On + * Solaris, they are layered on top of the memcntl syscall, so they cannot be + * pass-thru. + */ +int +lx_mlock(uintptr_t addr, uintptr_t len) +{ + uintptr_t addr1 = addr & PAGEMASK; + uintptr_t len1 = len + (addr & PAGEOFFSET); + + return (mlock((void *)addr1, (size_t)len1) ? -errno : 0); +} + +int +lx_mlockall(uintptr_t flags) +{ + return (mlockall(flags) ? -errno : 0); +} + +int +lx_munlock(uintptr_t addr, uintptr_t len) +{ + uintptr_t addr1 = addr & PAGEMASK; + uintptr_t len1 = len + (addr & PAGEOFFSET); + + return (munlock((void *)addr1, (size_t)len1) ? -errno : 0); +} + +int +lx_munlockall(void) +{ + return (munlockall() ? -errno : 0); +} + +int +lx_msync(uintptr_t addr, uintptr_t len, uintptr_t flags) +{ + return (msync((void *)addr, (size_t)len, flags) ? -errno : 0); +} + +/* + * Solaris recognizes more flags than Linux, so we don't want to inadvertently + * use what would be an invalid flag on Linux. Linux also allows the length to + * be zero, while Solaris does not. + */ +int +lx_madvise(uintptr_t start, uintptr_t len, uintptr_t advice) +{ + if (len == 0) + return (0); + + switch (advice) { + case MADV_NORMAL: + case MADV_RANDOM: + case MADV_SEQUENTIAL: + case MADV_WILLNEED: + case MADV_DONTNEED: + return (madvise((void *)start, len, advice) ? -errno : 0); + + default: + return (-EINVAL); + } +} + +/* + * mprotect() is identical except that we ignore the Linux flags PROT_GROWSDOWN + * and PROT_GROWSUP, which have no equivalent on Solaris. + */ +#define LX_PROT_GROWSDOWN 0x01000000 +#define LX_PROT_GROWSUP 0x02000000 + +int +lx_mprotect(uintptr_t start, uintptr_t len, uintptr_t prot) +{ + prot &= ~(LX_PROT_GROWSUP | LX_PROT_GROWSDOWN); + + return (mprotect((void *)start, len, prot) ? -errno : 0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c new file mode 100644 index 0000000000..4ff4b350bb --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c @@ -0,0 +1,492 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <alloca.h> +#include <errno.h> +#include <fcntl.h> +#include <strings.h> +#include <macros.h> +#include <sys/brand.h> +#include <sys/reboot.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/sysmacros.h> +#include <sys/systeminfo.h> +#include <sys/types.h> +#include <sys/lx_types.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_stat.h> +#include <sys/lx_syscall.h> +#include <sys/lx_thunk_server.h> +#include <unistd.h> +#include <libintl.h> + +extern int sethostname(char *, int); + +/* ARGUSED */ +int +lx_rename(uintptr_t p1, uintptr_t p2) +{ + int ret; + + ret = rename((const char *)p1, (const char *)p2); + + if (ret < 0) { + /* + * If rename(2) failed and we're in install mode, return + * success if the the reason we failed was either because the + * source file didn't actually exist or if it was because we + * tried to rename it to be the name of a device currently in + * use (resulting in an EBUSY.) + * + * To help install along further, if the failure was due + * to an EBUSY, delete the original file so we don't leave + * extra files lying around. + */ + if (lx_install != 0) { + if (errno == ENOENT) + return (0); + + if (errno == EBUSY) { + (void) unlink((const char *)p1); + return (0); + } + } + + return (-errno); + } + + return (0); +} + +/*ARGSUSED*/ +int +lx_reboot(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + int magic = (int)p1; + int magic2 = (int)p2; + uint_t flag = (int)p3; + int rc; + + if (magic != LINUX_REBOOT_MAGIC1) + return (-EINVAL); + if (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A && + magic2 != LINUX_REBOOT_MAGIC2B && magic2 != LINUX_REBOOT_MAGIC2C && + magic2 != LINUX_REBOOT_MAGIC2D) + return (-EINVAL); + + if (geteuid() != 0) + return (-EPERM); + + switch (flag) { + case LINUX_REBOOT_CMD_CAD_ON: + case LINUX_REBOOT_CMD_CAD_OFF: + /* ignored */ + rc = 0; + break; + case LINUX_REBOOT_CMD_POWER_OFF: + case LINUX_REBOOT_CMD_HALT: + rc = reboot(RB_HALT, NULL); + break; + case LINUX_REBOOT_CMD_RESTART: + case LINUX_REBOOT_CMD_RESTART2: + /* RESTART2 may need more work */ + lx_msg(gettext("Restarting system.\n")); + rc = reboot(RB_AUTOBOOT, NULL); + break; + default: + return (-EINVAL); + } + + return ((rc == -1) ? -errno : rc); +} + +/* + * getcwd() - Linux syscall semantics are slightly different; we need to return + * the length of the pathname copied (+ 1 for the terminating NULL byte.) + */ +int +lx_getcwd(uintptr_t p1, uintptr_t p2) +{ + char *buf; + size_t buflen = (size_t)p2; + size_t copylen, local_len; + size_t len = 0; + + if ((getcwd((char *)p1, (size_t)p2)) == NULL) + return (-errno); + + /* + * We need the length of the pathname getcwd() copied but we never want + * to dereference a Linux pointer for any reason. + * + * Thus, to get the string length we will uucopy() up to copylen bytes + * at a time into a local buffer and will walk each chunk looking for + * the string-terminating NULL byte. + * + * We can use strlen() to find the length of the string in the + * local buffer by delimiting the buffer with a NULL byte in the + * last element that will never be overwritten. + */ + copylen = min(buflen, MAXPATHLEN + 1); + buf = SAFE_ALLOCA(copylen + 1); + if (buf == NULL) + return (-ENOMEM); + buf[copylen] = '\0'; + + for (;;) { + if (uucopy((char *)p1 + len, buf, copylen) != 0) + return (-errno); + + local_len = strlen(buf); + len += local_len; + + /* + * If the strlen() is less than copylen, we found the + * real end of the string -- not the NULL byte used to + * delimit the end of our buffer. + */ + if (local_len != copylen) + break; + + /* prepare to check the next chunk of the string */ + buflen -= copylen; + copylen = min(buflen, copylen); + } + + return (len + 1); +} + +int +lx_uname(uintptr_t p1) +{ + struct lx_utsname *un = (struct lx_utsname *)p1; + char buf[LX_SYS_UTS_LN + 1]; + + if (gethostname(un->nodename, sizeof (un->nodename)) == -1) + return (-errno); + + (void) strlcpy(un->sysname, LX_UNAME_SYSNAME, LX_SYS_UTS_LN); + (void) strlcpy(un->release, lx_release, LX_SYS_UTS_LN); + (void) strlcpy(un->version, LX_UNAME_VERSION, LX_SYS_UTS_LN); + (void) strlcpy(un->machine, LX_UNAME_MACHINE, LX_SYS_UTS_LN); + if ((sysinfo(SI_SRPC_DOMAIN, buf, LX_SYS_UTS_LN) < 0)) + un->domainname[0] = '\0'; + else + (void) strlcpy(un->domainname, buf, LX_SYS_UTS_LN); + + return (0); +} + +/* + * {get,set}groups16() - Handle the conversion between 16-bit Linux gids and + * 32-bit Solaris gids. + */ +int +lx_getgroups16(uintptr_t p1, uintptr_t p2) +{ + int count = (int)p1; + lx_gid16_t *grouplist = (lx_gid16_t *)p2; + gid_t *grouplist32; + int ret; + int i; + + grouplist32 = SAFE_ALLOCA(count * sizeof (gid_t)); + if (grouplist32 == NULL) + return (-ENOMEM); + if ((ret = getgroups(count, grouplist32)) < 0) + return (-errno); + + for (i = 0; i < ret; i++) + grouplist[i] = LX_GID32_TO_GID16(grouplist32[i]); + + return (ret); +} + +int +lx_setgroups16(uintptr_t p1, uintptr_t p2) +{ + int count = (int)p1; + lx_gid16_t *grouplist = (lx_gid16_t *)p2; + gid_t *grouplist32; + int i; + + grouplist32 = SAFE_ALLOCA(count * sizeof (gid_t)); + if (grouplist32 == NULL) + return (-ENOMEM); + for (i = 0; i < count; i++) + grouplist32[i] = LX_GID16_TO_GID32(grouplist[i]); + + return (setgroups(count, grouplist32) ? -errno : 0); +} + +/* + * personality() - Solaris doesn't support Linux personalities, but we have to + * emulate enough to show that we support the basic personality. + */ +#define LX_PER_LINUX 0x0 + +int +lx_personality(uintptr_t p1) +{ + int per = (int)p1; + + switch (per) { + case -1: + /* Request current personality */ + return (LX_PER_LINUX); + case LX_PER_LINUX: + return (0); + default: + return (-EINVAL); + } +} + +/* + * mknod() - Since we don't have the SYS_CONFIG privilege within a zone, the + * only mode we have to support is S_IFIFO. We also have to distinguish between + * an invalid type and insufficient privileges. + */ +#define LX_S_IFMT 0170000 +#define LX_S_IFDIR 0040000 +#define LX_S_IFCHR 0020000 +#define LX_S_IFBLK 0060000 +#define LX_S_IFREG 0100000 +#define LX_S_IFIFO 0010000 +#define LX_S_IFLNK 0120000 +#define LX_S_IFSOCK 0140000 + +/*ARGSUSED*/ +int +lx_mknod(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + char *path = (char *)p1; + lx_dev_t lx_dev = (lx_dev_t)p3; + struct sockaddr_un sockaddr; + struct stat statbuf; + mode_t mode, type; + dev_t dev; + int fd; + + type = ((mode_t)p2 & LX_S_IFMT); + mode = ((mode_t)p2 & 07777); + + switch (type) { + case 0: + case LX_S_IFREG: + /* create a regular file */ + if (stat(path, &statbuf) == 0) + return (-EEXIST); + + if (errno != ENOENT) + return (-errno); + + if ((fd = creat(path, mode)) < 0) + return (-errno); + + (void) close(fd); + return (0); + + case LX_S_IFSOCK: + /* + * Create a UNIX domain socket. + * + * Most programmers aren't even aware you can do this. + * + * Note you can also do this via Solaris' mknod(2), but + * Linux allows anyone who can create a UNIX domain + * socket via bind(2) to create one via mknod(2); + * Solaris requires the caller to be privileged. + */ + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) + return (-errno); + + if (stat(path, &statbuf) == 0) + return (-EEXIST); + + if (errno != ENOENT) + return (-errno); + + if (uucopy(path, &sockaddr.sun_path, + sizeof (sockaddr.sun_path)) < 0) + return (-errno); + + /* assure NULL termination of sockaddr.sun_path */ + sockaddr.sun_path[sizeof (sockaddr.sun_path) - 1] = '\0'; + sockaddr.sun_family = AF_UNIX; + + if (bind(fd, (struct sockaddr *)&sockaddr, + strlen(sockaddr.sun_path) + + sizeof (sockaddr.sun_family)) < 0) + return (-errno); + + (void) close(fd); + return (0); + + case LX_S_IFIFO: + dev = 0; + break; + + case LX_S_IFCHR: + case LX_S_IFBLK: + /* + * The "dev" RPM package wants to create all possible Linux + * device nodes, so just report its mknod()s as having + * succeeded if we're in install mode. + */ + if (lx_install != 0) { + lx_debug("lx_mknod: install mode spoofed creation of " + "Linux device [%lld, %lld]\n", + LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev)); + + return (0); + } + + dev = makedevice(LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev)); + break; + + default: + return (-EINVAL); + } + + return (mknod(path, mode | type, dev) ? -errno : 0); +} + +int +lx_sethostname(uintptr_t p1, uintptr_t p2) +{ + char *name = (char *)p1; + int len = (size_t)p2; + + return (sethostname(name, len) ? -errno : 0); +} + +int +lx_setdomainname(uintptr_t p1, uintptr_t p2) +{ + char *name = (char *)p1; + int len = (size_t)p2; + long rval; + + if (len < 0 || len >= LX_SYS_UTS_LN) + return (-EINVAL); + + rval = sysinfo(SI_SET_SRPC_DOMAIN, name, len); + + return ((rval < 0) ? -errno : 0); +} + +int +lx_getpid(void) +{ + int pid; + + /* First call the thunk server hook. */ + if (lxt_server_pid(&pid) != 0) + return (pid); + + pid = syscall(SYS_brand, B_EMULATE_SYSCALL + 20); + return ((pid == -1) ? -errno : pid); +} + +int +lx_execve(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + char *filename = (char *)p1; + char **argv = (char **)p2; + char **envp = (char **)p3; + char *nullist[] = { NULL }; + char path[64]; + + /* First call the thunk server hook. */ + lxt_server_exec_check(); + + /* Get a copy of the executable we're trying to run */ + path[0] = '\0'; + (void) uucopystr(filename, path, sizeof (path)); + + /* Check if we're trying to run a native binary */ + if (strncmp(path, "/native/usr/lib/brand/lx/lx_native", + sizeof (path)) == 0) { + /* Skip the first element in the argv array */ + argv++; + + /* + * The name of the new program to execute was the first + * parameter passed to lx_native. + */ + if (uucopy(argv, &filename, sizeof (char *)) != 0) + return (-errno); + + (void) syscall(SYS_brand, B_EXEC_NATIVE, filename, argv, envp, + NULL, NULL, NULL); + return (-errno); + } + + if (argv == NULL) + argv = nullist; + + /* This is a normal exec call. */ + (void) execve(filename, argv, envp); + + return (-errno); +} + +int +lx_setgroups(uintptr_t p1, uintptr_t p2) +{ + int ng = (int)p1; + gid_t *glist; + int i, r; + + lx_debug("\tlx_setgroups(%d, 0x%p", ng, p2); + + if (ng > 0) { + if ((glist = (gid_t *)SAFE_ALLOCA(ng * sizeof (gid_t))) == NULL) + return (-ENOMEM); + + if (uucopy((void *)p2, glist, ng * sizeof (gid_t)) != 0) + return (-errno); + + /* + * Linux doesn't check the validity of the group IDs, but + * Solaris does. Change any invalid group IDs to a known, valid + * value (yuck). + */ + for (i = 0; i < ng; i++) { + if (glist[i] < 0 || glist[i] > MAXUID) + glist[i] = MAXUID; + } + } + + r = syscall(SYS_brand, B_EMULATE_SYSCALL + LX_SYS_setgroups32, + ng, glist); + + return ((r == -1) ? -errno : r); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/module.c b/usr/src/lib/brand/lx/lx_brand/common/module.c new file mode 100644 index 0000000000..3ec4164f71 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/module.c @@ -0,0 +1,90 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * We don't support Linux modules, but we have to emulate enough of the system + * calls to show that we don't have any modules installed. + */ + +#include <errno.h> +#include <sys/types.h> +#include <sys/lx_misc.h> + +/* + * For query_module(), we provide an empty list of modules, and return ENOENT + * on any request for a specific module. + */ +#define LX_QM_MODULES 1 +#define LX_QM_DEPS 2 +#define LX_QM_REFS 3 +#define LX_QM_SYMBOLS 4 +#define LX_QM_INFO 5 + +/*ARGSUSED*/ +int +lx_query_module(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + /* + * parameter p1 is the 'name' argument. + */ + int which = (int)p2; + char *buf = (char *)p3; + size_t bufsize = (size_t)p4; + size_t *ret = (size_t *)p5; + + switch (which) { + case 0: + /* + * Special case: always return 0 + */ + return (0); + + case LX_QM_MODULES: + /* + * Generate an empty list of modules. + */ + if (bufsize && buf) + buf[0] = '\0'; + if (ret) + *ret = 0; + return (0); + + case LX_QM_DEPS: + case LX_QM_REFS: + case LX_QM_SYMBOLS: + case LX_QM_INFO: + /* + * Any requests for specific module information return ENOENT. + */ + return (-ENOENT); + + default: + return (-EINVAL); + } +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/mount.c b/usr/src/lib/brand/lx/lx_brand/common/mount.c new file mode 100644 index 0000000000..bf349f4755 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/mount.c @@ -0,0 +1,709 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <alloca.h> +#include <assert.h> +#include <ctype.h> +#include <fcntl.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <strings.h> +#include <nfs/mount.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <sys/lx_autofs.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_mount.h> + +/* + * support definitions + */ +union fh_buffer { + struct nfs_fid fh2; + struct nfs_fh3 fh3; + char fh_data[NFS3_FHSIZE + 2]; +}; + +typedef enum mount_opt_type { + MOUNT_OPT_INVALID = 0, + MOUNT_OPT_NORMAL = 1, /* option value: none */ + MOUNT_OPT_UINT = 2 /* option value: unsigned int */ +} mount_opt_type_t; + +typedef struct mount_opt { + char *mo_name; + mount_opt_type_t mo_type; +} mount_opt_t; + + +/* + * Globals + */ +mount_opt_t lofs_options[] = { + { NULL, MOUNT_OPT_INVALID } +}; + +mount_opt_t lx_proc_options[] = { + { NULL, MOUNT_OPT_INVALID } +}; + +mount_opt_t lx_autofs_options[] = { + { LX_MNTOPT_FD, MOUNT_OPT_UINT }, + { LX_MNTOPT_PGRP, MOUNT_OPT_UINT }, + { LX_MNTOPT_MINPROTO, MOUNT_OPT_UINT }, + { LX_MNTOPT_MAXPROTO, MOUNT_OPT_UINT }, +}; + + +/* + * i_lx_opt_verify() - Check the mount options. + * + * You might wonder why we're being so strict about the mount options + * we allow. The reason is that normally all mount option verification + * is done by the Solaris userland mount command. Once mount options + * are passed to the kernel, invalid options are simply ignored. So + * if we actually want to catch requests for functionality that we + * don't support, or if we want to make sure that we don't randomly + * enable options that we haven't check to make sure they have the + * same syntax on Linux and Solaris, we need to reject any options + * we don't know to be ok here. + */ +static int +i_lx_opt_verify(char *opts, mount_opt_t *mop) +{ + int opts_len = strlen(opts); + char *opts_tmp, *opt; + int opt_len, i; + + assert((opts != NULL) && (mop != NULL)); + + /* If no options were specified, there's no problem. */ + if (opts_len == 0) + return (1); + + /* If no options are allowed, fail. */ + if (mop[0].mo_name == NULL) + return (0); + + /* Don't accept leading or trailing ','. */ + if ((opts[0] == ',') || (opts[opts_len] == ',')) + return (0); + + /* Don't accept sequential ','. */ + for (i = 1; i < opts_len; i++) + if ((opts[i - 1] == ',') && (opts[i] == ',')) + return (0); + + /* + * We're going to use strtok() which modifies the target + * string so make a temporary copy. + */ + opts_tmp = SAFE_ALLOCA(opts_len); + if (opts_tmp == NULL) + return (-1); + bcopy(opts, opts_tmp, opts_len + 1); + + /* Verify each prop one at a time. */ + opt = strtok(opts_tmp, ","); + opt_len = strlen(opt); + for (;;) { + + /* Check for matching option/value pair. */ + for (i = 0; mop[i].mo_name != NULL; i++) { + char *ovalue; + int ovalue_len, mo_len; + + /* If the options is too short don't bother comparing */ + mo_len = strlen(mop[i].mo_name); + if (opt_len < mo_len) { + /* Keep trying to find a match. */ + continue; + } + + /* Compare the option to an allowed option. */ + if (strncmp(mop[i].mo_name, opt, mo_len) != 0) { + /* Keep trying to find a match. */ + continue; + } + + if (mop[i].mo_type == MOUNT_OPT_NORMAL) { + /* The option doesn't take a value. */ + if (opt_len == mo_len) { + /* This option is ok. */ + break; + } else { + /* Keep trying to find a match. */ + continue; + } + } + + /* This options takes a value. */ + if ((opt_len == mo_len) || (opt[mo_len] != '=')) { + /* Keep trying to find a match. */ + continue; + } + + /* We have an option match. Verify option value. */ + ovalue = &opt[mo_len] + 1; + ovalue_len = strlen(ovalue); + + /* Value can't be zero length string. */ + if (ovalue_len == 0) + return (0); + + if (mop[i].mo_type == MOUNT_OPT_UINT) { + int j; + /* Verify that value is an unsigned int. */ + for (j = 0; j < ovalue_len; j++) + if (!isdigit(ovalue[j])) + return (0); + } else { + /* Unknown option type specified. */ + assert(0); + } + + /* The option is ok. */ + break; + } + + /* If there were no matches this is an unsupported option. */ + if (mop[i].mo_name == NULL) + return (0); + + /* This option is ok, move onto the next option. */ + if ((opt = strtok(NULL, ",")) == NULL) + break; + opt_len = strlen(opt); + }; + + /* We verified all the options. */ + return (1); +} + +static int +i_add_option(char *option, char *buf, size_t buf_size) +{ + char *fmt_str = NULL; + + assert((option != NULL) && (strlen(option) > 0)); + assert((buf != NULL) && (buf_size > 0)); + + if (buf[0] == '\0') { + fmt_str = "%s"; + } else { + fmt_str = ",%s"; + } + + buf_size -= strlen(buf); + buf += strlen(buf); + + /*LINTED*/ + if (snprintf(buf, buf_size, fmt_str, option) > (buf_size - 1)) + return (-EOVERFLOW); + return (0); +} + +static int +i_add_option_int(char *option, int val, char *buf, size_t buf_size) +{ + char *fmt_str = NULL; + + assert((option != NULL) && (strlen(option) > 0)); + assert((buf != NULL) && (buf_size > 0)); + + if (buf[0] == '\0') { + fmt_str = "%s=%d"; + } else { + fmt_str = ",%s=%d"; + } + + buf_size -= strlen(buf); + buf += strlen(buf); + + /*LINTED*/ + if (snprintf(buf, buf_size, fmt_str, option, val) > (buf_size - 1)) + return (-EOVERFLOW); + return (0); +} + +static int +i_make_nfs_args(lx_nfs_mount_data_t *lx_nmd, struct nfs_args *nfs_args, + struct netbuf *nfs_args_addr, struct knetconfig *nfs_args_knconf, + union fh_buffer *nfs_args_fh, struct sec_data *nfs_args_secdata, + char *fstype, char *options, int options_size) +{ + struct stat statbuf; + int i, rv, use_tcp; + + /* Sanity check the incomming Linux request. */ + if ((lx_nmd->nmd_rsize < 0) || (lx_nmd->nmd_wsize < 0) || + (lx_nmd->nmd_timeo < 0) || (lx_nmd->nmd_retrans < 0) || + (lx_nmd->nmd_acregmin < 0) || (lx_nmd->nmd_acregmax < 0) || + (lx_nmd->nmd_acdirmax < 0)) { + return (-EINVAL); + } + + /* + * Additional sanity checks of incomming request. + * + * Some of the sanity checks below should probably return + * EINVAL (or some other error code) instead or ENOTSUP, + * but without experiminting on Linux to see how it + * deals with certain strange values there is no way + * to really know what we should return, hence we return + * ENOTSUP to tell us that eventually if we see some + * application hitting the problem we can go to a real + * Linux system, figure out how it deals with the situation + * and update our code to handle it in the same fashion. + */ + if (lx_nmd->nmd_version != 4) { + lx_unsupported("unsupported nfs mount request, " + "unrecognized NFS mount structure: %d\n", + lx_nmd->nmd_version); + return (-ENOTSUP); + } + if ((lx_nmd->nmd_flags & ~LX_NFS_MOUNT_SUPPORTED) != 0) { + lx_unsupported("unsupported nfs mount request, " + "flags: 0x%x\n", lx_nmd->nmd_flags); + return (-ENOTSUP); + } + if (lx_nmd->nmd_addr.sin_family != AF_INET) { + lx_unsupported("unsupported nfs mount request, " + "transport address family: 0x%x\n", + lx_nmd->nmd_addr.sin_family); + return (-ENOTSUP); + } + if (lx_nmd->nmd_root.lx_fh3_length != 0x20) { + lx_unsupported("unsupported nfs mount request, " + "nfs file handle length: 0x%x\n", + lx_nmd->nmd_root.lx_fh3_length); + return (-ENOTSUP); + } + for (i = 0; i < LX_NMD_MAXHOSTNAMELEN; i++) { + if (lx_nmd->nmd_hostname[i] == '\0') + break; + } + if (i == 0) { + lx_unsupported("unsupported nfs mount request, " + "no hostname specified\n"); + return (-ENOTSUP); + } + if (i == LX_NMD_MAXHOSTNAMELEN) { + lx_unsupported("unsupported nfs mount request, " + "hostname not terminated\n"); + return (-ENOTSUP); + } + if (lx_nmd->nmd_namlen < i) { + lx_unsupported("unsupported nfs mount request, " + "invalid namlen value: 0x%x\n", lx_nmd->nmd_namlen); + return (-ENOTSUP); + } + if (lx_nmd->nmd_bsize != 0) { + lx_unsupported("unsupported nfs mount request, " + "bsize value: 0x%x\n", lx_nmd->nmd_bsize); + return (-ENOTSUP); + } + + /* Initialize and clear the output structure pointers passed in. */ + bzero(nfs_args, sizeof (*nfs_args)); + bzero(nfs_args_addr, sizeof (*nfs_args_addr)); + bzero(nfs_args_knconf, sizeof (*nfs_args_knconf)); + bzero(nfs_args_fh, sizeof (*nfs_args_fh)); + bzero(nfs_args_secdata, sizeof (*nfs_args_secdata)); + nfs_args->addr = nfs_args_addr; + nfs_args->knconf = nfs_args_knconf; + nfs_args->fh = (caddr_t)nfs_args_fh; + nfs_args->nfs_ext_u.nfs_extB.secdata = nfs_args_secdata; + + /* Check if we're using tcp. */ + use_tcp = (lx_nmd->nmd_flags & LX_NFS_MOUNT_TCP) ? 1 : 0; + + /* + * These seem to be the default flags used by Solaris for v2 and v3 + * nfs mounts. + * + * Don't bother with NFSMNT_TRYRDMA since we always specify a + * transport (either udp or tcp). + */ + nfs_args->flags = NFSMNT_NEWARGS | NFSMNT_KNCONF | NFSMNT_INT | + NFSMNT_HOSTNAME; + + /* Translate some Linux mount flags into Solaris mount flags. */ + if (lx_nmd->nmd_flags & LX_NFS_MOUNT_INTR) + nfs_args->flags |= NFSMNT_INT; + if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NOAC) + nfs_args->flags |= NFSMNT_NOAC; + + if ((lx_nmd->nmd_flags & LX_NFS_MOUNT_VER3) != 0) { + (void) strcpy(fstype, "nfs3"); + if ((rv = i_add_option_int("vers", 3, + options, options_size)) != 0) + return (rv); + + /* Set the v3 file handle info. */ + nfs_args_fh->fh3.fh3_length = lx_nmd->nmd_root.lx_fh3_length; + bcopy(&lx_nmd->nmd_root.lx_fh3_data, + nfs_args_fh->fh3.fh3_u.data, + sizeof (nfs_args_fh->fh3.fh3_u.data)); + } else { + /* + * Assume nfs v2. Note that this could also be a v1 + * mount request but there doesn't seem to be any difference + * in the parameters passed to the Linux mount system + * call for v1 or v2 mounts so there is no way of really + * knowing. + */ + (void) strcpy(fstype, "nfs"); + if ((rv = i_add_option_int("vers", 2, + options, options_size)) != 0) + return (rv); + + /* Solaris seems to add this flag when using v2. */ + nfs_args->flags |= NFSMNT_SECDEFAULT; + + /* Set the v2 file handle info. */ + bcopy(&lx_nmd->nmd_old_root, + nfs_args_fh, sizeof (nfs_args_fh->fh2)); + } + + /* + * We can't use getnetconfig() here because there is no netconfig + * database in linux. + */ + nfs_args_knconf->knc_protofmly = "inet"; + if (use_tcp) { + /* + * TCP uses NC_TPI_COTS_ORD semantics. + * See /etc/netconfig. + */ + nfs_args_knconf->knc_semantics = NC_TPI_COTS_ORD; + nfs_args_knconf->knc_proto = "tcp"; + if ((rv = i_add_option("proto=tcp", + options, options_size)) != 0) + return (rv); + if (stat("/dev/tcp", &statbuf) != 0) + return (-errno); + nfs_args_knconf->knc_rdev = statbuf.st_rdev; + } else { + /* + * Assume UDP. UDP uses NC_TPI_CLTS semantics. + * See /etc/netconfig. + */ + nfs_args_knconf->knc_semantics = NC_TPI_CLTS; + nfs_args_knconf->knc_proto = "udp"; + if ((rv = i_add_option("proto=udp", + options, options_size)) != 0) + return (rv); + if (stat("/dev/udp", &statbuf) != 0) + return (-errno); + nfs_args_knconf->knc_rdev = statbuf.st_rdev; + } + + /* Set the server address. */ + nfs_args_addr->maxlen = nfs_args_addr->len = + sizeof (struct sockaddr_in); + nfs_args_addr->buf = (char *)&lx_nmd->nmd_addr; + + /* Set the server hostname string. */ + nfs_args->hostname = lx_nmd->nmd_hostname; + + /* Translate Linux nfs mount parameters into Solaris mount options. */ + if (lx_nmd->nmd_rsize != LX_NMD_DEFAULT_RSIZE) { + if ((rv = i_add_option_int("rsize", lx_nmd->nmd_rsize, + options, options_size)) != 0) + return (rv); + nfs_args->rsize = lx_nmd->nmd_rsize; + nfs_args->flags |= NFSMNT_RSIZE; + } + if (lx_nmd->nmd_wsize != LX_NMD_DEFAULT_WSIZE) { + if ((rv = i_add_option_int("wsize", lx_nmd->nmd_wsize, + options, options_size)) != 0) + return (rv); + nfs_args->wsize = lx_nmd->nmd_wsize; + nfs_args->flags |= NFSMNT_WSIZE; + } + if ((rv = i_add_option_int("timeo", lx_nmd->nmd_timeo, + options, options_size)) != 0) + return (rv); + nfs_args->timeo = lx_nmd->nmd_timeo; + nfs_args->flags |= NFSMNT_TIMEO; + if ((rv = i_add_option_int("retrans", lx_nmd->nmd_retrans, + options, options_size)) != 0) + return (rv); + nfs_args->retrans = lx_nmd->nmd_retrans; + nfs_args->flags |= NFSMNT_RETRANS; + if ((rv = i_add_option_int("acregmin", lx_nmd->nmd_acregmin, + options, options_size)) != 0) + return (rv); + nfs_args->acregmin = lx_nmd->nmd_acregmin; + nfs_args->flags |= NFSMNT_ACREGMIN; + if ((rv = i_add_option_int("acregmax", lx_nmd->nmd_acregmax, + options, options_size)) != 0) + return (rv); + nfs_args->acregmax = lx_nmd->nmd_acregmax; + nfs_args->flags |= NFSMNT_ACREGMAX; + if ((rv = i_add_option_int("acdirmin", lx_nmd->nmd_acdirmin, + options, options_size)) != 0) + return (rv); + nfs_args->acdirmin = lx_nmd->nmd_acdirmin; + nfs_args->flags |= NFSMNT_ACDIRMIN; + if ((rv = i_add_option_int("acdirmax", lx_nmd->nmd_acdirmax, + options, options_size)) != 0) + return (rv); + nfs_args->acdirmax = lx_nmd->nmd_acdirmax; + nfs_args->flags |= NFSMNT_ACDIRMAX; + + /* We only support nfs with a security type of AUTH_SYS. */ + nfs_args->nfs_args_ext = NFS_ARGS_EXTB; + nfs_args_secdata->secmod = AUTH_SYS; + nfs_args_secdata->rpcflavor = AUTH_SYS; + nfs_args_secdata->flags = 0; + nfs_args_secdata->uid = 0; + nfs_args_secdata->data = NULL; + nfs_args->nfs_ext_u.nfs_extB.next = NULL; + + /* + * The Linux nfs mount command seems to pass an open socket fd + * to the kernel during the mount system call. We don't need + * this fd on Solaris so just close it. + */ + (void) close(lx_nmd->nmd_fd); + + return (0); +} + +int +lx_mount(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + /* Linux input arguments. */ + const char *sourcep = (const char *)p1; + const char *targetp = (const char *)p2; + const char *fstypep = (const char *)p3; + unsigned int flags = (unsigned int)p4; + const void *datap = (const void *)p5; + + /* Variables needed for all mounts. */ + char source[MAXPATHLEN], target[MAXPATHLEN]; + char fstype[MAXPATHLEN], options[MAXPATHLEN]; + int sflags, rv; + + /* Variables needed for nfs mounts. */ + lx_nfs_mount_data_t lx_nmd; + struct nfs_args nfs_args; + struct netbuf nfs_args_addr; + struct knetconfig nfs_args_knconf; + union fh_buffer nfs_args_fh; + struct sec_data nfs_args_secdata; + char *sdataptr = NULL; + int sdatalen = 0; + + /* Initialize Solaris mount arguments. */ + sflags = MS_OPTIONSTR; + options[0] = '\0'; + sdatalen = 0; + + /* Copy in parameters that are always present. */ + rv = uucopystr((void *)sourcep, &source, sizeof (source)); + if ((rv == -1) || (rv == sizeof (source))) + return (-EFAULT); + + rv = uucopystr((void *)targetp, &target, sizeof (target)); + if ((rv == -1) || (rv == sizeof (target))) + return (-EFAULT); + + rv = uucopystr((void *)fstypep, &fstype, sizeof (fstype)); + if ((rv == -1) || (rv == sizeof (fstype))) + return (-EFAULT); + + lx_debug("\tlinux mount source: %s", source); + lx_debug("\tlinux mount target: %s", target); + lx_debug("\tlinux mount fstype: %s", fstype); + + /* Make sure we support the requested mount flags. */ + if ((flags & ~LX_MS_SUPPORTED) != 0) { + lx_unsupported( + "unsupported mount flags: 0x%x", flags); + return (-ENOTSUP); + } + + /* Do filesystem specific mount work. */ + if (flags & LX_MS_BIND) { + + /* If MS_BIND is set, we turn this into a lofs mount. */ + (void) strcpy(fstype, "lofs"); + + /* Copy in Linux mount options. */ + if (datap != NULL) { + rv = uucopystr((void *)datap, + options, sizeof (options)); + if ((rv == -1) || (rv == sizeof (options))) + return (-EFAULT); + } + lx_debug("\tlinux mount options: \"%s\"", options); + + /* Verify Linux mount options. */ + if (i_lx_opt_verify(options, lofs_options) == 0) { + lx_unsupported("unsupported lofs mount options"); + return (-ENOTSUP); + } + } else if (strcmp(fstype, "proc") == 0) { + + /* Translate proc mount requests to lx_proc requests. */ + (void) strcpy(fstype, "lx_proc"); + + /* Copy in Linux mount options. */ + if (datap != NULL) { + rv = uucopystr((void *)datap, + options, sizeof (options)); + if ((rv == -1) || (rv == sizeof (options))) + return (-EFAULT); + } + lx_debug("\tlinux mount options: \"%s\"", options); + + /* Verify Linux mount options. */ + if (i_lx_opt_verify(options, lx_proc_options) == 0) { + lx_unsupported("unsupported lx_proc mount options"); + return (-ENOTSUP); + } + } else if (strcmp(fstype, "autofs") == 0) { + + /* Translate proc mount requests to lx_afs requests. */ + (void) strcpy(fstype, LX_AUTOFS_NAME); + + /* Copy in Linux mount options. */ + if (datap != NULL) { + rv = uucopystr((void *)datap, + options, sizeof (options)); + if ((rv == -1) || (rv == sizeof (options))) + return (-EFAULT); + } + lx_debug("\tlinux mount options: \"%s\"", options); + + /* Verify Linux mount options. */ + if (i_lx_opt_verify(options, lx_autofs_options) == 0) { + lx_unsupported("unsupported lx_autofs mount options"); + return (-ENOTSUP); + } + } else if (strcmp(fstype, "nfs") == 0) { + + /* + * Copy in Linux mount options. Note that for Linux + * nfs mounts the mount options pointer (which normally + * points to a string) points to a structure. + */ + if (uucopy((void *)datap, &lx_nmd, sizeof (lx_nmd)) < 0) + return (-errno); + + /* + * For Solaris nfs mounts, the kernel expects a special + * strucutre, but a pointer to this structure is passed + * in via an extra parameter (sdataptr below.) + */ + if ((rv = i_make_nfs_args(&lx_nmd, &nfs_args, + &nfs_args_addr, &nfs_args_knconf, &nfs_args_fh, + &nfs_args_secdata, fstype, + options, sizeof (options))) != 0) + return (rv); + + /* + * For nfs mounts we need to tell the mount system call + * to expect extra parameters. + */ + sflags |= MS_DATA; + sdataptr = (char *)&nfs_args; + sdatalen = sizeof (nfs_args); + } else { + lx_unsupported( + "unsupported mount filesystem type: %s", fstype); + return (-ENOTSUP); + } + + /* Convert some Linux flags to Solaris flags. */ + if (flags & LX_MS_RDONLY) + sflags |= MS_RDONLY; + if (flags & LX_MS_NOSUID) + sflags |= MS_NOSUID; + if (flags & LX_MS_REMOUNT) + sflags |= MS_REMOUNT; + + /* Convert some Linux flags to Solaris option strings. */ + if ((flags & LX_MS_NODEV) && + ((rv = i_add_option("nodev", options, sizeof (options))) != 0)) + return (rv); + if ((flags & LX_MS_NOEXEC) && + ((rv = i_add_option("noexec", options, sizeof (options))) != 0)) + return (rv); + if ((flags & LX_MS_NOATIME) && + ((rv = i_add_option("noatime", options, sizeof (options))) != 0)) + return (rv); + + lx_debug("\tsolaris mount fstype: %s", fstype); + lx_debug("\tsolaris mount options: \"%s\"", options); + + return (mount(source, target, sflags, fstype, sdataptr, sdatalen, + options, sizeof (options)) ? -errno : 0); +} + +/* + * umount() is identical, though it is implemented on top of umount2() in + * Solaris so it cannot be a pass-thru system call. + */ +int +lx_umount(uintptr_t p1) +{ + return (umount((char *)p1) ? -errno : 0); +} + +/* + * The Linux umount2() system call is identical but has a different value for + * MNT_FORCE (the logical equivalent to MS_FORCE). + */ +#define LX_MNT_FORCE 0x1 + +int +lx_umount2(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + int flags = 0; + + if (p2 & ~LX_MNT_FORCE) + return (-EINVAL); + + if (p2 & LX_MNT_FORCE) + flags |= MS_FORCE; + + return (umount2(path, flags) ? -errno : 0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/open.c b/usr/src/lib/brand/lx/lx_brand/common/open.c new file mode 100644 index 0000000000..c772f3ef4a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/open.c @@ -0,0 +1,136 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/inttypes.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <libintl.h> +#include <stdio.h> + +#include <sys/lx_types.h> +#include <sys/lx_debug.h> +#include <sys/lx_syscall.h> +#include <sys/lx_fcntl.h> +#include <sys/lx_misc.h> + +int +lx_open(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int flags, fd; + mode_t mode = 0; + char *path = (char *)p1; + struct stat64 statbuf; + + if ((p2 & O_ACCMODE) == LX_O_RDONLY) + flags = O_RDONLY; + else if ((p2 & O_ACCMODE) == LX_O_WRONLY) + flags = O_WRONLY; + else + flags = O_RDWR; + + if (p2 & LX_O_CREAT) { + flags |= O_CREAT; + mode = (mode_t)p3; + } + + if (p2 & LX_O_EXCL) + flags |= O_EXCL; + if (p2 & LX_O_NOCTTY) + flags |= O_NOCTTY; + if (p2 & LX_O_TRUNC) + flags |= O_TRUNC; + if (p2 & LX_O_APPEND) + flags |= O_APPEND; + if (p2 & LX_O_NONBLOCK) + flags |= O_NONBLOCK; + if (p2 & LX_O_SYNC) + flags |= O_SYNC; + if (p2 & LX_O_LARGEFILE) + flags |= O_LARGEFILE; + if (p2 & LX_O_NOFOLLOW) + flags |= O_NOFOLLOW; + + if (p2 & LX_O_ASYNC) { + lx_unsupported( + gettext("%s(): operation with O_ASYNC flag set"), "open"); + return (-ENOTSUP); + } + + /* + * Linux uses the LX_O_DIRECT flag to do raw, synchronous I/O to the + * device backing the fd in question. Solaris doesn't have similar + * functionality, but we can attempt to simulate it using the flags + * (O_RSYNC|O_SYNC) and directio(3C). + * + * The LX_O_DIRECT flag also requires that the transfer size and + * alignment of I/O buffers be a multiple of the logical block size for + * the underlying file system, but frankly there isn't an easy way to + * support that functionality without doing something like adding an + * fcntl(2) flag to denote LX_O_DIRECT mode. + * + * Since LX_O_DIRECT is merely a performance advisory, we'll just + * emulate what we can and trust that the only applications expecting + * an error when performing I/O from a misaligned buffer or when + * passing a transfer size is not a multiple of the underlying file + * system block size will be test suites. + */ + if (p2 & LX_O_DIRECT) + flags |= (O_RSYNC|O_SYNC); + + lx_debug("\topen(%s, 0%o, 0%o)", path, flags, mode); + + if ((fd = open(path, flags, mode)) < 0) + return (-errno); + + /* + * Check the file type AFTER opening the file to avoid a race condition + * where the file we want to open could change types between a stat64() + * and an open(). + */ + if (p2 & LX_O_DIRECTORY) { + int ret = fd; + + if (fstat64(fd, &statbuf) < 0) { + ret = -errno; + (void) close(fd); + } else if (!S_ISDIR(statbuf.st_mode)) { + ret = -ENOTDIR; + (void) close(fd); + } + + return (ret); + } + + if (p2 & LX_O_DIRECT) + (void) directio(fd, DIRECTIO_ON); + + return (fd); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/pgrp.c b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c new file mode 100644 index 0000000000..9ffd23ad8a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c @@ -0,0 +1,152 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <unistd.h> +#include <errno.h> +#include <sys/lx_misc.h> + +int +lx_getpgrp(void) +{ + int ret; + + ret = getpgrp(); + + /* + * If the pgrp is that of the init process, return the value Linux + * expects. + */ + if (ret == zoneinit_pid) + return (LX_INIT_PGID); + + return ((ret == -1) ? -errno : ret); +} + +int +lx_getpgid(uintptr_t p1) +{ + pid_t spid; + int pid = (int)p1; + int ret; + + if (pid < 0) + return (-ESRCH); + + /* + * If the supplied pid matches that of the init process, return + * the pgid Linux expects. + */ + if (pid == zoneinit_pid) + return (LX_INIT_PGID); + + if ((ret = lx_lpid_to_spid(pid, &spid)) < 0) + return (ret); + + ret = getpgid(spid); + + /* + * If the pgid is that of the init process, return the value Linux + * expects. + */ + if (ret == zoneinit_pid) + return (LX_INIT_PGID); + + return ((ret == -1) ? -errno : ret); +} + +int +lx_setpgid(uintptr_t p1, uintptr_t p2) +{ + pid_t pid = (pid_t)p1; + pid_t pgid = (pid_t)p2; + pid_t spid, spgid; + int ret; + + if (pid < 0) + return (-ESRCH); + + if ((ret = lx_lpid_to_spid(pid, &spid)) < 0) + return (ret); + + if ((ret = lx_lpid_to_spid(pgid, &spgid)) < 0) + return (ret); + + ret = setpgid(spid, spgid); + + return ((ret == 0) ? 0 : -errno); +} + +int +lx_getsid(uintptr_t p1) +{ + pid_t spid; + int pid = (int)p1; + int ret; + + if (pid < 0) + return (-ESRCH); + + /* + * If the supplied matches that of the init process, return the value + * Linux expects. + */ + if (pid == zoneinit_pid) + return (LX_INIT_SID); + + if ((ret = lx_lpid_to_spid(pid, &spid)) < 0) + return (ret); + + ret = getsid(spid); + + /* + * If the sid is that of the init process, return the value Linux + * expects. + */ + if (ret == zoneinit_pid) + return (LX_INIT_SID); + + return ((ret == -1) ? -errno : ret); +} + +int +lx_setsid(void) +{ + int ret; + + ret = setsid(); + + /* + * If the pgid is that of the init process, return the value Linux + * expects. + */ + if (ret == zoneinit_pid) + return (LX_INIT_SID); + + return ((ret == -1) ? -errno : ret); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/poll_select.c b/usr/src/lib/brand/lx/lx_brand/common/poll_select.c new file mode 100644 index 0000000000..e08d80e7e3 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/poll_select.c @@ -0,0 +1,215 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <alloca.h> +#include <signal.h> +#include <strings.h> +#include <sys/param.h> +#include <sys/brand.h> +#include <sys/poll.h> +#include <sys/syscall.h> +#include <sys/lx_debug.h> +#include <sys/lx_poll.h> +#include <sys/lx_syscall.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> + +extern int select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, + struct timeval *tv); + +int +lx_select(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, + uintptr_t p5) +{ + int nfds = (int)p1; + fd_set *rfdsp = NULL; + fd_set *wfdsp = NULL; + fd_set *efdsp = NULL; + struct timeval tv, *tvp = NULL; + int fd_set_len = howmany(nfds, 8); + int r; + hrtime_t start, end; + + lx_debug("\tselect(%d, 0x%p, x%p, 0x%p. 0x%p, 0x%p)", + nfds, rfdsp, wfdsp, efdsp, tvp); + + if (nfds > 0) { + if (p2 != NULL) { + rfdsp = SAFE_ALLOCA(fd_set_len); + if (rfdsp == NULL) + return (-ENOMEM); + if (uucopy((void *)p2, rfdsp, fd_set_len) != 0) + return (-errno); + } + if (p3 != NULL) { + wfdsp = SAFE_ALLOCA(fd_set_len); + if (wfdsp == NULL) + return (-ENOMEM); + if (uucopy((void *)p3, wfdsp, fd_set_len) != 0) + return (-errno); + } + if (p4 != NULL) { + efdsp = SAFE_ALLOCA(fd_set_len); + if (efdsp == NULL) + return (-ENOMEM); + if (uucopy((void *)p4, efdsp, fd_set_len) != 0) + return (-errno); + } + } + if (p5 != NULL) { + tvp = &tv; + if (uucopy((void *)p5, &tv, sizeof (tv)) != 0) + return (-errno); + start = gethrtime(); + } + + if (nfds >= FD_SETSIZE) + r = select_large_fdset(nfds, rfdsp, wfdsp, efdsp, tvp); + else + r = select(nfds, rfdsp, wfdsp, efdsp, tvp); + if (r < 0) + return (-errno); + + if (tvp != NULL) { + long long tv_total; + + /* + * Linux updates the timeval parameter for select() calls + * with the amount of time that left before the select + * would have timed out. + */ + end = gethrtime(); + tv_total = (tv.tv_sec * MICROSEC) + tv.tv_usec; + tv_total -= ((end - start) / (NANOSEC / MICROSEC)); + if (tv_total < 0) { + tv.tv_sec = 0; + tv.tv_usec = 0; + } else { + tv.tv_sec = tv_total / MICROSEC; + tv.tv_usec = tv_total % MICROSEC; + } + + if (uucopy(&tv, (void *)p5, sizeof (tv)) != 0) + return (-errno); + } + + if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0)) + return (-errno); + if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0)) + return (-errno); + if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0)) + return (-errno); + + return (r); +} + +int +lx_poll(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + struct pollfd *lfds, *sfds; + nfds_t nfds = (nfds_t)p2; + int fds_size, i, rval, revents; + + /* + * Note: we are assuming that the Linux and Solaris pollfd + * structures are identical. Copy in the linux poll structure. + */ + fds_size = sizeof (struct pollfd) * nfds; + lfds = (struct pollfd *)SAFE_ALLOCA(fds_size); + if (lfds == NULL) + return (-ENOMEM); + if (uucopy((void *)p1, lfds, fds_size) != 0) + return (-errno); + + /* + * The poll system call modifies the poll structures passed in + * so we'll need to make an exra copy of them. + */ + sfds = (struct pollfd *)SAFE_ALLOCA(fds_size); + if (sfds == NULL) + return (-ENOMEM); + + /* Convert the Linux events bitmask into the Solaris equivalent. */ + for (i = 0; i < nfds; i++) { + /* + * If the caller is polling for an unsupported event, we + * have to bail out. + */ + if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) { + lx_unsupported("unsupported poll events requested: " + "events=0x%x", lfds[i].events); + return (-ENOTSUP); + } + + sfds[i].fd = lfds[i].fd; + sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS; + if (lfds[i].events & LX_POLLWRNORM) + sfds[i].events |= POLLWRNORM; + if (lfds[i].events & LX_POLLWRBAND) + sfds[i].events |= POLLWRBAND; + sfds[i].revents = 0; + } + + lx_debug("\tpoll(0x%p, %u, %d)", sfds, nfds, (int)p3); + + if ((rval = poll(sfds, nfds, (int)p3)) < 0) + return (-errno); + + /* Convert the Solaris revents bitmask into the Linux equivalent */ + for (i = 0; i < nfds; i++) { + revents = sfds[i].revents & LX_POLL_COMMON_EVENTS; + if (sfds[i].revents & POLLWRBAND) + revents |= LX_POLLWRBAND; + + /* + * Be carefull because on solaris POLLOUT and POLLWRNORM + * are defined to the same values but on linux they + * are not. + */ + if (sfds[i].revents & POLLOUT) { + if ((lfds[i].events & LX_POLLOUT) == 0) + revents &= ~LX_POLLOUT; + if (lfds[i].events & LX_POLLWRNORM) + revents |= LX_POLLWRNORM; + } + + lfds[i].revents = revents; + } + + /* Copy out the results */ + if (uucopy(lfds, (void *)p1, fds_size) != 0) + return (-errno); + + return (rval); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/priority.c b/usr/src/lib/brand/lx/lx_brand/common/priority.c new file mode 100644 index 0000000000..1519c18a71 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/priority.c @@ -0,0 +1,89 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <sys/types.h> +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_syscall.h> +#include <sys/lx_types.h> +#include <sys/resource.h> +#include <sys/lx_misc.h> + +int +lx_getpriority(uintptr_t p1, uintptr_t p2) +{ + uint_t which = (int)p1; + id_t who = (id_t)p2; + int ret; + + /* + * The only valid values for 'which' are positive integers, and unlike + * Solaris, linux doesn't support anything past PRIO_USER. + */ + if (which > PRIO_USER) + return (-EINVAL); + + lx_debug("\tgetpriority(%d, %d)", which, who); + + errno = 0; + + if ((which == PRIO_PROCESS) && (who == 1)) + who = zoneinit_pid; + + ret = getpriority(which, who); + if (ret == -1 && errno != 0) + return (-errno); + + /* + * The return value of the getpriority syscall is biased by 20 to avoid + * returning negative values when successful. + */ + return (20 - ret); +} + +int +lx_setpriority(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + int which = (int)p1; + id_t who = (id_t)p2; + int prio = (int)p3; + int rval; + + if (which > PRIO_USER) + return (-EINVAL); + + lx_debug("\tsetpriority(%d, %d, %d)", which, who, prio); + + if ((which == PRIO_PROCESS) && (who == 1)) + who = zoneinit_pid; + + rval = setpriority(which, who, prio); + + return ((rval == -1) ? -errno : rval); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c new file mode 100644 index 0000000000..d5ec2f94f1 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c @@ -0,0 +1,2121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/lx_syscall.h> +#include <sys/lx_signal.h> +#include <sys/lx_thread.h> +#include <sys/lwp.h> +#include <unistd.h> +#include <fcntl.h> +#include <procfs.h> +#include <sys/frame.h> +#include <strings.h> +#include <signal.h> +#include <stddef.h> +#include <stdlib.h> +#include <sys/wait.h> +#include <sys/auxv.h> +#include <thread.h> +#include <pthread.h> +#include <synch.h> +#include <elf.h> +#include <ieeefp.h> +#include <assert.h> +#include <libintl.h> + +/* + * Linux ptrace compatibility. + * + * The brand support for ptrace(2) is built on top of the Solaris /proc + * interfaces, mounted at /native/proc in the zone. This gets quite + * complicated due to the way ptrace works and the Solaris realization of the + * Linux threading model. + * + * ptrace can only interact with a process if we are tracing it, and it is + * currently stopped. There are two ways a process can begin tracing another + * process: + * + * PTRACE_TRACEME + * + * A child process can use PTRACE_TRACEME to indicate that it wants to be + * traced by the parent. This sets the ptrace compatibility flag in /proc + * which causes ths ptrace consumer to be notified through the wait(2) + * system call of events of interest. PTRACE_TRACEME is typically used by + * the debugger by forking a process, using PTRACE_TRACEME, and finally + * doing an exec of the specified program. + * + * + * PTRACE_ATTACH + * + * We can attach to a process using PTRACE_ATTACH. This is considerably + * more complicated than the previous case. On Linux, the traced process is + * effectively reparented to the ptrace consumer so that event notification + * can go through the normal wait(2) system call. Solaris has no such + * ability to reparent a process (nor should it) so some trickery was + * required. + * + * When the ptrace consumer uses PTRACE_ATTACH it forks a monitor child + * process. The monitor enables the /proc ptrace flag for itself and uses + * the native /proc mechanisms to observe the traced process and wait for + * events of interest. When the traced process stops, the monitor process + * sends itself a SIGTRAP thus rousting its parent process (the ptrace + * consumer) out of wait(2). We then translate the process id and status + * code from wait(2) to those of the traced process. + * + * To detach from the process we just have to clean up tracing flags and + * clean up the monitor. + * + * ptrace can only interact with a process if we have traced it, and it is + * currently stopped (see is_traced()). For threads, there's no way to + * distinguish whether ptrace() has been called for all threads or some + * subset. Since most clients will be tracing all threads, and erroneously + * allowing ptrace to access a non-traced thread is non-fatal (or at least + * would be fatal on linux), we ignore this aspect of the problem. + */ + +#define LX_PTRACE_TRACEME 0 +#define LX_PTRACE_PEEKTEXT 1 +#define LX_PTRACE_PEEKDATA 2 +#define LX_PTRACE_PEEKUSER 3 +#define LX_PTRACE_POKETEXT 4 +#define LX_PTRACE_POKEDATA 5 +#define LX_PTRACE_POKEUSER 6 +#define LX_PTRACE_CONT 7 +#define LX_PTRACE_KILL 8 +#define LX_PTRACE_SINGLESTEP 9 +#define LX_PTRACE_GETREGS 12 +#define LX_PTRACE_SETREGS 13 +#define LX_PTRACE_GETFPREGS 14 +#define LX_PTRACE_SETFPREGS 15 +#define LX_PTRACE_ATTACH 16 +#define LX_PTRACE_DETACH 17 +#define LX_PTRACE_GETFPXREGS 18 +#define LX_PTRACE_SETFPXREGS 19 +#define LX_PTRACE_SYSCALL 24 + +/* + * This corresponds to the user_i387_struct Linux structure. + */ +typedef struct lx_user_fpregs { + long lxuf_cwd; + long lxuf_swd; + long lxuf_twd; + long lxuf_fip; + long lxuf_fcs; + long lxuf_foo; + long lxuf_fos; + long lxuf_st_space[20]; +} lx_user_fpregs_t; + +/* + * This corresponds to the user_fxsr_struct Linux structure. + */ +typedef struct lx_user_fpxregs { + uint16_t lxux_cwd; + uint16_t lxux_swd; + uint16_t lxux_twd; + uint16_t lxux_fop; + long lxux_fip; + long lxux_fcs; + long lxux_foo; + long lxux_fos; + long lxux_mxcsr; + long lxux_reserved; + long lxux_st_space[32]; + long lxux_xmm_space[32]; + long lxux_padding[56]; +} lx_user_fpxregs_t; + +/* + * This corresponds to the user_regs_struct Linux structure. + */ +typedef struct lx_user_regs { + long lxur_ebx; + long lxur_ecx; + long lxur_edx; + long lxur_esi; + long lxur_edi; + long lxur_ebp; + long lxur_eax; + long lxur_xds; + long lxur_xes; + long lxur_xfs; + long lxur_xgs; + long lxur_orig_eax; + long lxur_eip; + long lxur_xcs; + long lxur_eflags; + long lxur_esp; + long lxur_xss; +} lx_user_regs_t; + +typedef struct lx_user { + lx_user_regs_t lxu_regs; + int lxu_fpvalid; + lx_user_fpregs_t lxu_i387; + ulong_t lxu_tsize; + ulong_t lxu_dsize; + ulong_t lxu_ssize; + ulong_t lxu_start_code; + ulong_t lxu_start_stack; + long lxu_signal; + int lxu_reserved; + lx_user_regs_t *lxu_ar0; + lx_user_fpregs_t *lxu_fpstate; + ulong_t lxu_magic; + char lxu_comm[32]; + int lxu_debugreg[8]; +} lx_user_t; + +typedef struct ptrace_monitor_map { + struct ptrace_monitor_map *pmm_next; /* next pointer */ + pid_t pmm_monitor; /* monitor child process */ + pid_t pmm_target; /* traced Linux pid */ + pid_t pmm_pid; /* Solaris pid */ + lwpid_t pmm_lwpid; /* Solaris lwpid */ + uint_t pmm_exiting; /* detached */ +} ptrace_monitor_map_t; + +typedef struct ptrace_state_map { + struct ptrace_state_map *psm_next; /* next pointer */ + pid_t psm_pid; /* Solaris pid */ + uintptr_t psm_debugreg[8]; /* debug registers */ +} ptrace_state_map_t; + +static ptrace_monitor_map_t *ptrace_monitor_map = NULL; +static ptrace_state_map_t *ptrace_state_map = NULL; +static mutex_t ptrace_map_mtx = DEFAULTMUTEX; + +extern void *_START_; + +static sigset_t blockable_sigs; + +#pragma init(ptrace_init) +void +ptrace_init(void) +{ + (void) sigfillset(&blockable_sigs); + (void) sigdelset(&blockable_sigs, SIGKILL); + (void) sigdelset(&blockable_sigs, SIGSTOP); +} + +/* + * Given a pid, open the named file under /native/proc/<pid>/name using the + * given mode. + */ +static int +open_procfile(pid_t pid, int mode, const char *name) +{ + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), "/native/proc/%d/%s", pid, name); + + return (open(path, mode)); +} + +/* + * Given a pid and lwpid, open the named file under + * /native/proc/<pid>/<lwpid>/name using the given mode. + */ +static int +open_lwpfile(pid_t pid, lwpid_t lwpid, int mode, const char *name) +{ + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), "/native/proc/%d/lwp/%d/%s", + pid, lwpid, name); + + return (open(path, mode)); +} + +static int +get_status(pid_t pid, pstatus_t *psp) +{ + int fd; + + if ((fd = open_procfile(pid, O_RDONLY, "status")) < 0) + return (-ESRCH); + + if (read(fd, psp, sizeof (pstatus_t)) != sizeof (pstatus_t)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +static int +get_lwpstatus(pid_t pid, lwpid_t lwpid, lwpstatus_t *lsp) +{ + int fd; + + if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) + return (-ESRCH); + + if (read(fd, lsp, sizeof (lwpstatus_t)) != sizeof (lwpstatus_t)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +static uintptr_t +syscall_regs(int fd, uintptr_t fp, pid_t pid) +{ + uintptr_t addr, done; + struct frame fr; + auxv_t auxv; + int afd; + Elf32_Phdr phdr; + + /* + * Try to walk the stack looking for a return address that corresponds + * to the traced process's lx_emulate_done symbol. This relies on the + * fact that the brand library in the traced process is the same as the + * brand library in this process (indeed, this is true of all processes + * in a given branded zone). + */ + + /* + * Find the base address for the brand library in the traced process + * by grabbing the AT_PHDR auxv entry, reading in the program header + * at that location and subtracting off the p_vaddr member. We use + * this to compute the location of lx_emulate done in the traced + * process. + */ + if ((afd = open_procfile(pid, O_RDONLY, "auxv")) < 0) + return (0); + + do { + if (read(afd, &auxv, sizeof (auxv)) != sizeof (auxv)) { + (void) close(afd); + return (0); + } + } while (auxv.a_type != AT_PHDR); + + (void) close(afd); + + if (pread(fd, &phdr, sizeof (phdr), auxv.a_un.a_val) != sizeof (phdr)) { + lx_debug("failed to read brand library's phdr"); + return (0); + } + + addr = auxv.a_un.a_val - phdr.p_vaddr; + done = (uintptr_t)&lx_emulate_done - (uintptr_t)&_START_ + addr; + + fr.fr_savfp = fp; + + do { + addr = fr.fr_savfp; + if (pread(fd, &fr, sizeof (fr), addr) != sizeof (fr)) { + lx_debug("ptrace read failed for stack walk"); + return (0); + } + + if (addr >= fr.fr_savfp) { + lx_debug("ptrace stack not monotonically increasing " + "%p %p (%p)", addr, fr.fr_savfp, done); + return (0); + } + } while (fr.fr_savpc != done); + + /* + * The first argument to lx_emulate is known to be an lx_regs_t + * structure and the ABI specifies that it will be placed on the stack + * immediately preceeding the return address. + */ + addr += sizeof (fr); + if (pread(fd, &addr, sizeof (addr), addr) != sizeof (addr)) { + lx_debug("ptrace stack failed to read register set address"); + return (0); + } + + return (addr); +} + +static int +getregs(pid_t pid, lwpid_t lwpid, lx_user_regs_t *rp) +{ + lwpstatus_t status; + uintptr_t addr; + int fd, ret; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0) + return (-ESRCH); + + /* + * If we find the syscall regs (and are therefore in an emulated + * syscall, use the register set at given address. Otherwise, use the + * registers as reported by /proc. + */ + if ((addr = syscall_regs(fd, status.pr_reg[EBP], pid)) != 0) { + lx_regs_t regs; + + if (pread(fd, ®s, sizeof (regs), addr) != sizeof (regs)) { + (void) close(fd); + lx_debug("ptrace failed to read register set"); + return (-EIO); + } + + (void) close(fd); + + rp->lxur_ebx = regs.lxr_ebx; + rp->lxur_ecx = regs.lxr_ecx; + rp->lxur_edx = regs.lxr_edx; + rp->lxur_esi = regs.lxr_esi; + rp->lxur_edi = regs.lxr_edi; + rp->lxur_ebp = regs.lxr_ebp; + rp->lxur_eax = regs.lxr_eax; + rp->lxur_xds = status.pr_reg[DS]; + rp->lxur_xes = status.pr_reg[ES]; + rp->lxur_xfs = status.pr_reg[FS]; + rp->lxur_xgs = regs.lxr_gs; + rp->lxur_orig_eax = regs.lxr_orig_eax; + rp->lxur_eip = regs.lxr_eip; + rp->lxur_xcs = status.pr_reg[CS]; + rp->lxur_eflags = status.pr_reg[EFL]; + rp->lxur_esp = regs.lxr_esp; + rp->lxur_xss = status.pr_reg[SS]; + + } else { + (void) close(fd); + + rp->lxur_ebx = status.pr_reg[EBX]; + rp->lxur_ecx = status.pr_reg[ECX]; + rp->lxur_edx = status.pr_reg[EDX]; + rp->lxur_esi = status.pr_reg[ESI]; + rp->lxur_edi = status.pr_reg[EDI]; + rp->lxur_ebp = status.pr_reg[EBP]; + rp->lxur_eax = status.pr_reg[EAX]; + rp->lxur_xds = status.pr_reg[DS]; + rp->lxur_xes = status.pr_reg[ES]; + rp->lxur_xfs = status.pr_reg[FS]; + rp->lxur_xgs = status.pr_reg[GS]; + rp->lxur_orig_eax = 0; + rp->lxur_eip = status.pr_reg[EIP]; + rp->lxur_xcs = status.pr_reg[CS]; + rp->lxur_eflags = status.pr_reg[EFL]; + rp->lxur_esp = status.pr_reg[UESP]; + rp->lxur_xss = status.pr_reg[SS]; + + /* + * If the target process has just returned from exec, it's not + * going to be sitting in the emulation function. In that case + * we need to manually fake up the values for %eax and orig_eax + * to indicate a successful return and that the traced process + * had called execve (respectively). + */ + if (status.pr_why == PR_SYSEXIT && + (status.pr_what == SYS_exec || + status.pr_what == SYS_execve)) { + rp->lxur_eax = 0; + rp->lxur_orig_eax = LX_SYS_execve; + } + } + + return (0); +} + +static int +setregs(pid_t pid, lwpid_t lwpid, const lx_user_regs_t *rp) +{ + long ctl[1 + sizeof (prgregset_t) / sizeof (long)]; + lwpstatus_t status; + uintptr_t addr; + int fd, ret; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + if ((fd = open_procfile(pid, O_RDWR, "as")) < 0) + return (-ESRCH); + + /* + * If we find the syscall regs (and are therefore in an emulated + * syscall, modify the register set at given address and set the + * remaining registers through the /proc interface. Otherwise just use + * the /proc interface to set register values; + */ + if ((addr = syscall_regs(fd, status.pr_reg[EBP], pid)) != 0) { + lx_regs_t regs; + + regs.lxr_ebx = rp->lxur_ebx; + regs.lxr_ecx = rp->lxur_ecx; + regs.lxr_edx = rp->lxur_edx; + regs.lxr_esi = rp->lxur_esi; + regs.lxr_edi = rp->lxur_edi; + regs.lxr_ebp = rp->lxur_ebp; + regs.lxr_eax = rp->lxur_eax; + regs.lxr_gs = rp->lxur_xgs; + regs.lxr_orig_eax = rp->lxur_orig_eax; + regs.lxr_eip = rp->lxur_eip; + regs.lxr_esp = rp->lxur_esp; + + if (pwrite(fd, ®s, sizeof (regs), addr) != sizeof (regs)) { + (void) close(fd); + lx_debug("ptrace failed to write register set"); + return (-EIO); + } + + (void) close(fd); + + status.pr_reg[DS] = rp->lxur_xds; + status.pr_reg[ES] = rp->lxur_xes; + status.pr_reg[FS] = rp->lxur_xfs; + status.pr_reg[CS] = rp->lxur_xcs; + status.pr_reg[EFL] = rp->lxur_eflags; + status.pr_reg[SS] = rp->lxur_xss; + + } else { + (void) close(fd); + + status.pr_reg[EBX] = rp->lxur_ebx; + status.pr_reg[ECX] = rp->lxur_ecx; + status.pr_reg[EDX] = rp->lxur_edx; + status.pr_reg[ESI] = rp->lxur_esi; + status.pr_reg[EDI] = rp->lxur_edi; + status.pr_reg[EBP] = rp->lxur_ebp; + status.pr_reg[EAX] = rp->lxur_eax; + status.pr_reg[DS] = rp->lxur_xds; + status.pr_reg[ES] = rp->lxur_xes; + status.pr_reg[FS] = rp->lxur_xfs; + status.pr_reg[GS] = rp->lxur_xgs; + status.pr_reg[EIP] = rp->lxur_eip; + status.pr_reg[CS] = rp->lxur_xcs; + status.pr_reg[EFL] = rp->lxur_eflags; + status.pr_reg[UESP] = rp->lxur_esp; + status.pr_reg[SS] = rp->lxur_xss; + status.pr_reg[SS] = rp->lxur_xss; + } + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + ctl[0] = PCSREG; + bcopy(status.pr_reg, &ctl[1], sizeof (prgregset_t)); + + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +static int +getfpregs(pid_t pid, lwpid_t lwpid, lx_user_fpregs_t *rp) +{ + lwpstatus_t status; + struct _fpstate *fp; + char *data; + int ret, i; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state; + + rp->lxuf_cwd = fp->cw; + rp->lxuf_swd = fp->sw; + rp->lxuf_twd = fp->tag; + rp->lxuf_fip = fp->ipoff; + rp->lxuf_fcs = fp->cssel; + rp->lxuf_foo = fp->dataoff; + rp->lxuf_fos = fp->datasel; + + /* + * The Linux structure uses 10 bytes per floating-point register. + */ + data = (char *)&rp->lxuf_st_space[0]; + for (i = 0; i < 8; i++) { + bcopy(&fp->_st[i], data, 10); + data += 10; + } + + return (0); +} + +static int +setfpregs(pid_t pid, lwpid_t lwpid, const lx_user_fpregs_t *rp) +{ + lwpstatus_t status; + struct { + long cmd; + prfpregset_t regs; + } ctl; + struct _fpstate *fp = (struct _fpstate *)&ctl.regs; + char *data; + int ret, i, fd; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs)); + + fp->cw = rp->lxuf_cwd; + fp->sw = rp->lxuf_swd; + fp->tag = rp->lxuf_twd; + fp->ipoff = rp->lxuf_fip; + fp->cssel = rp->lxuf_fcs; + fp->dataoff = rp->lxuf_foo; + fp->datasel = rp->lxuf_fos; + + /* + * The Linux structure uses 10 bytes per floating-point register. + */ + data = (char *)&rp->lxuf_st_space[0]; + for (i = 0; i < 8; i++) { + bcopy(data, &fp->_st[i], 10); + data += 10; + } + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + ctl.cmd = PCSFPREG; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + + +static int +getfpxregs(pid_t pid, lwpid_t lwpid, lx_user_fpxregs_t *rp) +{ + lwpstatus_t status; + struct _fpstate *fp; + int ret, i; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state; + + rp->lxux_cwd = (uint16_t)fp->cw; + rp->lxux_swd = (uint16_t)fp->sw; + rp->lxux_twd = (uint16_t)fp->tag; + rp->lxux_fop = (uint16_t)(fp->cssel >> 16); + rp->lxux_fip = fp->ipoff; + rp->lxux_fcs = (uint16_t)fp->cssel; + rp->lxux_foo = fp->dataoff; + rp->lxux_fos = fp->datasel; + rp->lxux_mxcsr = status.pr_fpreg.fp_reg_set.fpchip_state.mxcsr; + + bcopy(fp->xmm, rp->lxux_xmm_space, sizeof (rp->lxux_xmm_space)); + bzero(rp->lxux_st_space, sizeof (rp->lxux_st_space)); + for (i = 0; i < 8; i++) { + bcopy(&fp->_st[i], &rp->lxux_st_space[i * 4], + sizeof (fp->_st[i])); + } + + return (0); +} + +static int +setfpxregs(pid_t pid, lwpid_t lwpid, const lx_user_fpxregs_t *rp) +{ + lwpstatus_t status; + struct { + long cmd; + prfpregset_t regs; + } ctl; + struct _fpstate *fp = (struct _fpstate *)&ctl.regs; + int ret, i, fd; + + if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0) + return (ret); + + bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs)); + + fp->cw = rp->lxux_cwd; + fp->sw = rp->lxux_swd; + fp->tag = rp->lxux_twd; + fp->ipoff = rp->lxux_fip; + fp->cssel = rp->lxux_fcs | (rp->lxux_fop << 16); + fp->dataoff = rp->lxux_foo; + fp->datasel = rp->lxux_fos; + + bcopy(rp->lxux_xmm_space, fp->xmm, sizeof (rp->lxux_xmm_space)); + for (i = 0; i < 8; i++) { + bcopy(&rp->lxux_st_space[i * 4], &fp->_st[i], + sizeof (fp->_st[i])); + } + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + ctl.cmd = PCSFPREG; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +/* + * Solaris does not allow a process to manipulate its own or some + * other process's debug registers. Linux ptrace(2) allows this + * and gdb manipulates them for its watchpoint implementation. + * + * We keep a pseudo set of debug registers for each traced process + * and map their contents into the appropriate PCWATCH /proc + * operations when they are activated by gdb. + * + * To understand how the debug registers work on x86 machines, + * see section 13.1 of the AMD x86-64 Architecture Programmer's + * Manual, Volume 2, System Programming. + */ +static uintptr_t * +debug_registers(pid_t pid) +{ + ptrace_state_map_t *p; + + (void) mutex_lock(&ptrace_map_mtx); + for (p = ptrace_state_map; p != NULL; p = p->psm_next) { + if (p->psm_pid == pid) + break; + } + if (p == NULL && (p = malloc(sizeof (*p))) != NULL) { + bzero(p, sizeof (*p)); + p->psm_pid = pid; + p->psm_next = ptrace_state_map; + p->psm_debugreg[6] = 0xffff0ff0; /* read as ones */ + ptrace_state_map = p; + } + (void) mutex_unlock(&ptrace_map_mtx); + return (p != NULL? p->psm_debugreg : NULL); +} + +static void +free_debug_registers(pid_t pid) +{ + ptrace_state_map_t **pp; + ptrace_state_map_t *p; + + /* ASSERT(MUTEX_HELD(&ptrace_map_mtx) */ + for (pp = &ptrace_state_map; (p = *pp) != NULL; pp = &p->psm_next) { + if (p->psm_pid == pid) { + *pp = p->psm_next; + free(p); + break; + } + } +} + +static int +setup_watchpoints(pid_t pid, uintptr_t *debugreg) +{ + int dr7 = debugreg[7]; + int lrw; + int fd; + size_t size; + prwatch_t prwatch[4]; + int nwatch; + int i; + int wflags; + int error; + struct { + long req; + prwatch_t prwatch; + } ctl; + + /* find all watched areas */ + if ((fd = open_procfile(pid, O_RDONLY, "watch")) < 0) + return (-ESRCH); + nwatch = read(fd, prwatch, sizeof (prwatch)) / sizeof (prwatch_t); + (void) close(fd); + if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) + return (-ESRCH); + /* clear all watched areas */ + for (i = 0; i < nwatch; i++) { + ctl.req = PCWATCH; + ctl.prwatch = prwatch[i]; + ctl.prwatch.pr_wflags = 0; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + error = -errno; + (void) close(fd); + return (error); + } + } + /* establish all new watched areas */ + for (i = 0; i < 4; i++) { + if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */ + continue; + lrw = (dr7 >> (16 + (4 * i))) & 0xf; + switch (lrw >> 2) { /* length */ + case 0: size = 1; break; + case 1: size = 2; break; + case 2: size = 8; break; + case 3: size = 4; break; + } + switch (lrw & 0x3) { /* mode */ + case 0: wflags = WA_EXEC; break; + case 1: wflags = WA_WRITE; break; + case 2: continue; + case 3: wflags = WA_READ | WA_WRITE; break; + } + ctl.req = PCWATCH; + ctl.prwatch.pr_vaddr = debugreg[i]; + ctl.prwatch.pr_size = size; + ctl.prwatch.pr_wflags = wflags | WA_TRAPAFTER; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + error = -errno; + (void) close(fd); + return (error); + } + } + (void) close(fd); + return (0); +} + +/* + * Returns TRUE if the process is traced, FALSE otherwise. This is only true + * if the process is currently stopped, and has been traced using PTRACE_TRACEME + * or PTRACE_ATTACH. + */ +static int +is_traced(pid_t pid) +{ + ptrace_monitor_map_t *p; + pstatus_t status; + + if (get_status(pid, &status) != 0) + return (0); + + if ((status.pr_flags & PR_PTRACE) && + (status.pr_ppid == getpid()) && + (status.pr_lwp.pr_flags & PR_ISTOP)) + return (1); + + (void) mutex_lock(&ptrace_map_mtx); + for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) { + if (p->pmm_target == pid) { + (void) mutex_unlock(&ptrace_map_mtx); + return (1); + } + } + (void) mutex_unlock(&ptrace_map_mtx); + + return (0); +} + +static int +ptrace_trace_common(int fd) +{ + struct { + long cmd; + union { + long flags; + sigset_t signals; + fltset_t faults; + } arg; + } ctl; + size_t size; + + ctl.cmd = PCSTRACE; + prfillset(&ctl.arg.signals); + size = sizeof (long) + sizeof (sigset_t); + if (write(fd, &ctl, size) != size) + return (-1); + + ctl.cmd = PCSFAULT; + premptyset(&ctl.arg.faults); + size = sizeof (long) + sizeof (fltset_t); + if (write(fd, &ctl, size) != size) + return (-1); + + ctl.cmd = PCUNSET; + ctl.arg.flags = PR_FORK; + size = sizeof (long) + sizeof (long); + if (write(fd, &ctl, size) != size) + return (-1); + + return (0); +} + +/* + * Notify that parent that we wish to be traced. This is the equivalent of: + * + * 1. Stop on all signals, and nothing else + * 2. Turn off inherit-on-fork flag + * 3. Set ptrace compatible flag + * + * If we are not the main thread, then the client is trying to request behavior + * by which one of its own thread is to be traced. We don't support this mode + * of operation. + */ +static int +ptrace_traceme(void) +{ + int fd, ret; + int error; + long ctl[2]; + pstatus_t status; + pid_t pid = getpid(); + + if (_lwp_self() != 1) { + lx_unsupported(gettext( + "thread %d calling PTRACE_TRACEME is unsupported"), + _lwp_self()); + return (-ENOTSUP); + } + + if ((ret = get_status(pid, &status)) != 0) + return (ret); + + /* + * Why would a process try to do this twice? I'm not sure, but there's + * a conformance test which wants this to fail just so. + */ + if (status.pr_flags & PR_PTRACE) + return (-EPERM); + + if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) + return (-errno); + + ctl[0] = PCSET; + ctl[1] = PR_PTRACE; + error = 0; + if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl) || + ptrace_trace_common(fd) != 0) + error = -errno; + + (void) close(fd); + return (error); +} + +/* + * Read a word of data from the given address. Because this is a process-wide + * action, we don't need the lwpid. + */ +static int +ptrace_peek(pid_t pid, uintptr_t addr, int *ret) +{ + int fd, data; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0) + return (-ESRCH); + + if (pread(fd, &data, sizeof (data), addr) != sizeof (data)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + if (uucopy(&data, ret, sizeof (data)) != 0) + return (-errno); + + return (0); +} + +#define LX_USER_BOUND(m) \ +(offsetof(lx_user_t, m) + sizeof (((lx_user_t *)NULL)->m)) + +static int +ptrace_peek_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int *ret) +{ + int err, data; + uintptr_t *debugreg; + int dreg; + + if (!is_traced(pid)) + return (-ESRCH); + + /* + * The offset specified by the user is an offset into the Linux + * user structure (seriously). Rather than constructing a full + * user structure, we figure out which part of the user structure + * the offset is in, and fill in just that component. + */ + if (off < LX_USER_BOUND(lxu_regs)) { + lx_user_regs_t regs; + + if ((err = getregs(pid, lwpid, ®s)) != 0) + return (err); + + data = *(int *)((uintptr_t)®s + off - + offsetof(lx_user_t, lxu_regs)); + + } else if (off < LX_USER_BOUND(lxu_fpvalid)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_i387)) { + lx_user_fpregs_t regs; + + if ((err = getfpregs(pid, lwpid, ®s)) != 0) + return (err); + + data = *(int *)((uintptr_t)®s + off - + offsetof(lx_user_t, lxu_i387)); + + } else if (off < LX_USER_BOUND(lxu_tsize)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_dsize)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_ssize)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_start_code)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_start_stack)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_signal)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_reserved)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_ar0)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_fpstate)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_magic)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_comm)) { + lx_err(gettext("offset = %lu\n"), off); + assert(0); + } else if (off < LX_USER_BOUND(lxu_debugreg)) { + dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int); + if (dreg == 4) /* aliased */ + dreg = 6; + else if (dreg == 5) /* aliased */ + dreg = 7; + if ((debugreg = debug_registers(pid)) != NULL) + data = debugreg[dreg]; + else + data = 0; + } else { + lx_unsupported(gettext( + "unsupported ptrace %s user offset: 0x%x\n"), "peek", off); + assert(0); + return (-ENOTSUP); + } + + if (uucopy(&data, ret, sizeof (data)) != 0) + return (-errno); + + return (0); +} + +/* + * Write a word of data to the given address. Because this is a process-wide + * action, we don't need the lwpid. Returns EINVAL if the address is not + * word-aligned. + */ +static int +ptrace_poke(pid_t pid, uintptr_t addr, int data) +{ + int fd; + + if (!is_traced(pid)) + return (-ESRCH); + + if (addr & 0x3) + return (-EINVAL); + + if ((fd = open_procfile(pid, O_WRONLY, "as")) < 0) + return (-ESRCH); + + if (pwrite(fd, &data, sizeof (data), addr) != sizeof (data)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + return (0); +} + +static int +ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data) +{ + lx_user_regs_t regs; + int err = 0; + uintptr_t *debugreg; + int dreg; + + if (!is_traced(pid)) + return (-ESRCH); + + if (off & 0x3) + return (-EINVAL); + + if (off < offsetof(lx_user_t, lxu_regs) + sizeof (lx_user_regs_t)) { + if ((err = getregs(pid, lwpid, ®s)) != 0) + return (err); + *(int *)((uintptr_t)®s + off - + offsetof(lx_user_t, lxu_regs)) = data; + return (setregs(pid, lwpid, ®s)); + } + + if (off >= offsetof(lx_user_t, lxu_debugreg) && + off < offsetof(lx_user_t, lxu_debugreg) + 8 * sizeof (int)) { + dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int); + if (dreg == 4) /* aliased */ + dreg = 6; + else if (dreg == 5) /* aliased */ + dreg = 7; + if ((debugreg = debug_registers(pid)) != NULL) { + debugreg[dreg] = data; + if (dreg == 7) + err = setup_watchpoints(pid, debugreg); + } + return (err); + } + + lx_unsupported(gettext("unsupported ptrace %s user offset: 0x%x\n"), + "poke", off); + assert(0); + return (-ENOTSUP); +} + +static int +ptrace_cont_common(int fd, int sig, int run, int step) +{ + long ctl[1 + 1 + sizeof (siginfo_t) / sizeof (long) + 2]; + long *ctlp = ctl; + size_t size; + + assert(0 <= sig && sig < LX_NSIG); + assert(!step || run); + + /* + * Clear the current signal. + */ + *ctlp++ = PCCSIG; + + /* + * Send a signal if one was specified. + */ + if (sig != 0 && sig != LX_SIGSTOP) { + siginfo_t *infop; + + *ctlp++ = PCSSIG; + infop = (siginfo_t *)ctlp; + bzero(infop, sizeof (siginfo_t)); + infop->si_signo = ltos_signo[sig]; + + ctlp += sizeof (siginfo_t) / sizeof (long); + } + + /* + * If run is true, set the lwp running. + */ + if (run) { + *ctlp++ = PCRUN; + *ctlp++ = step ? PRSTEP : 0; + } + + size = (char *)ctlp - (char *)&ctl[0]; + assert(size <= sizeof (ctl)); + + if (write(fd, ctl, size) != size) { + lx_debug("failed to continue %s", strerror(errno)); + return (-EIO); + } + + return (0); +} + +static int +ptrace_cont_monitor(ptrace_monitor_map_t *p) +{ + long ctl[2]; + int fd; + + fd = open_procfile(p->pmm_monitor, O_WRONLY, "ctl"); + if (fd < 0) { + lx_debug("failed to open monitor ctl %d", + errno); + return (-EIO); + } + + ctl[0] = PCRUN; + ctl[1] = PRCSIG; + if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + (void) close(fd); + + return (0); +} + +static int +ptrace_cont(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig, int step) +{ + ptrace_monitor_map_t *p; + uintptr_t *debugreg; + int fd, ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if (sig < 0 || sig >= LX_NSIG) + return (-EINVAL); + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + if ((ret = ptrace_cont_common(fd, sig, 1, step)) != 0) { + (void) close(fd); + return (ret); + } + + (void) close(fd); + + /* kludge: use debugreg[4] to remember the single-step flag */ + if ((debugreg = debug_registers(pid)) != NULL) + debugreg[4] = step; + + /* + * Check for a monitor and get it moving if we find it. If any of the + * /proc operations fail, we're kind of sunk so just return an error. + */ + (void) mutex_lock(&ptrace_map_mtx); + for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) { + if (p->pmm_target == lxpid) { + if ((ret = ptrace_cont_monitor(p)) != 0) + return (ret); + break; + } + } + (void) mutex_unlock(&ptrace_map_mtx); + + return (0); +} + +/* + * If a monitor exists for this traced process, dispose of it. + * First turn off its ptrace flag so we won't be notified of its + * impending demise. We ignore errors for this step since they + * indicate only that the monitor has been damaged due to pilot + * error. Then kill the monitor, and wait for it. If the wait + * succeeds we can dispose of the corpse, otherwise another thread's + * wait call has collected it and we need to set a flag in the + * structure so that if can be picked up in wait. + */ +static void +monitor_kill(pid_t lxpid, pid_t pid) +{ + ptrace_monitor_map_t *p, **pp; + pid_t mpid; + int fd; + long ctl[2]; + + (void) mutex_lock(&ptrace_map_mtx); + free_debug_registers(pid); + for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) { + if (p->pmm_target == lxpid) { + mpid = p->pmm_monitor; + if ((fd = open_procfile(mpid, O_WRONLY, "ctl")) >= 0) { + ctl[0] = PCUNSET; + ctl[1] = PR_PTRACE; + (void) write(fd, ctl, sizeof (ctl)); + (void) close(fd); + } + + (void) kill(mpid, SIGKILL); + + if (waitpid(mpid, NULL, 0) == mpid) { + *pp = p->pmm_next; + free(p); + } else { + p->pmm_exiting = 1; + } + + break; + } + } + (void) mutex_unlock(&ptrace_map_mtx); +} + +static int +ptrace_kill(pid_t lxpid, pid_t pid) +{ + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + ret = kill(pid, SIGKILL); + + /* kill off the monitor process, if any */ + monitor_kill(lxpid, pid); + + return (ret); +} + +static int +ptrace_step(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) +{ + return (ptrace_cont(lxpid, pid, lwpid, sig, 1)); +} + +static int +ptrace_getregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_regs_t regs; + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((ret = getregs(pid, lwpid, ®s)) != 0) + return (ret); + + if (uucopy(®s, (void *)addr, sizeof (regs)) != 0) + return (-errno); + + return (0); +} + +static int +ptrace_setregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_regs_t regs; + + if (!is_traced(pid)) + return (-ESRCH); + + if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) + return (-errno); + + return (setregs(pid, lwpid, ®s)); +} + +static int +ptrace_getfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_fpregs_t regs; + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((ret = getfpregs(pid, lwpid, ®s)) != 0) + return (ret); + + if (uucopy(®s, (void *)addr, sizeof (regs)) != 0) + return (-errno); + + return (0); +} + +static int +ptrace_setfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_fpregs_t regs; + + if (!is_traced(pid)) + return (-ESRCH); + + if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) + return (-errno); + + return (setfpregs(pid, lwpid, ®s)); +} + +static int +ptrace_getfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_fpxregs_t regs; + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((ret = getfpxregs(pid, lwpid, ®s)) != 0) + return (ret); + + if (uucopy(®s, (void *)addr, sizeof (regs)) != 0) + return (-errno); + + return (0); +} + +static int +ptrace_setfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr) +{ + lx_user_fpxregs_t regs; + + if (!is_traced(pid)) + return (-ESRCH); + + if (uucopy((void *)addr, ®s, sizeof (regs)) != 0) + return (-errno); + + return (setfpxregs(pid, lwpid, ®s)); +} + +static void __NORETURN +ptrace_monitor(int fd) +{ + struct { + long cmd; + union { + long flags; + sigset_t signals; + fltset_t faults; + } arg; + } ctl; + size_t size; + int monfd; + int rv; + + monfd = open_procfile(getpid(), O_WRONLY, "ctl"); + + ctl.cmd = PCSTRACE; /* trace only SIGTRAP */ + premptyset(&ctl.arg.signals); + praddset(&ctl.arg.signals, SIGTRAP); + size = sizeof (long) + sizeof (sigset_t); + (void) write(monfd, &ctl, size); /* can't fail */ + + ctl.cmd = PCSFAULT; + premptyset(&ctl.arg.faults); + size = sizeof (long) + sizeof (fltset_t); + (void) write(monfd, &ctl, size); /* can't fail */ + + ctl.cmd = PCUNSET; + ctl.arg.flags = PR_FORK; + size = sizeof (long) + sizeof (long); + (void) write(monfd, &ctl, size); /* can't fail */ + + ctl.cmd = PCSET; /* wait()able by the parent */ + ctl.arg.flags = PR_PTRACE; + size = sizeof (long) + sizeof (long); + (void) write(monfd, &ctl, size); /* can't fail */ + + (void) close(monfd); + + ctl.cmd = PCWSTOP; + size = sizeof (long); + + for (;;) { + /* + * Wait for the traced process to stop. + */ + if (write(fd, &ctl, size) != size) { + rv = (errno == ENOENT)? 0 : 1; + lx_debug("monitor failed to wait for LWP to stop: %s", + strerror(errno)); + _exit(rv); + } + + lx_debug("monitor caught traced LWP"); + + /* + * Pull the ptrace trigger by sending ourself a SIGTRAP. This + * will cause this, the monitor process, to stop which will + * cause the parent's waitid(2) call to return this process + * id. In lx_wait(), we remap the monitor process's pid and + * status to those of the traced LWP. When the parent process + * uses ptrace to resume the traced LWP, it will additionally + * restart this process. + */ + (void) _lwp_kill(_lwp_self(), SIGTRAP); + + lx_debug("monitor was resumed"); + } +} + +static int +ptrace_attach_common(int fd, pid_t lxpid, pid_t pid, lwpid_t lwpid, int run) +{ + pid_t child; + ptrace_monitor_map_t *p; + sigset_t unblock; + pstatus_t status; + long ctl[1 + sizeof (sysset_t) / sizeof (long) + 2]; + long *ctlp = ctl; + size_t size; + sysset_t *sysp; + int ret; + + /* + * We're going to need this structure so better to fail now before its + * too late to turn back. + */ + if ((p = malloc(sizeof (ptrace_monitor_map_t))) == NULL) + return (-EIO); + + if ((ret = get_status(pid, &status)) != 0) { + free(p); + return (ret); + } + + /* + * If this process is already traced, bail. + */ + if (status.pr_flags & PR_PTRACE) { + free(p); + return (-EPERM); + } + + /* + * Turn on the appropriate tracing flags. It's exceedingly unlikely + * that this operation will fail; any failure would probably be due + * to another /proc consumer mucking around. + */ + if (ptrace_trace_common(fd) != 0) { + free(p); + return (-EIO); + } + + /* + * Native ptrace automatically catches processes when they exec so we + * have to do that explicitly here. + */ + *ctlp++ = PCSEXIT; + sysp = (sysset_t *)ctlp; + ctlp += sizeof (sysset_t) / sizeof (long); + premptyset(sysp); + praddset(sysp, SYS_exec); + praddset(sysp, SYS_execve); + if (run) { + *ctlp++ = PCRUN; + *ctlp++ = 0; + } + + size = (char *)ctlp - (char *)&ctl[0]; + + if (write(fd, ctl, size) != size) { + free(p); + return (-EIO); + } + + /* + * Spawn the monitor proceses to notify this process of events of + * interest in the traced process. We block signals here both so + * we're not interrupted during this operation and so that the + * monitor process doesn't accept signals. + */ + (void) sigprocmask(SIG_BLOCK, &blockable_sigs, &unblock); + if ((child = fork1()) == 0) + ptrace_monitor(fd); + (void) sigprocmask(SIG_SETMASK, &unblock, NULL); + + if (child == -1) { + lx_debug("failed to fork monitor process\n"); + free(p); + return (-EIO); + } + + p->pmm_monitor = child; + p->pmm_target = lxpid; + p->pmm_pid = pid; + p->pmm_lwpid = lwpid; + p->pmm_exiting = 0; + + (void) mutex_lock(&ptrace_map_mtx); + p->pmm_next = ptrace_monitor_map; + ptrace_monitor_map = p; + (void) mutex_unlock(&ptrace_map_mtx); + + return (0); +} + +static int +ptrace_attach(pid_t lxpid, pid_t pid, lwpid_t lwpid) +{ + int fd, ret; + long ctl; + + /* + * Linux doesn't let you trace process 1 -- go figure. + */ + if (lxpid == 1) + return (-EPERM); + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY | O_EXCL, "lwpctl")) < 0) + return (errno == EBUSY ? -EPERM : -ESRCH); + + ctl = PCSTOP; + if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) { + lx_err(gettext("failed to stop %d/%d\n"), (int)pid, (int)lwpid); + assert(0); + } + + ret = ptrace_attach_common(fd, lxpid, pid, lwpid, 0); + + (void) close(fd); + + return (ret); +} + +static int +ptrace_detach(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) +{ + long ctl[2]; + int fd, ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if (sig < 0 || sig >= LX_NSIG) + return (-EINVAL); + + if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0) + return (-ESRCH); + + /* + * The /proc ptrace flag may not be set, but we clear it + * unconditionally since doing so doesn't hurt anything. + */ + ctl[0] = PCUNSET; + ctl[1] = PR_PTRACE; + if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + /* + * Clear the brand-specific system call tracing flag to ensure that + * the target doesn't stop unexpectedly some time in the future. + */ + if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 0)) != 0) { + (void) close(fd); + return (-ret); + } + + /* kill off the monitor process, if any */ + monitor_kill(lxpid, pid); + + /* + * Turn on the run-on-last-close flag so that all tracing flags will be + * cleared when we close the control file descriptor. + */ + ctl[0] = PCSET; + ctl[1] = PR_RLC; + if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) { + (void) close(fd); + return (-EIO); + } + + /* + * Clear the current signal (if any) and possibly send the traced + * process a new signal. + */ + ret = ptrace_cont_common(fd, sig, 0, 0); + + (void) close(fd); + + return (ret); +} + +static int +ptrace_syscall(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig) +{ + int ret; + + if (!is_traced(pid)) + return (-ESRCH); + + if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 1)) != 0) + return (-ret); + + return (ptrace_cont(lxpid, pid, lwpid, sig, 0)); +} + +int +lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + pid_t pid, lxpid = (pid_t)p2; + lwpid_t lwpid; + + if ((p1 != LX_PTRACE_TRACEME) && + (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0)) + return (-ESRCH); + + switch (p1) { + case LX_PTRACE_TRACEME: + return (ptrace_traceme()); + + case LX_PTRACE_PEEKTEXT: + case LX_PTRACE_PEEKDATA: + return (ptrace_peek(pid, p3, (int *)p4)); + + case LX_PTRACE_PEEKUSER: + return (ptrace_peek_user(pid, lwpid, p3, (int *)p4)); + + case LX_PTRACE_POKETEXT: + case LX_PTRACE_POKEDATA: + return (ptrace_poke(pid, p3, (int)p4)); + + case LX_PTRACE_POKEUSER: + return (ptrace_poke_user(pid, lwpid, p3, (int)p4)); + + case LX_PTRACE_CONT: + return (ptrace_cont(lxpid, pid, lwpid, (int)p4, 0)); + + case LX_PTRACE_KILL: + return (ptrace_kill(lxpid, pid)); + + case LX_PTRACE_SINGLESTEP: + return (ptrace_step(lxpid, pid, lwpid, (int)p4)); + + case LX_PTRACE_GETREGS: + return (ptrace_getregs(pid, lwpid, p4)); + + case LX_PTRACE_SETREGS: + return (ptrace_setregs(pid, lwpid, p4)); + + case LX_PTRACE_GETFPREGS: + return (ptrace_getfpregs(pid, lwpid, p4)); + + case LX_PTRACE_SETFPREGS: + return (ptrace_setfpregs(pid, lwpid, p4)); + + case LX_PTRACE_ATTACH: + return (ptrace_attach(lxpid, pid, lwpid)); + + case LX_PTRACE_DETACH: + return (ptrace_detach(lxpid, pid, lwpid, (int)p4)); + + case LX_PTRACE_GETFPXREGS: + return (ptrace_getfpxregs(pid, lwpid, p4)); + + case LX_PTRACE_SETFPXREGS: + return (ptrace_setfpxregs(pid, lwpid, p4)); + + case LX_PTRACE_SYSCALL: + return (ptrace_syscall(lxpid, pid, lwpid, (int)p4)); + + default: + return (-EINVAL); + } +} + +void +lx_ptrace_fork(void) +{ + /* + * Send a special signal (that has no Linux equivalent) to indicate + * that we're in this particularly special case. The signal will be + * ignored by this process, but noticed by /proc consumers tracing + * this process. + */ + (void) _lwp_kill(_lwp_self(), SIGWAITING); +} + +static void +ptrace_catch_fork(pid_t pid, int monitor) +{ + long ctl[14 + 2 * sizeof (sysset_t) / sizeof (long)]; + long *ctlp; + sysset_t *sysp; + size_t size; + pstatus_t ps; + pid_t child; + int fd, err; + + /* + * If any of this fails, we're really sunk since the child + * will be stuck in the middle of lx_ptrace_fork(). + * Fortunately it's practically assured to succeed unless + * something is seriously wrong on the system. + */ + if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) { + lx_debug("lx_catch_fork: failed to control %d", + (int)pid); + return; + } + + /* + * Turn off the /proc PR_PTRACE flag so the parent doesn't get + * spurious wake ups while we're working our dark magic. Arrange to + * catch the process when it exits from fork, and turn on the /proc + * inherit-on-fork flag so we catcht the child as well. We then run + * the process, wait for it to stop on the fork1(2) call and reset + * the tracing flags to their original state. + */ + ctlp = ctl; + *ctlp++ = PCCSIG; + if (!monitor) { + *ctlp++ = PCUNSET; + *ctlp++ = PR_PTRACE; + } + *ctlp++ = PCSET; + *ctlp++ = PR_FORK; + *ctlp++ = PCSEXIT; + sysp = (sysset_t *)ctlp; + ctlp += sizeof (sysset_t) / sizeof (long); + premptyset(sysp); + praddset(sysp, SYS_fork1); + *ctlp++ = PCRUN; + *ctlp++ = 0; + *ctlp++ = PCWSTOP; + if (!monitor) { + *ctlp++ = PCSET; + *ctlp++ = PR_PTRACE; + } + *ctlp++ = PCUNSET; + *ctlp++ = PR_FORK; + *ctlp++ = PCSEXIT; + sysp = (sysset_t *)ctlp; + ctlp += sizeof (sysset_t) / sizeof (long); + premptyset(sysp); + if (monitor) { + praddset(sysp, SYS_exec); + praddset(sysp, SYS_execve); + } + + size = (char *)ctlp - (char *)&ctl[0]; + assert(size <= sizeof (ctl)); + + if (write(fd, ctl, size) != size) { + (void) close(fd); + lx_debug("lx_catch_fork: failed to set %d running", + (int)pid); + return; + } + + /* + * Get the status so we can find the value returned from fork1() -- + * the child process's pid. + */ + if (get_status(pid, &ps) != 0) { + (void) close(fd); + lx_debug("lx_catch_fork: failed to get status for %d", + (int)pid); + return; + } + + child = (pid_t)ps.pr_lwp.pr_reg[R_R0]; + + /* + * We're done with the parent -- off you go. + */ + ctl[0] = PCRUN; + ctl[1] = 0; + size = 2 * sizeof (long); + + if (write(fd, ctl, size) != size) { + (void) close(fd); + lx_debug("lx_catch_fork: failed to set %d running", + (int)pid); + return; + } + + (void) close(fd); + + /* + * If fork1(2) failed, we're done. + */ + if (child < 0) { + lx_debug("lx_catch_fork: fork1 failed"); + return; + } + + /* + * Now we need to screw with the child process. + */ + if ((fd = open_lwpfile(child, 1, O_WRONLY, "lwpctl")) < 0) { + lx_debug("lx_catch_fork: failed to control %d", + (int)child); + return; + } + + ctlp = ctl; + *ctlp++ = PCUNSET; + *ctlp++ = PR_FORK; + *ctlp++ = PCSEXIT; + sysp = (sysset_t *)ctlp; + ctlp += sizeof (sysset_t) / sizeof (long); + premptyset(sysp); + size = (char *)ctlp - (char *)&ctl[0]; + + if (write(fd, ctl, size) != size) { + (void) close(fd); + lx_debug("lx_catch_fork: failed to clear trace flags for %d", + (int)child); + return; + } + + /* + * Now treat the child as though we had attached to it explicitly. + */ + err = ptrace_attach_common(fd, child, child, 1, 1); + assert(err == 0); + + (void) close(fd); +} + +static void +set_dr6(pid_t pid, siginfo_t *infop) +{ + uintptr_t *debugreg; + uintptr_t addr; + uintptr_t base; + size_t size; + int dr7; + int lrw; + int i; + + if ((debugreg = debug_registers(pid)) == NULL) + return; + + debugreg[6] = 0xffff0ff0; /* read as ones */ + switch (infop->si_code) { + case TRAP_TRACE: + debugreg[6] |= 0x4000; /* single-step */ + break; + case TRAP_RWATCH: + case TRAP_WWATCH: + case TRAP_XWATCH: + dr7 = debugreg[7]; + addr = (uintptr_t)infop->si_addr; + for (i = 0; i < 4; i++) { + if ((dr7 & (1 << (2 * i))) == 0) /* enabled? */ + continue; + lrw = (dr7 >> (16 + (4 * i))) & 0xf; + switch (lrw >> 2) { /* length */ + case 0: size = 1; break; + case 1: size = 2; break; + case 2: size = 8; break; + case 3: size = 4; break; + } + base = debugreg[i]; + if (addr >= base && addr < base + size) + debugreg[6] |= (1 << i); + } + /* + * Were we also attempting a single-step? + * (kludge: we use debugreg[4] for this flag.) + */ + if (debugreg[4]) + debugreg[6] |= 0x4000; + break; + default: + break; + } +} + +/* + * This is called from the emulation of the wait4 and waitpid system call to + * take into account the monitor processes which we spawn to observe other + * processes from ptrace_attach(). + */ +int +lx_ptrace_wait(siginfo_t *infop) +{ + ptrace_monitor_map_t *p, **pp; + pid_t lxpid, pid = infop->si_pid; + lwpid_t lwpid; + int fd; + pstatus_t status; + + /* + * If the process observed by waitid(2) corresponds to the monitor + * process for a traced thread, we need to rewhack the siginfo_t to + * look like it came from the traced thread with the flags set + * according to the current state. + */ + (void) mutex_lock(&ptrace_map_mtx); + for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) { + if (p->pmm_monitor == pid) { + assert(infop->si_code == CLD_EXITED || + infop->si_code == CLD_KILLED || + infop->si_code == CLD_DUMPED || + infop->si_code == CLD_TRAPPED); + goto found; + } + } + (void) mutex_unlock(&ptrace_map_mtx); + + /* + * If the traced process got a SIGWAITING, we must be in the middle + * of a clone(2) with CLONE_PTRACE set. + */ + if (infop->si_code == CLD_TRAPPED && infop->si_status == SIGWAITING) { + ptrace_catch_fork(pid, 0); + return (-1); + } + + if (get_status(pid, &status) == 0 && + (status.pr_lwp.pr_flags & PR_STOPPED) && + status.pr_lwp.pr_why == PR_SIGNALLED && + status.pr_lwp.pr_info.si_signo == SIGTRAP) + set_dr6(pid, &status.pr_lwp.pr_info); + + return (0); + +found: + /* + * If the monitor is in the exiting state, ignore the event and free + * the monitor structure if the monitor has exited. By returning -1 we + * indicate to the caller that this was a spurious return from + * waitid(2) and that it should ignore the result and try again. + */ + if (p->pmm_exiting) { + if (infop->si_code == CLD_EXITED || + infop->si_code == CLD_KILLED || + infop->si_code == CLD_DUMPED) { + *pp = p->pmm_next; + (void) mutex_unlock(&ptrace_map_mtx); + free(p); + } + return (-1); + } + + lxpid = p->pmm_target; + pid = p->pmm_pid; + lwpid = p->pmm_lwpid; + (void) mutex_unlock(&ptrace_map_mtx); + + /* + * If we can't find the traced process, kill off its monitor. + */ + if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) { + assert(errno == ENOENT); + monitor_kill(lxpid, pid); + infop->si_code = CLD_EXITED; + infop->si_status = 0; + infop->si_pid = lxpid; + return (0); + } + + if (read(fd, &status.pr_lwp, sizeof (status.pr_lwp)) != + sizeof (status.pr_lwp)) { + lx_err(gettext("read lwpstatus failed %d %s"), + fd, strerror(errno)); + assert(0); + } + + (void) close(fd); + + /* + * If the traced process isn't stopped, this is a truly spurious + * event probably caused by another /proc consumer tracing the + * monitor. + */ + if (!(status.pr_lwp.pr_flags & PR_STOPPED)) { + (void) ptrace_cont_monitor(p); + return (-1); + } + + switch (status.pr_lwp.pr_why) { + case PR_SIGNALLED: + /* + * If the traced process got a SIGWAITING, we must be in the + * middle of a clone(2) with CLONE_PTRACE set. + */ + if (status.pr_lwp.pr_what == SIGWAITING) { + ptrace_catch_fork(lxpid, 1); + (void) ptrace_cont_monitor(p); + return (-1); + } + infop->si_code = CLD_TRAPPED; + infop->si_status = status.pr_lwp.pr_what; + if (status.pr_lwp.pr_info.si_signo == SIGTRAP) + set_dr6(pid, &status.pr_lwp.pr_info); + break; + + case PR_REQUESTED: + /* + * Make it look like the traced process stopped on an + * event of interest. + */ + infop->si_code = CLD_TRAPPED; + infop->si_status = SIGTRAP; + break; + + case PR_JOBCONTROL: + /* + * Ignore this as it was probably caused by another /proc + * consumer tracing the monitor. + */ + (void) ptrace_cont_monitor(p); + return (-1); + + case PR_SYSEXIT: + /* + * Processes traced via a monitor (rather than using the + * native Solaris ptrace support) explicitly trace returns + * from exec system calls since it's an implicit ptrace + * trace point. Accordingly we need to present a process + * in that state as though it had reached the ptrace trace + * point. + */ + if (status.pr_lwp.pr_what == SYS_exec || + status.pr_lwp.pr_what == SYS_execve) { + infop->si_code = CLD_TRAPPED; + infop->si_status = SIGTRAP; + break; + } + + /*FALLTHROUGH*/ + + case PR_SYSENTRY: + case PR_FAULTED: + case PR_SUSPENDED: + default: + lx_err(gettext("didn't expect %d (%d %d)"), + status.pr_lwp.pr_why, + status.pr_lwp.pr_what, status.pr_lwp.pr_flags); + assert(0); + } + + infop->si_pid = lxpid; + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/rlimit.c b/usr/src/lib/brand/lx/lx_brand/common/rlimit.c new file mode 100644 index 0000000000..97498c6d4a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/rlimit.c @@ -0,0 +1,233 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/resource.h> +#include <sys/sysconfig.h> +#include <sys/lx_types.h> +#include <sys/lx_misc.h> + +#define LX_RLIMIT_RSS 5 +#define LX_RLIMIT_NPROC 6 +#define LX_RLIMIT_MEMLOCK 8 +#define LX_RLIMIT_LOCKS 10 +#define LX_RLIMIT_NLIMITS 11 + +/* + * Linux supports many of the same resources that we do, but the numbering + * is slightly different. This table is used to translate Linux resource + * limit keys into their Solaris equivalents. + */ +static int ltos_resource[LX_RLIMIT_NLIMITS] = { + RLIMIT_CPU, + RLIMIT_FSIZE, + RLIMIT_DATA, + RLIMIT_STACK, + RLIMIT_CORE, + -1, /* RSS */ + -1, /* NPROC */ + RLIMIT_NOFILE, + -1, /* MEMLOCK */ + RLIMIT_AS, + -1 /* LOCKS */ +}; + +#define NLIMITS (sizeof (ltos_resource) / sizeof (int)) + +/* + * Magic values Linux uses to indicate infinity + */ +#define LX_RLIM_INFINITY_O (0x7fffffffUL) +#define LX_RLIM_INFINITY_N (0xffffffffUL) + +/* + * Array to store the rlimits that we track but do not enforce. + */ +static struct rlimit fake_limits[NLIMITS] = { + 0, 0, + 0, 0, + 0, 0, + 0, 0, + 0, 0, + RLIM_INFINITY, RLIM_INFINITY, /* LX_RLIM_RSS */ + RLIM_INFINITY, RLIM_INFINITY, /* LX_RLIM_NPROC */ + 0, 0, + RLIM_INFINITY, RLIM_INFINITY, /* LX_RLIM_MEMLOCK */ + 0, 0, + RLIM_INFINITY, RLIM_INFINITY /* LX_RLIM_LOCKS */ +}; + +static int +lx_getrlimit_common(int resource, struct rlimit *rlp, int inf) +{ + int rv; + int sresource; + struct rlimit rl; + + if (resource < 0 || resource >= LX_RLIMIT_NLIMITS) + return (-EINVAL); + + sresource = ltos_resource[resource]; + + if (sresource == -1) { + switch (resource) { + case LX_RLIMIT_MEMLOCK: + case LX_RLIMIT_RSS: + case LX_RLIMIT_LOCKS: + case LX_RLIMIT_NPROC: + rl.rlim_max = fake_limits[resource].rlim_max; + rl.rlim_cur = fake_limits[resource].rlim_cur; + if (rl.rlim_cur == RLIM_INFINITY) + rl.rlim_cur = inf; + if (rl.rlim_max == RLIM_INFINITY) + rl.rlim_max = inf; + if ((uucopy(&rl, rlp, sizeof (rl))) != 0) + return (-errno); + return (0); + default: + lx_unsupported("Unsupported resource type %d\n", + resource); + return (-ENOTSUP); + } + } else { + rv = getrlimit(sresource, rlp); + } + + if (rv < 0) + return (-errno); + + if (rlp->rlim_cur == RLIM_INFINITY) + rlp->rlim_cur = inf; + + if (rlp->rlim_max == RLIM_INFINITY) + rlp->rlim_max = inf; + + return (0); +} + +/* + * This is the 'new' getrlimit, variously called getrlimit or ugetrlimit + * in Linux headers and code. The only difference between this and the old + * getrlimit (variously called getrlimit or old_getrlimit) is the value of + * RLIM_INFINITY, which is smaller for the older version. Modern code will + * use this version by default. + */ +int +lx_getrlimit(uintptr_t p1, uintptr_t p2) +{ + int resource = (int)p1; + struct rlimit *rlp = (struct rlimit *)p2; + + return (lx_getrlimit_common(resource, rlp, LX_RLIM_INFINITY_N)); +} + +/* + * This is the 'old' getrlimit, variously called getrlimit or old_getrlimit + * in Linux headers and code. The only difference between this and the new + * getrlimit (variously called getrlimit or ugetrlimit) is the value of + * RLIM_INFINITY, which is smaller for the older version. + */ +int +lx_oldgetrlimit(uintptr_t p1, uintptr_t p2) +{ + int resource = (int)p1; + struct rlimit *rlp = (struct rlimit *)p2; + + return (lx_getrlimit_common(resource, rlp, LX_RLIM_INFINITY_O)); +} + +int +lx_setrlimit(uintptr_t p1, uintptr_t p2) +{ + int resource = (int)p1; + struct rlimit *rlp = (struct rlimit *)p2; + struct rlimit rl; + int rv, sresource; + + if (resource < 0 || resource >= LX_RLIMIT_NLIMITS) + return (-EINVAL); + + sresource = ltos_resource[resource]; + + if (sresource == -1) { + if (uucopy((void *)p2, &rl, sizeof (rl)) != 0) + return (-errno); + + switch (resource) { + case LX_RLIMIT_MEMLOCK: + case LX_RLIMIT_RSS: + case LX_RLIMIT_LOCKS: + case LX_RLIMIT_NPROC: + if (rl.rlim_max != LX_RLIM_INFINITY_N && + (rl.rlim_cur == LX_RLIM_INFINITY_N || + rl.rlim_cur > rl.rlim_max)) + return (-EINVAL); + if (rl.rlim_max == LX_RLIM_INFINITY_N) + fake_limits[resource].rlim_max = RLIM_INFINITY; + else + fake_limits[resource].rlim_max = rl.rlim_max; + if (rl.rlim_cur == LX_RLIM_INFINITY_N) + fake_limits[resource].rlim_cur = RLIM_INFINITY; + else + fake_limits[resource].rlim_cur = rl.rlim_cur; + return (0); + } + + lx_unsupported("Unsupported resource type %d\n", resource); + return (-ENOTSUP); + } + + rv = setrlimit(sresource, rlp); + + return (rv < 0 ? -errno : 0); +} + +/* + * We lucked out here. Linux and Solaris have exactly the same + * rusage structures. + */ +int +lx_getrusage(uintptr_t p1, uintptr_t p2) +{ + int who = (int)p1; + struct rusage *rup = (struct rusage *)p2; + int rv, swho; + + if (who == LX_RUSAGE_SELF) + swho = _RUSAGESYS_GETRUSAGE; + else if (who == LX_RUSAGE_CHILDREN) + swho = _RUSAGESYS_GETRUSAGE_CHLD; + else + return (-EINVAL); + + rv = getrusage(swho, rup); + + return (rv < 0 ? -errno : 0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/sched.c b/usr/src/lib/brand/lx/lx_brand/common/sched.c new file mode 100644 index 0000000000..52ac15e5f0 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/sched.c @@ -0,0 +1,610 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/cred_impl.h> +#include <sys/ucred.h> +#include <ucred.h> +#include <stdlib.h> +#include <signal.h> +#include <errno.h> +#include <sched.h> +#include <strings.h> +#include <pthread.h> +#include <time.h> +#include <thread.h> +#include <alloca.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/lx_syscall.h> +#include <sys/lx_debug.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> +#include <sys/lx_sched.h> + +/* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */ +static int +validate_policy(int policy) +{ + switch (policy) { + case LX_SCHED_FIFO: + return (SCHED_FIFO); + + case LX_SCHED_RR: + return (SCHED_RR); + + case LX_SCHED_OTHER: + return (SCHED_OTHER); + + default: + lx_debug("validate_policy: illegal policy: %d", policy); + return (-EINVAL); + } +} + +/* + * Check to see if we have the permissions to set scheduler parameters and + * policy, based on Linux' demand that such commands fail with errno set to + * EPERM if the current euid is not the euid or ruid of the process in + * question. + */ +static int +check_schedperms(pid_t pid) +{ + size_t sz; + ucred_t *cr; + uid_t euid; + + euid = geteuid(); + + if (pid == getpid()) { + /* + * If we're the process to be checked, simply check the euid + * against our ruid. + */ + if (euid != getuid()) + return (-EPERM); + + return (0); + } + + /* + * We allocate a ucred_t ourselves rather than call ucred_get(3C) + * because ucred_get() calls malloc(3C), which the brand library cannot + * use. Because we allocate the space with SAFE_ALLOCA(), there's + * no need to free it when we're done. + */ + sz = ucred_size(); + cr = (ucred_t *)SAFE_ALLOCA(sz); + + if (cr == NULL) + return (-ENOMEM); + + /* + * If we can't access the process' credentials, fail with errno EPERM + * as the call would not have succeeded anyway. + */ + if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0) + return ((errno == EACCES) ? -EPERM : -errno); + + if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr))) + return (-EPERM); + + return (0); +} + +static int +ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp) +{ + struct lx_sched_param ls; + int smin = sched_get_priority_min(policy); + int smax = sched_get_priority_max(policy); + + if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0) + return (-errno); + + bzero(sp, sizeof (struct sched_param)); + + /* + * Linux has a fixed priority range, 0 - 99, which we need to convert to + * Solaris's dynamic range. Linux considers lower numbers to be + * higher priority, so we'll invert the priority within Solaris's range. + * + * The formula to convert between ranges is: + * + * L * (smax - smin) + * S = ----------------- + smin + * (lmax - lmin) + * + * where S is the Solaris equivalent of the linux priority L. + * + * To invert the priority, we use: + * S' = smax - S + smin + * + * Together, these two formulas become: + * + * L * (smax - smin) + * S = smax - ----------------- + 2smin + * 99 + */ + sp->sched_priority = smax - + ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin; + + lx_debug("ltos_sparam: linux prio %d = Solaris prio %d " + "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority, + smin, smax); + + return (0); +} + +static int +stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp) +{ + struct lx_sched_param ls; + int smin = sched_get_priority_min(policy); + int smax = sched_get_priority_max(policy); + + if (policy == SCHED_OTHER) { + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0 + */ + ls.lx_sched_prio = 0; + } else { + /* + * Convert Solaris's dynamic, inverted priority range to the + * fixed Linux range of 1 - 99. + * + * The formula is (see above): + * + * (smax - s + 2smin) * 99 + * l = ----------------------- + * smax - smin + */ + ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) * + LX_PRI_MAX) / (smax - smin); + } + + lx_debug("stol_sparam: Solaris prio %d = linux prio %d " + "(Solaris range %d,%d)\n", sp->sched_priority, ls.lx_sched_prio, + smin, smax); + + return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0) + ? -errno : 0); +} + +#define BITINDEX(ind) (ind / (sizeof (ulong_t) * 8)) +#define BITSHIFT(ind) (1 << (ind % (sizeof (ulong_t) * 8))) + +/* ARGSUSED */ +int +lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp) +{ + int sz; + ulong_t *lmask, *zmask; + int i; + + sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp); + if (sz == -1) + return (-errno); + + /* + * If the target LWP hasn't ever had an affinity mask set, the kernel + * will return a mask of all 0's. If that is the case we must build a + * default mask that has all valid bits turned on. + */ + lmask = SAFE_ALLOCA(sz); + zmask = SAFE_ALLOCA(sz); + if (lmask == NULL || zmask == NULL) + return (-ENOMEM); + + bzero(zmask, sz); + + if (uucopy((void *)maskp, lmask, sz) != 0) + return (-EFAULT); + + if (bcmp(lmask, zmask, sz) != 0) + return (sz); + + for (i = 0; i < sz * 8; i++) { + if (p_online(i, P_STATUS) != -1) { + lmask[BITINDEX(i)] |= BITSHIFT(i); + } + } + + if (uucopy(lmask, (void *)maskp, sz) != 0) + return (-EFAULT); + + return (sz); +} + +/* ARGSUSED */ +int +lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp) +{ + int ret; + int sz; + int i; + int found; + ulong_t *lmask; + pid_t s_pid; + lwpid_t s_tid; + processorid_t cpuid; + + if ((pid_t)pid < 0) + return (-EINVAL); + + if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + /* + * We only support setting affinity masks for threads in + * the calling process. + */ + if (s_pid != getpid()) + return (-EPERM); + + /* + * First, get the minimum bitmask size from the kernel. + */ + sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0); + if (sz == -1) + return (-errno); + + lmask = SAFE_ALLOCA(sz); + if (lmask == NULL) + return (-ENOMEM); + + if (uucopy((void *)maskp, lmask, sz) != 0) + return (-EFAULT); + + /* + * Make sure the mask contains at least one processor that is + * physically on the system. Reduce the user's mask to the set of + * physically present CPUs. Keep track of how many valid + * bits are set in the user's mask. + */ + + for (found = 0, i = 0; i < sz * 8; i++) { + if (p_online(i, P_STATUS) == -1) { + /* + * This CPU doesn't exist, so clear this bit from + * the user's mask. + */ + lmask[BITINDEX(i)] &= ~BITSHIFT(i); + continue; + } + + if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) { + found++; + cpuid = i; + } + } + + if (found == 0) { + lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n"); + return (-EINVAL); + } + + /* + * If only one bit is set, bind the thread to that procesor; + * otherwise, clear the binding. + */ + if (found == 1) { + lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n", + s_tid, cpuid); + if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0) + /* + * It could be that the requested processor is offline, + * so we'll just abandon our good-natured attempt to + * bind to it. + */ + lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid, + cpuid, strerror(errno)); + } else { + lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n", + s_tid); + if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) { + lx_debug("couldn't clear CPU binding for LWP %d: %s\n", + s_tid, strerror(errno)); + } + } + + /* + * Finally, ask the kernel to make a note of our current (though fairly + * meaningless) affinity mask. + */ + ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask); + + return ((ret == 0) ? 0 : -errno); +} + +int +lx_sched_getparam(uintptr_t pid, uintptr_t param) +{ + int policy, ret; + pid_t s_pid; + lwpid_t s_tid; + + struct sched_param sp; + + if (((pid_t)pid < 0) || (param == NULL)) + return (-EINVAL); + + if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + /* + * If we're attempting to get information on our own process, we can + * get data on a per-thread basis; if not, punt and use the specified + * pid. + */ + if (s_pid == getpid()) { + if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0) + return (-ret); + } else { + if (sched_getparam(s_pid, &sp) == -1) + return (-errno); + + if ((policy = sched_getscheduler(s_pid)) < 0) + return (-errno); + } + + return (stol_sparam(policy, &sp, (struct lx_sched_param *)param)); +} + +int +lx_sched_setparam(uintptr_t pid, uintptr_t param) +{ + int err, policy; + pid_t s_pid; + lwpid_t s_tid; + struct lx_sched_param lp; + struct sched_param sp; + + if (((pid_t)pid < 0) || (param == NULL)) + return (-EINVAL); + + if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + if (s_pid == getpid()) { + struct sched_param dummy; + + if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0) + return (-err); + } else + if ((policy = sched_getscheduler(s_pid)) < 0) + return (-errno); + + lx_debug("sched_setparam(): current policy %d", policy); + + if (uucopy((void *)param, &lp, sizeof (lp)) != 0) + return (-errno); + + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0 + */ + if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0)) + return (-EINVAL); + + if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp, + &sp)) != 0) + return (err); + + /* + * Check if we're allowed to change the scheduler for the process. + * + * If we're operating on a thread, we can't just call + * pthread_setschedparam() because as all threads reside within a + * single Solaris process, Solaris will allow the modification + * + * If we're operating on a process, we can't just call sched_setparam() + * because Solaris will allow the call to succeed if the scheduler + * parameters do not differ from those being installed, but Linux wants + * the call to fail. + */ + if ((err = check_schedperms(s_pid)) != 0) + return (err); + + if (s_pid == getpid()) + return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0) + ? -err : 0); + + return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0); +} + +int +lx_sched_rr_get_interval(uintptr_t pid, uintptr_t timespec) +{ + struct timespec ts; + pid_t s_pid; + + if ((pid_t)pid < 0) + return (-EINVAL); + + if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0) + return (-ESRCH); + + if (uucopy((struct timespec *)timespec, &ts, + sizeof (struct timespec)) != 0) + return (-errno); + + return ((sched_rr_get_interval(s_pid, &ts) == -1) ? -errno : 0); +} + +int +lx_sched_getscheduler(uintptr_t pid) +{ + int policy, rv; + pid_t s_pid; + lwpid_t s_tid; + + if ((pid_t)pid < 0) + return (-EINVAL); + + if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + if (s_pid == getpid()) { + struct sched_param dummy; + + if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0) + return (-rv); + } else + if ((policy = sched_getscheduler(s_pid)) < 0) + return (-errno); + + /* + * Linux only supports certain policies; avoid confusing apps with + * alien policies. + */ + switch (policy) { + case SCHED_FIFO: + return (LX_SCHED_FIFO); + case SCHED_OTHER: + return (LX_SCHED_OTHER); + case SCHED_RR: + return (LX_SCHED_RR); + default: + break; + } + + return (LX_SCHED_OTHER); +} + +int +lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param) +{ + int rt_pol; + int rv; + pid_t s_pid; + lwpid_t s_tid; + struct lx_sched_param lp; + + struct sched_param sp; + + if (((pid_t)pid < 0) || (param == NULL)) + return (-EINVAL); + + if ((rt_pol = validate_policy((int)policy)) < 0) + return (rt_pol); + + if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param, + &sp)) != 0) + return (rv); + + if (uucopy((void *)param, &lp, sizeof (lp)) != 0) + return (-errno); + + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0 + */ + if ((rt_pol == LX_SCHED_OTHER) && (lp.lx_sched_prio != 0)) + return (-EINVAL); + + if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) + return (-ESRCH); + + /* + * Check if we're allowed to change the scheduler for the process. + * + * If we're operating on a thread, we can't just call + * pthread_setschedparam() because as all threads reside within a + * single Solaris process, Solaris will allow the modification. + * + * If we're operating on a process, we can't just call + * sched_setscheduler() because Solaris will allow the call to succeed + * if the scheduler and scheduler parameters do not differ from those + * being installed, but Linux wants the call to fail. + */ + if ((rv = check_schedperms(s_pid)) != 0) + return (rv); + + if (s_pid == getpid()) { + struct sched_param param; + int pol; + + if ((pol = sched_getscheduler(s_pid)) != 0) + return (-errno); + + /* + * sched_setscheduler() returns the previous scheduling policy + * on success, so call pthread_getschedparam() to get the + * current thread's scheduling policy and return that if the + * call to pthread_setschedparam() succeeds. + */ + if ((rv = pthread_getschedparam(s_tid, &pol, ¶m)) != 0) + return (-rv); + + return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0) + ? -rv : pol); + } + + return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1) + ? -errno : rv); +} + +int +lx_sched_get_priority_min(uintptr_t policy) +{ + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0. + * Linux scheduling priorities are not alterable, so there is no + * Solaris translation necessary. + */ + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + return (LX_SCHED_PRIORITY_MIN_RRFIFO); + case LX_SCHED_OTHER: + return (LX_SCHED_PRIORITY_MIN_OTHER); + default: + break; + } + return (-EINVAL); +} + +int +lx_sched_get_priority_max(uintptr_t policy) +{ + /* + * In Linux, the only valid SCHED_OTHER scheduler priority is 0 + * Linux scheduling priorities are not alterable, so there is no + * Solaris translation necessary. + */ + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + return (LX_SCHED_PRIORITY_MAX_RRFIFO); + case LX_SCHED_OTHER: + return (LX_SCHED_PRIORITY_MAX_OTHER); + default: + break; + } + return (-EINVAL); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/sendfile.c b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c new file mode 100644 index 0000000000..1c4af9bf74 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c @@ -0,0 +1,97 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * lx_sendfile() and lx_sendfile64() are just branded versions of the + * library calls available in the Solaris libsendfile (see sendfile(3EXT)). + */ + +#include <sys/types.h> +#include <sys/syscall.h> +#include <sys/sendfile.h> +#include <string.h> +#include <errno.h> +#include <sys/lx_misc.h> + +int +lx_sendfile(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + sysret_t rval; + off_t off = 0; + off_t *offp = (off_t *)p3; + int error; + struct sendfilevec sfv; + size_t xferred; + size_t sz = (size_t)p4; + + if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0) + return (-errno); + + sfv.sfv_fd = p2; + sfv.sfv_flag = 0; + sfv.sfv_off = off; + sfv.sfv_len = sz; + error = __systemcall(&rval, SYS_sendfilev, SENDFILEV, p1, &sfv, + 1, &xferred); + + if (error == 0 && xferred > 0) { + off += xferred; + error = uucopy(&off, offp, sizeof (off)); + } + + return (error ? -error : (int)rval.sys_rval1); +} + +int +lx_sendfile64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + sysret_t rval; + off64_t off = 0; + off64_t *offp = (off64_t *)p3; + size_t sz = (size_t)p4; + int error; + struct sendfilevec64 sfv; + size_t xferred; + + if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0) + return (-errno); + + sfv.sfv_fd = p2; + sfv.sfv_flag = 0; + sfv.sfv_off = off; + sfv.sfv_len = sz; + error = __systemcall(&rval, SYS_sendfilev, SENDFILEV64, p1, &sfv, + 1, &xferred); + + if (error == 0 && xferred > 0) { + off += xferred; + error = uucopy(&off, offp, sizeof (off)); + } + + return (error ? -error : (int)rval.sys_rval1); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c new file mode 100644 index 0000000000..3a1bc0175a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c @@ -0,0 +1,1676 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/segments.h> +#include <sys/lx_types.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/lx_signal.h> +#include <sys/lx_syscall.h> +#include <sys/lx_thread.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <thread.h> +#include <ucontext.h> +#include <unistd.h> +#include <stdio.h> +#include <libintl.h> +#include <ieeefp.h> + +/* + * Delivering signals to a Linux process is complicated by differences in + * signal numbering, stack structure and contents, and the action taken when a + * signal handler exits. In addition, many signal-related structures, such as + * sigset_ts, vary between Solaris and Linux. + * + * To support user-level signal handlers, the brand uses a double layer of + * indirection to process and deliver signals to branded threads. + * + * When a Linux process sends a signal using the kill(2) system call, we must + * translate the signal into the Solaris equivalent before handing control off + * to the standard signalling mechanism. When a signal is delivered to a Linux + * process, we translate the signal number from Solaris to back to Linux. + * Translating signals both at generation and delivery time ensures both that + * Solaris signals are sent properly to Linux applications and that signals' + * default behavior works as expected. + * + * In a normal Solaris process, signal delivery is interposed on for any thread + * registering a signal handler by libc. Libc needs to do various bits of magic + * to provide thread-safe critical regions, so it registers its own handler, + * named sigacthandler(), using the sigaction(2) system call. When a signal is + * received, sigacthandler() is called, and after some processing, libc turns + * around and calls the user's signal handler via a routine named + * call_user_handler(). + * + * Adding a Linux branded thread to the mix complicates things somewhat. + * + * First, when a thread receives a signal, it may be running with a Linux value + * in the x86 %gs segment register as opposed to the value Solaris threads + * expect; if control were passed directly to Solaris code, such as libc's + * sigacthandler(), that code would experience a segmentation fault the first + * time it tried to dereference a memory location using %gs. + * + * Second, the signal number translation referenced above must take place. + * Further, as was the case with Solaris libc, before the Linux signal handler + * is called, the value of the %gs segment register MUST be restored to the + * value Linux code expects. + * + * This need to translate signal numbers and manipulate the %gs register means + * that while with standard Solaris libc, following a signal from generation to + * delivery looks something like: + * + * kernel -> + * sigacthandler() -> + * call_user_handler() -> + * user signal handler + * + * while for the brand's Linux threads, this would look like: + * + * kernel -> + * lx_sigacthandler() -> + * sigacthandler() -> + * call_user_handler() -> + * lx_call_user_handler() -> + * Linux user signal handler + * + * The new addtions are: + * + * lx_sigacthandler + * ================ + * This routine is responsible for setting the %gs segment register to the + * value Solaris code expects, and jumping to Solaris' libc signal + * interposition handler, sigacthandler(). + * + * lx_call_user_handler + * ==================== + * This routine is responsible for translating Solaris signal numbers to + * their Linux equivalents, building a Linux signal stack based on the + * information Solaris has provided, and passing the stack to the + * registered Linux signal handler. It is, in effect, the Linux thread + * equivalent to libc's call_user_handler(). + * + * Installing lx_sigacthandler() is a bit tricky, as normally libc's + * sigacthandler() routine is hidden from user programs. To facilitate this, a + * new private function was added to libc, setsigaction(): + * + * void setsigacthandler(void (*new_handler)(int, siginfo_t *, void *), + * void (**old_handler)(int, siginfo_t *, void *)) + * + * The routine works by modifying the per-thread data structure libc already + * keeps that keeps track of the address of its own interposition handler with + * the address passed in; the old handler's address is set in the pointer + * pointed to by the second argument, if it is non-NULL, mimicking the behavior + * of sigaction() itself. Once setsigacthandler() has been executed, all + * future branded threads this thread may create will automatically have the + * proper interposition handler installed as the result of a normal + * sigaction() call. + * + * Note that none of this interposition is necessary unless a Linux thread + * registers a user signal handler, as the default action for all signals is the + * same between Solaris and Linux save for one signal, SIGPWR. For this reason, + * the brand ALWAYS installs its own internal signal handler for SIGPWR that + * translates the action to the Linux default, to terminate the process. + * (Solaris' default action is to ignore SIGPWR.) + * + * It is also important to note that when signals are not translated, the brand + * relies upon code interposing upon the wait(2) system call to translate + * signals to their proper values for any Linux threads retrieving the status + * of others. So while the Solaris signal number for a particular signal is set + * in a process' data structures (and would be returned as the result of say, + * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for + * translating the value WTERMSIG() would return from a Solaris signal number + * to the appropriate Linux value. + * + * The process of returning to an interrupted thread of execution from a user + * signal handler is entirely different between Solaris and Linux. While + * Solaris generally expects to set the context to the interrupted one on a + * normal return from a signal handler, in the normal case Linux instead calls + * code that calls a specific Linux system call, sigreturn(2). Thus when a + * Linux signal handler completes execution, instead of returning through what + * would in libc be a call to setcontext(2), the sigreturn(2) Linux system call + * is responsible for accomplishing much the same thing. + * + * This trampoline code looks something like this: + * + * pop %eax + * mov LX_SYS_rt_sigreturn, %eax + * int $0x80 + * + * so when the Linux user signal handler is eventually called, the stack looks + * like this (in the case of an "lx_sigstack" stack: + * + * ========================================================= + * | Pointer to actual trampoline code (in code segment) | + * ========================================================= + * | Linux signal number | + * ========================================================= + * | Pointer to Linux siginfo_t (or NULL) | + * ========================================================= + * | Pointer to Linux ucontext_t (or NULL) | + * ========================================================= + * | Linux siginfo_t | + * ========================================================= + * | Linux ucontext_t | + * ========================================================= + * | Linux struct _fpstate | + * ========================================================= + * | Trampoline code (marker for gdb, not really executed) | + * ========================================================= + * + * The brand takes the approach of intercepting the Linux sigreturn(2) system + * call in order to turn it into the return through the libc call stack that + * Solaris expects. This is done by the lx_sigreturn() and lx_rt_sigreturn() + * routines, which remove the Linux signal frame from the stack and pass the + * resulting stack pointer to another routine, lx_sigreturn_tolibc(), which + * makes libc believe the user signal handler it had called returned. + * + * (Note that the trampoline code actually lives in a proper executable segment + * and not on the stack, but gdb checks for the exact code sequence of the + * trampoline code on the stack to determine whether it is in a signal stack + * frame or not. Really.) + * + * When control then returns to libc's call_user_handler() routine, a + * setcontext(2) will be done that (in most cases) returns the thread executing + * the code back to the location originally interrupted by receipt of the + * signal. + */ + +/* + * Two flavors of Linux signal stacks: + * + * lx_sigstack - used for "modern" signal handlers, in practice those + * that have the sigaction(2) flag SA_SIGINFO set + * + * lx_oldsigstack - used for legacy signal handlers, those that do not have + * the sigaction(2) flag SA_SIGINFO set or that were setup via + * the signal(2) call. + * + * NOTE: Since these structures will be placed on the stack and stack math will + * be done with their sizes, they must be word aligned in size (32 bits) + * so the stack remains word aligned per the i386 ABI. + */ +struct lx_sigstack { + void (*retaddr)(); /* address of real lx_rt_sigreturn code */ + int sig; /* signal number */ + lx_siginfo_t *sip; /* points to "si" if valid, NULL if not */ + lx_ucontext_t *ucp; /* points to "uc" if valid, NULL if not */ + lx_siginfo_t si; /* saved signal information */ + lx_ucontext_t uc; /* saved user context */ + lx_fpstate_t fpstate; /* saved FP state */ + char trampoline[8]; /* code for trampoline to lx_rt_sigreturn() */ +}; + +struct lx_oldsigstack { + void (*retaddr)(); /* address of real lx_sigreturn code */ + int sig; /* signal number */ + lx_sigcontext_t sigc; /* saved user context */ + lx_fpstate_t fpstate; /* saved FP state */ + int sig_extra; /* signal mask for signals [32 .. NSIG - 1] */ + char trampoline[8]; /* code for trampoline to lx_sigreturn() */ +}; + +/* + * libc_sigacthandler is set to the address of the libc signal interposition + * routine, sigacthandler(). + */ +void (*libc_sigacthandler)(int, siginfo_t *, void*); + +/* + * The lx_sighandlers structure needs to be a global due to the semantics of + * clone(). + * + * If CLONE_SIGHAND is set, the calling process and child share signal + * handlers, and if either calls sigaction(2) it should change the behavior + * in the other thread. Each thread does, however, have its own signal mask + * and set of pending signals. + * + * If CLONE_SIGHAND is not set, the child process should inherit a copy of + * the signal handlers at the time of the clone() but later calls to + * sigaction(2) should only affect the individual thread calling it. + * + * This maps perfectly to a thr_create(3C) thread semantic in the first + * case and a fork(2)-type semantic in the second case. By making + * lx_sighandlers global, we automatically get the correct behavior. + */ +static lx_sighandlers_t lx_sighandlers; + +/* + * stol_stack() and ltos_stack() convert between Solaris and Linux stack_t + * structures. + * + * These routines are needed because although the two structures have the same + * contents, their contents are declared in a different order, so the content + * of the structures cannot be copied with a simple bcopy(). + */ +static void +stol_stack(stack_t *fr, lx_stack_t *to) +{ + to->ss_sp = fr->ss_sp; + to->ss_flags = fr->ss_flags; + to->ss_size = fr->ss_size; +} + +static void +ltos_stack(lx_stack_t *fr, stack_t *to) +{ + to->ss_sp = fr->ss_sp; + to->ss_flags = fr->ss_flags; + to->ss_size = fr->ss_size; +} + +static int +ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp) +{ + lx_sigset_t l; + int lx_sig, sig; + + if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0) + return (-errno); + + (void) sigemptyset(s_sigsetp); + + for (lx_sig = 1; lx_sig < LX_NSIG; lx_sig++) { + if (lx_sigismember(&l, lx_sig) && + ((sig = ltos_signo[lx_sig]) > 0)) + (void) sigaddset(s_sigsetp, sig); + } + + return (0); +} + +static int +stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp) +{ + lx_sigset_t l; + int sig, lx_sig; + + bzero(&l, sizeof (lx_sigset_t)); + + for (sig = 1; sig < NSIG; sig++) { + if (sigismember(s_sigsetp, sig) && + ((lx_sig = stol_signo[sig]) > 0)) + lx_sigaddset(&l, lx_sig); + } + + return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0) + ? -errno : 0); +} + +static int +ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp) +{ + lx_osigset_t lo; + int lx_sig, sig; + + if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0) + return (-errno); + + (void) sigemptyset(s_sigsetp); + + for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++) + if ((lo & OSIGSET_BITSET(lx_sig)) && + ((sig = ltos_signo[lx_sig]) > 0)) + (void) sigaddset(s_sigsetp, sig); + + return (0); +} + +static int +stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp) +{ + lx_osigset_t lo = 0; + int lx_sig, sig; + + /* + * Note that an lx_osigset_t can only represent the signals from + * [1 .. OSIGSET_NBITS], so even though a signal may be present in the + * Solaris sigset_t, it may not be representable as a bit in the + * lx_osigset_t. + */ + for (sig = 1; sig < NSIG; sig++) + if (sigismember(s_sigsetp, sig) && + ((lx_sig = stol_signo[sig]) > 0) && + (lx_sig <= OSIGSET_NBITS)) + lo |= OSIGSET_BITSET(lx_sig); + + return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0) + ? -errno : 0); +} + +static int +stol_sigcode(int si_code) +{ + switch (si_code) { + case SI_USER: + return (LX_SI_USER); + case SI_LWP: + return (LX_SI_TKILL); + case SI_QUEUE: + return (LX_SI_QUEUE); + case SI_TIMER: + return (LX_SI_TIMER); + case SI_ASYNCIO: + return (LX_SI_ASYNCIO); + case SI_MESGQ: + return (LX_SI_MESGQ); + default: + return (si_code); + } +} + +static int +stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop) +{ + lx_siginfo_t lx_siginfo; + + bzero(&lx_siginfo, sizeof (*lx_siginfop)); + + if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) { + errno = EINVAL; + return (-1); + } + + lx_siginfo.lsi_code = stol_sigcode(siginfop->si_code); + lx_siginfo.lsi_errno = siginfop->si_errno; + + switch (lx_siginfo.lsi_signo) { + /* + * Semantics ARE defined for SIGKILL, but since + * we can't catch it, we can't translate it. :-( + */ + case LX_SIGPOLL: + lx_siginfo.lsi_band = siginfop->si_band; + lx_siginfo.lsi_fd = siginfop->si_fd; + break; + + case LX_SIGCHLD: + lx_siginfo.lsi_pid = siginfop->si_pid; + lx_siginfo.lsi_status = siginfop->si_status; + lx_siginfo.lsi_utime = siginfop->si_utime; + lx_siginfo.lsi_stime = siginfop->si_stime; + + break; + + case LX_SIGILL: + case LX_SIGBUS: + case LX_SIGFPE: + lx_siginfo.lsi_addr = siginfop->si_addr; + break; + + default: + lx_siginfo.lsi_pid = siginfop->si_pid; + lx_siginfo.lsi_uid = + LX_UID32_TO_UID16(siginfop->si_uid); + break; + } + + return ((uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0) + ? -errno : 0); +} + +static void +stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr) +{ + struct _fpstate *fpsp = (struct _fpstate *)fpr; + size_t copy_len; + + /* + * The Solaris struct _fpstate and lx_fpstate_t are identical from the + * beginning of the structure to the lx_fpstate_t "magic" field, so + * just bcopy() those entries. + */ + copy_len = (size_t)&(((lx_fpstate_t *)0)->magic); + bcopy(fpsp, lfpr, copy_len); + + /* + * These fields are all only significant for the first 16 bits. + */ + lfpr->cw &= 0xffff; /* x87 control word */ + lfpr->sw &= 0xffff; /* x87 status word */ + lfpr->tag &= 0xffff; /* x87 tag word */ + lfpr->cssel &= 0xffff; /* cs selector */ + lfpr->datasel &= 0xffff; /* ds selector */ + + lfpr->mxcsr = fpsp->mxcsr; + + if (fpsp->mxcsr != 0) { + /* + * Linux uses the "magic" field to denote whether the XMM + * registers contain legal data or not. Since we can't get to + * %cr4 from userland to check the status of the OSFXSR bit, + * check the mxcsr field to see if it's 0, which it should + * never be on a system with the OXFXSR bit enabled. + */ + lfpr->magic = LX_X86_FXSR_MAGIC; + bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm)); + } else { + lfpr->magic = LX_X86_FXSR_NONE; + } +} + +static void +ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr) +{ + struct _fpstate *fpsp = (struct _fpstate *)fpr; + size_t copy_len; + + /* + * The lx_fpstate_t and Solaris struct _fpstate are identical from the + * beginning of the structure to the struct _fpstate "mxcsr" field, so + * just bcopy() those entries. + */ + copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr); + bcopy(lfpr, fpsp, copy_len); + + /* + * These fields are all only significant for the first 16 bits. + */ + fpsp->cw &= 0xffff; /* x87 control word */ + fpsp->sw &= 0xffff; /* x87 status word */ + fpsp->tag &= 0xffff; /* x87 tag word */ + fpsp->cssel &= 0xffff; /* cs selector */ + fpsp->datasel &= 0xffff; /* ds selector */ + fpsp->status &= 0xffff; /* saved status */ + + fpsp->mxcsr = lfpr->mxcsr; + + if (lfpr->magic == LX_X86_FXSR_MAGIC) + bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm)); +} + +/* + * The brand needs a lx version of this because the format of the lx stack_t + * differs from the Solaris stack_t not really in content but in ORDER, + * so we can't simply pass pointers and expect things to work (sigh...) + */ +int +lx_sigaltstack(uintptr_t nsp, uintptr_t osp) +{ + lx_stack_t ls; + stack_t newsstack, oldsstack; + stack_t *nssp = (nsp ? &newsstack : NULL); + stack_t *ossp = (osp ? &oldsstack : NULL); + + if (nsp) { + if (uucopy((void *)nsp, &ls, sizeof (lx_stack_t)) != 0) + return (-errno); + + if ((ls.ss_flags & LX_SS_DISABLE) == 0 && + ls.ss_size < LX_MINSIGSTKSZ) + return (-ENOMEM); + + newsstack.ss_sp = (int *)ls.ss_sp; + newsstack.ss_size = (long)ls.ss_size; + newsstack.ss_flags = ls.ss_flags; + } + + if (sigaltstack(nssp, ossp) != 0) + return (-errno); + + if (osp) { + ls.ss_sp = (void *)oldsstack.ss_sp; + ls.ss_size = (size_t)oldsstack.ss_size; + ls.ss_flags = oldsstack.ss_flags; + + if (uucopy(&ls, (void *)osp, sizeof (lx_stack_t)) != 0) + return (-errno); + } + + return (0); +} + +/* + * The following routines are needed because sigset_ts and siginfo_ts are + * different in format between Linux and Solaris. + * + * Note that there are two different lx_sigset structures, lx_sigset_ts and + * lx_osigset_ts: + * + * + An lx_sigset_t is the equivalent of a Solaris sigset_t and supports + * more than 32 signals. + * + * + An lx_osigset_t is simply a uint32_t, so it by definition only supports + * 32 signals. + * + * When there are two versions of a routine, one prefixed with lx_rt_ and + * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with + * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts. Unfortunately, + * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn()) + */ +int +lx_sigpending(uintptr_t sigpend) +{ + sigset_t sigpendset; + + if (sigpending(&sigpendset) != 0) + return (-errno); + + return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend)); +} + +int +lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize) +{ + sigset_t sigpendset; + + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + if (sigpending(&sigpendset) != 0) + return (-errno); + + return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend)); +} + +/* + * Create a common routine to encapsulate all of the sigprocmask code, + * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask() + * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by + * the setting of the "sigset_type" flag. + */ +static int +lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp, + uintptr_t sigset_type) +{ + int err; + sigset_t set, oset; + sigset_t *s_setp = NULL; + sigset_t *s_osetp; + + if (l_setp) { + switch (how) { + case LX_SIG_BLOCK: + how = SIG_BLOCK; + break; + + case LX_SIG_UNBLOCK: + how = SIG_UNBLOCK; + break; + + case LX_SIG_SETMASK: + how = SIG_SETMASK; + break; + + default: + return (-EINVAL); + } + + s_setp = &set; + + if (sigset_type == USE_SIGSET) + err = ltos_sigset((lx_sigset_t *)l_setp, s_setp); + else + err = ltos_osigset((lx_osigset_t *)l_setp, s_setp); + + if (err != 0) + return (err); + } + + s_osetp = (l_osetp ? &oset : NULL); + + /* + * In a multithreaded environment, a call to sigprocmask(2) should + * only affect the current thread's signal mask so we don't need to + * explicitly call thr_sigsetmask(3C) here. + */ + if (sigprocmask(how, s_setp, s_osetp) != 0) + return (-errno); + + if (l_osetp) { + if (sigset_type == USE_SIGSET) + err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp); + else + err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp); + + if (err != 0) { + /* + * Encountered a fault while writing to the old signal + * mask buffer, so unwind the signal mask change made + * above. + */ + (void) sigprocmask(how, s_osetp, (sigset_t *)NULL); + return (err); + } + } + + return (0); +} + +int +lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp) +{ + return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET)); +} + +int +lx_sgetmask(void) +{ + lx_osigset_t oldmask; + + return ((lx_sigprocmask_common(SIG_SETMASK, NULL, (uintptr_t)&oldmask, + USE_OSIGSET) != 0) ? -errno : (int)oldmask); +} + +int +lx_ssetmask(uintptr_t sigmask) +{ + lx_osigset_t newmask, oldmask; + + newmask = (lx_osigset_t)sigmask; + + return ((lx_sigprocmask_common(SIG_SETMASK, (uintptr_t)&newmask, + (uintptr_t)&oldmask, USE_OSIGSET) != 0) ? -errno : (int)oldmask); +} + +int +lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp, + uintptr_t setsize) +{ + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET)); +} + +int +lx_sigsuspend(uintptr_t set) +{ + sigset_t s_set; + + if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0) + return (-errno); + + return ((sigsuspend(&s_set) == -1) ? -errno : 0); +} + +int +lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize) +{ + sigset_t s_set; + + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0) + return (-errno); + + return ((sigsuspend(&s_set) == -1) ? -errno : 0); +} + +int +lx_sigwaitinfo(uintptr_t set, uintptr_t sinfo) +{ + lx_osigset_t *setp = (lx_osigset_t *)set; + lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; + + sigset_t s_set; + siginfo_t s_sinfo, *s_sinfop; + int rc; + + if (ltos_osigset(setp, &s_set) != 0) + return (-errno); + + s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; + + if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1) + return (-errno); + + if (s_sinfop == NULL) + return (rc); + + return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc); +} + +int +lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize) +{ + sigset_t s_set; + siginfo_t s_sinfo, *s_sinfop; + int rc; + + lx_sigset_t *setp = (lx_sigset_t *)set; + lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; + + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + if (ltos_sigset(setp, &s_set) != 0) + return (-errno); + + s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; + + if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1) + return (-errno); + + if (s_sinfop == NULL) + return (rc); + + return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc); +} + +int +lx_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp) +{ + sigset_t s_set; + siginfo_t s_sinfo, *s_sinfop; + int rc; + + lx_osigset_t *setp = (lx_osigset_t *)set; + lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; + + if (ltos_osigset(setp, &s_set) != 0) + return (-errno); + + s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; + + if ((rc = sigtimedwait(&s_set, s_sinfop, + (struct timespec *)toutp)) == -1) + return (-errno); + + if (s_sinfop == NULL) + return (rc); + + return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc); +} + +int +lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp, + uintptr_t setsize) +{ + sigset_t s_set; + siginfo_t s_sinfo, *s_sinfop; + int rc; + + lx_sigset_t *setp = (lx_sigset_t *)set; + lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; + + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + if (ltos_sigset(setp, &s_set) != 0) + return (-errno); + + s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; + + if ((rc = sigtimedwait(&s_set, s_sinfop, + (struct timespec *)toutp)) == -1) + return (-errno); + + if (s_sinfop == NULL) + return (rc); + + return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc); +} + +/* + * Intercept the Linux sigreturn() syscall to turn it into the return through + * the libc call stack that Solaris expects. + * + * When control returns to libc's call_user_handler() routine, a setcontext(2) + * will be done that returns thread execution to the point originally + * interrupted by receipt of the signal. + */ +int +lx_sigreturn(void) +{ + struct lx_oldsigstack *lx_ossp; + lx_sigset_t lx_sigset; + lx_regs_t *rp; + ucontext_t *ucp; + uintptr_t sp; + + rp = lx_syscall_regs(); + + /* + * NOTE: The sp saved in the context is eight bytes off of where we + * need it to be. + */ + sp = (uintptr_t)rp->lxr_esp - 8; + + /* + * At this point, the stack pointer should point to the struct + * lx_oldsigstack that lx_build_old_signal_frame() constructed and + * placed on the stack. We need to reference it a bit later, so + * save a pointer to it before incrementing our copy of the sp. + */ + lx_ossp = (struct lx_oldsigstack *)sp; + sp += sizeof (struct lx_oldsigstack); + + /* + * lx_sigdeliver() pushes LX_SIGRT_MAGIC on the stack before it + * creates the struct lx_oldsigstack. + * + * If we don't find it here, the stack's been corrupted and we need to + * kill ourselves. + */ + if (*(uint32_t *)sp != LX_SIGRT_MAGIC) + lx_err_fatal(gettext( + "sp @ 0x%p, expected 0x%x, found 0x%x!"), + sp, LX_SIGRT_MAGIC, *(uint32_t *)sp); + + sp += sizeof (uint32_t); + + /* + * For signal mask handling to be done properly, this call needs to + * return to the libc routine that originally called the signal handler + * rather than directly set the context back to the place the signal + * interrupted execution as the original Linux code would do. + * + * Here *sp points to the Solaris ucontext_t, so we need to copy + * machine registers the Linux signal handler may have modified + * back to the Solaris version. + */ + ucp = (ucontext_t *)(*(uint32_t *)sp); + + /* general registers copy back as-is */ + bcopy(&lx_ossp->sigc, &ucp->uc_mcontext, sizeof (gregset_t)); + + /* copy back FP regs if present */ + if (lx_ossp->sigc.sc_fpstate != NULL) + ltos_fpstate(&lx_ossp->fpstate, &ucp->uc_mcontext.fpregs); + + /* convert Linux signal mask back to its Solaris equivalent */ + bzero(&lx_sigset, sizeof (lx_sigset_t)); + lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask; + lx_sigset.__bits[1] = lx_ossp->sig_extra; + (void) ltos_sigset(&lx_sigset, &ucp->uc_sigmask); + + /* + * At this point sp contains the value of the stack pointer when + * lx_call_user_handler() was called. + * + * Pop one more value off the stack and pass the new sp to + * lx_sigreturn_tolibc(), which will in turn manipulate the x86 + * registers to make it appear to libc's call_user_handler() as if the + * handler it had called returned. + */ + sp += sizeof (uint32_t); + lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp); + lx_sigreturn_tolibc(sp); + + /*NOTREACHED*/ + return (0); +} + +int +lx_rt_sigreturn(void) +{ + struct lx_sigstack *lx_ssp; + lx_regs_t *rp; + lx_ucontext_t *lx_ucp; + ucontext_t *ucp; + uintptr_t sp; + + rp = lx_syscall_regs(); + + /* + * NOTE: Because of some silly compatibility measures done in the + * signal trampoline code to make sure it uses the _exact same_ + * instruction sequence Linux does, we have to manually "pop" + * one extra four byte instruction off the stack here before + * passing the stack address to the syscall because the + * trampoline code isn't allowed to do it. + * + * No, I'm not kidding. + * + * The sp saved in the context is eight bytes off of where we + * need it to be, so the need to pop the extra four byte + * instruction means we need to subtract a net four bytes from + * the sp before "popping" the struct lx_sigstack off the stack. + * This will yield the value the stack pointer had before + * lx_sigdeliver() created the stack frame for the Linux signal + * handler. + */ + sp = (uintptr_t)rp->lxr_esp - 4; + + /* + * At this point, the stack pointer should point to the struct + * lx_sigstack that lx_build_signal_frame() constructed and + * placed on the stack. We need to reference it a bit later, so + * save a pointer to it before incrementing our copy of the sp. + */ + lx_ssp = (struct lx_sigstack *)sp; + sp += sizeof (struct lx_sigstack); + + /* + * lx_sigdeliver() pushes LX_SIGRT_MAGIC on the stack before it + * creates the struct lx_sigstack (and possibly struct lx_fpstate_t). + * + * If we don't find it here, the stack's been corrupted and we need to + * kill ourselves. + */ + if (*(uint32_t *)sp != LX_SIGRT_MAGIC) + lx_err_fatal(gettext("sp @ 0x%p, expected 0x%x, found 0x%x!"), + sp, LX_SIGRT_MAGIC, *(uint32_t *)sp); + + sp += sizeof (uint32_t); + + /* + * For signal mask handling to be done properly, this call needs to + * return to the libc routine that originally called the signal handler + * rather than directly set the context back to the place the signal + * interrupted execution as the original Linux code would do. + * + * Here *sp points to the Solaris ucontext_t, so we need to copy + * machine registers the Linux signal handler may have modified + * back to the Solaris version. + */ + ucp = (ucontext_t *)(*(uint32_t *)sp); + + /* general registers copy back as-is */ + lx_ucp = lx_ssp->ucp; + + if (lx_ucp != NULL) { + bcopy(&lx_ucp->uc_sigcontext, &ucp->uc_mcontext.gregs, + sizeof (gregset_t)); + + if (lx_ucp->uc_sigcontext.sc_fpstate != NULL) + ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate, + &ucp->uc_mcontext.fpregs); + + /* + * Convert the Linux signal mask and stack back to their + * Solaris equivalents. + */ + (void) ltos_sigset(&lx_ucp->uc_sigmask, &ucp->uc_sigmask); + ltos_stack(&lx_ucp->uc_stack, &ucp->uc_stack); + } + + /* + * At this point sp contains the value of the stack pointer when + * lx_call_user_handler() was called. + * + * Pop one more value off the stack and pass the new sp to + * lx_sigreturn_tolibc(), which will in turn manipulate the x86 + * registers to make it appear to libc's call_user_handler() as if the + * handler it had called returned. + */ + sp += sizeof (uint32_t); + lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp); + lx_sigreturn_tolibc(sp); + + /*NOTREACHED*/ + return (0); +} + +/* + * Build signal frame for processing for "old" (legacy) Linux signals + */ +static void +lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp) +{ + extern void lx_sigreturn_tramp(); + + lx_sigset_t lx_sigset; + ucontext_t *ucp = (ucontext_t *)p; + struct lx_sigaction *lxsap; + struct lx_oldsigstack *lx_ossp = sp; + + lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp); + + lx_ossp->sig = lx_sig; + lxsap = &lx_sighandlers.lx_sa[lx_sig]; + lx_debug("lxsap @ 0x%p", lxsap); + + if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) && + lxsap->lxsa_restorer) { + lx_ossp->retaddr = lxsap->lxsa_restorer; + lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr); + } else { + lx_ossp->retaddr = lx_sigreturn_tramp; + lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp); + } + + lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr); + + /* convert Solaris signal mask and stack to their Linux equivalents */ + (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset); + lx_ossp->sigc.sc_mask = lx_sigset.__bits[0]; + lx_ossp->sig_extra = lx_sigset.__bits[1]; + + /* general registers copy across as-is */ + bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t)); + + /* + * cr2 contains the faulting address, and Linux only sets cr2 for a + * a segmentation fault. + */ + lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ? + (uintptr_t)sip->si_addr : 0); + + /* convert FP regs if present */ + if (ucp->uc_flags & UC_FPU) { + stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate); + lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate; + } else + lx_ossp->sigc.sc_fpstate = NULL; + + /* + * Believe it or not, gdb wants to SEE the trampoline code on the + * bottom of the stack to determine whether the stack frame belongs to + * a signal handler, even though this code is no longer actually + * called. + * + * You can't make this stuff up. + */ + bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline, + sizeof (lx_ossp->trampoline)); +} + +/* + * Build signal frame for processing for modern Linux signals + */ +static void +lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp) +{ + extern void lx_rt_sigreturn_tramp(); + + lx_ucontext_t *lx_ucp; + ucontext_t *ucp = (ucontext_t *)p; + struct lx_sigstack *lx_ssp = sp; + struct lx_sigaction *lxsap; + + lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp); + + lx_ucp = &lx_ssp->uc; + lx_ssp->ucp = lx_ucp; + lx_ssp->sig = lx_sig; + + lxsap = &lx_sighandlers.lx_sa[lx_sig]; + lx_debug("lxsap @ 0x%p", lxsap); + + if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) && + lxsap->lxsa_restorer) { + lx_ssp->retaddr = lxsap->lxsa_restorer; + lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr); + } else { + lx_ssp->retaddr = lx_rt_sigreturn_tramp; + lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp); + } + + /* Linux has these fields but always clears them to 0 */ + lx_ucp->uc_flags = 0; + lx_ucp->uc_link = NULL; + + /* convert Solaris signal mask and stack to their Linux equivalents */ + (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask); + stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack); + + /* general registers copy across as-is */ + bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t)); + + /* + * cr2 contains the faulting address, which Linux only sets for a + * a segmentation fault. + */ + lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ? + (uintptr_t)sip->si_addr : 0; + + /* + * Point the lx_siginfo_t pointer to the signal stack's lx_siginfo_t + * if there was a Solaris siginfo_t to convert, otherwise set it to + * NULL. + */ + if ((sip) && (stol_siginfo(sip, &lx_ssp->si) == 0)) + lx_ssp->sip = &lx_ssp->si; + else + lx_ssp->sip = NULL; + + /* convert FP regs if present */ + if (ucp->uc_flags & UC_FPU) { + /* + * Copy FP regs to the appropriate place in the the lx_sigstack + * structure. + */ + stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate); + lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate; + } else + lx_ucp->uc_sigcontext.sc_fpstate = NULL; + + /* + * Believe it or not, gdb wants to SEE the trampoline code on the + * bottom of the stack to determine whether the stack frame belongs to + * a signal handler, even though this code is no longer actually + * called. + * + * You can't make this stuff up. + */ + bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline, + sizeof (lx_ssp->trampoline)); +} + +/* + * This is the second level interposition handler for Linux signals. + */ +static void +lx_call_user_handler(int sig, siginfo_t *sip, void *p) +{ + void (*user_handler)(); + void (*stk_builder)(); + + lx_tsd_t *lx_tsd; + struct lx_sigaction *lxsap; + ucontext_t *ucp = (ucontext_t *)p; + uintptr_t gs; + size_t stksize; + int err, lx_sig; + + /* + * If Solaris signal has no Linux equivalent, effectively + * ignore it. + */ + if ((lx_sig = stol_signo[sig]) == -1) { + lx_debug("caught solaris signal %d, no Linux equivalent", sig); + return; + } + + lx_debug("interpose caught solaris signal %d, translating to Linux " + "signal %d", sig, lx_sig); + + lxsap = &lx_sighandlers.lx_sa[lx_sig]; + lx_debug("lxsap @ 0x%p", lxsap); + + if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) { + /* Linux SIG_DFL for SIGPWR is to terminate */ + exit(LX_SIGPWR | 0x80); + } + + if ((lxsap->lxsa_handler == SIG_DFL) || + (lxsap->lxsa_handler == SIG_IGN)) + lx_err_fatal(gettext("%s set to %s? How?!?!?"), + "lxsa_handler", + ((lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN"), + lxsap->lxsa_handler); + + if ((err = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "lx_call_user_handler", strerror(err)); + + assert(lx_tsd != 0); + + gs = lx_tsd->lxtsd_gs & 0xffff; /* gs is only 16 bits */ + + /* + * Any zero %gs value should be caught when a save is attempted in + * lx_emulate(), but this extra check will catch any zero values due to + * bugs in the library. + */ + assert(gs != 0); + + if (lxsap->lxsa_flags & LX_SA_SIGINFO) { + stksize = sizeof (struct lx_sigstack); + stk_builder = lx_build_signal_frame; + } else { + stksize = sizeof (struct lx_oldsigstack); + stk_builder = lx_build_old_signal_frame; + } + + user_handler = lxsap->lxsa_handler; + + lx_debug("delivering %d (lx %d) to handler at 0x%p with gs 0x%x", sig, + lx_sig, lxsap->lxsa_handler, gs); + + if (lxsap->lxsa_flags & LX_SA_RESETHAND) + lxsap->lxsa_handler = SIG_DFL; + + /* + * lx_sigdeliver() doesn't return, so it relies on the Linux + * signal handlers to clean up the stack, reset the current + * signal mask and return to the code interrupted by the signal. + */ + lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler, gs); +} + +/* + * Common routine to modify sigaction characteristics of a thread. + * + * We shouldn't need any special locking code here as we actually use + * libc's sigaction() to do all the real work, so its thread locking should + * take care of any issues for us. + */ +static int +lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp, + struct lx_sigaction *olxsp) +{ + struct lx_sigaction *lxsap; + struct sigaction sa; + + if (lx_sig <= 0 || lx_sig >= LX_NSIG) + return (-EINVAL); + + lxsap = &lx_sighandlers.lx_sa[lx_sig]; + lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap); + + if ((olxsp != NULL) && + ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0)) + return (-errno); + + if (lxsp != NULL) { + int err, sig; + struct lx_sigaction lxsa; + sigset_t new_set, oset; + + if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0) + return (-errno); + + if ((sig = ltos_signo[lx_sig]) != -1) { + /* + * Block this signal while messing with its dispostion + */ + (void) sigemptyset(&new_set); + (void) sigaddset(&new_set, sig); + + if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) { + err = errno; + lx_debug("unable to block signal %d: %s", sig, + strerror(err)); + return (-err); + } + + /* + * We don't really need the old signal disposition at + * this point, but this weeds out signals that would + * cause sigaction() to return an error before we change + * anything other than the current signal mask. + */ + if (sigaction(sig, NULL, &sa) < 0) { + err = errno; + lx_debug("sigaction() to get old " + "disposition for signal %d failed: " + "%s", sig, strerror(err)); + (void) sigprocmask(SIG_SETMASK, &oset, NULL); + return (-err); + } + + if ((lxsa.lxsa_handler != SIG_DFL) && + (lxsa.lxsa_handler != SIG_IGN)) { + sa.sa_handler = lx_call_user_handler; + + /* + * The interposition signal handler needs the + * information provided via the SA_SIGINFO flag. + */ + sa.sa_flags = SA_SIGINFO; + + if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP) + sa.sa_flags |= SA_NOCLDSTOP; + if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT) + sa.sa_flags |= SA_NOCLDWAIT; + if (lxsa.lxsa_flags & LX_SA_ONSTACK) + sa.sa_flags |= SA_ONSTACK; + if (lxsa.lxsa_flags & LX_SA_RESTART) + sa.sa_flags |= SA_RESTART; + if (lxsa.lxsa_flags & LX_SA_NODEFER) + sa.sa_flags |= SA_NODEFER; + + /* + * Can't use RESETHAND with SIGPWR due to + * different default actions between Linux + * and Solaris. + */ + if ((sig != SIGPWR) && + (lxsa.lxsa_flags & LX_SA_RESETHAND)) + sa.sa_flags |= SA_RESETHAND; + + if (ltos_sigset(&lxsa.lxsa_mask, + &sa.sa_mask) != 0) { + err = errno; + (void) sigprocmask(SIG_SETMASK, &oset, + NULL); + return (-err); + } + + lx_debug("interposing handler @ 0x%p for " + "signal %d (lx %d), flags 0x%x", + lxsa.lxsa_handler, sig, lx_sig, + lxsa.lxsa_flags); + + if (sigaction(sig, &sa, NULL) < 0) { + err = errno; + lx_debug("sigaction() to set new " + "disposition for signal %d failed: " + "%s", sig, strerror(err)); + (void) sigprocmask(SIG_SETMASK, &oset, + NULL); + return (-err); + } + } else if ((sig != SIGPWR) || + ((sig == SIGPWR) && + (lxsa.lxsa_handler == SIG_IGN))) { + /* + * There's no need to interpose for SIG_DFL or + * SIG_IGN so just call libc's sigaction(), but + * don't allow SIG_DFL for SIGPWR due to + * differing default actions between Linux and + * Solaris. + * + * Get the previous disposition first so things + * like sa_mask and sa_flags are preserved over + * a transition to SIG_DFL or SIG_IGN, which is + * what Linux expects. + */ + + sa.sa_handler = lxsa.lxsa_handler; + + if (sigaction(sig, &sa, NULL) < 0) { + err = errno; + lx_debug("sigaction(%d, %s) failed: %s", + sig, ((sa.sa_handler == SIG_DFL) ? + "SIG_DFL" : "SIG_IGN"), + strerror(err)); + (void) sigprocmask(SIG_SETMASK, &oset, + NULL); + return (-err); + } + } + } else { + lx_debug("Linux signal with no kill support " + "specified: %d", lx_sig); + } + + /* + * Save the new disposition for the signal in the global + * lx_sighandlers structure. + */ + bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction)); + + /* + * Reset the signal mask to what we came in with if + * we were modifying a kill-supported signal. + */ + if (sig != -1) + (void) sigprocmask(SIG_SETMASK, &oset, NULL); + } + + return (0); +} + +int +lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp) +{ + int val; + struct lx_sigaction sa, osa; + struct lx_sigaction *sap, *osap; + struct lx_osigaction *osp; + + sap = (actp ? &sa : NULL); + osap = (oactp ? &osa : NULL); + + /* + * If we have a source pointer, convert source lxsa_mask from + * lx_osigset_t to lx_sigset_t format. + */ + if (sap) { + osp = (struct lx_osigaction *)actp; + sap->lxsa_handler = osp->lxsa_handler; + + bzero(&sap->lxsa_mask, sizeof (lx_sigset_t)); + + for (val = 1; val <= OSIGSET_NBITS; val++) + if (osp->lxsa_mask & OSIGSET_BITSET(val)) + (void) lx_sigaddset(&sap->lxsa_mask, val); + + sap->lxsa_flags = osp->lxsa_flags; + sap->lxsa_restorer = osp->lxsa_restorer; + } + + if (val = lx_sigaction_common(lx_sig, sap, osap)) + return (val); + + /* + * If we have a save pointer, convert the old lxsa_mask from + * lx_sigset_t to lx_osigset_t format. + */ + if (osap) { + osp = (struct lx_osigaction *)oactp; + + osp->lxsa_handler = osap->lxsa_handler; + + bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask)); + for (val = 1; val <= OSIGSET_NBITS; val++) + if (lx_sigismember(&osap->lxsa_mask, val)) + osp->lxsa_mask |= OSIGSET_BITSET(val); + + osp->lxsa_flags = osap->lxsa_flags; + osp->lxsa_restorer = osap->lxsa_restorer; + } + + return (0); +} + +int +lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp, + uintptr_t setsize) +{ + /* + * The "new" rt_sigaction call checks the setsize + * parameter. + */ + if ((size_t)setsize != sizeof (lx_sigset_t)) + return (-EINVAL); + + return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp, + (struct lx_sigaction *)oactp)); +} + +/* + * Convert signal syscall to a call to the lx_sigaction() syscall + */ +int +lx_signal(uintptr_t lx_sig, uintptr_t handler) +{ + struct sigaction act; + struct sigaction oact; + int rc; + + /* + * Use sigaction to mimic SYSV signal() behavior; glibc will + * actually call sigaction(2) itself, so we're really reaching + * back for signal(2) semantics here. + */ + bzero(&act, sizeof (act)); + act.sa_handler = (void (*)())handler; + act.sa_flags = SA_RESETHAND | SA_NODEFER; + + rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact); + return ((rc == 0) ? ((int)oact.sa_handler) : rc); +} + +int +lx_tgkill(uintptr_t tgid, uintptr_t pid, uintptr_t sig) +{ + if (((pid_t)tgid <= 0) || ((pid_t)pid <= 0)) + return (-EINVAL); + + if (tgid != pid) { + lx_unsupported(gettext( + "BrandZ tgkill(2) does not support gid != pid\n")); + return (-ENOTSUP); + } + + /* + * Pad the lx_tkill() call with NULLs to match the IN_KERNEL_SYSCALL + * prototype generated for it by IN_KERNEL_SYSCALL in lx_brand.c. + */ + return (lx_tkill(pid, sig, NULL, NULL, NULL, NULL)); +} + +/* + * This C routine to save the passed %gs value into the thread-specific save + * area is called by the assembly routine lx_sigacthandler. + */ +void +lx_sigsavegs(uintptr_t signalled_gs) +{ + lx_tsd_t *lx_tsd; + int err; + + signalled_gs &= 0xffff; /* gs is only 16 bits */ + + /* + * While a %gs of 0 is technically legal (as long as the application + * never dereferences memory using %gs), Solaris has its own ideas as + * to how a zero %gs should be handled in _update_sregs(), such that + * any 32-bit user process with a %gs of zero running on a system with + * a 64-bit kernel will have its %gs hidden base register stomped on on + * return from a system call, leaving an incorrect base address in + * place until the next time %gs is actually reloaded (forcing a reload + * of the base address from the appropriate descriptor table.) + * + * Of course the kernel will once again stomp on THAT base address when + * returning from a system call, resulting in an application + * segmentation fault. + * + * To avoid this situation, disallow a save of a zero %gs here in order + * to try and capture any Linux process that takes a signal with a zero + * %gs installed. + */ + assert(signalled_gs != 0); + + if (signalled_gs != LWPGS_SEL) { + if ((err = thr_getspecific(lx_tsd_key, + (void **)&lx_tsd)) != 0) + lx_err_fatal(gettext( + "%s: unable to read thread-specific data: %s"), + "sigsavegs", strerror(err)); + + assert(lx_tsd != 0); + + lx_tsd->lxtsd_gs = signalled_gs; + + lx_debug("lx_sigsavegs(): gsp 0x%p, saved gs: 0x%x\n", + lx_tsd, signalled_gs); + } +} + +int +lx_siginit(void) +{ + extern void set_setcontext_enforcement(int); + extern void lx_sigacthandler(int, siginfo_t *, void *); + + struct sigaction sa; + sigset_t new_set, oset; + int lx_sig, sig; + + /* + * Block all signals possible while setting up the signal imposition + * mechanism. + */ + (void) sigfillset(&new_set); + + if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) + lx_err_fatal(gettext("unable to block signals while setting up " + "imposition mechanism: %s"), strerror(errno)); + + /* + * Ignore any signals that have no Linux analog so that those + * signals cannot be sent to Linux processes from the global zone + */ + for (sig = 1; sig < NSIG; sig++) + if (stol_signo[sig] < 0) + (void) sigignore(sig); + + /* + * As mentioned previously, when a user signal handler is installed + * via sigaction(), libc interposes on the mechanism by actually + * installing an internal routine sigacthandler() as the signal + * handler. On receipt of the signal, libc does some thread-related + * processing via sigacthandler(), then calls the registered user + * signal handler on behalf of the user. + * + * We need to interpose on that mechanism to make sure the correct + * %gs segment register value is installed before the libc routine + * is called, otherwise the libc code will die with a segmentation + * fault. + * + * The private libc routine setsigacthandler() will set our + * interposition routine, lx_sigacthandler(), as the default + * "sigacthandler" routine for all new signal handlers for this + * thread. + */ + setsigacthandler(lx_sigacthandler, &libc_sigacthandler); + lx_debug("lx_sigacthandler installed, libc_sigacthandler = 0x%p", + libc_sigacthandler); + + /* + * Mark any signals that are ignored as ignored in our interposition + * handler array + */ + for (lx_sig = 1; lx_sig < LX_NSIG; lx_sig++) { + if (((sig = ltos_signo[lx_sig]) != -1) && + (sigaction(sig, NULL, &sa) < 0)) + lx_err_fatal(gettext("unable to determine previous " + "disposition for signal %d: %s"), + sig, strerror(errno)); + + if (sa.sa_handler == SIG_IGN) { + lx_debug("marking signal %d (lx %d) as SIG_IGN", + sig, lx_sig); + lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN; + } + } + + /* + * Have our interposition handler handle SIGPWR to start with, + * as it has a default action of terminating the process in Linux + * but its default is to be ignored in Solaris. + */ + (void) sigemptyset(&sa.sa_mask); + sa.sa_sigaction = lx_call_user_handler; + sa.sa_flags = SA_SIGINFO; + + if (sigaction(SIGPWR, &sa, NULL) < 0) + lx_err_fatal(gettext("%s failed: %s"), "sigaction(SIGPWR)", + strerror(errno)); + + /* + * Solaris' libc forces certain register values in the ucontext_t + * used to restore a post-signal user context to be those Solaris + * expects; however that is not what we want to happen if the signal + * was taken while branded code was executing, so we must disable + * that behavior. + */ + set_setcontext_enforcement(0); + + /* + * Reset the signal mask to what we came in with + */ + (void) sigprocmask(SIG_SETMASK, &oset, NULL); + + lx_debug("interposition handler setup for SIGPWR"); + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/socket.c b/usr/src/lib/brand/lx/lx_brand/common/socket.c new file mode 100644 index 0000000000..203c92c9f7 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/socket.c @@ -0,0 +1,1248 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <libintl.h> +#include <strings.h> +#include <alloca.h> + +#include <sys/param.h> +#include <sys/brand.h> +#include <sys/syscall.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/un.h> +#include <netinet/tcp.h> +#include <netinet/igmp.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/lx_debug.h> +#include <sys/lx_syscall.h> +#include <sys/lx_socket.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> + +static int lx_socket(ulong_t *); +static int lx_bind(ulong_t *); +static int lx_connect(ulong_t *); +static int lx_listen(ulong_t *); +static int lx_accept(ulong_t *); +static int lx_getsockname(ulong_t *); +static int lx_getpeername(ulong_t *); +static int lx_socketpair(ulong_t *); +static int lx_send(ulong_t *); +static int lx_recv(ulong_t *); +static int lx_sendto(ulong_t *); +static int lx_recvfrom(ulong_t *); +static int lx_shutdown(ulong_t *); +static int lx_setsockopt(ulong_t *); +static int lx_getsockopt(ulong_t *); +static int lx_sendmsg(ulong_t *); +static int lx_recvmsg(ulong_t *); + +typedef int (*sockfn_t)(ulong_t *); + +static struct { + sockfn_t s_fn; /* Function implementing the subcommand */ + int s_nargs; /* Number of arguments the function takes */ +} sockfns[] = { + lx_socket, 3, + lx_bind, 3, + lx_connect, 3, + lx_listen, 2, + lx_accept, 3, + lx_getsockname, 3, + lx_getpeername, 3, + lx_socketpair, 4, + lx_send, 4, + lx_recv, 4, + lx_sendto, 6, + lx_recvfrom, 6, + lx_shutdown, 2, + lx_setsockopt, 5, + lx_getsockopt, 5, + lx_sendmsg, 3, + lx_recvmsg, 3 +}; + +/* + * What follows are a series of tables we use to translate Linux constants + * into equivalent Solaris constants and back again. I wish this were + * cleaner, more programmatic, and generally nicer. Sadly, life is messy, + * and Unix networking even more so. + */ +static const int ltos_family[LX_AF_MAX + 1] = { + AF_UNSPEC, AF_UNIX, AF_INET, AF_CCITT, AF_IPX, + AF_APPLETALK, AF_NOTSUPPORTED, AF_OSI, AF_NOTSUPPORTED, + AF_X25, AF_INET6, AF_CCITT, AF_DECnet, + AF_802, AF_POLICY, AF_KEY, AF_ROUTE, + AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, + AF_NOTSUPPORTED, AF_SNA, AF_NOTSUPPORTED, AF_NOTSUPPORTED, + AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, + AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED +}; + +#define LTOS_FAMILY(d) ((d) <= LX_AF_MAX ? ltos_family[(d)] : AF_INVAL) + +static const int ltos_socktype[LX_SOCK_PACKET + 1] = { + SOCK_NOTSUPPORTED, SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, + SOCK_RDM, SOCK_SEQPACKET, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, + SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED +}; + +#define LTOS_SOCKTYPE(t) \ + ((t) <= LX_SOCK_PACKET ? ltos_socktype[(t)] : SOCK_INVAL) + +/* + * Linux socket option type definitions + * + * The protocol `levels` are well defined (see in.h) The option values are + * not so well defined. Linux often uses different values to Solaris + * although they mean the same thing. For example, IP_TOS in Linux is + * defined as value 1 but in Solaris it is defined as value 3. This table + * maps all the Protocol levels to their options and maps them between + * Linux and Solaris and vice versa. Hence the reason for the complexity. + */ + +typedef struct lx_proto_opts { + const int *proto; /* Linux to Solaris mapping table */ + int maxentries; /* max entries in this table */ +} lx_proto_opts_t; + +#define OPTNOTSUP -1 /* we don't support it */ + +static const int ltos_ip_sockopts[LX_IP_DROP_MEMBERSHIP + 1] = { + OPTNOTSUP, IP_TOS, IP_TTL, IP_HDRINCL, + IP_OPTIONS, OPTNOTSUP, IP_RECVOPTS, IP_RETOPTS, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + IP_RECVTTL, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + IP_MULTICAST_IF, IP_MULTICAST_TTL, IP_MULTICAST_LOOP, + IP_ADD_MEMBERSHIP, IP_DROP_MEMBERSHIP +}; + +static const int ltos_tcp_sockopts[LX_TCP_QUICKACK + 1] = { + OPTNOTSUP, TCP_NODELAY, TCP_MAXSEG, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + TCP_KEEPALIVE, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP +}; + +static const int ltos_igmp_sockopts[IGMP_MTRACE + 1] = { + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + IGMP_MINLEN, OPTNOTSUP, OPTNOTSUP, /* XXX: was IGMP_TIMER_SCALE */ + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, IGMP_MEMBERSHIP_QUERY, + IGMP_V1_MEMBERSHIP_REPORT, IGMP_DVMRP, + IGMP_PIM, OPTNOTSUP, IGMP_V2_MEMBERSHIP_REPORT, + IGMP_V2_LEAVE_GROUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + IGMP_MTRACE_RESP, IGMP_MTRACE +}; + +static const int ltos_socket_sockopts[LX_SO_ACCEPTCONN + 1] = { + OPTNOTSUP, SO_DEBUG, SO_REUSEADDR, SO_TYPE, + SO_ERROR, SO_DONTROUTE, SO_BROADCAST, SO_SNDBUF, + SO_RCVBUF, SO_KEEPALIVE, SO_OOBINLINE, OPTNOTSUP, + OPTNOTSUP, SO_LINGER, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, SO_RCVLOWAT, SO_SNDLOWAT, + SO_RCVTIMEO, SO_SNDTIMEO, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, + OPTNOTSUP, OPTNOTSUP, SO_ACCEPTCONN +}; + +#define PROTO_SOCKOPTS(opts) \ + { (opts), sizeof ((opts)) / sizeof ((opts)[0]) } + +/* + * The main Linux to Solaris protocol to options mapping table + * IPPROTO_TAB_SIZE can be set up to IPPROTO_MAX. All entries above + * IPPROTO_TAB_SIZE are in effect not implemented, + */ + +#define IPPROTO_TAB_SIZE 8 + +static const lx_proto_opts_t ltos_proto_opts[IPPROTO_TAB_SIZE] = { + /* IPPROTO_IP 0 */ + PROTO_SOCKOPTS(ltos_ip_sockopts), + /* SOL_SOCKET 1 */ + PROTO_SOCKOPTS(ltos_socket_sockopts), + /* IPPROTO_IGMP 2 */ + PROTO_SOCKOPTS(ltos_igmp_sockopts), + /* NOT IMPLEMENTED 3 */ + { NULL, 0 }, + /* NOT IMPLEMENTED 4 */ + { NULL, 0 }, + /* NOT IMPLEMENTED 5 */ + { NULL, 0 }, + /* IPPROTO_TCP 6 */ + PROTO_SOCKOPTS(ltos_tcp_sockopts), + /* NOT IMPLEMENTED 7 */ + { NULL, 0 } +}; + +/* + * Lifted from socket.h, since these definitions are contained within + * _KERNEL guards. + */ +#define _CMSG_HDR_ALIGNMENT 4 +#define _CMSG_HDR_ALIGN(x) (((uintptr_t)(x) + _CMSG_HDR_ALIGNMENT - 1) & \ + ~(_CMSG_HDR_ALIGNMENT - 1)) +#define CMSG_FIRSTHDR(m) \ + (((m)->msg_controllen < sizeof (struct cmsghdr)) ? \ + (struct cmsghdr *)0 : (struct cmsghdr *)((m)->msg_control)) + +#define CMSG_NXTHDR(m, c) \ + (((c) == 0) ? CMSG_FIRSTHDR(m) : \ + ((((uintptr_t)_CMSG_HDR_ALIGN((char *)(c) + \ + ((struct cmsghdr *)(c))->cmsg_len) + sizeof (struct cmsghdr)) > \ + (((uintptr_t)((struct lx_msghdr *)(m))->msg_control) + \ + ((uintptr_t)((struct lx_msghdr *)(m))->msg_controllen))) ? \ + ((struct cmsghdr *)0) : \ + ((struct cmsghdr *)_CMSG_HDR_ALIGN((char *)(c) + \ + ((struct cmsghdr *)(c))->cmsg_len)))) + +#define LX_TO_SOL 1 +#define SOL_TO_LX 2 + +static int +convert_cmsgs(int direction, struct lx_msghdr *msg, char *caller) +{ + struct cmsghdr *cmsg, *last; + int err = 0; + + cmsg = CMSG_FIRSTHDR(msg); + while (cmsg != NULL && err == 0) { + if (direction == LX_TO_SOL) { + if (cmsg->cmsg_level == LX_SOL_SOCKET) { + cmsg->cmsg_level = SOL_SOCKET; + if (cmsg->cmsg_type == LX_SCM_RIGHTS) + cmsg->cmsg_type = SCM_RIGHTS; + else if (cmsg->cmsg_type == LX_SCM_CRED) + cmsg->cmsg_type = SCM_UCRED; + else + err = ENOTSUP; + } else { + err = ENOTSUP; + } + } else { + if (cmsg->cmsg_level == SOL_SOCKET) { + cmsg->cmsg_level = LX_SOL_SOCKET; + if (cmsg->cmsg_type == SCM_RIGHTS) + cmsg->cmsg_type = LX_SCM_RIGHTS; + else if (cmsg->cmsg_type == SCM_UCRED) + cmsg->cmsg_type = LX_SCM_CRED; + else + err = ENOTSUP; + } else { + err = ENOTSUP; + } + } + + last = cmsg; + cmsg = CMSG_NXTHDR(msg, last); + } + if (err) + lx_unsupported("Unsupported socket control message in %s\n.", + caller); + + return (err); +} + +static int +convert_sockaddr(struct sockaddr *addr, socklen_t *len, + struct sockaddr *inaddr, socklen_t inlen) +{ + sa_family_t family; + int lx_in6_len; + int size; + + /* + * Note that if the buffer at inaddr is ever smaller than inlen bytes, + * we may erroneously return EFAULT rather than a possible EINVAL + * as the copy comes before the various checks as to whether inlen + * is of the proper length for the socket type. + * + * This isn't an issue at present because all callers to this routine + * do meet that constraint. + */ + if ((ssize_t)inlen < 0) + return (-EINVAL); + if (uucopy(inaddr, addr, inlen) != 0) + return (-errno); + + family = LTOS_FAMILY(addr->sa_family); + + switch (family) { + case (sa_family_t)AF_NOTSUPPORTED: + return (-EPROTONOSUPPORT); + case (sa_family_t)AF_INVAL: + return (-EAFNOSUPPORT); + case AF_INET: + size = sizeof (struct sockaddr); + + if (inlen < size) + return (-EINVAL); + + *len = size; + break; + + case AF_INET6: + /* + * The Solaris sockaddr_in6 has one more 32-bit + * field than the Linux version. + */ + size = sizeof (struct sockaddr_in6); + lx_in6_len = size - sizeof (uint32_t); + + if (inlen != lx_in6_len) + return (-EINVAL); + + *len = (sizeof (struct sockaddr_in6)); + bzero((char *)addr + lx_in6_len, sizeof (uint32_t)); + break; + + case AF_UNIX: + if (inlen > sizeof (struct sockaddr_un)) + return (-EINVAL); + + *len = inlen; + break; + + default: + *len = inlen; + } + + addr->sa_family = family; + return (0); +} + +static int +convert_sock_args(int in_dom, int in_type, int in_protocol, int *out_dom, + int *out_type) +{ + int domain, type; + + if (in_dom < 0 || in_type < 0 || in_protocol < 0) + return (-EINVAL); + + domain = LTOS_FAMILY(in_dom); + if (domain == AF_NOTSUPPORTED || domain == AF_UNSPEC) + return (-EAFNOSUPPORT); + if (domain == AF_INVAL) + return (-EINVAL); + + type = LTOS_SOCKTYPE(in_type); + if (type == SOCK_NOTSUPPORTED) + return (-ESOCKTNOSUPPORT); + if (type == SOCK_INVAL) + return (-EINVAL); + + /* + * Linux does not allow the app to specify IP Protocol for raw + * sockets. Solaris does, so bail out here. + */ + if (type == SOCK_RAW && in_protocol == IPPROTO_IP) + return (-ESOCKTNOSUPPORT); + + *out_dom = domain; + *out_type = type; + return (0); +} + +static int +convert_sockflags(int lx_flags) +{ + int solaris_flags = 0; + + if (lx_flags & LX_MSG_OOB) + solaris_flags |= MSG_OOB; + + if (lx_flags & LX_MSG_PEEK) + solaris_flags |= MSG_PEEK; + + if (lx_flags & LX_MSG_DONTROUTE) + solaris_flags |= MSG_DONTROUTE; + + if (lx_flags & LX_MSG_CTRUNC) + solaris_flags |= MSG_CTRUNC; + + if (lx_flags & LX_MSG_TRUNC) + solaris_flags |= MSG_TRUNC; + + if (lx_flags & LX_MSG_WAITALL) + solaris_flags |= MSG_WAITALL; + + if (lx_flags & LX_MSG_DONTWAIT) + solaris_flags |= MSG_DONTWAIT; + + if (lx_flags & LX_MSG_EOR) + solaris_flags |= MSG_EOR; + + if (lx_flags & LX_MSG_PROXY) + lx_unsupported("socket operation with MSG_PROXY flag set"); + + if (lx_flags & LX_MSG_FIN) + lx_unsupported("socket operation with MSG_FIN flag set"); + + if (lx_flags & LX_MSG_SYN) + lx_unsupported("socket operation with MSG_SYN flag set"); + + if (lx_flags & LX_MSG_CONFIRM) + lx_unsupported("socket operation with MSG_CONFIRM set"); + + if (lx_flags & LX_MSG_RST) + lx_unsupported("socket operation with MSG_RST flag set"); + + if (lx_flags & LX_MSG_MORE) + lx_unsupported("socket operation with MSG_MORE flag set"); + + return (solaris_flags); +} + +static int +lx_socket(ulong_t *args) +{ + int domain; + int type; + int protocol = (int)args[2]; + int fd; + int err; + + err = convert_sock_args((int)args[0], (int)args[1], protocol, + &domain, &type); + if (err != 0) + return (err); + + lx_debug("\tsocket(%d, %d, %d)", domain, type, protocol); + fd = socket(domain, type, protocol); + if (fd >= 0) + return (fd); + + if (errno == EPROTONOSUPPORT) + return (-ESOCKTNOSUPPORT); + + return (-errno); +} + +static int +lx_bind(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct stat64 statbuf; + struct sockaddr *name; + socklen_t len; + int r; + + if ((name = SAFE_ALLOCA((socklen_t)args[2])) == NULL) + return (-EINVAL); + if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1], + (socklen_t)args[2])) < 0) + return (r); + + lx_debug("\tbind(%d, 0x%p, %d)", sockfd, name, len); + + if (name->sa_family == AF_UNIX) + lx_debug("\t\tAF_UNIX, path = %s", name->sa_data); + + r = bind(sockfd, name, len); + + /* + * Linux returns EADDRINUSE for attempts to bind to UNIX domain + * sockets that aren't sockets. + */ + if ((r < 0) && (errno == EINVAL) && (name->sa_family == AF_UNIX) && + ((stat64(name->sa_data, &statbuf) == 0) && + (!S_ISSOCK(statbuf.st_mode)))) + return (-EADDRINUSE); + + return ((r < 0) ? -errno : r); +} + +static int +lx_connect(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct sockaddr *name; + socklen_t len; + int r; + + if ((name = SAFE_ALLOCA((socklen_t)args[2])) == NULL) + return (-EINVAL); + + if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1], + (socklen_t)args[2])) < 0) + return (r); + + lx_debug("\tconnect(%d, 0x%p, %d)", sockfd, name, len); + + if (name->sa_family == AF_UNIX) + lx_debug("\t\tAF_UNIX, path = %s", name->sa_data); + + r = connect(sockfd, name, len); + + return ((r < 0) ? -errno : r); +} + +static int +lx_listen(ulong_t *args) +{ + int sockfd = (int)args[0]; + int backlog = (int)args[1]; + int r; + + lx_debug("\tlisten(%d, %d)", sockfd, backlog); + r = listen(sockfd, backlog); + + return ((r < 0) ? -errno : r); +} + +static int +lx_accept(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct sockaddr *name = (struct sockaddr *)args[1]; + socklen_t namelen = 0; + int r; + + lx_debug("\taccept(%d, 0x%p, 0x%p", sockfd, args[1], args[2]); + + /* + * The Linux man page says that -1 is returned and errno is set to + * EFAULT if the "name" address is bad, but it is silent on what to + * set errno to if the "namelen" address is bad. Experimentation + * shows that Linux (at least the 2.4.21 kernel in CentOS) actually + * sets errno to EINVAL in both cases. + * + * Note that we must first check the name pointer, as the Linux + * docs state nothing is copied out if the "name" pointer is NULL. + * If it is NULL, we don't care about the namelen pointer's value + * or about dereferencing it. + * + * Happily, Solaris' accept(3SOCKET) treats NULL name pointers and + * zero namelens the same way. + */ + if ((name != NULL) && + (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0)) + return ((errno == EFAULT) ? -EINVAL : -errno); + + lx_debug("\taccept namelen = %d", namelen); + + if ((r = accept(sockfd, name, &namelen)) < 0) + return ((errno == EFAULT) ? -EINVAL : -errno); + + lx_debug("\taccept namelen returned %d bytes", namelen); + + /* + * In Linux, accept()ed sockets do not inherit anything set by + * fcntl(), so filter those out. + */ + if (fcntl(r, F_SETFL, 0) < 0) + return (-errno); + + /* + * Once again, a bad "namelen" address sets errno to EINVAL, not + * EFAULT. If namelen was zero, there's no need to copy a zero back + * out. + * + * Logic might dictate that we should check if we can write to + * the namelen pointer earlier so we don't accept a pending connection + * only to fail the call because we can't write the namelen value back + * out. However, testing shows Linux does indeed fail the call after + * accepting the connection so we must behave in a compatible manner. + */ + if ((name != NULL) && (namelen != 0) && + (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0)) + return ((errno == EFAULT) ? -EINVAL : -errno); + + return (r); +} + +static int +lx_getsockname(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct sockaddr *name = NULL; + socklen_t namelen, namelen_orig; + + if (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0) + return (-errno); + namelen_orig = namelen; + + lx_debug("\tgetsockname(%d, 0x%p, 0x%p (=%d))", + sockfd, args[1], args[2], namelen); + + if (namelen > 0) { + if ((name = SAFE_ALLOCA(namelen)) == NULL) + return (-EINVAL); + bzero(name, namelen); + } + + if ((getsockname(sockfd, name, &namelen)) < 0) + return (-errno); + + /* + * If the name that getsockname() want's to return is larger + * than namelen, getsockname() will copy out the maximum amount + * of data possible and then update namelen to indicate the + * actually size of all the data that it wanted to copy out. + */ + if (uucopy(name, (void *)args[1], namelen_orig) != 0) + return (-errno); + if (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0) + return (-errno); + + return (0); +} + +static int +lx_getpeername(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct sockaddr *name; + socklen_t namelen; + + if (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0) + return (-errno); + + lx_debug("\tgetpeername(%d, 0x%p, 0x%p (=%d))", + sockfd, args[1], args[2], namelen); + + /* + * Linux returns EFAULT in this case, even if the namelen parameter + * is 0. This check will not catch other illegal addresses, but + * the benefit catching a non-null illegal address here is not + * worth the cost of another system call. + */ + if ((void *)args[1] == NULL) + return (-EFAULT); + + if ((name = SAFE_ALLOCA(namelen)) == NULL) + return (-EINVAL); + if ((getpeername(sockfd, name, &namelen)) < 0) + return (-errno); + + if (uucopy(name, (void *)args[1], namelen) != 0) + return (-errno); + + if (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0) + return (-errno); + + return (0); +} + +static int +lx_socketpair(ulong_t *args) +{ + int domain; + int type; + int protocol = (int)args[2]; + int *sv = (int *)args[3]; + int fds[2]; + int r; + + r = convert_sock_args((int)args[0], (int)args[1], protocol, + &domain, &type); + if (r != 0) + return (r); + + lx_debug("\tsocketpair(%d, %d, %d, 0x%p)", domain, type, protocol, sv); + + r = socketpair(domain, type, protocol, fds); + + if (r == 0) { + if (uucopy(fds, sv, sizeof (fds)) != 0) { + r = errno; + (void) close(fds[0]); + (void) close(fds[1]); + return (-r); + } + return (0); + } + + if (errno == EPROTONOSUPPORT) + return (-ESOCKTNOSUPPORT); + + return (-errno); +} + +static ssize_t +lx_send(ulong_t *args) +{ + int sockfd = (int)args[0]; + void *buf = (void *)args[1]; + size_t len = (size_t)args[2]; + int flags = (int)args[3]; + ssize_t r; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\tsend(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags); + + flags = convert_sockflags(flags); + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "send()"); + } + + r = send(sockfd, buf, len, flags); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "send()"); + + return ((r < 0) ? -errno : r); +} + +static ssize_t +lx_recv(ulong_t *args) +{ + int sockfd = (int)args[0]; + void *buf = (void *)args[1]; + size_t len = (size_t)args[2]; + int flags = (int)args[3]; + ssize_t r; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\trecv(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags); + + flags = convert_sockflags(flags); + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "recv()"); + } + + r = recv(sockfd, buf, len, flags); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "recv()"); + + return ((r < 0) ? -errno : r); +} + +static ssize_t +lx_sendto(ulong_t *args) +{ + int sockfd = (int)args[0]; + void *buf = (void *)args[1]; + size_t len = (size_t)args[2]; + int flags = (int)args[3]; + struct sockaddr *to; + socklen_t tolen; + ssize_t r; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + if ((to = SAFE_ALLOCA((socklen_t)args[5])) == NULL) + return (-EINVAL); + + if ((r = convert_sockaddr(to, &tolen, (struct sockaddr *)args[4], + (socklen_t)args[5])) < 0) + return (r); + + lx_debug("\tsendto(%d, 0x%p, 0x%d, 0x%x, 0x%x, %d)", sockfd, buf, len, + flags, to, tolen); + + flags = convert_sockflags(flags); + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "sendto()"); + } + + r = sendto(sockfd, buf, len, flags, to, tolen); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "sendto()"); + + if (r < 0) { + /* + * according to the man page and LTP, the expected error in + * this case is EPIPE. + */ + if (errno == ENOTCONN) + return (-EPIPE); + else + return (-errno); + } + return (r); +} + +static ssize_t +lx_recvfrom(ulong_t *args) +{ + int sockfd = (int)args[0]; + void *buf = (void *)args[1]; + size_t len = (size_t)args[2]; + int flags = (int)args[3]; + struct sockaddr *from = (struct sockaddr *)args[4]; + socklen_t *from_lenp = (socklen_t *)args[5]; + ssize_t r; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\trecvfrom(%d, 0x%p, 0x%d, 0x%x, 0x%x, 0x%p)", sockfd, buf, + len, flags, from, from_lenp); + + flags = convert_sockflags(flags); + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "recvfrom()"); + } + + r = recvfrom(sockfd, buf, len, flags, from, from_lenp); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "recvfrom()"); + + return ((r < 0) ? -errno : r); +} + +static int +lx_shutdown(ulong_t *args) +{ + int sockfd = (int)args[0]; + int how = (int)args[1]; + int r; + + lx_debug("\tshutdown(%d, %d)", sockfd, how); + r = shutdown(sockfd, how); + + return ((r < 0) ? -errno : r); +} + +static int +lx_setsockopt(ulong_t *args) +{ + int sockfd = (int)args[0]; + int level = (int)args[1]; + int optname = (int)args[2]; + void *optval = (void *)args[3]; + int optlen = (int)args[4]; + int internal_opt; + int r; + + lx_debug("\tsetsockopt(%d, %d, %d, 0x%p, %d)", sockfd, level, optname, + optval, optlen); + + /* + * The kernel returns EFAULT for all invalid addresses except NULL, + * for which it returns EINVAL. Linux wants EFAULT for NULL too. + */ + if (optval == NULL) + return (-EFAULT); + + /* + * Do a table lookup of the Solaris equivalent of the given option + */ + if (level < IPPROTO_IP || level >= IPPROTO_TAB_SIZE) + return (-ENOPROTOOPT); + + if (ltos_proto_opts[level].maxentries == 0 || + optname <= 0 || optname >= (ltos_proto_opts[level].maxentries)) + return (-ENOPROTOOPT); + + if (optname == LX_TCP_CORK) { + /* + * TCP_CORK is a Linux-only option that instructs the TCP + * stack not to send out partial frames. Solaris doesn't + * include this option but some apps require it. So, we do + * our best to emulate the option by disabling TCP_NODELAY. + * If the app requests that we disable TCP_CORK, we just + * ignore it since enabling TCP_NODELAY may be + * overcompensating. + */ + optname = TCP_NODELAY; + if (optlen != sizeof (int)) + return (-EINVAL); + if (uucopy(optval, &internal_opt, sizeof (int)) != 0) + return (-errno); + if (internal_opt == 0) + return (0); + internal_opt = 1; + optval = &internal_opt; + } else { + optname = ltos_proto_opts[level].proto[optname]; + + if (optname == OPTNOTSUP) + return (-ENOPROTOOPT); + } + + if (level == LX_SOL_SOCKET) + level = SOL_SOCKET; + + r = setsockopt(sockfd, level, optname, optval, optlen); + + return ((r < 0) ? -errno : r); +} + +static int +lx_getsockopt(ulong_t *args) +{ + int sockfd = (int)args[0]; + int level = (int)args[1]; + int optname = (int)args[2]; + void *optval = (void *)args[3]; + int *optlen = (int *)args[4]; + int r; + + lx_debug("\tgetsockopt(%d, %d, %d, 0x%p, 0x%p)", sockfd, level, optname, + optval, optlen); + + /* + * According to the Linux man page, a NULL optval should indicate + * (as in Solaris) that no return value is expected. Instead, it + * actually triggers an EFAULT error. + */ + if (optval == NULL) + return (-EFAULT); + + /* + * Do a table lookup of the Solaris equivalent of the given option + */ + if (level < IPPROTO_IP || level >= IPPROTO_TAB_SIZE) + return (-EOPNOTSUPP); + + if (ltos_proto_opts[level].maxentries == 0 || + optname <= 0 || optname >= (ltos_proto_opts[level].maxentries)) + return (-ENOPROTOOPT); + + if (optname == LX_TCP_CORK) { + /* + * We don't support TCP_CORK but some apps rely on it. So, + * rather than return an error we just return 0. This + * isn't exactly a lie, since this option really isn't set, + * but it's not the whole truth either. Fortunately, we + * aren't under oath. + */ + r = 0; + if (uucopy(&r, optval, sizeof (int)) != 0) + return (-errno); + r = sizeof (int); + if (uucopy(&r, optlen, sizeof (int)) != 0) + return (-errno); + return (0); + } + + optname = ltos_proto_opts[level].proto[optname]; + + if (optname == OPTNOTSUP) + return (-ENOPROTOOPT); + + if (level == LX_SOL_SOCKET) + level = SOL_SOCKET; + + r = getsockopt(sockfd, level, optname, optval, optlen); + + return ((r < 0) ? -errno : r); +} + +/* + * libc routines that issue these system calls. We bypass the libsocket + * wrappers since they explicitly turn off the MSG_XPG_2 flag we need for + * Linux compatibility. + */ +extern int _so_sendmsg(); +extern int _so_recvmsg(); + +static int +lx_sendmsg(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct lx_msghdr msg; + struct cmsghdr *cmsg; + int flags = (int)args[2]; + int r; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\tsendmsg(%d, 0x%p, 0x%x)", sockfd, (void *)args[1], flags); + + flags = convert_sockflags(flags); + + if ((uucopy((void *)args[1], &msg, sizeof (msg))) != 0) + return (-errno); + + /* + * If there are control messages bundled in this message, we need + * to convert them from Linux to Solaris. + */ + if (msg.msg_control != NULL) { + if (msg.msg_controllen == 0) { + cmsg = NULL; + } else { + cmsg = SAFE_ALLOCA(msg.msg_controllen); + if (cmsg == NULL) + return (-EINVAL); + } + if ((uucopy(msg.msg_control, cmsg, msg.msg_controllen)) != 0) + return (-errno); + msg.msg_control = cmsg; + if ((r = convert_cmsgs(LX_TO_SOL, &msg, "sendmsg()")) != 0) + return (-r); + } + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "sendmsg()"); + } + + r = _so_sendmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "sendmsg()"); + + if (r < 0) { + /* + * according to the man page and LTP, the expected error in + * this case is EPIPE. + */ + if (errno == ENOTCONN) + return (-EPIPE); + else + return (-errno); + } + + return (r); +} + +static int +lx_recvmsg(ulong_t *args) +{ + int sockfd = (int)args[0]; + struct lx_msghdr msg; + struct lx_msghdr *msgp = (struct lx_msghdr *)args[1]; + struct cmsghdr *cmsg; + int flags = (int)args[2]; + int r, err; + + int nosigpipe = flags & LX_MSG_NOSIGNAL; + struct sigaction newact, oact; + + lx_debug("\trecvmsg(%d, 0x%p, 0x%x)", sockfd, (void *)args[1], flags); + + flags = convert_sockflags(flags); + + if ((uucopy(msgp, &msg, sizeof (msg))) != 0) + return (-errno); + + /* + * If we are expecting to have to convert any control messages, + * then we should receive them into our address space instead of + * the app's. + */ + if (msg.msg_control != NULL) { + cmsg = msg.msg_control; + if (msg.msg_controllen == 0) { + msg.msg_control = NULL; + } else { + msg.msg_control = SAFE_ALLOCA(msg.msg_controllen); + if (msg.msg_control == NULL) + return (-EINVAL); + } + } + + /* + * If nosigpipe is set, we want to emulate the Linux action of + * not sending a SIGPIPE to the caller if the remote socket has + * already been closed. + * + * As SIGPIPE is a directed signal sent only to the thread that + * performed the action, we can emulate this behavior by momentarily + * resetting the action for SIGPIPE to SIG_IGN, performing the socket + * call, and resetting the action back to its previous value. + */ + if (nosigpipe) { + newact.sa_handler = SIG_IGN; + newact.sa_flags = 0; + (void) sigemptyset(&newact.sa_mask); + + if (sigaction(SIGPIPE, &newact, &oact) < 0) + lx_err_fatal(gettext( + "%s: could not ignore SIGPIPE to emulate " + "LX_MSG_NOSIGNAL"), "recvmsg()"); + } + + r = _so_recvmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2); + + if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) + lx_err_fatal( + gettext("%s: could not reset SIGPIPE handler to " + "emulate LX_MSG_NOSIGNAL"), "recvmsg()"); + + if (r >= 0 && msg.msg_control != NULL) { + /* + * If there are control messages bundled in this message, + * we need to convert them from Linux to Solaris. + */ + if ((err = convert_cmsgs(SOL_TO_LX, &msg, "recvmsg()")) != 0) + return (-err); + + if ((uucopy(msg.msg_control, cmsg, msg.msg_controllen)) != 0) + return (-errno); + } + + /* + * A handful of the values in the msghdr are set by the recvmsg() + * call, so copy their values back to the caller. Rather than iterate, + * just copy the whole structure back. + */ + if (uucopy(&msg, msgp, sizeof (msg)) != 0) + return (-errno); + + return ((r < 0) ? -errno : r); +} + +int +lx_socketcall(uintptr_t p1, uintptr_t p2) +{ + int subcmd = (int)p1 - 1; /* subcommands start at 1 - not 0 */ + ulong_t args[6]; + int r; + + if (subcmd < 0 || subcmd >= LX_RECVMSG) + return (-EINVAL); + + /* + * Copy the arguments to the subcommand in from the app's address + * space, returning EFAULT if we get a bogus pointer. + */ + if (uucopy((void *)p2, args, + sockfns[subcmd].s_nargs * sizeof (ulong_t))) + return (-errno); + + r = (sockfns[subcmd].s_fn)(args); + + return (r); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/stat.c b/usr/src/lib/brand/lx/lx_brand/common/stat.c new file mode 100644 index 0000000000..d89a5b3c35 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/stat.c @@ -0,0 +1,531 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * when a stat() is done for a non-device file, the devt returned + * via the stat is the devt of the device backing the filesystem which + * contains the file the stat was performed on. these devts are currently + * untranslated. if this turns out to cause problems in the future then + * we might want to add more devt translators to convert sd and cmdk + * devts into linux devts that normally represent disks. + * + * XXX this may not be the best place to have the devt translation code. + * devt translation will also be needed for /proc fs support, which will + * probably be done in the kernel. we may need to move this code into + * the kernel and add a brand syscall to do the translation for us. this + * will need to be worked out before putback. + */ + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <strings.h> +#include <unistd.h> +#include <libintl.h> +#include <sys/fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/lx_types.h> +#include <sys/lx_stat.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> +#include <sys/lx_ptm.h> +#include <sys/lx_audio.h> +#include <sys/modctl.h> + +/* define _KERNEL to get the devt manipulation macros */ +#define _KERNEL +#include <sys/sysmacros.h> +#undef _KERNEL + + +#define LX_PTS_MAJOR_MIN 136 +#define LX_PTS_MAJOR_MAX 143 +#define LX_PTS_MAX \ + ((LX_PTS_MAJOR_MAX - LX_PTS_MAJOR_MIN + 1) * LX_MINORMASK) + +#define LX_PTM_MAJOR 5 +#define LX_PTM_MINOR 2 + +/* values for dt_type */ +#define DTT_INVALID 0 +#define DTT_LIST 1 +#define DTT_CUSTOM 2 + +/* convience macros for access the dt_minor union */ +#define dt_list dt_minor.dtm_list +#define dt_custom dt_minor.dtm_custom + +/* + * structure used to define devt translators + */ +typedef struct minor_translator { + char *mt_path; /* solaris minor node path */ + minor_t mt_minor; /* solaris minor node number */ + int mt_lx_major; /* linux major node number */ + int mt_lx_minor; /* linux minor node number */ +} minor_translator_t; + +typedef struct devt_translator { + char *dt_driver; /* solaris driver name */ + major_t dt_major; /* solaris driver number */ + + /* dt_type dictates how we intrepret dt_minor */ + int dt_type; + union { + uintptr_t dtm_foo; /* required to compile */ + minor_translator_t *dtm_list; + int (*dtm_custom)(dev_t, lx_dev_t *, int); + } dt_minor; +} devt_translator_t; + + +/* + * forward declerations + */ +static devt_translator_t devt_translators[]; + +/* + * called to initialize the devt translation subsystem + */ +int +lx_stat_init() +{ + minor_translator_t *mt; + struct stat st; + major_t major; + char *driver; + int i, j, ret; + + for (i = 0; devt_translators[i].dt_driver != NULL; i++) { + + assert(devt_translators[i].dt_type != DTT_INVALID); + + /* figure out the major numbers for our devt translators */ + driver = devt_translators[i].dt_driver; + ret = modctl(MODGETMAJBIND, + driver, strlen(driver) + 1, &major); + if (ret != 0) { + lx_err(gettext("%s%s) failed: %s\n"), + "lx_stat_init(): modctl(MODGETMAJBIND, ", + driver, strerror(errno)); + lx_err(gettext("%s: %s translator disabled for: %s\n"), + "lx_stat_init()", "devt", driver); + devt_translators[i].dt_major = (major_t)-1; + continue; + } + + /* save the major node value */ + devt_translators[i].dt_major = major; + + /* if this translator doesn't use a list mapping we're done. */ + if (devt_translators[i].dt_type != DTT_LIST) + continue; + + /* for each device listed, lookup the minor node number */ + mt = devt_translators[i].dt_list; + for (j = 0; mt[j].mt_path != NULL; j++) { + + /* stat the device */ + ret = stat(mt[j].mt_path, &st); + if (ret != 0) { + lx_err(gettext("%s%s) failed: %s\n"), + "lx_stat_init(): stat(", + mt[j].mt_path, strerror(errno)); + lx_err(gettext( + "%s: %s translator disabled for: %s\n"), + "lx_stat_init()", "devt", + mt[j].mt_path); + st.st_rdev = NODEV; + } else { + /* make sure the major node matches */ + assert(getmajor(st.st_rdev) == major); + assert(mt[j].mt_minor < LX_MINORMASK); + } + + /* save the minor node value */ + mt[j].mt_minor = getminor(st.st_rdev); + } + } + return (0); +} + +static int +/*ARGSUSED*/ +pts_devt_translator(dev_t dev, lx_dev_t *jdev, int fd) +{ + minor_t min = getminor(dev); + int lx_maj; + int lx_min; + + /* + * linux has a really small minor number name space (8 bits). + * so if pts devices are limited to one major number you could + * only have 256 of them. linux addresses this issue by using + * multiple major numbers for pts devices. + */ + if (min >= LX_PTS_MAX) + return (EOVERFLOW); + + lx_maj = LX_PTS_MAJOR_MIN + (min / LX_MINORMASK); + lx_min = min % LX_MINORMASK; + + *jdev = LX_MAKEDEVICE(lx_maj, lx_min); + return (0); +} + + +static int +/*ARGSUSED*/ +ptm_devt_translator(dev_t dev, lx_dev_t *jdev, int fd) +{ + *jdev = LX_MAKEDEVICE(LX_PTM_MAJOR, LX_PTM_MINOR); + return (0); +} + +static int +audio_devt_translator(dev_t dev, lx_dev_t *jdev, int fd) +{ + int s_minor, l_minor; + + if (fd == -1) { + s_minor = getminor(dev); + } else { + /* + * this is a cloning device so we have to ask the driver + * what kind of minor node this is + */ + if (ioctl(fd, LXA_IOC_GETMINORNUM, &s_minor) < 0) + return (-EINVAL); + } + + switch (s_minor) { + case LXA_MINORNUM_DSP: + l_minor = 3; + break; + case LXA_MINORNUM_MIXER: + l_minor = 0; + break; + default: + return (-EINVAL); + } + + *jdev = LX_MAKEDEVICE(14, l_minor); + return (0); +} + +static void +s2l_dev_report(dev_t dev, lx_dev_t jdev) +{ + major_t maj; + minor_t min; + int lx_maj, lx_min; + + if (lx_debug_enabled == 0) + return; + + maj = getmajor(dev); + min = getminor(dev); + + lx_maj = LX_GETMAJOR(jdev); + lx_min = LX_GETMINOR(jdev); + + lx_debug("\ttranslated devt [%d, %d] -> [%d, %d]", + maj, min, lx_maj, lx_min); +} + +static int +s2l_devt(dev_t dev, lx_dev_t *jdev, int fd) +{ + minor_translator_t *mt; + int i, j, err; + major_t maj = getmajor(dev); + minor_t min = getminor(dev); + + /* look for a devt translator for this major number */ + for (i = 0; devt_translators[i].dt_driver != NULL; i++) { + if (devt_translators[i].dt_major == maj) + break; + } + if (devt_translators[i].dt_driver != NULL) { + + /* try to translate the solaris devt to a linux devt */ + switch (devt_translators[i].dt_type) { + case DTT_LIST: + mt = devt_translators[i].dt_list; + for (j = 0; mt[j].mt_path != NULL; j++) { + if (mt[j].mt_minor == min) { + assert(mt[j].mt_minor < LX_MINORMASK); + + /* found a translation */ + *jdev = LX_MAKEDEVICE( + mt[j].mt_lx_major, + mt[j].mt_lx_minor); + s2l_dev_report(dev, *jdev); + return (0); + } + } + break; + + case DTT_CUSTOM: + err = devt_translators[i].dt_custom(dev, jdev, fd); + if (err == 0) + s2l_dev_report(dev, *jdev); + return (err); + break; + } + } + + /* we don't have a translator for this device */ + *jdev = LX_MAKEDEVICE(maj, min); + return (0); +} + +static int +stat_convert(uintptr_t lx_statp, struct stat *s, int fd) +{ + struct lx_stat buf; + lx_dev_t st_dev, st_rdev; + int err; + + if ((err = s2l_devt(s->st_dev, &st_dev, fd)) != 0) + return (err); + if ((err = s2l_devt(s->st_rdev, &st_rdev, fd)) != 0) + return (err); + + if ((st_dev > USHRT_MAX) || (st_rdev > USHRT_MAX) || + (s->st_nlink > USHRT_MAX) || (s->st_size > ULONG_MAX)) + return (-EOVERFLOW); + + /* Linux seems to report a 0 st_size for all block devices */ + if ((s->st_mode & S_IFMT) == S_IFBLK) + s->st_size = 0; + + bzero(&buf, sizeof (buf)); + buf.st_dev = st_dev; + buf.st_rdev = st_rdev; + buf.st_ino = s->st_ino; + buf.st_mode = s->st_mode; + buf.st_nlink = s->st_nlink; + buf.st_uid = LX_UID32_TO_UID16(s->st_uid); + buf.st_gid = LX_GID32_TO_GID16(s->st_gid); + buf.st_size = s->st_size; + buf.st_blksize = s->st_blksize; + buf.st_blocks = s->st_blocks; + buf.st_atime.ts_sec = s->st_atim.tv_sec; + buf.st_atime.ts_nsec = s->st_atim.tv_nsec; + buf.st_ctime.ts_sec = s->st_ctim.tv_sec; + buf.st_ctime.ts_nsec = s->st_ctim.tv_nsec; + buf.st_mtime.ts_sec = s->st_mtim.tv_sec; + buf.st_mtime.ts_nsec = s->st_mtim.tv_nsec; + + if (uucopy(&buf, (void *)lx_statp, sizeof (buf)) != 0) + return (-errno); + + return (0); +} + +static int +stat64_convert(uintptr_t lx_statp, struct stat64 *s, int fd) +{ + struct lx_stat64 buf; + lx_dev_t st_dev, st_rdev; + int err; + + if ((err = s2l_devt(s->st_dev, &st_dev, fd)) != 0) + return (err); + if ((err = s2l_devt(s->st_rdev, &st_rdev, fd)) != 0) + return (err); + + /* Linux seems to report a 0 st_size for all block devices */ + if ((s->st_mode & S_IFMT) == S_IFBLK) + s->st_size = 0; + + bzero(&buf, sizeof (buf)); + buf.st_dev = st_dev; + buf.st_rdev = st_rdev; + buf.st_small_ino = (lx_ino_t)(s->st_ino & UINT_MAX); + buf.st_ino = (lx_ino64_t)s->st_ino; + buf.st_mode = s->st_mode; + buf.st_nlink = s->st_nlink; + buf.st_uid = s->st_uid; + buf.st_gid = s->st_gid; + buf.st_size = s->st_size; + buf.st_blksize = s->st_blksize; + buf.st_blocks = s->st_blocks; + buf.st_atime.ts_sec = s->st_atim.tv_sec; + buf.st_atime.ts_nsec = s->st_atim.tv_nsec; + buf.st_ctime.ts_sec = s->st_ctim.tv_sec; + buf.st_ctime.ts_nsec = s->st_ctim.tv_nsec; + buf.st_mtime.ts_sec = s->st_mtim.tv_sec; + buf.st_mtime.ts_nsec = s->st_mtim.tv_nsec; + + if (uucopy(&buf, (void *)lx_statp, sizeof (buf)) != 0) + return (-errno); + + return (0); +} + +int +lx_stat(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + struct stat sbuf; + + lx_debug("\tstat(%s, ...)", path); + if (stat(path, &sbuf)) + return (-errno); + + return (stat_convert(p2, &sbuf, -1)); +} + + +int +lx_fstat(uintptr_t p1, uintptr_t p2) +{ + int fd = (int)p1; + struct stat sbuf; + char *path, path_buf[MAXPATHLEN]; + + if (lx_debug_enabled != 0) { + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + if (path == NULL) + path = "?"; + + lx_debug("\tfstat(%d - %s, ...)", fd, path); + } + if (fstat(fd, &sbuf)) + return (-errno); + + return (stat_convert(p2, &sbuf, fd)); +} + + +int +lx_lstat(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + struct stat sbuf; + + lx_debug("\tlstat(%s, ...)", path); + if (lstat(path, &sbuf)) + return (-errno); + + return (stat_convert(p2, &sbuf, -1)); +} + +int +lx_stat64(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + struct stat64 sbuf; + + lx_debug("\tstat64(%s, ...)", path); + if (stat64(path, &sbuf)) + return (-errno); + + return (stat64_convert(p2, &sbuf, -1)); +} + + +int +lx_fstat64(uintptr_t p1, uintptr_t p2) +{ + int fd = (int)p1; + struct stat64 sbuf; + char *path, path_buf[MAXPATHLEN]; + + if (lx_debug_enabled != 0) { + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + if (path == NULL) + path = "?"; + + lx_debug("\tfstat64(%d - %s, ...)", fd, path); + } + if (fstat64(fd, &sbuf)) + return (-errno); + + return (stat64_convert(p2, &sbuf, fd)); +} + + +int +lx_lstat64(uintptr_t p1, uintptr_t p2) +{ + char *path = (char *)p1; + struct stat64 sbuf; + + lx_debug("\tlstat64(%s, ...)", path); + if (lstat64(path, &sbuf)) + return (-errno); + + return (stat64_convert(p2, &sbuf, -1)); +} + +/* + * devt translator definitions + */ +#define MINOR_TRANSLATOR(path, lx_major, lx_minor) \ + { path, 0, lx_major, lx_minor } + +#define MINOR_TRANSLATOR_END \ + { NULL, 0, 0, 0 } + +#define DEVT_TRANSLATOR(drv, flags, i) \ + { drv, 0, flags, (uintptr_t)i } + +/* + * translators for devts + */ +static minor_translator_t mtranslator_mm[] = { + MINOR_TRANSLATOR("/dev/null", 1, 3), + MINOR_TRANSLATOR("/dev/zero", 1, 5), + MINOR_TRANSLATOR_END +}; +static minor_translator_t mtranslator_random[] = { + MINOR_TRANSLATOR("/dev/random", 1, 8), + MINOR_TRANSLATOR("/dev/urandom", 1, 9), + MINOR_TRANSLATOR_END +}; +static minor_translator_t mtranslator_sy[] = { + MINOR_TRANSLATOR("/dev/tty", 5, 0), + MINOR_TRANSLATOR_END +}; +static minor_translator_t mtranslator_zcons[] = { + MINOR_TRANSLATOR("/dev/console", 5, 1), + MINOR_TRANSLATOR_END +}; +static devt_translator_t devt_translators[] = { + DEVT_TRANSLATOR("mm", DTT_LIST, &mtranslator_mm), + DEVT_TRANSLATOR("random", DTT_LIST, &mtranslator_random), + DEVT_TRANSLATOR("sy", DTT_LIST, &mtranslator_sy), + DEVT_TRANSLATOR("zcons", DTT_LIST, &mtranslator_zcons), + DEVT_TRANSLATOR(LX_AUDIO_DRV, DTT_CUSTOM, audio_devt_translator), + DEVT_TRANSLATOR(LX_PTM_DRV, DTT_CUSTOM, ptm_devt_translator), + DEVT_TRANSLATOR("pts", DTT_CUSTOM, pts_devt_translator), + DEVT_TRANSLATOR(NULL, 0, 0) +}; diff --git a/usr/src/lib/brand/lx/lx_brand/common/statfs.c b/usr/src/lib/brand/lx/lx_brand/common/statfs.c new file mode 100644 index 0000000000..03e2563d70 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/statfs.c @@ -0,0 +1,309 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <errno.h> +#include <libintl.h> +#include <string.h> +#include <strings.h> +#include <sys/types.h> +#include <sys/statvfs.h> +#include <sys/param.h> + +#include <sys/lx_debug.h> +#include <sys/lx_misc.h> +#include <sys/lx_statfs.h> + +/* + * these defines must exist before we include regexp.h, see regexp(5) + */ +#define RE_SIZE 1024 +#define INIT char *sp = instring; +#define GETC() (*sp++) +#define PEEKC() (*sp) +#define UNGETC(c) (--sp) +#define RETURN(c) return (NULL); +#define ERROR(c) return ((char *)c); + +/* + * for regular expressions we're using regexp(5). + * + * we'd really prefer to use some other nicer regular expressions + * interfaces (like regcmp(3c), regcomp(3c), or re_comp(3c)) but we + * can't because all these other interfaces rely on the ability + * to allocate memory via libc malloc()/calloc() calls, which + * we can't really do here. + * + * we could optionally use regexpr(3gen) but we don't since the + * interfaces there are incredibly similar to the regexp(5) + * interfaces we're already using and we'd have the added + * requirement of linking against libgen. + * + * another option that was considered is fnmatch(3c) but the + * limited pattern expansion capability of this interface would + * force us to include more patterns to check against. + */ +#include <regexp.h> + +static struct lx_ftype_path { + char *lfp_path; + char lfp_re[RE_SIZE]; + int lfp_magic; + char *lfp_magic_str; +} ftype_path_list[] = { + { "^/dev/pts$", "", + LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" }, + { "^/dev/pts/$", "", + LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" }, + { "^/dev/pts/[0-9][0-9]*$", "", + LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" }, + { NULL, "", + 0, NULL } +}; + +/* + * For lack of linux equivalents, we present lofs and zfs as being ufs. + */ +static struct lx_ftype_name { + const char *lfn_name; + int lfn_magic; + char *lfn_magic_str; +} ftype_name_list[] = { + { "hsfs", LX_ISOFS_SUPER_MAGIC, "LX_ISOFS_SUPER_MAGIC" }, + { "nfs", LX_NFS_SUPER_MAGIC, "LX_NFS_SUPER_MAGIC" }, + { "pcfs", LX_MSDOS_SUPER_MAGIC, "LX_MSDOS_SUPER_MAGIC" }, + { "lx_proc", LX_PROC_SUPER_MAGIC, "LX_PROC_SUPER_MAGIC" }, + { "ufs", LX_UFS_MAGIC, "LX_UFS_MAGIC" }, + { "lofs", LX_UFS_MAGIC, "LX_UFS_MAGIC" }, + { "zfs", LX_UFS_MAGIC, "LX_UFS_MAGIC" }, + { NULL, 0, NULL } +}; + +int +lx_statfs_init() +{ + int i; + char *rv; + + for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) { + rv = compile( + ftype_path_list[i].lfp_path, + ftype_path_list[i].lfp_re, + ftype_path_list[i].lfp_re + RE_SIZE, '\0'); + if (rv == NULL) + continue; + + lx_debug("lx_statfs_init compile(\"%s\") failed", + ftype_path_list[i].lfp_path); + return (1); + } + return (0); +} + +static int +stol_type(const char *path, const char *name) +{ + int i; + lx_debug("\tstol_type(\"%s\", \"%s\")\n", path == NULL ? "NULL" : path, + name == NULL ? "NULL" : name); + + if (path != NULL) { + char userpath[MAXPATHLEN]; + + if (uucopystr(path, userpath, MAXPATHLEN) == -1) + return (-errno); + + for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) { + if (step(userpath, ftype_path_list[i].lfp_re) == 0) + continue; + + /* got a match on the fs path */ + lx_debug("\ttranslated f_type to 0x%x - %s", + ftype_path_list[i].lfp_magic, + ftype_path_list[i].lfp_magic_str); + return (ftype_path_list[i].lfp_magic); + } + } + + assert(name != NULL); + for (i = 0; ftype_name_list[i].lfn_name != NULL; i++) { + if (strcmp(name, ftype_name_list[i].lfn_name) == 0) { + + /* got a match on the fs name */ + lx_debug("\ttranslated f_type to 0x%x - %s", + ftype_name_list[i].lfn_magic, + ftype_name_list[i].lfn_magic_str); + return (ftype_name_list[i].lfn_magic); + } + } + + /* we don't know what the fs type is so just set it to 0 */ + return (0); +} + +/* + * The Linux statfs() is similar to the Solaris statvfs() call, the main + * difference being the use of a numeric 'f_type' identifier instead of the + * 'f_basetype' string. + */ +static int +stol_statfs(const char *path, struct lx_statfs *l, struct statvfs *s) +{ + int type; + + if ((type = stol_type(path, s->f_basetype)) < 0) + return (type); + + l->f_type = type; + l->f_bsize = s->f_bsize; + l->f_blocks = s->f_blocks; + l->f_bfree = s->f_bfree; + l->f_bavail = s->f_bavail; + l->f_files = s->f_files; + l->f_ffree = s->f_ffree; + l->f_fsid = s->f_fsid; + l->f_namelen = s->f_namemax; + l->f_frsize = s->f_frsize; + bzero(&(l->f_spare), sizeof (l->f_spare)); + + return (0); +} + +static int +stol_statfs64(const char *path, struct lx_statfs64 *l, struct statvfs64 *s) +{ + int type; + + if ((type = stol_type(path, s->f_basetype)) < 0) + return (type); + + l->f_type = type; + l->f_bsize = s->f_bsize; + l->f_blocks = s->f_blocks; + l->f_bfree = s->f_bfree; + l->f_bavail = s->f_bavail; + l->f_files = s->f_files; + l->f_ffree = s->f_ffree; + l->f_fsid = s->f_fsid; + l->f_namelen = s->f_namemax; + l->f_frsize = s->f_frsize; + bzero(&(l->f_spare), sizeof (l->f_spare)); + + return (0); +} + +int +lx_statfs(uintptr_t p1, uintptr_t p2) +{ + const char *path = (const char *)p1; + struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2; + struct statvfs vfs; + int err; + + lx_debug("\tfstatvfs(%s, 0x%p)", path, fs); + if (statvfs(path, &vfs) != 0) + return (-errno); + + if ((err = stol_statfs(path, &lxfs, &vfs)) != 0) + return (err); + + if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0) + return (-errno); + + return (0); +} + +int +lx_fstatfs(uintptr_t p1, uintptr_t p2) +{ + struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2; + struct statvfs vfs; + char *path, path_buf[MAXPATHLEN]; + int fd = (int)p1; + int err; + + lx_debug("\tfstatvfs(%d, 0x%p)", fd, fs); + if (fstatvfs(fd, &vfs) != 0) + return (-errno); + + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + + if ((err = stol_statfs(path, &lxfs, &vfs)) != 0) + return (err); + + if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0) + return (-errno); + + return (0); +} + +/* ARGSUSED */ +int +lx_statfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + const char *path = (const char *)p1; + struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3; + struct statvfs64 vfs; + int err; + + lx_debug("\tstatvfs64(%s, %d, 0x%p)", path, p2, fs); + if (statvfs64(path, &vfs) != 0) + return (-errno); + + if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0) + return (err); + + if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0) + return (-errno); + + return (0); +} + +/* ARGSUSED */ +int +lx_fstatfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3; + struct statvfs64 vfs; + char *path, path_buf[MAXPATHLEN]; + int fd = (int)p1; + int err; + + lx_debug("\tfstatvfs64(%d, %d, 0x%p)", fd, p2, fs); + if (fstatvfs64(fd, &vfs) != 0) + return (-errno); + + path = lx_fd_to_path(fd, path_buf, sizeof (path_buf)); + + if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0) + return (err); + + if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0) + return (-errno); + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysctl.c b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c new file mode 100644 index 0000000000..03fcce4ef0 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <alloca.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <sys/lx_syscall.h> +#include <sys/lx_misc.h> +#include <sys/lx_debug.h> + +/* + * sysctl() implementation. The full set of possible values is incredibly + * large; we only implement the bare minimum here, namely basic kernel + * information. + * + * For the moment, we also print out debugging messages if the application + * attempts to write or access any other values, so we can tell if we are not + * supporting something we should be. + */ + +struct lx_sysctl_args { + int *name; + int nlen; + void *oldval; + size_t *oldlenp; + void *newval; + size_t newlen; +}; + +#define LX_CTL_KERN 1 + +#define LX_KERN_OSTYPE 1 +#define LX_KERN_OSRELEASE 2 +#define LX_KERN_OSREV 3 +#define LX_KERN_VERSION 4 + +int +lx_sysctl(uintptr_t raw) +{ + struct lx_sysctl_args args; + int name[2]; + size_t oldlen; + char *namebuf; + + if (uucopy((void *)raw, &args, sizeof (args)) < 0) + return (-EFAULT); + + /* + * We only allow [ CTL_KERN, KERN_* ] pairs, so reject anything that + * doesn't have exactly two values starting with LX_CTL_KERN. + */ + if (args.nlen != 2) + return (-ENOTDIR); + + if (uucopy(args.name, name, sizeof (name)) < 0) + return (-EFAULT); + + if (name[0] != LX_CTL_KERN) { + lx_debug("sysctl: read of [%d, %d] unsupported", + name[0], name[1]); + return (-ENOTDIR); + } + + /* We don't support writing new sysctl values. */ + if ((args.newval != NULL) || (args.newlen != 0)) { + lx_debug("sysctl: write of [%d, %d] unsupported", + name[0], name[1]); + return (-EPERM); + } + + /* + * It may seem silly, but passing in a NULL oldval pointer and not + * writing any new values is a perfectly legal thing to do and should + * succeed. + */ + if (args.oldval == NULL) + return (0); + + /* + * Likewise, Linux specifies that setting a non-NULL oldval but a + * zero *oldlenp should result in an errno of EFAULT. + */ + if ((uucopy(args.oldlenp, &oldlen, sizeof (oldlen)) < 0) || + (oldlen == 0)) + return (-EFAULT); + + namebuf = SAFE_ALLOCA(oldlen); + if (namebuf == NULL) + return (-ENOMEM); + + switch (name[1]) { + case LX_KERN_OSTYPE: + (void) strlcpy(namebuf, LX_UNAME_SYSNAME, oldlen); + break; + case LX_KERN_OSRELEASE: + (void) strlcpy(namebuf, LX_UNAME_RELEASE, oldlen); + break; + case LX_KERN_VERSION: + (void) strlcpy(namebuf, LX_UNAME_VERSION, oldlen); + break; + default: + lx_debug("sysctl: read of [CTL_KERN, %d] unsupported", name[1]); + return (-ENOTDIR); + } + + oldlen = strlen(namebuf); + + if ((uucopy(namebuf, args.oldval, oldlen) < 0) || + (uucopy(&oldlen, args.oldlenp, sizeof (oldlen)) < 0)) + return (-EFAULT); + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c new file mode 100644 index 0000000000..9a3d76e6ca --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c @@ -0,0 +1,893 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <strings.h> +#include <rctl.h> +#include <alloca.h> +#include <values.h> +#include <sys/syscall.h> +#include <sys/msg.h> +#include <sys/ipc.h> +#include <sys/sem.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/lx_debug.h> +#include <sys/lx_types.h> +#include <sys/lx_sysv_ipc.h> +#include <sys/lx_misc.h> +#include <sys/lx_syscall.h> + +#define SLOT_SEM 0 +#define SLOT_SHM 1 +#define SLOT_MSG 2 + +static int +get_rctlval(rctlblk_t *rblk, char *name) +{ + rctl_qty_t r; + + if (getrctl(name, NULL, rblk, RCTL_FIRST) == -1) + return (-errno); + + r = rctlblk_get_value(rblk); + if (r > MAXINT) + return (-EOVERFLOW); + return (r); +} + +/* + * Given a slot number and a maximum number of ids to extract from the + * kernel, return the msgid in the provided slot. + */ +static int +slot_to_id(int type, int slot) +{ + uint_t nids, max; + int *idbuf = NULL; + int r; + + nids = 0; + for (;;) { + switch (type) { + case SLOT_SEM: + r = semids(idbuf, nids, &max); + break; + case SLOT_SHM: + r = shmids(idbuf, nids, &max); + break; + case SLOT_MSG: + r = msgids(idbuf, nids, &max); + break; + } + + if (r < 0) + return (-errno); + + if (max == 0) + return (-EINVAL); + + if (max <= nids) + return (idbuf[slot]); + + nids = max; + if ((idbuf = (int *)SAFE_ALLOCA(sizeof (int) * nids)) == NULL) + return (-ENOMEM); + } +} + +/* + * Semaphore operations. + */ +static int +lx_semget(key_t key, int nsems, int semflg) +{ + int sol_flag; + int r; + + lx_debug("\nsemget(%d, %d, %d)\n", key, nsems, semflg); + sol_flag = semflg & S_IAMB; + if (semflg & LX_IPC_CREAT) + sol_flag |= IPC_CREAT; + if (semflg & LX_IPC_EXCL) + sol_flag |= IPC_EXCL; + + r = semget(key, nsems, sol_flag); + return ((r < 0) ? -errno : r); +} + +static int +lx_semop(int semid, struct sembuf *sops, size_t nsops) +{ + int r; + + lx_debug("\nsemop(%d, 0x%p, %u)\n", semid, sops, nsops); + if (nsops == 0) + return (-EINVAL); + + r = semop(semid, sops, nsops); + return ((r < 0) ? -errno : r); +} + +static int +lx_semctl_ipcset(int semid, void *buf) +{ + struct lx_semid_ds semds; + struct semid_ds sol_semds; + int r; + + if (uucopy(buf, &semds, sizeof (semds))) + return (-errno); + + bzero(&sol_semds, sizeof (sol_semds)); + sol_semds.sem_perm.uid = semds.sem_perm.uid; + sol_semds.sem_perm.gid = semds.sem_perm.gid; + sol_semds.sem_perm.mode = semds.sem_perm.mode; + + r = semctl(semid, 0, IPC_SET, &sol_semds); + return ((r < 0) ? -errno : r); +} + +static int +lx_semctl_ipcstat(int semid, void *buf) +{ + struct lx_semid_ds semds; + struct semid_ds sol_semds; + + if (semctl(semid, 0, IPC_STAT, &sol_semds) != 0) + return (-errno); + + bzero(&semds, sizeof (semds)); + semds.sem_perm.key = sol_semds.sem_perm.key; + semds.sem_perm.seq = sol_semds.sem_perm.seq; + semds.sem_perm.uid = sol_semds.sem_perm.uid; + semds.sem_perm.gid = sol_semds.sem_perm.gid; + semds.sem_perm.cuid = sol_semds.sem_perm.cuid; + semds.sem_perm.cgid = sol_semds.sem_perm.cgid; + + /* Linux only uses the bottom 9 bits */ + semds.sem_perm.mode = sol_semds.sem_perm.mode & S_IAMB; + semds.sem_otime = sol_semds.sem_otime; + semds.sem_ctime = sol_semds.sem_ctime; + semds.sem_nsems = sol_semds.sem_nsems; + + if (uucopy(&semds, buf, sizeof (semds))) + return (-errno); + + return (0); +} + +static int +lx_semctl_ipcinfo(void *buf) +{ + struct lx_seminfo i; + rctlblk_t *rblk; + int rblksz; + uint_t nids; + int idbuf; + + rblksz = rctlblk_size(); + if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL) + return (-ENOMEM); + + bzero(&i, sizeof (i)); + if ((i.semmni = get_rctlval(rblk, "project.max-sem-ids")) < 0) + return (i.semmni); + if ((i.semmsl = get_rctlval(rblk, "process.max-sem-nsems")) < 0) + return (i.semmsl); + if ((i.semopm = get_rctlval(rblk, "process.max-sem-ops")) < 0) + return (i.semopm); + + /* + * We don't have corresponding rctls for these fields. The values + * are taken from the formulas used to derive the defaults listed + * in the Linux header file. We're lying, but trying to be + * coherent about it. + */ + i.semmap = i.semmni; + i.semmns = i.semmni * i.semmsl; + i.semmnu = INT_MAX; + i.semume = INT_MAX; + i.semvmx = LX_SEMVMX; + if (semids(&idbuf, 0, &nids) < 0) + return (-errno); + i.semusz = nids; + i.semaem = INT_MAX; + + if (uucopy(&i, buf, sizeof (i)) != 0) + return (-errno); + + return (nids); +} + +static int +lx_semctl_semstat(int slot, void *buf) +{ + int r, semid; + + semid = slot_to_id(SLOT_SEM, slot); + if (semid < 0) + return (semid); + + r = lx_semctl_ipcstat(semid, buf); + return (r < 0 ? r : semid); +} + +/* + * For the SETALL operation, we have to examine each of the semaphore + * values to be sure it is legal. + */ +static int +lx_semctl_setall(int semid, union lx_semun *arg) +{ + struct semid_ds semds; + ushort_t *vals; + int i, sz, r; + + /* + * Find out how many semaphores are involved, reserve enough + * memory for an internal copy of the array, and then copy it in + * from the process. + */ + if (semctl(semid, 0, IPC_STAT, &semds) != 0) + return (-errno); + sz = semds.sem_nsems * sizeof (ushort_t); + if ((vals = SAFE_ALLOCA(sz)) == NULL) + return (-ENOMEM); + if (uucopy(arg->sems, vals, sz)) + return (-errno); + + /* Validate each of the values. */ + for (i = 0; i < semds.sem_nsems; i++) + if (vals[i] > LX_SEMVMX) + return (-ERANGE); + + r = semctl(semid, 0, SETALL, arg->sems); + + return ((r < 0) ? -errno : r); +} + +static int +lx_semctl(int semid, int semnum, int cmd, void *ptr) +{ + union lx_semun arg; + int rval; + int opt = cmd & ~LX_IPC_64; + int use_errno = 0; + + lx_debug("\nsemctl(%d, %d, %d, 0x%p)\n", semid, semnum, cmd, ptr); + + /* + * The final arg to semctl() is a pointer to a union. For some + * commands we can hand that pointer directly to the kernel. For + * these commands, we need to extract an argument from the union + * before calling into the kernel. + */ + if (opt == LX_SETVAL || opt == LX_SETALL || opt == LX_GETALL || + opt == LX_IPC_SET || opt == LX_IPC_STAT || opt == LX_SEM_STAT || + opt == LX_IPC_INFO || opt == LX_SEM_INFO) + if (uucopy(ptr, &arg, sizeof (arg))) + return (-errno); + + switch (opt) { + case LX_GETVAL: + use_errno = 1; + rval = semctl(semid, semnum, GETVAL, NULL); + break; + case LX_SETVAL: + if (arg.val > LX_SEMVMX) { + rval = -ERANGE; + break; + } + use_errno = 1; + rval = semctl(semid, semnum, SETVAL, arg.val); + break; + case LX_GETPID: + use_errno = 1; + rval = semctl(semid, semnum, GETPID, NULL); + break; + case LX_GETNCNT: + use_errno = 1; + rval = semctl(semid, semnum, GETNCNT, NULL); + break; + case LX_GETZCNT: + use_errno = 1; + rval = semctl(semid, semnum, GETZCNT, NULL); + break; + case LX_GETALL: + use_errno = 1; + rval = semctl(semid, semnum, GETALL, arg.sems); + break; + case LX_SETALL: + rval = lx_semctl_setall(semid, &arg); + break; + case LX_IPC_RMID: + use_errno = 1; + rval = semctl(semid, semnum, IPC_RMID, NULL); + break; + case LX_SEM_STAT: + rval = lx_semctl_semstat(semid, arg.semds); + break; + case LX_IPC_STAT: + rval = lx_semctl_ipcstat(semid, arg.semds); + break; + + case LX_IPC_SET: + rval = lx_semctl_ipcset(semid, arg.semds); + break; + + case LX_IPC_INFO: + case LX_SEM_INFO: + rval = lx_semctl_ipcinfo(arg.semds); + break; + + default: + rval = -EINVAL; + } + + if (use_errno == 1 && rval < 0) + return (-errno); + return (rval); +} + +/* + * msg operations. + */ +static int +lx_msgget(key_t key, int flag) +{ + int sol_flag; + int r; + + lx_debug("\tlx_msgget(%d, %d)\n", key, flag); + + sol_flag = flag & S_IAMB; + if (flag & LX_IPC_CREAT) + sol_flag |= IPC_CREAT; + if (flag & LX_IPC_EXCL) + sol_flag |= IPC_EXCL; + + r = msgget(key, sol_flag); + return (r < 0 ? -errno : r); +} + +static int +lx_msgsnd(int id, struct msgbuf *buf, size_t sz, int flag) +{ + int sol_flag = 0; + int r; + + lx_debug("\tlx_msgsnd(%d, 0x%p, %d, %d)\n", id, buf, sz, flag); + + if (flag & LX_IPC_NOWAIT) + sol_flag |= IPC_NOWAIT; + + if (((ssize_t)sz < 0) || (sz > LX_MSGMAX)) + return (-EINVAL); + + r = msgsnd(id, buf, sz, sol_flag); + return (r < 0 ? -errno : r); +} + +static int +lx_msgrcv(int id, struct msgbuf *buf, size_t sz, int flag) +{ + int sol_flag = 0; + struct { + void *msgp; + long msgtype; + } args; + int r; + + /* + * Rather than passing 5 args into ipc(2) directly, glibc passes 4 + * args and uses the buf argument to point to a structure + * containing two args: a pointer to the message and the message + * type. + */ + if (uucopy(buf, &args, sizeof (args))) + return (-errno); + + lx_debug("\tlx_msgrcv(%d, 0x%p, %d, %d, %ld, %d)\n", + id, args.msgp, sz, args.msgtype, flag); + + /* + * Check for a negative sz parameter. + * + * Unlike msgsnd(2), the Linux man page does not specify that + * msgrcv(2) should return EINVAL if (sz > MSGMAX), only if (sz < 0). + */ + if ((ssize_t)sz < 0) + return (-EINVAL); + + if (flag & LX_MSG_NOERROR) + sol_flag |= MSG_NOERROR; + if (flag & LX_IPC_NOWAIT) + sol_flag |= IPC_NOWAIT; + + r = msgrcv(id, args.msgp, sz, args.msgtype, sol_flag); + return (r < 0 ? -errno : r); +} + +static int +lx_msgctl_ipcstat(int msgid, void *buf) +{ + struct lx_msqid_ds msgids; + struct msqid_ds sol_msgids; + int r; + + r = msgctl(msgid, IPC_STAT, &sol_msgids); + if (r < 0) + return (-errno); + + bzero(&msgids, sizeof (msgids)); + msgids.msg_perm.key = sol_msgids.msg_perm.key; + msgids.msg_perm.seq = sol_msgids.msg_perm.seq; + msgids.msg_perm.uid = sol_msgids.msg_perm.uid; + msgids.msg_perm.gid = sol_msgids.msg_perm.gid; + msgids.msg_perm.cuid = sol_msgids.msg_perm.cuid; + msgids.msg_perm.cgid = sol_msgids.msg_perm.cgid; + + /* Linux only uses the bottom 9 bits */ + msgids.msg_perm.mode = sol_msgids.msg_perm.mode & S_IAMB; + + msgids.msg_stime = sol_msgids.msg_stime; + msgids.msg_rtime = sol_msgids.msg_rtime; + msgids.msg_ctime = sol_msgids.msg_ctime; + msgids.msg_qbytes = sol_msgids.msg_qbytes; + msgids.msg_cbytes = sol_msgids.msg_cbytes; + msgids.msg_qnum = sol_msgids.msg_qnum; + msgids.msg_lspid = sol_msgids.msg_lspid; + msgids.msg_lrpid = sol_msgids.msg_lrpid; + + if (uucopy(&msgids, buf, sizeof (msgids))) + return (-errno); + + return (0); +} + +static int +lx_msgctl_ipcinfo(int cmd, void *buf) +{ + struct lx_msginfo m; + rctlblk_t *rblk; + int idbuf, rblksz, msgseg, maxmsgs; + uint_t nids; + int rval; + + rblksz = rctlblk_size(); + if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL) + return (-ENOMEM); + + bzero(&m, sizeof (m)); + if ((m.msgmni = get_rctlval(rblk, "project.max-msg-ids")) < 0) + return (m.msgmni); + if ((m.msgmnb = get_rctlval(rblk, "process.max-msg-qbytes")) < 0) + return (m.msgmnb); + + if (cmd == LX_IPC_INFO) { + if ((maxmsgs = get_rctlval(rblk, + "process.max-msg-messages")) < 0) + return (maxmsgs); + m.msgtql = maxmsgs * m.msgmni; + m.msgmap = m.msgmnb; + m.msgpool = m.msgmax * m.msgmnb; + rval = 0; + } else { + if (msgids(&idbuf, 0, &nids) < 0) + return (-errno); + m.msgpool = nids; + + /* + * For these fields, we can't even come up with a good fake + * approximation. These are listed as 'obsolete' or + * 'unused' in the header files, so hopefully nobody is + * relying on them anyway. + */ + m.msgtql = INT_MAX; + m.msgmap = INT_MAX; + rval = nids; + } + + /* + * We don't have corresponding rctls for these fields. The values + * are taken from the formulas used to derive the defaults listed + * in the Linux header file. We're lying, but trying to be + * coherent about it. + */ + m.msgmax = m.msgmnb; + m.msgssz = 16; + msgseg = (m.msgpool * 1024) / m.msgssz; + m.msgseg = (msgseg > 0xffff) ? 0xffff : msgseg; + + if (uucopy(&m, buf, sizeof (m))) + return (-errno); + return (rval); +} + +static int +lx_msgctl_ipcset(int msgid, void *buf) +{ + struct lx_msqid_ds msgids; + struct msqid_ds sol_msgids; + int r; + + if (uucopy(buf, &msgids, sizeof (msgids))) + return (-errno); + + bzero(&sol_msgids, sizeof (sol_msgids)); + sol_msgids.msg_perm.uid = LX_UID16_TO_UID32(msgids.msg_perm.uid); + sol_msgids.msg_perm.gid = LX_UID16_TO_UID32(msgids.msg_perm.gid); + + /* Linux only uses the bottom 9 bits */ + sol_msgids.msg_perm.mode = msgids.msg_perm.mode & S_IAMB; + sol_msgids.msg_qbytes = msgids.msg_qbytes; + + r = msgctl(msgid, IPC_SET, &sol_msgids); + return (r < 0 ? -errno : r); +} + +static int +lx_msgctl_msgstat(int slot, void *buf) +{ + int r, msgid; + + lx_debug("msgstat(%d, 0x%p)\n", slot, buf); + + msgid = slot_to_id(SLOT_MSG, slot); + + if (msgid < 0) + return (msgid); + + r = lx_msgctl_ipcstat(msgid, buf); + return (r < 0 ? r : msgid); +} + +/* + * Split off the various msgctl's here + */ +static int +lx_msgctl(int msgid, int cmd, void *buf) +{ + int r; + + lx_debug("\tlx_msgctl(%d, %d, 0x%p)\n", msgid, cmd, buf); + switch (cmd & ~LX_IPC_64) { + case LX_IPC_RMID: + r = msgctl(msgid, IPC_RMID, NULL); + if (r < 0) + r = -errno; + break; + case LX_IPC_SET: + r = lx_msgctl_ipcset(msgid, buf); + break; + case LX_IPC_STAT: + r = lx_msgctl_ipcstat(msgid, buf); + break; + case LX_MSG_STAT: + r = lx_msgctl_msgstat(msgid, buf); + break; + + case LX_IPC_INFO: + case LX_MSG_INFO: + r = lx_msgctl_ipcinfo(cmd, buf); + break; + + default: + r = -EINVAL; + break; + } + + return (r); +} + +/* + * shm-related operations. + */ +static int +lx_shmget(key_t key, size_t size, int flag) +{ + int sol_flag; + int r; + + lx_debug("\tlx_shmget(%d, %d, %d)\n", key, size, flag); + + sol_flag = flag & S_IAMB; + if (flag & LX_IPC_CREAT) + sol_flag |= IPC_CREAT; + if (flag & LX_IPC_EXCL) + sol_flag |= IPC_EXCL; + + r = shmget(key, size, sol_flag); + return (r < 0 ? -errno : r); +} + +static int +lx_shmat(int shmid, void *addr, int flags, void **rval) +{ + int sol_flags; + void *ptr; + + lx_debug("\tlx_shmat(%d, 0x%p, %d, 0%o)\n", shmid, addr, flags); + + sol_flags = 0; + if (flags & LX_SHM_RDONLY) + sol_flags |= SHM_RDONLY; + if (flags & LX_SHM_RND) + sol_flags |= SHM_RND; + if ((flags & LX_SHM_REMAP) && (addr == NULL)) + return (-EINVAL); + + ptr = shmat(shmid, addr, sol_flags); + if (ptr == (void *)-1) + return (-errno); + if (uucopy(&ptr, rval, sizeof (ptr)) != 0) + return (-errno); + + return (0); +} + +static int +lx_shmctl_ipcinfo(void *buf) +{ + struct lx_shminfo s; + rctlblk_t *rblk; + int rblksz; + + rblksz = rctlblk_size(); + if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL) + return (-ENOMEM); + + bzero(&s, sizeof (s)); + if ((s.shmmni = get_rctlval(rblk, "project.max-shm-ids")) < 0) + return (s.shmmni); + if ((s.shmmax = get_rctlval(rblk, "project.max-shm-memory")) < 0) + return (s.shmmax); + + /* + * We don't have corresponding rctls for these fields. The values + * are taken from the formulas used to derive the defaults listed + * in the Linux header file. We're lying, but trying to be + * coherent about it. + */ + s.shmmin = 1; + s.shmseg = INT_MAX; + s.shmall = s.shmmax / getpagesize(); + + if (uucopy(&s, buf, sizeof (s))) + return (-errno); + + return (0); +} + +static int +lx_shmctl_ipcstat(int shmid, void *buf) +{ + struct lx_shmid_ds shmds; + struct shmid_ds sol_shmds; + + if (shmctl(shmid, IPC_STAT, &sol_shmds) != 0) + return (-errno); + + bzero(&shmds, sizeof (shmds)); + shmds.shm_perm.key = sol_shmds.shm_perm.key; + shmds.shm_perm.seq = sol_shmds.shm_perm.seq; + shmds.shm_perm.uid = sol_shmds.shm_perm.uid; + shmds.shm_perm.gid = sol_shmds.shm_perm.gid; + shmds.shm_perm.cuid = sol_shmds.shm_perm.cuid; + shmds.shm_perm.cgid = sol_shmds.shm_perm.cgid; + shmds.shm_perm.mode = sol_shmds.shm_perm.mode & S_IAMB; + if (sol_shmds.shm_lkcnt > 0) + shmds.shm_perm.mode |= LX_SHM_LOCKED; + shmds.shm_segsz = sol_shmds.shm_segsz; + shmds.shm_atime = sol_shmds.shm_atime; + shmds.shm_dtime = sol_shmds.shm_dtime; + shmds.shm_ctime = sol_shmds.shm_ctime; + shmds.shm_cpid = sol_shmds.shm_cpid; + shmds.shm_lpid = sol_shmds.shm_lpid; + shmds.shm_nattch = (ushort_t)sol_shmds.shm_nattch; + + if (uucopy(&shmds, buf, sizeof (shmds))) + return (-errno); + + return (0); +} + +static int +lx_shmctl_ipcset(int shmid, void *buf) +{ + struct lx_shmid_ds shmds; + struct shmid_ds sol_shmds; + int r; + + if (uucopy(buf, &shmds, sizeof (shmds))) + return (-errno); + + bzero(&sol_shmds, sizeof (sol_shmds)); + sol_shmds.shm_perm.uid = shmds.shm_perm.uid; + sol_shmds.shm_perm.gid = shmds.shm_perm.gid; + sol_shmds.shm_perm.mode = shmds.shm_perm.mode & S_IAMB; + + r = shmctl(shmid, IPC_SET, &sol_shmds); + return (r < 0 ? -errno : r); +} + +/* + * Build and return a shm_info structure. We only return the bare + * essentials required by ipcs. The rest of the info is not readily + * available. + */ +static int +lx_shmctl_shminfo(void *buf) +{ + struct lx_shm_info shminfo; + uint_t nids; + int idbuf; + + bzero(&shminfo, sizeof (shminfo)); + + if (shmids(&idbuf, 0, &nids) < 0) + return (-errno); + + shminfo.used_ids = nids; + if (uucopy(&shminfo, buf, sizeof (shminfo)) != 0) + return (-errno); + + return (nids); +} + +static int +lx_shmctl_shmstat(int slot, void *buf) +{ + int r, shmid; + + lx_debug("shmctl_shmstat(%d, 0x%p)\n", slot, buf); + shmid = slot_to_id(SLOT_SHM, slot); + if (shmid < 0) + return (shmid); + + r = lx_shmctl_ipcstat(shmid, buf); + return (r < 0 ? r : shmid); +} + +static int +lx_shmctl(int shmid, int cmd, void *buf) +{ + int r; + int use_errno = 0; + + lx_debug("\tlx_shmctl(%d, %d, 0x%p)\n", shmid, cmd, buf); + switch (cmd & ~LX_IPC_64) { + case LX_IPC_RMID: + use_errno = 1; + r = shmctl(shmid, IPC_RMID, NULL); + break; + + case LX_IPC_SET: + r = lx_shmctl_ipcset(shmid, buf); + break; + + case LX_IPC_STAT: + r = lx_shmctl_ipcstat(shmid, buf); + break; + + case LX_IPC_INFO: + r = lx_shmctl_ipcinfo(buf); + break; + + case LX_SHM_LOCK: + use_errno = 1; + r = shmctl(shmid, SHM_LOCK, NULL); + break; + + case LX_SHM_UNLOCK: + use_errno = 1; + r = shmctl(shmid, SHM_UNLOCK, NULL); + break; + + case LX_SHM_INFO: + r = lx_shmctl_shminfo(buf); + break; + + case LX_SHM_STAT: + r = lx_shmctl_shmstat(shmid, buf); + break; + default: + r = -EINVAL; + break; + } + + if (use_errno == 1 && r < 0) + return (-errno); + + return (r); +} + +/* + * Under Linux, glibc funnels all of the sysv IPC operations into this + * single ipc(2) system call. We need to blow that up and filter the + * remnants into the proper Solaris system calls. + */ +int +lx_ipc(uintptr_t cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, + uintptr_t arg4) +{ + int r; + void *bufptr = (void *)arg4; + + lx_debug("lx_ipc(%d, %d, %d, %d, 0x%p, %d)\n", + cmd, arg1, arg2, arg3, bufptr, arg4); + + switch (cmd) { + case LX_MSGGET: + r = lx_msgget((key_t)arg1, (int)arg2); + break; + case LX_MSGSND: + r = lx_msgsnd((int)arg1, bufptr, (size_t)arg2, (int)arg3); + break; + case LX_MSGRCV: + r = lx_msgrcv((int)arg1, bufptr, (size_t)arg2, (int)arg3); + break; + case LX_MSGCTL: + r = lx_msgctl((int)arg1, (int)arg2, bufptr); + break; + case LX_SEMCTL: + r = lx_semctl((int)arg1, (size_t)arg2, (int)arg3, bufptr); + break; + case LX_SEMOP: + /* + * 'struct sembuf' is the same on Linux and Solaris, so we + * pass bufptr straight through. + */ + r = lx_semop((int)arg1, bufptr, (size_t)arg2); + break; + case LX_SEMGET: + r = lx_semget((int)arg1, (size_t)arg2, (int)arg3); + break; + case LX_SHMAT: + r = lx_shmat((int)arg1, bufptr, (size_t)arg2, (void *)arg3); + break; + case LX_SHMDT: + r = shmdt(bufptr); + if (r < 0) + r = -errno; + break; + case LX_SHMGET: + r = lx_shmget((int)arg1, (size_t)arg2, (int)arg3); + break; + case LX_SHMCTL: + r = lx_shmctl((int)arg1, (int)arg2, bufptr); + break; + + default: + r = -EINVAL; + } + + return (r); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/time.c b/usr/src/lib/brand/lx/lx_brand/common/time.c new file mode 100644 index 0000000000..16b883ec0a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/time.c @@ -0,0 +1,184 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <time.h> +#include <string.h> +#include <strings.h> +#include <sys/times.h> +#include <sys/lx_syscall.h> +#include <sys/lx_misc.h> + +/* + * time() - This cannot be passthrough because on Linux a bad buffer will + * set errno to EFAULT, and on Solaris the failure mode is documented + * as "undefined." + * + * (At present, Solaris' time(2) will segmentation fault, as the call + * is simply a libc wrapper atop the time() syscall that will + * dereference the passed pointer if it is non-zero.) + */ +int +lx_time(uintptr_t p1) +{ + time_t ret = time((time_t *)0); + + if ((ret == (time_t)-1) || + ((p1 != 0) && (uucopy(&ret, (time_t *)p1, sizeof (ret)) != 0))) + return (-errno); + + return (ret); +} + +/* + * times() - The Linux implementation avoids writing to NULL, while Solaris + * returns EFAULT. + */ +int +lx_times(uintptr_t p1) +{ + clock_t ret; + struct tms buf, *tp = (struct tms *)p1; + + ret = times(&buf); + + if ((ret == -1) || + ((tp != NULL) && uucopy((void *)&buf, tp, sizeof (buf)) != 0)) + return (-errno); + + return ((ret == -1) ? -errno : ret); +} + +/* + * setitimer() - the Linux implementation can handle tv_usec values greater + * than 1,000,000 where Solaris would return EINVAL. + * + * There's still an issue here where Linux can handle a + * tv_sec value greater than 100,000,000 but Solaris cannot, + * but that would also mean setting an interval timer to fire + * over _three years_ in the future so it's unlikely anything + * other than Linux test suites will trip over it. + */ +int +lx_setitimer(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + struct itimerval itv; + struct itimerval *itp = (struct itimerval *)p2; + + if (itp != NULL) { + if (uucopy(itp, &itv, sizeof (itv)) != 0) + return (-errno); + + /* + * Adjust any tv_usec fields >= 1,000,000 by adding any whole + * seconds so indicated to tv_sec and leaving tv_usec as the + * remainder. + */ + if (itv.it_interval.tv_usec >= MICROSEC) { + itv.it_interval.tv_sec += + itv.it_interval.tv_usec / MICROSEC; + + itv.it_interval.tv_usec %= MICROSEC; + } + if (itv.it_value.tv_usec >= MICROSEC) { + itv.it_value.tv_sec += + itv.it_value.tv_usec / MICROSEC; + + itv.it_value.tv_usec %= MICROSEC; + } + + itp = &itv; + } + + return ((setitimer((int)p1, itp, (struct itimerval *)p3) != 0) ? + -errno : 0); +} + +/* + * NOTE: The Linux man pages state this structure is obsolete and is + * unsupported, so it is declared here for sizing purposes only. + */ +struct lx_timezone { + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +/* + * lx_gettimeofday() and lx_settimeofday() are implemented here rather than + * as pass-through calls to Solaris' libc due to the need to return EFAULT + * for a bad buffer rather than die with a segmentation fault. + */ +int +lx_gettimeofday(uintptr_t p1, uintptr_t p2) +{ + struct timeval tv; + struct lx_timezone tz; + + bzero(&tz, sizeof (tz)); + (void) gettimeofday(&tv, NULL); + + if ((p1 != NULL) && + (uucopy(&tv, (struct timeval *)p1, sizeof (tv)) < 0)) + return (-errno); + + /* + * The Linux man page states use of the second parameter is obsolete, + * but gettimeofday(2) should still return EFAULT if it is set + * to a bad non-NULL pointer (sigh...) + */ + if ((p2 != NULL) && + (uucopy(&tz, (struct lx_timezone *)p2, sizeof (tz)) < 0)) + return (-errno); + + return (0); +} + +int +lx_settimeofday(uintptr_t p1, uintptr_t p2) +{ + struct timeval tv; + struct lx_timezone tz; + + if ((p1 != NULL) && + (uucopy((struct timeval *)p1, &tv, sizeof (tv)) < 0)) + return (-errno); + + /* + * The Linux man page states use of the second parameter is obsolete, + * but settimeofday(2) should still return EFAULT if it is set + * to a bad non-NULL pointer (sigh...) + */ + if ((p2 != NULL) && + (uucopy((struct lx_timezone *)p2, &tz, sizeof (tz)) < 0)) + return (-errno); + + if ((p1 != NULL) && (settimeofday(&tv, NULL) < 0)) + return (-errno); + + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/truncate.c b/usr/src/lib/brand/lx/lx_brand/common/truncate.c new file mode 100644 index 0000000000..81fdfbac35 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/truncate.c @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <errno.h> +#include <unistd.h> +#include <sys/lx_types.h> +#include <sys/lx_misc.h> + +/* + * On Solaris, truncate() and ftruncate() are implemented in libc, so these are + * layered on those interfaces. + */ + +int +lx_truncate(uintptr_t path, uintptr_t length) +{ + return (truncate((const char *)path, (off_t)length) == 0 ? 0 : -errno); +} + +int +lx_ftruncate(uintptr_t fd, uintptr_t length) +{ + return (ftruncate((int)fd, (off_t)length) == 0 ? 0 : -errno); +} + +int +lx_truncate64(uintptr_t path, uintptr_t length_lo, uintptr_t length_hi) +{ + return (truncate64((const char *)path, + LX_32TO64(length_lo, length_hi)) == 0 ? 0 : -errno); +} + +int +lx_ftruncate64(uintptr_t fd, uintptr_t length_lo, uintptr_t length_hi) +{ + return (ftruncate64((int)fd, + LX_32TO64(length_lo, length_hi)) == 0 ? 0 : -errno); +} diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c new file mode 100644 index 0000000000..0895e76bc0 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c @@ -0,0 +1,244 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * wait() family of functions. + * + * The first minor difference between the Linux and Solaris family of wait() + * calls is that the values for WNOHANG and WUNTRACED are different. Solaris + * also has additional options (WCONTINUED, WNOWAIT) which should be flagged as + * invalid on Linux. Thankfully, the exit status values are identical between + * the two implementations. + * + * Things get very different and very complicated when we introduce the Linux + * threading model. Under linux, both threads and child processes are + * represented as processes. However, the behavior of wait() with respect to + * each child varies according to the flags given to clone() + * + * SIGCHLD The SIGCHLD signal should be sent on termination + * CLONE_THREAD The child shares the same thread group as the parent + * CLONE_DETACHED The parent receives no notification when the child exits + * + * The following flags control the Linux behavior w.r.t. the above attributes: + * + * __WALL Wait on all children, regardless of type + * __WCLONE Wait only on non-SIGCHLD children + * __WNOTHREAD Don't wait on children of other threads in this group + * + * The following chart shows whether wait() returns when the child exits: + * + * default __WCLONE __WALL + * no SIGCHLD - X X + * SIGCHLD X - X + * + * The following chart shows whether wait() returns when the grandchild exits: + * + * default __WNOTHREAD + * no CLONE_THREAD - - + * CLONE_THREAD X - + * + * The CLONE_DETACHED flag is universal - when the child exits, no state is + * stored and wait() has no effect. + * + * XXX Support the above combination of options, or some reasonable subset that + * covers at least fork() and pthread_create(). + */ + +#include <errno.h> +#include <sys/wait.h> +#include <sys/lx_types.h> +#include <sys/lx_signal.h> +#include <sys/lx_misc.h> +#include <sys/lx_syscall.h> +#include <sys/times.h> +#include <strings.h> +#include <unistd.h> +#include <assert.h> + +/* + * Convert between Linux options and Solaris options, returning -1 if any + * invalid flags are found. + */ +#define LX_WNOHANG 0x1 +#define LX_WUNTRACED 0x2 + +#define LX_WNOTHREAD 0x20000000 +#define LX_WALL 0x40000000 +#define LX_WCLONE 0x80000000 + +static int +ltos_options(uintptr_t options) +{ + int newoptions = 0; + + if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WNOTHREAD | + LX_WALL | LX_WCLONE)) != 0) { + return (-1); + } + /* XXX implement LX_WNOTHREAD, LX_WALL, LX_WCLONE */ + + if (options & LX_WNOHANG) + newoptions |= WNOHANG; + if (options & LX_WUNTRACED) + newoptions |= WUNTRACED; + + return (newoptions); +} + +static int +lx_wstat(int code, int status) +{ + int stat = 0; + + switch (code) { + case CLD_EXITED: + stat = status << 8; + break; + case CLD_DUMPED: + stat = stol_signo[status]; + assert(stat != -1); + stat |= WCOREFLG; + break; + case CLD_KILLED: + stat = stol_signo[status]; + assert(stat != -1); + break; + case CLD_TRAPPED: + case CLD_STOPPED: + stat = stol_signo[status]; + assert(stat != -1); + stat <<= 8; + stat |= WSTOPFLG; + break; + case CLD_CONTINUED: + stat = WCONTFLG; + break; + } + + return (stat); +} + +int +lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) +{ + siginfo_t info = { 0 }; + struct rusage ru = { 0 }; + idtype_t idtype; + id_t id; + int options, status = 0; + pid_t pid = (pid_t)p1; + int rval; + + if ((options = ltos_options(p3)) == -1) + return (-EINVAL); + + /* + * While not listed as a valid return code, Linux's wait4(2) does, + * in fact, get an EFAULT if either the status pointer or rusage + * pointer is invalid. Since a failed waitpid should leave child + * process in a state where a future wait4(2) will succeed, we + * check them by copying out the values their buffers originally + * contained. (We need to do this as a failed system call should + * never affect the contents of a passed buffer.) + * + * This will fail if the buffers in question are write-only. + */ + if ((void *)p2 != NULL && + ((uucopy((void *)p2, &status, sizeof (status)) != 0) || + (uucopy(&status, (void *)p2, sizeof (status)) != 0))) + return (-EFAULT); + + if ((void *)p4 != NULL) { + if ((uucopy((void *)p4, &ru, sizeof (ru)) != 0) || + (uucopy(&ru, (void *)p4, sizeof (ru)) != 0)) + return (-EFAULT); + } + + if (pid < -1) { + idtype = P_PGID; + id = -pid; + } else if (pid == -1) { + idtype = P_ALL; + id = 0; + } else if (pid == 0) { + idtype = P_PGID; + id = getpgrp(); + } else { + idtype = P_PID; + id = pid; + } + + options |= WEXITED | WTRAPPED; + +again: + /* + * It's possible that we return EINVAL here if the idtype is P_PID or + * P_PGID and id is out of bounds for a valid pid or pgid, but Linux + * expects to see ECHILD. No good way occurs to handle this so we'll + * punt for now. + */ + if (waitid(idtype, id, &info, options) < 0) + return (-errno); + + /* + * If the WNOHANG flag was specified and no child was found return 0. + */ + if ((options & WNOHANG) && info.si_pid == 0) + return (0); + + /* + * It's possible that we may have a spurious return for one of the + * child processes created by the ptrace subsystem. In that's the case, + * we simply try again. + */ + if (lx_ptrace_wait(&info) == -1) + goto again; + + status = lx_wstat(info.si_code, info.si_status); + + /* + * Unfortunately if this attempt to copy out either the status or the + * rusage fails, the process will be in an inconsistent state as + * subsequent calls to wait for the same child will fail where they + * should succeed on a Linux system. This, however, is rather + * unlikely since we tested the validity of both above. + */ + if (p2 != NULL && uucopy(&status, (void *)p2, sizeof (status)) != 0) + return (-EFAULT); + + if (p4 != NULL && (rval = lx_getrusage(LX_RUSAGE_CHILDREN, p4)) != 0) + return (rval); + + return (info.si_pid); +} + +int +lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3) +{ + return (lx_wait4(p1, p2, p3, NULL)); +} diff --git a/usr/src/lib/brand/lx/lx_brand/i386/Makefile b/usr/src/lib/brand/lx/lx_brand/i386/Makefile new file mode 100644 index 0000000000..fd38a056f6 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/Makefile @@ -0,0 +1,56 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/lx/i386/Makefile + +ISASRCDIR=. + +ASFLAGS += -P -D_ASM + +include ../Makefile.com + +POFILE= lx_brand.po +MSGFILES= $(CSRCS) + +ASSYMDEP_OBJS = lx_handler.o + +$(ASSYMDEP_OBJS:%=pics/%): assym.h + +OFFSETS = ../$(MACH)/offsets.in + +assym.h: $(OFFSETS) + $(OFFSETS_CREATE) $(CTF_FLAGS) < $(OFFSETS) > $@ + +CLOBBERFILES += assym.h + +install: all $(ROOTLIBS) + +$(POFILE): $(MSGFILES) + $(BUILDPO.msgfiles) + +_msg: $(MSGDOMAINPOFILE) + +include $(SRC)/Makefile.msg.targ diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s new file mode 100644 index 0000000000..c457c1c209 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> + +#if defined(lint) + +void +_start(void) +{ +} + +#else /* lint */ + + /* + * C language startup routine for the lx brand shared library. + */ + ENTRY_NP(_start) + pushl $0 / Build a stack frame. retpc = NULL + pushl $0 / fp = NULL + movl %esp, %ebp / first stack frame + + /* + * Calculate the location of the envp array by adding the size of + * the argv array to the start of the argv array. + */ + movl 8(%ebp), %eax / argc in %eax + leal 16(%ebp,%eax,4), %edx / envp in %edx + andl $-16, %esp + pushl %edx / push envp + leal 12(%ebp),%edx / compute &argv[0] + pushl %edx / push argv + pushl %eax / push argc + call lx_init + /* + * lx_init will never return. + */ + SET_SIZE(_start) + +#endif /* lint */ diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s new file mode 100644 index 0000000000..c9b6a5a761 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s @@ -0,0 +1,374 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> +#include <sys/regset.h> +#include <sys/segments.h> +#include <sys/syscall.h> +#include <sys/lx_brand.h> + +#if defined(_ASM) +#include <sys/lx_signal.h> +#include <sys/lx_syscall.h> +#endif /* _ASM */ + +#include "assym.h" + +#define PIC_SETUP(r) \ + call 9f; \ +9: popl r; \ + addl $_GLOBAL_OFFSET_TABLE_ + [. - 9b], r + +/* + * Each JMP must occupy 16 bytes + */ +#define JMP \ + pushl $_CONST(. - lx_handler_table); \ + jmp lx_handler_notrace; \ + .align 16; + +#define JMP4 JMP; JMP; JMP; JMP +#define JMP16 JMP4; JMP4; JMP4; JMP4 +#define JMP64 JMP16; JMP16; JMP16; JMP16 +#define JMP256 JMP64; JMP64; JMP64; JMP64 + +/* + * Alternate jump table which turns on lx_traceflag before emulating the + * system call. + */ +#define TJMP \ + pushl $_CONST(. - lx_handler_trace_table); \ + jmp lx_handler; \ + .align 16; + +#define TJMP4 TJMP; TJMP; TJMP; TJMP +#define TJMP16 TJMP4; TJMP4; TJMP4; TJMP4 +#define TJMP64 TJMP16; TJMP16; TJMP16; TJMP16 +#define TJMP256 TJMP64; TJMP64; TJMP64; TJMP64 + + +#if defined(lint) + +#include <sys/types.h> +#include <sys/regset.h> +#include <sys/signal.h> + +void +lx_handler_table(void) +{} + +void +lx_handler(void) +{} + +/* ARGSUSED */ +void +lx_setup_clone(uintptr_t gs, void *retaddr, void *stk) +{} + +/* ARGSUSED */ +void +lx_sigdeliver(int sig, siginfo_t *sip, void *p, size_t stacksz, + void (*stack_frame_builder)(void), void (*lx_sighandler)(void), + uintptr_t gs) +{} + +/* ARGSUSED */ +void +lx_sigacthandler(int sig, siginfo_t *s, void *p) +{} + +void +lx_sigreturn_tramp(void) +{} + +void +lx_rt_sigreturn_tramp(void) +{} + +/* ARGSUSED */ +void +lx_sigreturn_tolibc(uintptr_t sp) +{} + +#else /* lint */ + + /* + * On entry to this table, %eax will hold the return address. The + * location where we enter the table is a function of the system + * call number. The table needs the same alignment as the individual + * entries. + */ + .align 16 + ENTRY_NP(lx_handler_trace_table) + TJMP256 + TJMP16 + SET_SIZE(lx_handler_trace_table) + + .align 16 + ENTRY_NP(lx_handler_table) + JMP256 + JMP16 + SET_SIZE(lx_handler_table) + + ENTRY_NP(lx_handler) + pushl %esi + PIC_SETUP(%esi) + movl lx_traceflag@GOT(%esi), %esi + movl $1, (%esi) + popl %esi + + ALTENTRY(lx_handler_notrace) + /* + * %ebp isn't always going to be a frame pointer on Linux, but when + * it is, saving it here lets us have a coherent stack backtrace. + */ + pushl %ebp + + /* + * Fill in a lx_regs_t structure on the stack. + */ + subl $SIZEOF_LX_REGS_T, %esp + + /* + * Save %ebp and then fill it with what would be its usual value as + * the frame pointer. The value we save for %esp needs to be the + * stack pointer at the time of the interrupt so we need to skip the + * saved %ebp and (what will be) the return address. + */ + movl %ebp, LXR_EBP(%esp) + movl %esp, %ebp + addl $_CONST(SIZEOF_LX_REGS_T), %ebp + movl %ebp, LXR_ESP(%esp) + addl $_CONST(_MUL(CPTRSIZE, 2)), LXR_ESP(%esp) + + movl $0, LXR_GS(%esp) + movw %gs, LXR_GS(%esp) + movl %edi, LXR_EDI(%esp) + movl %esi, LXR_ESI(%esp) + movl %ebx, LXR_EBX(%esp) + movl %edx, LXR_EDX(%esp) + movl %ecx, LXR_ECX(%esp) + + movl %eax, LXR_EIP(%esp) + + /* + * We enter this routine part-way into the table above, and make a + * call to lx_handler -- this both redirects control and pushes the + * address where we entered the table onto the stack. That position + * indicates the system call number while %eax holds what would + * normally be the return address. We replace the value on the stack + * with the return address and use the value to compute the system + * call number. + * + * sysnum = (trampoline_address - lx_handler_table) / 16 + */ + xchgl CPTRSIZE(%ebp), %eax + shrl $4, %eax + movl %eax, LXR_EAX(%esp) + + /* + * Switch to the Solaris libc's %gs. + */ + movl $LWPGS_SEL, %ebx + movw %ebx, %gs + + /* + * Call lx_emulate() whose only argument is a pointer to the + * lx_regs_t structure we've placed on the stack. + */ + pushl %esp + call lx_emulate + + /* + * We use this global symbol to identify this return site when + * walking the stack backtrace. It needs to remain immediately + * after the call to lx_emulate(). + */ + ALTENTRY(lx_emulate_done) + + /* + * Clean up the argument to lx_emulate(). + */ + addl $4, %esp + + /* + * Restore the saved register state; we get %ebp, %esp and %esp from + * the ordinary locations rather than the saved state. + */ + movl LXR_EDI(%esp), %edi + movl LXR_ESI(%esp), %esi + movl LXR_EBX(%esp), %ebx + movl LXR_EDX(%esp), %edx + movl LXR_ECX(%esp), %ecx + movl LXR_EAX(%esp), %eax + movw LXR_GS(%esp), %gs + + addl $SIZEOF_LX_REGS_T, %esp + + movl %ebp, %esp + popl %ebp + ret + SET_SIZE(lx_handler) + + ENTRY_NP(lx_swap_gs) + push %eax /* save the current eax value */ + movl 0xc(%esp),%eax /* 2nd param is a pointer */ + movw %gs,(%eax) /* use the pointer to save current gs */ + movl 0x8(%esp),%eax /* first parameter is the new gs value */ + movw %eax, %gs /* switch to the new gs value */ + pop %eax /* restore eax */ + ret + SET_SIZE(lx_swap_gs) + + ENTRY_NP(lx_setup_clone) + xorl %ebp, %ebp /* terminating stack */ + popl %edx /* eat the start_clone() return address */ + popl %gs /* Switch back to the Linux libc's %gs */ + popl %edx /* Linux clone() return address */ + popl %esp /* New stack pointer */ + xorl %eax, %eax /* child returns 0 to SYS_clone() */ + jmp *%edx /* return to Linux app. */ + SET_SIZE(lx_setup_clone) + + /* + * lx_sigdeliver(sig, siginfo_t *, ucontext_t *, stack_size, + * stack_build_routine, signal_handler, glibc_gs) + * + * This routine allocates stack space for the Linux signal stack, + * calls a routine to build the signal stack and then calls the Linux + * signal handler. This is written in assembly because of the way + * we need to directly manipulate the stack and pass the resulting + * stack to the signal handler with the Linux signal stack on top. + * + * When the Linux signal handler is called, the stack will look + * like this: + * + * ================================================= + * | Linux signal frame built by lx_stackbuilder() | + * ================================================= + * | LX_SIGRT_MAGIC | + * ================================================= + * | %ebp | + * ================================================= + */ + ENTRY_NP(lx_sigdeliver) + pushl %ebp + movl %esp, %ebp + movl 16(%ebp), %edx /* pointer to Solaris ucontext_t */ + pushl %edx /* save ucontext_t ptr for later */ + pushl $LX_SIGRT_MAGIC /* marker value for lx_(rt)_sigreturn */ + + subl 20(%ebp), %esp /* create stack buffer */ + pushl %esp /* push stack pointer */ + pushl %edx /* push pointer to ucontext_t */ + pushl 12(%ebp) /* push pointer to siginfo_t */ + pushl 8(%ebp) /* push signal number */ + call *24(%ebp) /* lx_stackbuilder(sig, sip, ucp, sp) */ + add $16, %esp /* remove args from stack */ + movw 32(%ebp), %gs /* only low 16 bits are used */ + + mov 4(%ebp),%eax /* fetch old %ebp from stack */ + mov 28(%ebp), %edx /* get address of Linux handler */ + mov %eax, %ebp /* restore old %ebp */ + jmp *%edx /* jmp to the Linux signal handler */ + SET_SIZE(lx_sigdeliver) + + /* + * Due to the nature of signals, we need to be able to force the %gs + * value to that used by Solaris by running any Solaris code. + * + * This routine does that, then calls a C routine that will save the + * %gs value at the time of the signal off into a thread-specific data + * structure. Finally, we trampoline to the libc code that would + * normally interpose itself before calling a signal handler. + * + * The libc routine that calls user signal handlers ends with a + * setcontext, so we would never return here even if we used a call + * rather than a jmp. + * + * %esi is used for the PIC as it is guaranteed by the 386 ABI to + * survive the call to lx_sigsavegs. The downside is we must also + * preserve its value for our caller. + * + * Note that because lx_sigsavegs and libc_sigacthandler are externs, + * they need to be dereferenced via the GOT. + * + * IMPORTANT: Because libc apparently gets upset if extra data is + * left on its stack, this routine needs to be crafted + * in assembly so that the jmp to the libc interposer + * doesn't leave any cruft lying around. + */ + ENTRY_NP(lx_sigacthandler) + pushl %esi /* save %esi */ + pushl %gs /* push the Linux %gs */ + pushl $LWPGS_SEL + popl %gs /* install the Solaris %gs */ + + PIC_SETUP(%esi) + movl lx_sigsavegs@GOT(%esi), %eax + call *%eax /* save the Linux %gs */ + movl libc_sigacthandler@GOT(%esi), %eax + add $4, %esp /* clear Linux %gs from stack */ + popl %esi /* restore %esi */ + jmp *(%eax) /* jmp to libc's interposer */ + SET_SIZE(lx_sigacthandler) + + /* + * Trampoline code is called by the return at the end of a Linux + * signal handler to return control to the interrupted application + * via the lx_sigreturn() or lx_rt_sigreturn() syscalls. + * + * (lx_sigreturn() is called for legacy signal handling, and + * lx_rt_sigreturn() is called for "new"-style signals.) + * + * These two routines must consist of the EXACT code sequences below + * as gdb looks at the sequence of instructions a routine will return + * to determine whether it is in a signal handler or not. + */ + ENTRY_NP(lx_sigreturn_tramp) + popl %eax + movl $LX_SYS_sigreturn, %eax + int $0x80 + SET_SIZE(lx_sigreturn_tramp) + + ENTRY_NP(lx_rt_sigreturn_tramp) + movl $LX_SYS_rt_sigreturn, %eax + int $0x80 + SET_SIZE(lx_rt_sigreturn_tramp) + + /* + * Manipulate the stack in the way necessary for it to appear to libc + * that the signal handler it invoked via call_user_handler() is + * returning. + */ + ENTRY_NP(lx_sigreturn_tolibc) + movl 4(%esp), %esp /* set %esp to passed value */ + popl %ebp /* restore proper %ebp */ + ret /* return to libc interposer */ + SET_SIZE(lx_sigreturn_tolibc) +#endif /* lint */ diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s new file mode 100644 index 0000000000..28ec39938e --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_runexe.s @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> + +#if defined(lint) + +/*ARGSUSED*/ +void +lx_runexe(void *argv, int32_t entry) +{ +} + +#else /* lint */ + + /* + * Set our stack pointer, clear the general registers, + * and jump to the brand linker's entry point. + */ + ENTRY_NP(lx_runexe) + movl 4(%esp), %eax / %eax = &argv[0] + movl 8(%esp), %ebx / Brand linker's entry point in %ebx + subl $4, %eax / Top of stack - must point at argc + movl %eax, %esp / Set %esp to what linkers expect + + movl $0, %eax + movl $0, %ecx + movl $0, %edx + movl $0, %esi + movl $0, %edi + movl $0, %ebp + + jmp *%ebx / And away we go... + SET_SIZE(lx_runexe) + +#endif /* lint */ diff --git a/usr/src/lib/brand/lx/lx_brand/i386/offsets.in b/usr/src/lib/brand/lx/lx_brand/i386/offsets.in new file mode 100644 index 0000000000..ac934ee76c --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/i386/offsets.in @@ -0,0 +1,40 @@ +\ +\ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +\ Use is subject to license terms. +\ +\ CDDL HEADER START +\ +\ The contents of this file are subject to the terms of the +\ Common Development and Distribution License (the "License"). +\ You may not use this file except in compliance with the License. +\ +\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +\ or http://www.opensolaris.org/os/licensing. +\ See the License for the specific language governing permissions +\ and limitations under the License. +\ +\ When distributing Covered Code, include this CDDL HEADER in each +\ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +\ If applicable, add the following below this CDDL HEADER, with the +\ fields enclosed by brackets "[]" replaced with your own identifying +\ information: Portions Copyright [yyyy] [name of copyright owner] +\ +\ CDDL HEADER END +\ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/lx_brand.h> + +lx_regs_t SIZEOF_LX_REGS_T + lxr_gs + lxr_edi + lxr_esi + lxr_ebp + lxr_esp + lxr_ebx + lxr_edx + lxr_ecx + lxr_eax + lxr_eip + lxr_orig_eax diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h new file mode 100644 index 0000000000..80fb579665 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_DEBUG_H +#define _LX_DEBUG_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* initialize the debugging subsystem */ +extern void lx_debug_init(void); + +/* printf() style debug message functionality */ +extern void lx_debug(const char *, ...); + +/* set non-zero if the debugging subsystem is enabled */ +extern int lx_debug_enabled; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_DEBUG_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h new file mode 100644 index 0000000000..72fe0aa7ac --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_fcntl.h @@ -0,0 +1,110 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_FCNTL_H +#define _SYS_LX_FCNTL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Lx open/fcntl flags + */ +#define LX_O_RDONLY 00 +#define LX_O_WRONLY 01 +#define LX_O_RDWR 02 +#define LX_O_CREAT 0100 +#define LX_O_EXCL 0200 +#define LX_O_NOCTTY 0400 +#define LX_O_TRUNC 01000 +#define LX_O_APPEND 02000 +#define LX_O_NONBLOCK 04000 +#define LX_O_NDELAY LX_O_NONBLOCK +#define LX_O_SYNC 010000 +#define LX_O_FSYNC LX_O_SYNC +#define LX_O_ASYNC 020000 +#define LX_O_DIRECT 040000 +#define LX_O_LARGEFILE 0100000 +#define LX_O_DIRECTORY 0200000 +#define LX_O_NOFOLLOW 0400000 + +#define LX_F_DUPFD 0 +#define LX_F_GETFD 1 +#define LX_F_SETFD 2 +#define LX_F_GETFL 3 +#define LX_F_SETFL 4 +#define LX_F_GETLK 5 +#define LX_F_SETLK 6 +#define LX_F_SETLKW 7 +#define LX_F_SETOWN 8 +#define LX_F_GETOWN 9 +#define LX_F_SETSIG 10 +#define LX_F_GETSIG 11 + +#define LX_F_GETLK64 12 +#define LX_F_SETLK64 13 +#define LX_F_SETLKW64 14 + +#define LX_F_SETLEASE 1024 +#define LX_F_GETLEASE 1025 +#define LX_F_NOTIFY 1026 + +#define LX_F_RDLCK 0 +#define LX_F_WRLCK 1 +#define LX_F_UNLCK 2 + +/* + * Lx flock codes. + */ +#define LX_NAME_MAX 255 +#define LX_LOCK_SH 1 /* shared */ +#define LX_LOCK_EX 2 /* exclusive */ +#define LX_LOCK_NB 4 /* non-blocking */ +#define LX_LOCK_UN 8 /* unlock */ + +struct lx_flock { + short l_type; + short l_whence; + long l_start; + long l_len; + int l_pid; +}; + +struct lx_flock64 { + short l_type; + short l_whence; + long long l_start; + long long l_len; + int l_pid; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_FCNTL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h new file mode 100644 index 0000000000..77e784808a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_ioctl.h @@ -0,0 +1,370 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_IOCTL_H +#define _SYS_LX_IOCTL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +extern int lx_ioctl_init(void); + +#define LX_NCC 8 +struct lx_termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[LX_NCC]; /* control characters */ +}; + +#define LX_NCCS 19 +struct lx_termios { + uint32_t c_iflag; /* input mode flags */ + uint32_t c_oflag; /* output mode flags */ + uint32_t c_cflag; /* control mode flags */ + uint32_t c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[LX_NCCS]; /* control characters */ +}; + +/* c_cc characters */ +#define LX_VINTR 0 +#define LX_VQUIT 1 +#define LX_VERASE 2 +#define LX_VKILL 3 +#define LX_VEOF 4 +#define LX_VTIME 5 +#define LX_VMIN 6 +#define LX_VSWTC 7 +#define LX_VSTART 8 +#define LX_VSTOP 9 +#define LX_VSUSP 10 +#define LX_VEOL 11 +#define LX_VREPRINT 12 +#define LX_VDISCARD 13 +#define LX_VWERASE 14 +#define LX_VLNEXT 15 +#define LX_VEOL2 16 + +/* + * Sound formats + */ +#define LX_AFMT_QUERY 0x00000000 +#define LX_AFMT_MU_LAW 0x00000001 +#define LX_AFMT_A_LAW 0x00000002 +#define LX_AFMT_IMA_ADPCM 0x00000004 +#define LX_AFMT_U8 0x00000008 +#define LX_AFMT_S16_LE 0x00000010 +#define LX_AFMT_S16_BE 0x00000020 +#define LX_AFMT_S8 0x00000040 +#define LX_AFMT_U16_LE 0x00000080 +#define LX_AFMT_U16_BE 0x00000100 +#define LX_AFMT_MPEG 0x00000200 +#define LX_AFMT_AC3 0x00000400 + +/* + * Supported ioctls + */ +#define LX_TCGETS 0x5401 +#define LX_TCSETS 0x5402 +#define LX_TCSETSW 0x5403 +#define LX_TCSETSF 0x5404 +#define LX_TCGETA 0x5405 +#define LX_TCSETA 0x5406 +#define LX_TCSETAW 0x5407 +#define LX_TCSETAF 0x5408 +#define LX_TCSBRK 0x5409 +#define LX_TCXONC 0x540a +#define LX_TCFLSH 0x540b +#define LX_TIOCEXCL 0x540c +#define LX_TIOCNXCL 0x540d +#define LX_TIOCSCTTY 0x540e +#define LX_TIOCGPGRP 0x540f +#define LX_TIOCSPGRP 0x5410 +#define LX_TIOCOUTQ 0x5411 +#define LX_TIOCSTI 0x5412 +#define LX_TIOCGWINSZ 0x5413 +#define LX_TIOCSWINSZ 0x5414 +#define LX_TIOCMGET 0x5415 +#define LX_TIOCMBIS 0x5416 +#define LX_TIOCMBIC 0x5417 +#define LX_TIOCMSET 0x5418 +#define LX_TIOCGSOFTCAR 0x5419 +#define LX_TIOCSSOFTCAR 0x541a +#define LX_FIONREAD 0x541b +#define LX_TIOCPKT 0x5420 +#define LX_FIONBIO 0x5421 +#define LX_TIOCNOTTY 0x5422 +#define LX_TIOCSETD 0x5423 +#define LX_TIOCGETD 0x5424 +#define LX_TCSBRKP 0x5425 +#define LX_TIOCGSID 0x5429 +#define LX_TIOCGPTN 0x80045430 +#define LX_TIOCSPTLCK 0x40045431 +#define LX_FIONCLEX 0x5450 +#define LX_FIOCLEX 0x5451 +#define LX_FIOASYNC 0x5452 +#define LX_FIOSETOWN 0x8901 +#define LX_SIOCSPGRP 0x8902 +#define LX_FIOGETOWN 0x8903 +#define LX_SIOCGPGRP 0x8904 +#define LX_SIOCATMARK 0x8905 +#define LX_SIOCGIFCONF 0x8912 +#define LX_SIOCGIFFLAGS 0x8913 +#define LX_SIOCSIFFLAGS 0x8914 +#define LX_SIOCGIFADDR 0x8915 +#define LX_SIOCSIFADDR 0x8916 +#define LX_SIOCGIFDSTADDR 0x8917 +#define LX_SIOCSIFDSTADDR 0x8918 +#define LX_SIOCGIFBRDADDR 0x8919 +#define LX_SIOCSIFBRDADDR 0x891a +#define LX_SIOCGIFNETMASK 0x891b +#define LX_SIOCSIFNETMASK 0x891c +#define LX_SIOCGIFMETRIC 0x891d +#define LX_SIOCSIFMETRIC 0x891e +#define LX_SIOCGIFMEM 0x891f +#define LX_SIOCSIFMEM 0x8920 +#define LX_SIOCGIFMTU 0x8921 +#define LX_SIOCSIFMTU 0x8922 +#define LX_SIOCSIFHWADDR 0x8924 +#define LX_SIOCGIFHWADDR 0x8927 + +/* + * /dev/dsp ioctls - supported + */ +#define LX_OSS_SNDCTL_DSP_RESET 0x5000 +#define LX_OSS_SNDCTL_DSP_SYNC 0x5001 +#define LX_OSS_SNDCTL_DSP_SPEED 0xc0045002 +#define LX_OSS_SNDCTL_DSP_STEREO 0xc0045003 +#define LX_OSS_SNDCTL_DSP_GETBLKSIZE 0xc0045004 +#define LX_OSS_SNDCTL_DSP_SETFMTS 0xc0045005 +#define LX_OSS_SNDCTL_DSP_CHANNELS 0xc0045006 +#define LX_OSS_SNDCTL_DSP_SETFRAGMENT 0xc004500a +#define LX_OSS_SNDCTL_DSP_GETFMTS 0x8004500b +#define LX_OSS_SNDCTL_DSP_GETOSPACE 0x8010500c +#define LX_OSS_SNDCTL_DSP_GETCAPS 0x8004500f +#define LX_OSS_SNDCTL_DSP_SETTRIGGER 0x40045010 +#define LX_OSS_SNDCTL_DSP_GETOPTR 0x800c5012 +#define LX_OSS_SNDCTL_DSP_GETISPACE 0x8010500d + +/* + * support for /dev/dsp SNDCTL_DSP_GETFMTS and SNDCTL_DSP_SETFMTS + */ +#define LX_OSS_AFMT_QUERY 0x0000 +#define LX_OSS_AFMT_MU_LAW 0x0001 +#define LX_OSS_AFMT_A_LAW 0x0002 +#define LX_OSS_AFMT_IMA_ADPCM 0x0004 +#define LX_OSS_AFMT_U8 0x0008 +#define LX_OSS_AFMT_S16_LE 0x0010 +#define LX_OSS_AFMT_S16_BE 0x0020 +#define LX_OSS_AFMT_S8 0x0040 +#define LX_OSS_AFMT_U16_LE 0x0080 +#define LX_OSS_AFMT_U16_BE 0x0100 +#define LX_OSS_AFMT_MPEG 0x0200 + +#ifdef _LITTLE_ENDIAN +#define LX_OSS_AFMT_S16_NE LX_OSS_AFMT_S16_LE +#define LX_OSS_AFMT_U16_NE LX_OSS_AFMT_U16_LE +#elif defined(_BIG_ENDIAN) +#define LX_OSS_AFMT_S16_NE LX_OSS_AFMT_S16_BE +#define LX_OSS_AFMT_U16_NE LX_OSS_AFMT_U16_BE +#else /* _LITTLE_ENDIAN */ +#error NO ENDIAN defined. +#endif /* _LITTLE_ENDIAN */ + +/* + * support for /dev/dsp SNDCTL_DSP_GETISPACE and SNDCTL_DSP_GETOSPACE + */ +typedef struct lx_oss_audio_buf_info { + int fragments; /* fragments that can be rd/wr without blocking */ + int fragstotal; /* total number of fragments allocated for buffering */ + int fragsize; /* size of fragments, same as SNDCTL_DSP_GETBLKSIZE */ + int bytes; /* what can be rd/wr immediatly without blocking */ +} lx_oss_audio_buf_info_t; + +/* + * support for /dev/dsp SNDCTL_DSP_GETOPTR + */ +typedef struct lx_oss_count_info { + /* # of bytes processed since opening the device */ + int bytes; + + /* + * # of fragment transitions since last call to this function. + * only valid for mmap acess mode. + */ + int blocks; + + /* + * byte offset of the current recording/playback position from + * the beginning of the audio buffer. only valid for mmap access + * mode. + */ + int ptr; +} lx_oss_count_info_t; + +/* + * support for /dev/dsp SNDCTL_DSP_GETCAPS + */ +#define LX_OSS_DSP_CAP_TRIGGER 0x1000 +#define LX_OSS_DSP_CAP_MMAP 0x2000 + +/* + * support for /dev/dsp/ SNDCTL_DSP_SETTRIGGER + */ +#define LX_OSS_PCM_DISABLE_OUTPUT 0 +#define LX_OSS_PCM_ENABLE_OUTPUT 2 + +/* + * /dev/mixer ioctl macros + */ +#define LX_OSS_SM_NRDEVICES 25 +#define LX_OSS_SM_READ(x) (0x80044d00 | (x)) +#define LX_OSS_SM_WRITE(x) (0xc0044d00 | (x)) + +/* + * /dev/mixer ioctls - supported + */ +#define LX_OSS_SOUND_MIXER_READ_VOLUME LX_OSS_SM_READ(LX_OSS_SM_VOLUME) +#define LX_OSS_SOUND_MIXER_READ_PCM LX_OSS_SM_READ(LX_OSS_SM_PCM) +#define LX_OSS_SOUND_MIXER_READ_MIC LX_OSS_SM_READ(LX_OSS_SM_MIC) +#define LX_OSS_SOUND_MIXER_READ_IGAIN LX_OSS_SM_READ(LX_OSS_SM_IGAIN) +#define LX_OSS_SOUND_MIXER_WRITE_VOLUME LX_OSS_SM_WRITE(LX_OSS_SM_VOLUME) +#define LX_OSS_SOUND_MIXER_WRITE_PCM LX_OSS_SM_WRITE(LX_OSS_SM_PCM) +#define LX_OSS_SOUND_MIXER_WRITE_MIC LX_OSS_SM_WRITE(LX_OSS_SM_MIC) +#define LX_OSS_SOUND_MIXER_WRITE_IGAIN LX_OSS_SM_WRITE(LX_OSS_SM_IGAIN) +#define LX_OSS_SOUND_MIXER_READ_STEREODEVS LX_OSS_SM_READ(LX_OSS_SM_STEREODEVS) +#define LX_OSS_SOUND_MIXER_READ_RECMASK LX_OSS_SM_READ(LX_OSS_SM_RECMASK) +#define LX_OSS_SOUND_MIXER_READ_DEVMASK LX_OSS_SM_READ(LX_OSS_SM_DEVMASK) +#define LX_OSS_SOUND_MIXER_READ_RECSRC LX_OSS_SM_READ(LX_OSS_SM_RECSRC) + +/* + * /dev/mixer channels + */ +#define LX_OSS_SM_VOLUME 0 +#define LX_OSS_SM_BASS 1 +#define LX_OSS_SM_TREBLE 2 +#define LX_OSS_SM_SYNTH 3 +#define LX_OSS_SM_PCM 4 +#define LX_OSS_SM_SPEAKER 5 +#define LX_OSS_SM_LINE 6 +#define LX_OSS_SM_MIC 7 +#define LX_OSS_SM_CD 8 +#define LX_OSS_SM_MIX 9 +#define LX_OSS_SM_PCM2 10 +#define LX_OSS_SM_REC 11 +#define LX_OSS_SM_IGAIN 12 +#define LX_OSS_SM_OGAIN 13 +#define LX_OSS_SM_LINE1 14 +#define LX_OSS_SM_LINE2 15 +#define LX_OSS_SM_LINE3 16 +#define LX_OSS_SM_DIGITAL1 17 +#define LX_OSS_SM_DIGITAL2 18 +#define LX_OSS_SM_DIGITAL3 19 +#define LX_OSS_SM_PHONEIN 20 +#define LX_OSS_SM_PHONEOUT 21 +#define LX_OSS_SM_VIDEO 22 +#define LX_OSS_SM_RADIO 23 +#define LX_OSS_SM_MONITOR 24 + +/* + * /dev/mixer operations + */ +#define LX_OSS_SM_STEREODEVS 251 +#define LX_OSS_SM_CAPS 252 +#define LX_OSS_SM_RECMASK 253 +#define LX_OSS_SM_DEVMASK 254 +#define LX_OSS_SM_RECSRC 255 + +/* + * /dev/mixer value conversion macros + * + * solaris expects gain level on a scale of 0 - 255 + * oss expects gain level on a scale of 0 - 100 + * + * oss also encodes multiple channels volume values in a single int, + * one channel value per byte. + */ +#define LX_OSS_S2L_GAIN(v) (((v) * 100) / 255) +#define LX_OSS_L2S_GAIN(v) (((v) * 255) / 100) +#define LX_OSS_MIXER_DEC1(v) ((v) & 0xff) +#define LX_OSS_MIXER_DEC2(v) (((v) >> 8) & 0xff) +#define LX_OSS_MIXER_ENC2(v1, v2) (((v2) << 8) | (v1)) + +/* + * /dev/mixer value verification macros + */ +#define LX_OSS_MIXER_VCHECK(x) (((int)(x) >= 0) && ((int)(x) <= 100)) +#define LX_OSS_MIXER_1CH_OK(x) ((((x) & ~0xff) == 0) && \ + LX_OSS_MIXER_VCHECK(LX_OSS_MIXER_DEC1(x))) +#define LX_OSS_MIXER_2CH_OK(x) ((((x) & ~0xffff) == 0) && \ + LX_OSS_MIXER_VCHECK(LX_OSS_MIXER_DEC1(x)) && \ + LX_OSS_MIXER_VCHECK(LX_OSS_MIXER_DEC2(x))) + +/* + * Unsupported ioctls (NOT a comprehensive list) + */ +#define LX_TIOCLINUX 0x541c +#define LX_TIOCCONS 0x541d +#define LX_TIOCGSERIAL 0x541e +#define LX_TIOCSSERIAL 0x541f +#define LX_TIOCTTYGSTRUCT 0x5426 +#define LX_TIOCSERCONFIG 0x5453 +#define LX_TIOCSERGWILD 0x5454 +#define LX_TIOCSERSWILD 0x5455 +#define LX_TIOCGLCKTRMIOS 0x5456 +#define LX_TIOCSLCKTRMIOS 0x5457 +#define LX_TIOCSERGSTRUCT 0x5458 +#define LX_TIOCSERGETLSR 0x5459 +#define LX_TIOCSERGETMULTI 0x545a +#define LX_TIOCSERSETMULTI 0x545b +#define LX_OLD_SIOCGIFHWADDR 0x8923 +#define LX_SIOCSIFENCAP 0x8926 +#define LX_SIOCGIFSLAVE 0x8929 +#define LX_SIOCSIFSLAVE 0x8930 +#define LX_SIOCADDMULTI 0x8931 +#define LX_SIOCDELMULTI 0x8932 +#define LX_SIOCADDRTOLD 0x8940 +#define LX_SIOCDELRTOLD 0x8941 +#define LX_SIOCGIFTXQLEN 0x8942 +#define LX_SIOCDARP 0x8950 +#define LX_SIOCGARP 0x8951 +#define LX_SIOCSARP 0x8952 +#define LX_SIOCDRARP 0x8960 +#define LX_SIOCGRARP 0x8961 +#define LX_SIOCSRARP 0x8962 +#define LX_SIOCGIFMAP 0x8970 +#define LX_SIOCSIFMAP 0x8971 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_IOCTL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h new file mode 100644 index 0000000000..2f54724f17 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h @@ -0,0 +1,145 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_H +#define _SYS_LX_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <alloca.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/lwp.h> + +#include <sys/lx_brand.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern char lx_release[128]; +extern pid_t zoneinit_pid; + +/* + * Values Linux expects for init + */ +#define LX_INIT_PGID 0 +#define LX_INIT_SID 0 +#define LX_INIT_PID 1 + +/* + * Codes to reboot(2). + */ +#define LINUX_REBOOT_MAGIC1 0xfee1dead +#define LINUX_REBOOT_MAGIC2 672274793 +#define LINUX_REBOOT_MAGIC2A 85072278 +#define LINUX_REBOOT_MAGIC2B 369367448 +#define LINUX_REBOOT_MAGIC2C 537993216 + +/* + * This was observed as coming from Red Hat's init process, but it's not in + * their reboot(2) man page. + */ +#define LINUX_REBOOT_MAGIC2D 0x28121969 + +#define LINUX_REBOOT_CMD_RESTART 0x1234567 +#define LINUX_REBOOT_CMD_HALT 0xcdef0123 +#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc +#define LINUX_REBOOT_CMD_RESTART2 0xa1b2c3d4 +#define LINUX_REBOOT_CMD_CAD_ON 0x89abcdef +#define LINUX_REBOOT_CMD_CAD_OFF 0 + +/* + * the maximum length of messages to be output with lx_msg(), lx_err(), + * lx_debug(), or lx_unsupported(). + */ +#define LX_MSG_MAXLEN (128 + MAXPATHLEN) + +/* + * Linux scheduler priority ranges. + */ +#define LX_SCHED_PRIORITY_MIN_OTHER 0 +#define LX_SCHED_PRIORITY_MAX_OTHER 0 +#define LX_SCHED_PRIORITY_MIN_RRFIFO 1 +#define LX_SCHED_PRIORITY_MAX_RRFIFO 99 + +/* + * Constants to indicate who getrusage() should return information about. + */ +#define LX_RUSAGE_SELF 0 +#define LX_RUSAGE_CHILDREN (-1) + +/* + * normally we never want to write to stderr or stdout because it's unsafe + * to make assumptions about the underlying file descriptors. to protect + * against writes to these file descriptors we go ahead and close them + * our brand process initalization code. but there are still occasions + * where we are willing to make assumptions about our file descriptors + * and write to them. at thes times we should use one lx_msg() or + * lx_msg_error() + */ +extern void lx_msg(char *, ...); +extern void lx_err(char *, ...); +extern void lx_err_fatal(char *, ...); +extern void lx_unsupported(char *, ...); + +struct ucontext; + +extern void lx_handler_table(void); +extern void lx_handler_trace_table(void); +extern void lx_emulate_done(void); +extern lx_regs_t *lx_syscall_regs(void); + +extern char *lx_fd_to_path(int fd, char *buf, int buf_size); +extern int lx_lpid_to_spair(pid_t, pid_t *, lwpid_t *); +extern int lx_lpid_to_spid(pid_t, pid_t *); + +extern int lx_ptrace_wait(siginfo_t *); +extern void lx_ptrace_fork(void); + +extern int lx_check_alloca(size_t); +#define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL) + +/* + * NO_UUCOPY disables calls to the uucopy* system calls to help with + * debugging brand library accesses to linux application memory. + */ +#ifdef NO_UUCOPY + +int uucopy_unsafe(const void *src, void *dst, size_t n); +int uucopystr_unsafe(const void *src, void *dst, size_t n); + +#define uucopy(src, dst, n) uucopy_unsafe((src), (dst), (n)) +#define uucopystr(src, dst, n) uucopystr_unsafe((src), (dst), (n)) + +#endif /* NO_UUCOPY */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h new file mode 100644 index 0000000000..300ed340b1 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_MOUNT_H +#define _LX_MOUNT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rpc/rpc.h> +#include <nfs/nfs.h> + +/* + * mount() is significantly different between Linux and Solaris. The main + * difference is between the set of flags. Some flags on Linux can be + * translated to a Solaris equivalent, some are converted to a + * filesystem-specific option, while others have no equivalent whatsoever. + */ +#define LX_MS_MGC_VAL 0xC0ED0000 +#define LX_MS_RDONLY 0x00000001 +#define LX_MS_NOSUID 0x00000002 +#define LX_MS_NODEV 0x00000004 +#define LX_MS_NOEXEC 0x00000008 +#define LX_MS_SYNCHRONOUS 0x00000010 +#define LX_MS_REMOUNT 0x00000020 +#define LX_MS_MANDLOCK 0x00000040 +#define LX_MS_NOATIME 0x00000400 +#define LX_MS_NODIRATIME 0x00000800 +#define LX_MS_BIND 0x00001000 +#define LX_MS_SUPPORTED (LX_MS_MGC_VAL | \ + LX_MS_RDONLY | LX_MS_NOSUID | \ + LX_MS_NODEV | LX_MS_NOEXEC | \ + LX_MS_REMOUNT | LX_MS_NOATIME | \ + LX_MS_BIND) + +/* + * support for nfs mounts + */ +#define LX_NMD_MAXHOSTNAMELEN 256 + +#define LX_NFS_MOUNT_SOFT 0x00000001 +#define LX_NFS_MOUNT_INTR 0x00000002 +#define LX_NFS_MOUNT_SECURE 0x00000004 +#define LX_NFS_MOUNT_POSIX 0x00000008 +#define LX_NFS_MOUNT_NOCTO 0x00000010 +#define LX_NFS_MOUNT_NOAC 0x00000020 +#define LX_NFS_MOUNT_TCP 0x00000040 +#define LX_NFS_MOUNT_VER3 0x00000080 +#define LX_NFS_MOUNT_KERBEROS 0x00000100 +#define LX_NFS_MOUNT_NONLM 0x00000200 +#define LX_NFS_MOUNT_BROKEN_SUID 0x00000400 +#define LX_NFS_MOUNT_SUPPORTED (LX_NFS_MOUNT_INTR | \ + LX_NFS_MOUNT_NOAC | \ + LX_NFS_MOUNT_TCP | \ + LX_NFS_MOUNT_VER3) + +#define LX_NMD_DEFAULT_RSIZE 0 +#define LX_NMD_DEFAULT_WSIZE 0 + +/* + * the nfs v3 file handle structure definitions are _almost_ the same + * on linux and solaris. the key difference are: + * + * 1) on linux fh3_length is an unsigned short where as on solaris it's + * an int. + * + * 2) on linux the file handle data doesn't 32 bit members, so the structure + * is not 32 bit aligned. (where as on solaris it is.) + * + * so rather than defining a structure that would allow us to intrepret + * all the contents of the nfs v3 file handle here, we decide to treate + * the file handle as an array of chars. this works just fine since it + * avoids the alignment issues and the actual file handle handle contects + * are defined by the nfs specification so they are common across solaris + * and linux. we do the same thing for nfs v2 file handles. + */ +struct lx_nfs_fh2 { + unsigned char lx_fh_data[NFS_FHSIZE]; +} lx_nfs_fh2; + +struct lx_nfs_fh3 { + unsigned short lx_fh3_length; + unsigned char lx_fh3_data[NFS3_FHSIZE]; +} lx_nfs_fh3; + +typedef struct lx_nfs_mount_data { + int nmd_version; + int nmd_fd; + struct lx_nfs_fh2 nmd_old_root; + int nmd_flags; + int nmd_rsize; + int nmd_wsize; + int nmd_timeo; + int nmd_retrans; + int nmd_acregmin; + int nmd_acregmax; + int nmd_acdirmin; + int nmd_acdirmax; + struct sockaddr_in nmd_addr; + char nmd_hostname[LX_NMD_MAXHOSTNAMELEN]; + int nmd_namlen; + uint_t nmd_bsize; + struct lx_nfs_fh3 nmd_root; +} lx_nfs_mount_data_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_MOUNT_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h new file mode 100644 index 0000000000..cb5706fab2 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_POLL_H +#define _SYS_LX_POLL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * These events are identical between Linux and Solaris + */ +#define LX_POLLIN 0x001 +#define LX_POLLPRI 0x002 +#define LX_POLLOUT 0x004 +#define LX_POLLERR 0x008 +#define LX_POLLHUP 0x010 +#define LX_POLLNVAL 0x020 +#define LX_POLLRDNORM 0x040 +#define LX_POLLRDBAND 0x080 + +#define LX_POLL_COMMON_EVENTS (LX_POLLIN | LX_POLLPRI | LX_POLLOUT | \ + LX_POLLERR | LX_POLLHUP | LX_POLLNVAL | LX_POLLRDNORM | LX_POLLRDBAND) + +/* + * These events differ between Linux and Solaris + */ +#define LX_POLLWRNORM 0x100 +#define LX_POLLWRBAND 0x200 + +#define LX_POLL_SUPPORTED_EVENTS \ + (LX_POLL_COMMON_EVENTS | LX_POLLWRNORM | LX_POLLWRBAND) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_POLL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h new file mode 100644 index 0000000000..981c7d1ad2 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h @@ -0,0 +1,297 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_SIGNAL_H +#define _SYS_LX_SIGNAL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if !defined(_ASM) +#include <sys/lx_types.h> +#include <lx_signum.h> + +#endif /* !defined(_ASM) */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Linux sigaction flags + */ +#define LX_SA_NOCLDSTOP 0x00000001 +#define LX_SA_NOCLDWAIT 0x00000002 +#define LX_SA_SIGINFO 0x00000004 +#define LX_SA_RESTORER 0x04000000 +#define LX_SA_ONSTACK 0x08000000 +#define LX_SA_RESTART 0x10000000 +#define LX_SA_NODEFER 0x40000000 +#define LX_SA_RESETHAND 0x80000000 +#define LX_SA_NOMASK LX_SA_NODEFER +#define LX_SA_ONESHOT LX_SA_RESETHAND + +#define LX_SIG_BLOCK 0 +#define LX_SIG_UNBLOCK 1 +#define LX_SIG_SETMASK 2 + +#define LX_MINSIGSTKSZ 2048 +#define LX_SS_ONSTACK 1 +#define LX_SS_DISABLE 2 + +#define LX_SIGRT_MAGIC 0xdeadf00d + +#if !defined(_ASM) + +/* + * NOTE: Linux uses different definitions for sigset_ts and sigaction_ts + * depending on whether the definition is for user space or the kernel. + * + * The definitions below MUST correspond to the Linux kernel versions, + * as glibc will do the necessary translation from the Linux user + * versions. + */ +typedef struct { + ulong_t __bits[LX_NSIG_WORDS]; +} lx_sigset_t; + +#define LX_NBITS (sizeof (ulong_t) * NBBY) +#define lx_sigmask(n) (1UL << (((n) - 1) % LX_NBITS)) +#define lx_sigword(n) (((ulong_t)((n) - 1))>>5) +#define lx_sigismember(s, n) (lx_sigmask(n) & (s)->__bits[lx_sigword(n)]) +#define lx_sigaddset(s, n) ((s)->__bits[lx_sigword(n)] |= lx_sigmask(n)) + +typedef struct lx_sigaction { + void (*lxsa_handler)(); + int lxsa_flags; + void (*lxsa_restorer)(void); + lx_sigset_t lxsa_mask; +} lx_sigaction_t; + +typedef uint32_t lx_osigset_t; + +#define OSIGSET_NBITS (sizeof (lx_osigset_t) * NBBY) +#define OSIGSET_BITSET(sig) (1U << (((sig) - 1) % OSIGSET_NBITS)) + +/* + * Flag settings to determine whether common routines should operate on + * lx_sigset_ts or lx_osigset_ts. + */ +#define USE_OSIGSET 0 +#define USE_SIGSET 1 + +typedef struct lx_osigaction { + void (*lxsa_handler)(); + lx_osigset_t lxsa_mask; + int lxsa_flags; + void (*lxsa_restorer)(void); +} lx_osigaction_t; + +#define LX_SI_MAX_SIZE 128 +#define LX_SI_PAD_SIZE ((LX_SI_MAX_SIZE/sizeof (int)) - 3) + +typedef struct lx_siginfo { + int lsi_signo; + int lsi_errno; + int lsi_code; + union { + int _pad[LX_SI_PAD_SIZE]; + + struct { + pid_t _pid; + lx_uid16_t _uid; + } _kill; + + struct { + uint_t _timer1; + uint_t _timer2; + } _timer; + + struct { + pid_t _pid; /* sender's pid */ + lx_uid16_t _uid; /* sender's uid */ + union sigval _sigval; + } _rt; + + struct { + pid_t _pid; /* which child */ + lx_uid16_t _uid; /* sender's uid */ + int _status; /* exit code */ + clock_t _utime; + clock_t _stime; + } _sigchld; + + struct { + void *_addr; /* faulting insn/memory ref. */ + } _sigfault; + + struct { + int _band; /* POLL_IN,POLL_OUT,POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} lx_siginfo_t; + +/* + * lx_siginfo_t lsi_code values + * + * LX_SI_ASYNCNL: Sent by asynch name lookup completion + * LX_SI_TKILL: Sent by tkill + * LX_SI_SIGIO: Sent by queued SIGIO + * LX_SI_ASYNCIO: Sent by asynchronous I/O completion + * LX_SI_MESGQ: Sent by real time message queue state change + * LX_SI_TIMER: Sent by timer expiration + * LX_SI_QUEUE: Sent by sigqueue + * LX_SI_USER: Sent by kill, sigsend, raise, etc. + * LX_SI_KERNEL: Sent by kernel + * + * At present, LX_SI_ASYNCNL and LX_SI_SIGIO are unused by BrandZ. + */ +#define LX_SI_ASYNCNL (-60) +#define LX_SI_TKILL (-6) +#define LX_SI_SIGIO (-5) +#define LX_SI_ASYNCIO (-4) +#define LX_SI_MESGQ (-3) +#define LX_SI_TIMER (-2) +#define LX_SI_QUEUE (-1) +#define LX_SI_USER (0) +#define LX_SI_KERNEL (0x80) + +typedef struct lx_sighandlers { + struct lx_sigaction lx_sa[LX_NSIG]; +} lx_sighandlers_t; + +typedef struct lx_sigaltstack { + void *ss_sp; + int ss_flags; + size_t ss_size; +} lx_stack_t; + +struct lx_fpreg { + ushort_t significand[4]; + ushort_t exponent; +}; + +struct lx_fpxreg { + ushort_t significand[4]; + ushort_t exponent; + ushort_t padding[3]; +}; + +struct lx_xmmreg { + uint32_t element[4]; +}; + +#define LX_X86_FXSR_MAGIC 0x0000 +#define LX_X86_FXSR_NONE 0xffff + +typedef struct lx_fpstate { + /* Regular FPU environment */ + ulong_t cw; + ulong_t sw; + ulong_t tag; + ulong_t ipoff; + ulong_t cssel; + ulong_t dataoff; + ulong_t datasel; + struct lx_fpreg _st[8]; + ushort_t status; + ushort_t magic; /* 0xffff = regular FPU data */ + + /* FXSR FPU environment */ + ulong_t _fxsr_env[6]; /* env is ignored */ + ulong_t mxcsr; + ulong_t reserved; + struct lx_fpxreg _fxsr_st[8]; /* reg data is ignored */ + struct lx_xmmreg _xmm[8]; + ulong_t padding[56]; +} lx_fpstate_t; + +typedef struct lx_sigcontext { + ulong_t sc_gs; + ulong_t sc_fs; + ulong_t sc_es; + ulong_t sc_ds; + ulong_t sc_edi; + ulong_t sc_esi; + ulong_t sc_ebp; + ulong_t sc_esp; + ulong_t sc_ebx; + ulong_t sc_edx; + ulong_t sc_ecx; + ulong_t sc_eax; + ulong_t sc_trapno; + ulong_t sc_err; + ulong_t sc_eip; + ulong_t sc_cs; + ulong_t sc_eflags; + ulong_t sc_esp_at_signal; + ulong_t sc_ss; + lx_fpstate_t *sc_fpstate; + ulong_t sc_mask; + ulong_t sc_cr2; +} lx_sigcontext_t; + +typedef struct lx_ucontext { + ulong_t uc_flags; + struct lx_ucontext *uc_link; + lx_stack_t uc_stack; + lx_sigcontext_t uc_sigcontext; + lx_sigset_t uc_sigmask; +} lx_ucontext_t; + +#define LX_SI_MAX_SIZE 128 +#define LX_SI_PAD_SIZE ((LX_SI_MAX_SIZE/sizeof (int)) - 3) + +#define lsi_pid _sifields._kill._pid +#define lsi_uid _sifields._kill._uid +#define lsi_status _sifields._sigchld._status +#define lsi_utime _sifields._sigchld._utime +#define lsi_stime _sifields._sigchld._stime +#define lsi_value _sifields._rt._sigval +#define lsi_int _sifields._rt._sigval.sivalx_int +#define lsi_ptr _sifields._rt._sigval.sivalx_ptr +#define lsi_addr _sifields._sigfault._addr +#define lsi_band _sifields._sigpoll._band +#define lsi_fd _sifields._sigpoll._fd + +extern const int ltos_signo[]; +extern const int stol_signo[]; + +extern void setsigacthandler(void (*)(int, siginfo_t *, void *), + void (**)(int, siginfo_t *, void *)); + +extern int lx_siginit(void); + +extern void lx_sigreturn_tolibc(uintptr_t); +extern void lx_sigdeliver(int, siginfo_t *, void *, size_t, void (*)(), + void (*)(), uintptr_t); + +#endif /* !defined(_ASM) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_SIGNAL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h new file mode 100644 index 0000000000..04d2a50bb2 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h @@ -0,0 +1,250 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_SOCKET_H +#define _SYS_LX_SOCKET_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Linux address family definitions + * Some of these are not supported + */ +#define LX_AF_UNSPEC 0 /* Unspecified */ +#define LX_AF_UNIX 1 /* local file/pipe name */ +#define LX_AF_INET 2 /* IP protocol family */ +#define LX_AF_AX25 3 /* Amateur Radio AX.25 */ +#define LX_AF_IPX 4 /* Novell Internet Protocol */ +#define LX_AF_APPLETALK 5 /* Appletalk */ +#define LX_AF_NETROM 6 /* Amateur radio */ +#define LX_AF_BRIDGE 7 /* Multiprotocol bridge */ +#define LX_AF_ATMPVC 8 /* ATM PVCs */ +#define LX_AF_X25 9 /* X.25 */ +#define LX_AF_INET6 10 /* IPV 6 */ +#define LX_AF_ROSE 11 /* Amateur Radio X.25 */ +#define LX_AF_DECnet 12 /* DECnet */ +#define LX_AF_NETBEUI 13 /* 802.2LLC */ +#define LX_AF_SECURITY 14 /* Security callback */ +#define LX_AF_KEY 15 /* key management */ +#define LX_AF_ROUTE 16 /* Alias to emulate 4.4BSD */ +#define LX_AF_PACKET 17 /* Packet family */ +#define LX_AF_ASH 18 /* Ash ? */ +#define LX_AF_ECONET 19 /* Acorn Econet */ +#define LX_AF_ATMSVC 20 /* ATM SVCs */ +#define LX_AF_SNA 22 /* Linux SNA */ +#define LX_AF_IRDA 23 /* IRDA sockets */ +#define LX_AF_PPPOX 24 /* PPPoX sockets */ +#define LX_AF_WANPIPE 25 /* Wanpipe API sockets */ +#define LX_AF_BLUETOOTH 31 /* Bluetooth sockets */ +#define LX_AF_MAX 32 /* MAX socket type */ + +#define AF_NOTSUPPORTED -1 +#define AF_INVAL -2 + +/* + * Linux ARP protocol hardware identifiers + */ +#define LX_ARPHRD_ETHER 1 /* Ethernet */ +#define LX_ARPHRD_LOOPBACK 772 /* Loopback */ +#define LX_ARPHRD_VOID 0xffff /* Unknown */ + +/* + * Linux socket type definitions + */ +#define LX_SOCK_STREAM 1 /* Connection-based byte streams */ +#define LX_SOCK_DGRAM 2 /* Connectionless, datagram */ +#define LX_SOCK_RAW 3 /* Raw protocol interface */ +#define LX_SOCK_RDM 4 /* Reliably-delivered message */ +#define LX_SOCK_SEQPACKET 5 /* Sequenced packet stream */ +#define LX_SOCK_PACKET 10 /* Linux specific */ +#define LX_SOCK_MAX 11 + +#define SOCK_NOTSUPPORTED -1 +#define SOCK_INVAL -2 + +/* + * Options for use with [gs]etsockopt at the IP level. + * IPPROTO_IP + */ +#define LX_IP_TOS 1 +#define LX_IP_TTL 2 +#define LX_IP_HDRINCL 3 +#define LX_IP_OPTIONS 4 +#define LX_IP_ROUTER_ALERT 5 +#define LX_IP_RECVOPTS 6 +#define LX_IP_RETOPTS 7 +#define LX_IP_PKTINFO 8 +#define LX_IP_PKTOPTIONS 9 +#define LX_IP_MTU_DISCOVER 10 +#define LX_IP_RECVERR 11 +#define LX_IP_RECVTTL 12 +#define LX_IP_RECVTOS 13 +#define LX_IP_MTU 14 +#define LX_IP_FREEBIND 15 +#define LX_IP_MULTICAST_IF 32 +#define LX_IP_MULTICAST_TTL 33 +#define LX_IP_MULTICAST_LOOP 34 +#define LX_IP_ADD_MEMBERSHIP 35 +#define LX_IP_DROP_MEMBERSHIP 36 + +/* + * Options for use with [gs]etsockopt at the TCP level. + * IPPROTO_TCP + */ +#define LX_TCP_NODELAY 1 /* Don't delay send to coalesce packets */ +#define LX_TCP_MAXSEG 2 /* Set maximum segment size */ +#define LX_TCP_CORK 3 /* Control sending of partial frames */ +#define LX_TCP_KEEPIDLE 4 /* Start keeplives after this period */ +#define LX_TCP_KEEPINTVL 5 /* Interval between keepalives */ +#define LX_TCP_KEEPCNT 6 /* Number of keepalives before death */ +#define LX_TCP_SYNCNT 7 /* Number of SYN retransmits */ +#define LX_TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */ +#define LX_TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ +#define LX_TCP_WINDOW_CLAMP 10 /* Bound advertised window */ +#define LX_TCP_INFO 11 /* Information about this connection. */ +#define LX_TCP_QUICKACK 12 /* Bock/reenable quick ACKs. */ + +/* + * Options for use with [gs]etsockopt at the IGMP level. + * IPPROTO_IGMP + */ +#define LX_IGMP_MINLEN 8 +#define LX_IGMP_MAX_HOST_REPORT_DELAY 10 +#define LX_IGMP_HOST_MEMBERSHIP_QUERY 0x11 +#define LX_IGMP_HOST_MEMBERSHIP_REPORT 0x12 +#define LX_IGMP_DVMRP 0x13 +#define LX_IGMP_PIM 0x14 +#define LX_IGMP_TRACE 0x15 +#define LX_IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16 +#define LX_IGMP_HOST_LEAVE_MESSAGE 0x17 +#define LX_IGMP_MTRACE_RESP 0x1e +#define LX_IGMP_MTRACE 0x1f + +/* + * Options for use with [gs]etsockopt at the SOL_SOCKET level. + */ +#define LX_SOL_SOCKET 1 + +#define LX_SCM_RIGHTS 1 +#define LX_SCM_CRED 2 + +#define LX_SO_DEBUG 1 +#define LX_SO_REUSEADDR 2 +#define LX_SO_TYPE 3 +#define LX_SO_ERROR 4 +#define LX_SO_DONTROUTE 5 +#define LX_SO_BROADCAST 6 +#define LX_SO_SNDBUF 7 +#define LX_SO_RCVBUF 8 +#define LX_SO_KEEPALIVE 9 +#define LX_SO_OOBINLINE 10 +#define LX_SO_NO_CHECK 11 +#define LX_SO_PRIORITY 12 +#define LX_SO_LINGER 13 +#define LX_SO_BSDCOMPAT 14 +/* To add :#define LX_SO_REUSEPORT 15 */ +#define LX_SO_PASSCRED 16 +#define LX_SO_PEERCRED 17 +#define LX_SO_RCVLOWAT 18 +#define LX_SO_SNDLOWAT 19 +#define LX_SO_RCVTIMEO 20 +#define LX_SO_SNDTIMEO 21 +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define LX_SO_SECURITY_AUTHENTICATION 22 +#define LX_SO_SECURITY_ENCRYPTION_TRANSPORT 23 +#define LX_SO_SECURITY_ENCRYPTION_NETWORK 24 +#define LX_SO_BINDTODEVICE 25 +/* Socket filtering */ +#define LX_SO_ATTACH_FILTER 26 +#define LX_SO_DETACH_FILTER 27 +#define LX_SO_PEERNAME 28 +#define LX_SO_TIMESTAMP 29 +#define LX_SCM_TIMESTAMP LX_SO_TIMESTAMP +#define LX_SO_ACCEPTCONN 30 + +/* + * Linux socketcall indices. + * These constitute all 17 socket related system calls + * + * These system calls are called via a single system call socketcall(). + * The first arg being the endex of the system call type + */ +#define LX_SOCKET 1 +#define LX_BIND 2 +#define LX_CONNECT 3 +#define LX_LISTEN 4 +#define LX_ACCEPT 5 +#define LX_GETSOCKNAME 6 +#define LX_GETPEERNAME 7 +#define LX_SOCKETPAIR 8 +#define LX_SEND 9 +#define LX_RECV 10 +#define LX_SENDTO 11 +#define LX_RECVFROM 12 +#define LX_SHUTDOWN 13 +#define LX_SETSOCKOPT 14 +#define LX_GETSOCKOPT 15 +#define LX_SENDMSG 16 +#define LX_RECVMSG 17 + +/* + * Linux socket flags for use with recv(2)/send(2)/recvmsg(2)/sendmsg(2) + */ +#define LX_MSG_OOB 1 +#define LX_MSG_PEEK 2 +#define LX_MSG_DONTROUTE 4 +#define LX_MSG_CTRUNC 8 +#define LX_MSG_PROXY 0x10 +#define LX_MSG_TRUNC 0x20 +#define LX_MSG_DONTWAIT 0x40 +#define LX_MSG_EOR 0x80 +#define LX_MSG_WAITALL 0x100 +#define LX_MSG_FIN 0x200 +#define LX_MSG_SYN 0x400 +#define LX_MSG_CONFIRM 0x800 +#define LX_MSG_RST 0x1000 +#define LX_MSG_ERRQUEUE 0x2000 +#define LX_MSG_NOSIGNAL 0x4000 +#define LX_MSG_MORE 0x8000 + +struct lx_msghdr { + void *msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + struct iovec *msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + void *msg_control; /* ancillary data */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_SOCKET_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h new file mode 100644 index 0000000000..67aa86bb1c --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_stat.h @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_STAT_H +#define _SYS_LX_STAT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/lx_types.h> +#include <sys/stat.h> + +#define LX_MAJORSHIFT 8 +#define LX_MINORMASK ((1 << LX_MAJORSHIFT) - 1) +#define LX_MAKEDEVICE(lx_maj, lx_min) \ + ((lx_dev_t)((lx_maj) << LX_MAJORSHIFT | ((lx_min) & LX_MINORMASK))) + +#define LX_GETMAJOR(lx_dev) ((lx_dev) >> LX_MAJORSHIFT) +#define LX_GETMINOR(lx_dev) ((lx_dev) & LX_MINORMASK) + +#undef st_atime +#undef st_mtime +#undef st_ctime + +struct lx_stat { + lx_dev16_t st_dev; + uint16_t st_pad1; + lx_ino_t st_ino; + lx_mode16_t st_mode; + uint16_t st_nlink; + lx_uid16_t st_uid; + lx_gid16_t st_gid; + lx_dev16_t st_rdev; + uint16_t st_pad2; + lx_off_t st_size; + lx_blksize_t st_blksize; + lx_blkcnt_t st_blocks; + struct lx_timespec st_atime; + struct lx_timespec st_mtime; + struct lx_timespec st_ctime; + uint32_t st_pad3; + uint32_t st_pad4; +}; + +struct lx_stat64 { + lx_dev_t st_dev; + uint32_t st_pad1; + lx_ino_t st_small_ino; + lx_mode_t st_mode; + uint_t st_nlink; + lx_uid_t st_uid; + lx_gid_t st_gid; + lx_dev_t st_rdev; + uint32_t st_pad2; + lx_off64_t st_size; + lx_blksize_t st_blksize; + lx_blkcnt64_t st_blocks; + struct lx_timespec st_atime; + struct lx_timespec st_mtime; + struct lx_timespec st_ctime; + lx_ino64_t st_ino; +}; + +extern int lx_stat_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_STAT_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h new file mode 100644 index 0000000000..839d36d7ae --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_STATFS_H +#define _LX_STATFS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +extern int lx_statfs_init(void); + +struct lx_statfs { + int f_type; + int f_bsize; + ulong_t f_blocks; + ulong_t f_bfree; + ulong_t f_bavail; + ulong_t f_files; + ulong_t f_ffree; + u_longlong_t f_fsid; + int f_namelen; + int f_frsize; + int f_spare[5]; +}; + +struct lx_statfs64 { + int f_type; + int f_bsize; + u_longlong_t f_blocks; + u_longlong_t f_bfree; + u_longlong_t f_bavail; + u_longlong_t f_files; + u_longlong_t f_ffree; + u_longlong_t f_fsid; + int f_namelen; + int f_frsize; + int f_spare[5]; +}; + +/* + * These magic values are taken mostly from statfs(2). + */ +#define LX_ISOFS_SUPER_MAGIC 0x9660 +#define LX_NFS_SUPER_MAGIC 0x6969 +#define LX_MSDOS_SUPER_MAGIC 0x4d44 +#define LX_PROC_SUPER_MAGIC 0x9fa0 +#define LX_UFS_MAGIC 0x00011954 +#define LX_DEVPTS_SUPER_MAGIC 0x1cd1 + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_STATFS_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h new file mode 100644 index 0000000000..241faf7c5b --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h @@ -0,0 +1,489 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_SYSCALL_H +#define _SYS_LX_SYSCALL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if !defined(_ASM) + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern int lx_install; + +extern int lx_stat(uintptr_t, uintptr_t); +extern int lx_fstat(uintptr_t, uintptr_t); +extern int lx_lstat(uintptr_t, uintptr_t); +extern int lx_stat64(uintptr_t, uintptr_t); +extern int lx_fstat64(uintptr_t, uintptr_t); +extern int lx_lstat64(uintptr_t, uintptr_t); +extern int lx_fcntl(uintptr_t, uintptr_t, uintptr_t); +extern int lx_fcntl64(uintptr_t, uintptr_t, uintptr_t); +extern int lx_flock(uintptr_t, uintptr_t); +extern int lx_open(uintptr_t, uintptr_t, uintptr_t); +extern int lx_readdir(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getdents64(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getpid(void); +extern int lx_execve(uintptr_t, uintptr_t, uintptr_t); +extern int lx_dup2(uintptr_t, uintptr_t); +extern int lx_ioctl(uintptr_t, uintptr_t, uintptr_t); +extern int lx_vhangup(void); + +extern int lx_read(uintptr_t, uintptr_t, uintptr_t); +extern int lx_readv(uintptr_t, uintptr_t, uintptr_t); +extern int lx_writev(uintptr_t, uintptr_t, uintptr_t); +extern int lx_pread64(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_socketcall(uintptr_t, uintptr_t); +extern int lx_select(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_poll(uintptr_t, uintptr_t, uintptr_t); +extern int lx_oldgetrlimit(uintptr_t, uintptr_t); +extern int lx_getrlimit(uintptr_t, uintptr_t); +extern int lx_setrlimit(uintptr_t, uintptr_t); +extern int lx_gettimeofday(uintptr_t, uintptr_t); +extern int lx_settimeofday(uintptr_t, uintptr_t); +extern int lx_getrusage(uintptr_t, uintptr_t); +extern int lx_mknod(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_getpgrp(void); +extern int lx_getpgid(uintptr_t); +extern int lx_setpgid(uintptr_t, uintptr_t); +extern int lx_getsid(uintptr_t); +extern int lx_setsid(void); +extern int lx_setgroups(uintptr_t, uintptr_t); + + +extern int lx_waitpid(uintptr_t, uintptr_t, uintptr_t); +extern int lx_wait4(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_getuid16(void); +extern int lx_getgid16(void); +extern int lx_geteuid16(void); +extern int lx_getegid16(void); +extern int lx_geteuid(void); +extern int lx_getegid(void); +extern int lx_getresuid16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getresgid16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getresuid(uintptr_t, uintptr_t, uintptr_t); +extern int lx_getresgid(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_setuid16(uintptr_t); +extern int lx_setreuid16(uintptr_t, uintptr_t); +extern int lx_setregid16(uintptr_t, uintptr_t); +extern int lx_setgid16(uintptr_t); +extern int lx_setfsuid16(uintptr_t); +extern int lx_setfsgid16(uintptr_t); + +extern int lx_setfsuid(uintptr_t); +extern int lx_setfsgid(uintptr_t); + +extern int lx_clock_settime(int, struct timespec *); +extern int lx_clock_gettime(int, struct timespec *); +extern int lx_clock_getres(int, struct timespec *); +extern int lx_clock_nanosleep(int, int flags, struct timespec *, + struct timespec *); + +extern int lx_truncate(uintptr_t, uintptr_t); +extern int lx_ftruncate(uintptr_t, uintptr_t); +extern int lx_truncate64(uintptr_t, uintptr_t, uintptr_t); +extern int lx_ftruncate64(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sysctl(uintptr_t); +extern int lx_fsync(uintptr_t); +extern int lx_fdatasync(uintptr_t); +extern int lx_pipe(uintptr_t); +extern int lx_link(uintptr_t, uintptr_t); +extern int lx_unlink(uintptr_t); +extern int lx_rmdir(uintptr_t); +extern int lx_chown16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_fchown16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_lchown16(uintptr_t, uintptr_t, uintptr_t); +extern int lx_chown(uintptr_t, uintptr_t, uintptr_t); +extern int lx_fchown(uintptr_t, uintptr_t, uintptr_t); +extern int lx_chmod(uintptr_t, uintptr_t); +extern int lx_rename(uintptr_t, uintptr_t); +extern int lx_utime(uintptr_t, uintptr_t); +extern int lx_llseek(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_lseek(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sysfs(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_getcwd(uintptr_t, uintptr_t); +extern int lx_uname(uintptr_t); +extern int lx_reboot(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_getgroups16(uintptr_t, uintptr_t); +extern int lx_setgroups16(uintptr_t, uintptr_t); +extern int lx_personality(uintptr_t); + +extern int lx_query_module(uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); + +extern int lx_time(uintptr_t); +extern int lx_times(uintptr_t); +extern int lx_setitimer(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_clone(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_exit(uintptr_t); +extern int lx_group_exit(uintptr_t); + +extern int lx_mlock(uintptr_t, uintptr_t); +extern int lx_mlockall(uintptr_t); +extern int lx_munlock(uintptr_t, uintptr_t); +extern int lx_munlockall(void); +extern int lx_msync(uintptr_t, uintptr_t, uintptr_t); +extern int lx_madvise(uintptr_t, uintptr_t, uintptr_t); +extern int lx_mprotect(uintptr_t, uintptr_t, uintptr_t); +extern int lx_mmap(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); +extern int lx_mmap2(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); + +extern int lx_mount(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_umount(uintptr_t); +extern int lx_umount2(uintptr_t, uintptr_t); + +extern int lx_statfs(uintptr_t, uintptr_t); +extern int lx_fstatfs(uintptr_t, uintptr_t); +extern int lx_statfs64(uintptr_t, uintptr_t, uintptr_t); +extern int lx_fstatfs64(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sigreturn(void); +extern int lx_rt_sigreturn(void); +extern int lx_signal(uintptr_t, uintptr_t); +extern int lx_sigaction(uintptr_t, uintptr_t, uintptr_t); +extern int lx_rt_sigaction(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_sigaltstack(uintptr_t, uintptr_t); +extern int lx_sigpending(uintptr_t); +extern int lx_rt_sigpending(uintptr_t, uintptr_t); +extern int lx_sigprocmask(uintptr_t, uintptr_t, uintptr_t); +extern int lx_rt_sigprocmask(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_sigsuspend(uintptr_t); +extern int lx_rt_sigsuspend(uintptr_t, uintptr_t); +extern int lx_sigwaitinfo(uintptr_t, uintptr_t); +extern int lx_rt_sigwaitinfo(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sigtimedwait(uintptr_t, uintptr_t, uintptr_t); +extern int lx_rt_sigtimedwait(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sync(void); + +extern int lx_tkill(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); +extern int lx_tgkill(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sethostname(uintptr_t, uintptr_t); +extern int lx_setdomainname(uintptr_t, uintptr_t); + +extern int lx_sendfile(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_sendfile64(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_fork(void); +extern int lx_vfork(void); +extern int lx_exec(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_getpriority(uintptr_t, uintptr_t); +extern int lx_setpriority(uintptr_t, uintptr_t, uintptr_t); + +extern int lx_ptrace(uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +extern int lx_sched_getaffinity(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sched_setaffinity(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sched_getparam(uintptr_t, uintptr_t); +extern int lx_sched_setparam(uintptr_t, uintptr_t); +extern int lx_sched_rr_get_interval(uintptr_t pid, uintptr_t); +extern int lx_sched_getscheduler(uintptr_t); +extern int lx_sched_setscheduler(uintptr_t, uintptr_t, uintptr_t); +extern int lx_sched_get_priority_min(uintptr_t); +extern int lx_sched_get_priority_max(uintptr_t); + +extern int lx_ipc(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); + +#endif /* !defined(_ASM) */ + +#define EBP_HAS_ARG6 0x01 +#define LINUX_MAX_SYSCALL 270 + +/* + * Linux syscall numbers + */ +#define LX_SYS_exit 1 +#define LX_SYS_fork 2 +#define LX_SYS_read 3 +#define LX_SYS_write 4 +#define LX_SYS_open 5 +#define LX_SYS_close 6 +#define LX_SYS_waitpid 7 +#define LX_SYS_creat 8 +#define LX_SYS_link 9 +#define LX_SYS_unlink 10 +#define LX_SYS_execve 11 +#define LX_SYS_chdir 12 +#define LX_SYS_time 13 +#define LX_SYS_mknod 14 +#define LX_SYS_chmod 15 +#define LX_SYS_lchown 16 +#define LX_SYS_break 17 +#define LX_SYS_oldstat 18 +#define LX_SYS_lseek 19 +#define LX_SYS_getpid 20 +#define LX_SYS_mount 21 +#define LX_SYS_umount 22 +#define LX_SYS_setuid 23 +#define LX_SYS_getuid 24 +#define LX_SYS_stime 25 +#define LX_SYS_ptrace 26 +#define LX_SYS_alarm 27 +#define LX_SYS_oldfstat 28 +#define LX_SYS_pause 29 +#define LX_SYS_utime 30 +#define LX_SYS_stty 31 +#define LX_SYS_gtty 32 +#define LX_SYS_access 33 +#define LX_SYS_nice 34 +#define LX_SYS_ftime 35 +#define LX_SYS_sync 36 +#define LX_SYS_kill 37 +#define LX_SYS_rename 38 +#define LX_SYS_mkdir 39 +#define LX_SYS_rmdir 40 +#define LX_SYS_dup 41 +#define LX_SYS_pipe 42 +#define LX_SYS_times 43 +#define LX_SYS_prof 44 +#define LX_SYS_brk 45 +#define LX_SYS_setgid 46 +#define LX_SYS_getgid 47 +#define LX_SYS_signal 48 +#define LX_SYS_geteuid 49 +#define LX_SYS_getegid 50 +#define LX_SYS_acct 51 +#define LX_SYS_umount2 52 +#define LX_SYS_lock 53 +#define LX_SYS_ioctl 54 +#define LX_SYS_fcntl 55 +#define LX_SYS_mpx 56 +#define LX_SYS_setpgid 57 +#define LX_SYS_ulimit 58 +#define LX_SYS_oldolduname 59 +#define LX_SYS_umask 60 +#define LX_SYS_chroot 61 +#define LX_SYS_ustat 62 +#define LX_SYS_dup2 63 +#define LX_SYS_getppid 64 +#define LX_SYS_getpgrp 65 +#define LX_SYS_setsid 66 +#define LX_SYS_sigaction 67 +#define LX_SYS_sgetmask 68 +#define LX_SYS_ssetmask 69 +#define LX_SYS_setreuid 70 +#define LX_SYS_setregid 71 +#define LX_SYS_sigsuspend 72 +#define LX_SYS_sigpending 73 +#define LX_SYS_sethostname 74 +#define LX_SYS_setrlimit 75 +#define LX_SYS_getrlimit 76 +#define LX_SYS_getrusage 77 +#define LX_SYS_gettimeofday 78 +#define LX_SYS_settimeofday 79 +#define LX_SYS_getgroups 80 +#define LX_SYS_setgroups 81 +#define LX_SYS_select 82 +#define LX_SYS_symlink 83 +#define LX_SYS_oldlstat 84 +#define LX_SYS_readlink 85 +#define LX_SYS_uselib 86 +#define LX_SYS_swapon 87 +#define LX_SYS_reboot 88 +#define LX_SYS_readdir 89 +#define LX_SYS_mmap 90 +#define LX_SYS_munmap 91 +#define LX_SYS_truncate 92 +#define LX_SYS_ftruncate 93 +#define LX_SYS_fchmod 94 +#define LX_SYS_fchown 95 +#define LX_SYS_getpriority 96 +#define LX_SYS_setpriority 97 +#define LX_SYS_profil 98 +#define LX_SYS_statfs 99 +#define LX_SYS_fstatfs 100 +#define LX_SYS_ioperm 101 +#define LX_SYS_socketcall 102 +#define LX_SYS_syslog 103 +#define LX_SYS_setitimer 104 +#define LX_SYS_getitimer 105 +#define LX_SYS_stat 106 +#define LX_SYS_lstat 107 +#define LX_SYS_fstat 108 +#define LX_SYS_olduname 109 +#define LX_SYS_iopl 110 +#define LX_SYS_vhangup 111 +#define LX_SYS_idle 112 +#define LX_SYS_vm86old 113 +#define LX_SYS_wait4 114 +#define LX_SYS_swapoff 115 +#define LX_SYS_sysinfo 116 +#define LX_SYS_ipc 117 +#define LX_SYS_fsync 118 +#define LX_SYS_sigreturn 119 +#define LX_SYS_clone 120 +#define LX_SYS_setdomainname 121 +#define LX_SYS_uname 122 +#define LX_SYS_modify_ldt 123 +#define LX_SYS_adjtimex 124 +#define LX_SYS_mprotect 125 +#define LX_SYS_sigprocmask 126 +#define LX_SYS_create_module 127 +#define LX_SYS_init_module 128 +#define LX_SYS_delete_module 129 +#define LX_SYS_get_kernel_syms 130 +#define LX_SYS_quotactl 131 +#define LX_SYS_getpgid 132 +#define LX_SYS_fchdir 133 +#define LX_SYS_sysfs 135 +#define LX_SYS_setfsuid 138 +#define LX_SYS_setfsgid 139 +#define LX_SYS_llseek 140 +#define LX_SYS_getdents 141 +#define LX_SYS_newselect 142 +#define LX_SYS_flock 143 +#define LX_SYS_msync 144 +#define LX_SYS_readv 145 +#define LX_SYS_writev 146 +#define LX_SYS_getsid 147 +#define LX_SYS_fdatasync 148 +#define LX_SYS_sysctl 149 +#define LX_SYS_mlock 150 +#define LX_SYS_munlock 151 +#define LX_SYS_mlockall 152 +#define LX_SYS_munlockall 153 +#define LX_SYS_sched_setparam 154 +#define LX_SYS_sched_getparam 155 +#define LX_SYS_sched_setscheduler 156 +#define LX_SYS_sched_getscheduler 157 +#define LX_SYS_sched_yield 158 +#define LX_SYS_sched_get_priority_max 159 +#define LX_SYS_sched_get_priority_min 160 +#define LX_SYS_sched_rr_get_interval 161 +#define LX_SYS_nanosleep 162 +#define LX_SYS_mremap 163 +#define LX_SYS_setresuid 164 +#define LX_SYS_getresuid 165 +#define LX_SYS_poll 168 +#define LX_SYS_setresgid 170 +#define LX_SYS_getresgid 171 +#define LX_SYS_prctl 172 +#define LX_SYS_rt_sigreturn 173 +#define LX_SYS_rt_sigaction 174 +#define LX_SYS_rt_sigprocmask 175 +#define LX_SYS_rt_sigpending 176 +#define LX_SYS_rt_sigtimedwait 177 +#define LX_SYS_rt_sigqueueinfo 178 +#define LX_SYS_rt_sigsuspend 179 +#define LX_SYS_pread 180 +#define LX_SYS_pwrite 181 +#define LX_SYS_chown 182 +#define LX_SYS_getcwd 183 +#define LX_SYS_capget 184 +#define LX_SYS_capset 185 +#define LX_SYS_sigaltstack 186 +#define LX_SYS_sendfile 187 +#define LX_SYS_getpmsg 188 +#define LX_SYS_putpmsg 189 +#define LX_SYS_vfork 190 +#define LX_SYS_ugetrlimit 191 +#define LX_SYS_mmap2 192 +#define LX_SYS_truncate64 193 +#define LX_SYS_ftruncate64 194 +#define LX_SYS_stat64 195 +#define LX_SYS_lstat64 196 +#define LX_SYS_fstat64 197 +#define LX_SYS_lchown32 198 +#define LX_SYS_getuid32 199 +#define LX_SYS_getgid32 200 +#define LX_SYS_geteuid32 201 +#define LX_SYS_getegid32 202 +#define LX_SYS_setreuid32 203 +#define LX_SYS_setregid32 204 +#define LX_SYS_getgroups32 205 +#define LX_SYS_setgroups32 206 +#define LX_SYS_fchown32 207 +#define LX_SYS_setresuid32 208 +#define LX_SYS_getresuid32 209 +#define LX_SYS_setresgid32 210 +#define LX_SYS_getresgid32 211 +#define LX_SYS_chown32 212 +#define LX_SYS_setuid32 213 +#define LX_SYS_setgid32 214 +#define LX_SYS_setfsuid32 215 +#define LX_SYS_setfsgid32 216 +#define LX_SYS_mincore 218 +#define LX_SYS_madvise 219 +#define LX_SYS_getdents64 220 +#define LX_SYS_fcntl64 221 +#define LX_SYS_gettid 224 +#define LX_SYS_readahead 225 +#define LX_SYS_setxattr 226 +#define LX_SYS_lsetxattr 227 +#define LX_SYS_fsetxattr 228 +#define LX_SYS_getxattr 229 +#define LX_SYS_lgetxattr 230 +#define LX_SYS_fgetxattr 231 +#define LX_SYS_listxattr 232 +#define LX_SYS_llistxattr 233 +#define LX_SYS_flistxattr 234 +#define LX_SYS_removexattr 235 +#define LX_SYS_lremovexattr 236 +#define LX_SYS_fremovexattr 237 +#define LX_SYS_tkill 238 +#define LX_SYS_sendfile64 239 +#define LX_SYS_futex 240 +#define LX_SYS_sched_setaffinity 241 +#define LX_SYS_sched_getaffinity 242 +#define LX_SYS_set_thread_area 243 +#define LX_SYS_get_thread_area 244 +#define LX_SYS_fadvise64 250 +#define LX_SYS_exit_group 252 +#define LX_SYS_remap_file_pages 257 +#define LX_SYS_set_tid_address 258 +#define LX_SYS_timer_create 259 +#define LX_SYS_timer_settime 260 +#define LX_SYS_timer_gettime 261 +#define LX_SYS_timer_getoverrun 262 +#define LX_SYS_timer_delete 263 +#define LX_SYS_clock_settime 264 +#define LX_SYS_clock_gettime 265 +#define LX_SYS_clock_getres 266 +#define LX_SYS_clock_nanosleep 267 +#define LX_SYS_tgkill 270 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_SYSCALL_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h new file mode 100644 index 0000000000..08c824d5b4 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h @@ -0,0 +1,211 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_SYSV_IPC_H +#define _LX_SYSV_IPC_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * msg-related definitions. + */ +#define LX_IPC_CREAT 00001000 +#define LX_IPC_EXCL 00002000 +#define LX_IPC_NOWAIT 00004000 + +#define LX_IPC_RMID 0 +#define LX_IPC_SET 1 +#define LX_IPC_STAT 2 +#define LX_IPC_INFO 3 + +#define LX_IPC_64 0x0100 + +#define LX_SEMOP 1 +#define LX_SEMGET 2 +#define LX_SEMCTL 3 +#define LX_MSGSND 11 +#define LX_MSGRCV 12 +#define LX_MSGGET 13 +#define LX_MSGCTL 14 +#define LX_SHMAT 21 +#define LX_SHMDT 22 +#define LX_SHMGET 23 +#define LX_SHMCTL 24 + +#define LX_MSG_STAT 11 +#define LX_MSG_INFO 12 + +#define LX_MSG_NOERROR 010000 + +/* + * Linux hard codes the maximum msgbuf length to be 8192 bytes. Really. + */ +#define LX_MSGMAX 8192 + +struct lx_ipc_perm { + key_t key; + uid_t uid; + uid_t gid; + uid_t cuid; + uid_t cgid; + ushort_t mode; + ushort_t _pad1; + ushort_t seq; + ushort_t _pad2; + ulong_t _unused1; + ulong_t _unused2; +}; + +struct lx_msqid_ds { + struct lx_ipc_perm msg_perm; + time_t msg_stime; + ulong_t _unused1; + time_t msg_rtime; + ulong_t _unused2; + time_t msg_ctime; + ulong_t _unused3; + ulong_t msg_cbytes; + ulong_t msg_qnum; + ulong_t msg_qbytes; + pid_t msg_lspid; + pid_t msg_lrpid; + ulong_t _unused4; + ulong_t _unused5; +}; + +struct lx_msginfo { + int msgpool; + int msgmap; + int msgmax; + int msgmnb; + int msgmni; + int msgssz; + int msgtql; + ushort_t msgseg; +}; + +/* + * semaphore-related definitions. + */ +#define LX_GETPID 11 +#define LX_GETVAL 12 +#define LX_GETALL 13 +#define LX_GETNCNT 14 +#define LX_GETZCNT 15 +#define LX_SETVAL 16 +#define LX_SETALL 17 +#define LX_SEM_STAT 18 +#define LX_SEM_INFO 19 +#define LX_SEM_UNDO 0x1000 +#define LX_SEMVMX 32767 + +struct lx_semid_ds { + struct lx_ipc_perm sem_perm; + time_t sem_otime; + ulong_t _unused1; + time_t sem_ctime; + ulong_t _unused2; + ulong_t sem_nsems; + ulong_t _unused3; + ulong_t _unused4; +}; + +struct lx_seminfo { + int semmap; + int semmni; + int semmns; + int semmnu; + int semmsl; + int semopm; + int semume; + int semusz; + int semvmx; + int semaem; +}; + +union lx_semun { + int val; + struct lx_semid_ds *semds; + ushort_t *sems; + struct lx_seminfo *info; + uintptr_t dummy; +}; + +/* + * shm-related definitions + */ +#define LX_SHM_LOCKED 02000 +#define LX_SHM_RDONLY 010000 +#define LX_SHM_RND 020000 +#define LX_SHM_REMAP 040000 + +#define LX_SHM_LOCK 11 +#define LX_SHM_UNLOCK 12 +#define LX_SHM_STAT 13 +#define LX_SHM_INFO 14 + +struct lx_shmid_ds { + struct lx_ipc_perm shm_perm; + size_t shm_segsz; + time_t shm_atime; + ulong_t _unused1; + time_t shm_dtime; + ulong_t _unused2; + time_t shm_ctime; + ulong_t _unused3; + pid_t shm_cpid; + pid_t shm_lpid; + ushort_t shm_nattch; + ulong_t _unused4; + ulong_t _unused5; +}; + +struct lx_shm_info { + int used_ids; + ulong_t shm_tot; + ulong_t shm_rss; + ulong_t shm_swp; + ulong_t swap_attempts; + ulong_t swap_successes; +}; + +struct lx_shminfo { + int shmmax; + int shmmin; + int shmmni; + int shmseg; + int shmall; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_SYSV_IPC_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h new file mode 100644 index 0000000000..9c1952bfe7 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_THREAD_H +#define _SYS_LX_THREAD_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <thread.h> + +typedef struct lx_tsd { + uintptr_t lxtsd_gs; + int lxtsd_exit; + int lxtsd_exit_status; + ucontext_t lxtsd_exit_context; +} lx_tsd_t; + +extern thread_key_t lx_tsd_key; /* thread-specific Linux %gs value */ + +extern void lx_swap_gs(long, long *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_THREAD_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h new file mode 100644 index 0000000000..a56fe8eeb3 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thunk_server.h @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_THUNK_SERVER_H +#define _LX_THUNK_SERVER_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <netdb.h> +#include <procfs.h> + +/* + * Binary that should be exec'd to start up the thunking server + */ +#define LXT_SERVER_BINARY "/native/usr/lib/brand/lx/lx_thunk" + +/* + * When the thunking server is started it will need to communicate + * to the client via two fifos. These fifos will be passed to the + * thunking server via the following file descriptors: + */ +#define LXT_SERVER_FIFO_RD_FD 3 +#define LXT_SERVER_FIFO_WR_FD 4 + +/* + * Operations supported by the thunking server + */ +#define LXT_SERVER_OP_MIN 0 +#define LXT_SERVER_OP_PING 0 +#define LXT_SERVER_OP_NAME2HOST 1 +#define LXT_SERVER_OP_ADDR2HOST 2 +#define LXT_SERVER_OP_NAME2SERV 3 +#define LXT_SERVER_OP_PORT2SERV 4 +#define LXT_SERVER_OP_OPENLOG 5 +#define LXT_SERVER_OP_SYSLOG 6 +#define LXT_SERVER_OP_CLOSELOG 7 +#define LXT_SERVER_OP_MAX 8 + +/* + * Macros used to translate pointer into offsets for when they are + * being transmitted between the client and server processes. + * + * NOTE: We're going to add 1 to every offset value. The reason + * for this is that some of the pointers we're converting to offsets are + * stored in NULL terminated arrays, and if one of the members of + * one of these arrays happened to be at the beginning of the storage + * buffer it would have an offset of 0 and when the client tries to + * translate the offsets back into pointers it wouldn't be able + * to differentiate between the 0 offset from the end of the array. + */ +#define LXT_PTR_TO_OFFSET(ptr, base) \ + ((void *)((uintptr_t)(ptr) - (uintptr_t)(base) + 1)) +#define LXT_OFFSET_TO_PTR(offset, base) \ + ((void *)((uintptr_t)(offset) + (uintptr_t)(base) - 1)) + +/* + * Structures passed to the thunking server via door calls + */ +typedef struct lxt_server_arg { + int lxt_sa_op; + int lxt_sa_success; + int lxt_sa_errno; + char lxt_sa_data[1]; +} lxt_server_arg_t; + +typedef struct lxt_gethost_arg { + struct hostent lxt_gh_result; + + int lxt_gh_h_errno; + + int lxt_gh_type; + int lxt_gh_token_len; + int lxt_gh_buf_len; + + int lxt_gh_storage_len; + char lxt_gh_storage[1]; +} lxt_gethost_arg_t; + +typedef struct lxt_getserv_arg { + struct servent lxt_gs_result; + + int lxt_gs_token_len; + int lxt_gs_buf_len; + char lxt_gs_proto[5]; + + int lxt_gs_storage_len; + char lxt_gs_storage[1]; +} lxt_getserv_arg_t; + +typedef struct lxt_openlog_arg { + int lxt_ol_logopt; + int lxt_ol_facility; + char lxt_ol_ident[128]; +} lxt_openlog_arg_t; + +typedef struct lxt_syslog_arg { + int lxt_sl_priority; + pid_t lxt_sl_pid; + char lxt_sl_progname[PRFNSZ]; + char lxt_sl_message[1024]; +} lxt_syslog_arg_t; + + +/* + * Functions called by the brand library to manage startup of the + * thunk server process. + */ +void lxt_server_init(int, char *[]); +int lxt_server_pid(int *pid); +void lxt_server_exec_check(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_THUNK_SERVER_H */ diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h new file mode 100644 index 0000000000..6152634459 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_types.h @@ -0,0 +1,108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_TYPES_H +#define _SYS_LX_TYPES_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SHRT_MIN (-32768) /* min value of a "short int" */ +#define SHRT_MAX 32767 /* max value of a "short int" */ +#define USHRT_MAX 65535 /* max of "unsigned short int" */ +#define INT_MIN (-2147483647-1) /* min value of an "int" */ +#define INT_MAX 2147483647 /* max value of an "int" */ +#define UINT_MAX 4294967295U /* max value of an "unsigned int" */ +#define LONG_MIN (-2147483647L-1L) + /* min value of a "long int" */ +#define LONG_MAX 2147483647L /* max value of a "long int" */ +#define ULONG_MAX 4294967295UL /* max of "unsigned long int" */ + +#define LX_SYS_UTS_LN 65 + +struct lx_utsname { + char sysname[LX_SYS_UTS_LN]; + char nodename[LX_SYS_UTS_LN]; + char release[LX_SYS_UTS_LN]; + char version[LX_SYS_UTS_LN]; + char machine[LX_SYS_UTS_LN]; + char domainname[LX_SYS_UTS_LN]; +}; + +typedef uint64_t lx_dev_t; +typedef uint16_t lx_dev16_t; +typedef uint32_t lx_ino_t; +typedef uint64_t lx_ino64_t; +typedef uint32_t lx_uid_t; +typedef uint16_t lx_uid16_t; +typedef uint32_t lx_gid_t; +typedef uint16_t lx_gid16_t; +typedef uint32_t lx_off_t; +typedef uint64_t lx_off64_t; +typedef uint32_t lx_blksize_t; +typedef uint32_t lx_blkcnt_t; +typedef uint64_t lx_blkcnt64_t; +typedef ulong_t lx_mode_t; +typedef uint16_t lx_mode16_t; + +#define LX_UID16_TO_UID32(uid16) \ + (((uid16) == (lx_uid16_t)-1) ? ((lx_uid_t)-1) : (lx_uid_t)(uid16)) + +#define LX_GID16_TO_GID32(gid16) \ + (((gid16) == (lx_gid16_t)-1) ? ((lx_gid_t)-1) : (lx_gid_t)(gid16)) + +/* Overflow values default to NFS nobody. */ + +#define UID16_OVERFLOW ((lx_uid16_t)65534) +#define GID16_OVERFLOW ((lx_gid16_t)65534) + +/* + * All IDs with high word non-zero are converted to default overflow values to + * avoid inadvertent truncation to zero (root) (!). + */ +#define LX_UID32_TO_UID16(uid32) \ + ((((uid32) & 0xffff0000) == 0) ? ((lx_uid16_t)(uid32)) : \ + (((uid32) == ((lx_uid_t)-1)) ? ((lx_uid16_t)-1) : UID16_OVERFLOW)) + +#define LX_GID32_TO_GID16(gid32) \ + ((((gid32) & 0xffff0000) == 0) ? ((lx_gid16_t)(gid32)) : \ + (((gid32) == ((lx_gid_t)-1)) ? ((lx_gid16_t)-1) : GID16_OVERFLOW)) + +struct lx_timespec { + time_t ts_sec; + long ts_nsec; +}; + +#define LX_32TO64(lo, hi) \ + ((uint64_t)((uint64_t)(lo) | ((uint64_t)(hi) << 32))) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_TYPES_H */ diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile new file mode 100644 index 0000000000..9142ef2986 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile @@ -0,0 +1,52 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +LINT_SUBDIRS = $(MACH) +$(BUILD64)LINT_SUBDIRS += $(MACH64) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all install clean clobber: $(ROOTBRANDDIR) $(ROOTBRANDDIR64) $(SUBDIRS) + +lint: $(LINT_SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com new file mode 100644 index 0000000000..a0fd9da3fe --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/Makefile.com @@ -0,0 +1,68 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = lx_nametoaddr.a +VERS = .1 + +COBJS = lx_nametoaddr.o +OBJECTS = $(COBJS) + +include ../../../../Makefile.lib +include ../../Makefile.lx + +MAPFILES = ../common/mapfile-vers +MAPOPTS = $(MAPFILES:%=-M%) + +CSRCS = $(COBJS:%o=../common/%c) +SRCS = $(CSRCS) + +SRCDIR = ../common +LX_THUNK = ../../lx_thunk + +ASFLAGS += -P -D_ASM +LDLIBS += -lc -lnsl +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I../ -I$(LX_THUNK) +DYNFLAGS += $(MAPOPTS) '-R$$ORIGIN' + +LIBS = $(DYNLIB) + +LINTFLAGS += $(LX_THUNK)/$(MACH)/llib-llx_thunk.ln +LINTFLAGS64 += $(LX_THUNK)/$(MACH64)/llib-llx_thunk.ln + +CLEANFILES = $(DYNLIB) +ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx +ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64) + +.KEEP_STATE: + +all: $(DYNLIB) + +lint: lintcheck + +include ../../../../Makefile.targ diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile new file mode 100644 index 0000000000..5945014469 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/amd64/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +DYNFLAGS += $(LX_THUNK)/$(MACH64)/lx_thunk.so.1 +CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB) + +install: $(ROOTLIBDIR64) all $(ROOTLIBS64) diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c b/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c new file mode 100644 index 0000000000..4cd6e67679 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/common/lx_nametoaddr.c @@ -0,0 +1,478 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * BrandZ lx name services translation library. + * + * This library is specified as the default name services translation + * library in a custom netconfig(4) file that is only used when running + * native solaris processes in a Linux branded zone. + * + * What this means it that when a native solaris process runs in a + * Linux branded zone and issues a name service request to libnsl.so + * (either directly or indirectly via any libraries the program may + * be linked against) libnsl.so will dlopen(3c) this library and call + * into it to service these requests. + * + * This library is in turn linked against lx_thunk.so and will attempt + * to call interfaces in lx_thunk.so to resolve these requests. The + * functions that are called in lx_thunk.so are designed to have the + * same signature and behavior as the existing solaris name service + * interfaces. The name services interfaces we call are: + * + * Native Interface -> lx_thunk.so Interface + * ---------------- -> --------------------- + * gethostbyname_r -> lxt_gethostbyname_r + * gethostbyaddr_r -> lxt_gethostbyaddr_r + * getservbyname_r -> lxt_getservbyname_r + * getservbyport_r -> lxt_getservbyport_r + * + * This library also uses one additional interface from lx_thunk.so: + * lxt_debug + * Information debugging messages are sent to lx_thunk.so via this + * interface and that library can decided if it wants to drop the + * messages or output them somewhere. + */ + +#include <assert.h> +#include <dlfcn.h> +#include <errno.h> +#include <fcntl.h> +#include <netdb.h> +#include <netdir.h> +#include <nss_dbdefs.h> +#include <rpc/clnt.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/varargs.h> +#include <sys/wait.h> +#include <thread.h> +#include <tiuser.h> +#include <unistd.h> +#include <sys/lx_thunk.h> + + +/* + * Private nametoaddr library interfaces. + */ +static int +netconfig_is_ipv4(struct netconfig *config) +{ + int i; + /* + * If we look at the rpc services registered on a Linux system + * (this can be done via rpcinfo(1M)) for both on the loopback + * interface and on any remote interfaces we only see services + * registered for tcp and udp. So here we'll limit our support + * to these transports. + */ + char *ipv4_netids[] = { + "tcp", + "udp", + NULL + }; + + for (i = 0; ipv4_netids[i] != NULL; i++) { + if (strcmp(ipv4_netids[i], config->nc_netid) == 0) + return (1); + } + return (0); +} + +/* + * Public nametoaddr library interfaces. + * + * These are the functional entry points that libnsl will lookup (via + * the symbol names) when it loads this nametoaddr translation library. + */ + +/* + * _netdir_getbyname() returns all of the addresses for + * a specified host and service. + */ +struct nd_addrlist * +_netdir_getbyname(struct netconfig *netconfigp, + struct nd_hostserv *nd_hostservp) +{ + struct nd_addrlist *rp = NULL; + struct netbuf *nbp = NULL; + struct sockaddr_in *sap = NULL; + struct hostent n2h_result; + struct servent n2s_result; + char *n2h_buf = NULL, *n2s_buf = NULL; + int h_errno, i, host_self = 0, r_count; + int n2h_count = 0, n2s_count = 0; + + lxt_debug("_netdir_getbyname: request recieved\n"); + + /* Make sure this is an ipv4 request. */ + if (!netconfig_is_ipv4(netconfigp)) { + _nderror = ND_BADARG; + goto fail; + } + + /* Allocate memory for the queries. */ + if (((n2h_buf = malloc(NSS_BUFLEN_HOSTS)) == NULL) || + ((n2s_buf = malloc(NSS_BUFLEN_SERVICES)) == NULL)) + goto malloc_fail; + + /* Check if the host name specified is HOST_SELF. */ + if (strcmp(nd_hostservp->h_host, HOST_SELF) == 0) + host_self = 1; + + /* + * If the hostname specified is HOST_SELF, the we're just + * just doing a service lookup so don't bother with trying + * to lookup the host name. + */ + if (!host_self) { + /* Resolve the hostname. */ + lxt_debug("_netdir_getbyname: " + "resolving host name: %s\n", nd_hostservp->h_host); + if (lxt_gethostbyname_r(nd_hostservp->h_host, &n2h_result, + n2h_buf, NSS_BUFLEN_HOSTS, &h_errno) == NULL) { + if (errno == ERANGE) { + _nderror = ND_SYSTEM; + } else if (h_errno == HOST_NOT_FOUND) { + _nderror = ND_NOHOST; + } else if (h_errno == TRY_AGAIN) { + _nderror = ND_TRY_AGAIN; + } else if (h_errno == NO_RECOVERY) { + _nderror = ND_NO_RECOVERY; + } else if (h_errno == NO_DATA) { + _nderror = ND_NO_DATA; + } else { + _nderror = ND_SYSTEM; + } + goto fail; + } + while (n2h_result.h_addr_list[n2h_count++] != NULL); + n2h_count--; + } + + if (nd_hostservp->h_serv != NULL) { + /* Resolve the service name */ + lxt_debug("_netdir_getbyname: " + "resolving service name: %s\n", nd_hostservp->h_serv); + if (lxt_getservbyname_r(nd_hostservp->h_serv, + netconfigp->nc_proto, &n2s_result, + n2s_buf, NSS_BUFLEN_SERVICES) == NULL) { + _nderror = ND_SYSTEM; + goto fail; + } + n2s_count = 1; + } + + /* Make sure we got some results. */ + if ((n2h_count + n2s_count) == 0) { + lxt_debug("_netdir_getbyname: no results!\n"); + goto exit; + } + r_count = (n2h_count != 0) ? n2h_count : 1; + + /* + * Allocate the return buffers. These buffers will be free'd + * by libnsl`netdir_free(), so we need to allocate them in the + * way that libnsl`netdir_free() expects. + */ + if (((rp = calloc(1, sizeof (struct nd_addrlist))) == NULL) || + ((nbp = calloc(1, sizeof (struct netbuf) * r_count)) == NULL) || + ((sap = calloc(1, sizeof (struct sockaddr_in) * r_count)) == NULL)) + goto malloc_fail; + + /* Initialize the structures we're going to return. */ + rp->n_cnt = r_count; + rp->n_addrs = nbp; + for (i = 0; i < r_count; i++) { + + /* Initialize the netbuf. */ + nbp[i].maxlen = nbp[i].len = sizeof (struct sockaddr_in); + nbp[i].buf = (char *)&sap[i]; + + /* Initialize the sockaddr_in. */ + sap[i].sin_family = AF_INET; + + /* If we looked up any host address copy them out. */ + if (!host_self) + bcopy(n2h_result.h_addr_list[i], &sap[i].sin_addr, + sizeof (sap[i].sin_addr)); + + /* If we looked up any service ports copy them out. */ + if (nd_hostservp->h_serv != NULL) + sap[i].sin_port = n2s_result.s_port; + } + + /* We're finally done. */ + lxt_debug("_netdir_getbyname: success\n"); + return (rp); + +malloc_fail: + _nderror = ND_NOMEM; + +fail: + lxt_debug("_netdir_getbyname: failed!\n"); + +exit: + if (n2h_buf == NULL) + free(n2h_buf); + if (n2s_buf == NULL) + free(n2s_buf); + if (rp == NULL) + free(rp); + if (nbp == NULL) + free(nbp); + if (sap == NULL) + free(sap); + return (NULL); +} + +/* + * _netdir_getbyaddr() takes an address (hopefully obtained from + * someone doing a _netdir_getbyname()) and returns all hosts with + * that address. + */ +struct nd_hostservlist * +/*ARGSUSED*/ +_netdir_getbyaddr(struct netconfig *netconfigp, struct netbuf *nbp) +{ + struct nd_hostservlist *rp = NULL; + struct nd_hostserv *hsp = NULL; + struct sockaddr_in *sap; + struct servent p2s_result; + struct hostent a2h_result; + char *a2h_buf = NULL, *p2s_buf = NULL; + int h_errno, r_count, i; + int a2h_count = 0, p2s_count = 0; + + lxt_debug("_netdir_getbyaddr: request recieved\n"); + + /* Make sure this is an ipv4 request. */ + if (!netconfig_is_ipv4(netconfigp)) { + _nderror = ND_BADARG; + goto fail; + } + + /* + * Make sure the netbuf contains one struct sockaddr_in of + * type AF_INET. + */ + if ((nbp->len != sizeof (struct sockaddr_in)) || + (nbp->len < nbp->maxlen)) { + _nderror = ND_BADARG; + goto fail; + } + /*LINTED*/ + sap = (struct sockaddr_in *)nbp->buf; + if (sap->sin_family != AF_INET) { + _nderror = ND_BADARG; + goto fail; + } + + /* Allocate memory for the queries. */ + if (((a2h_buf = malloc(NSS_BUFLEN_HOSTS)) == NULL) || + ((p2s_buf = malloc(NSS_BUFLEN_SERVICES)) == NULL)) + goto malloc_fail; + + if (sap->sin_addr.s_addr != INADDR_ANY) { + lxt_debug("_netdir_getbyaddr: " + "resolving host address: 0x%x\n", sap->sin_addr.s_addr); + if (lxt_gethostbyaddr_r((char *)&sap->sin_addr.s_addr, + sizeof (sap->sin_addr.s_addr), AF_INET, + &a2h_result, a2h_buf, NSS_BUFLEN_HOSTS, + &h_errno) == NULL) { + if (errno == ERANGE) { + _nderror = ND_SYSTEM; + } else if (h_errno == HOST_NOT_FOUND) { + _nderror = ND_NOHOST; + } else if (h_errno == TRY_AGAIN) { + _nderror = ND_TRY_AGAIN; + } else if (h_errno == NO_RECOVERY) { + _nderror = ND_NO_RECOVERY; + } else if (h_errno == NO_DATA) { + _nderror = ND_NO_DATA; + } else { + _nderror = ND_SYSTEM; + } + goto fail; + } + while (a2h_result.h_aliases[a2h_count++] != NULL); + /* + * We need to count a2h_result.h_name as a valid name for + * for the address we just looked up. Of course a2h_count + * is actually over estimated by one, so instead of + * decrementing it here we'll just leave it as it to + * account for a2h_result.h_name. + */ + } + + if (sap->sin_port != 0) { + lxt_debug("_netdir_getbyaddr: " + "resolving service port: 0x%x\n", sap->sin_port); + if (lxt_getservbyport_r(sap->sin_port, + netconfigp->nc_proto, &p2s_result, + p2s_buf, NSS_BUFLEN_SERVICES) == NULL) { + _nderror = ND_SYSTEM; + goto fail; + } + p2s_count = 1; + } + + /* Make sure we got some results. */ + if ((a2h_count + p2s_count) == 0) { + lxt_debug("_netdir_getbyaddr: no results!\n"); + goto exit; + } + r_count = (a2h_count != 0) ? a2h_count : 1; + + /* + * Allocate the return buffers. These buffers will be free'd + * by libnsl`netdir_free(), so we need to allocate them in the + * way that libnsl`netdir_free() expects. + */ + if (((rp = calloc(1, sizeof (struct nd_hostservlist))) == NULL) || + ((hsp = calloc(1, sizeof (struct nd_hostserv) * r_count)) == NULL)) + goto malloc_fail; + + lxt_debug("_netdir_getbyaddr: hahaha0 - %d\n", r_count); + rp->h_cnt = r_count; + rp->h_hostservs = hsp; + for (i = 0; i < r_count; i++) { + /* If we looked up any host names copy them out. */ + lxt_debug("_netdir_getbyaddr: hahaha1 - %d\n", r_count); + if ((a2h_count > 0) && (i == 0) && + ((hsp[i].h_host = strdup(a2h_result.h_name)) == NULL)) + goto malloc_fail; + + if ((a2h_count > 0) && (i > 0) && + ((hsp[i].h_host = + strdup(a2h_result.h_aliases[i - 1])) == NULL)) + goto malloc_fail; + + lxt_debug("_netdir_getbyaddr: hahaha2 - %d\n", r_count); + /* If we looked up any service names copy them out. */ + if ((p2s_count > 0) && + ((hsp[i].h_serv = strdup(p2s_result.s_name)) == NULL)) + goto malloc_fail; + lxt_debug("_netdir_getbyaddr: hahaha3 - %d\n", r_count); + } + + /* We're finally done. */ + lxt_debug("_netdir_getbyaddr: success\n"); + return (rp); + +malloc_fail: + _nderror = ND_NOMEM; + +fail: + lxt_debug("_netdir_getbyaddr: failed!\n"); + +exit: + if (a2h_buf == NULL) + free(a2h_buf); + if (p2s_buf == NULL) + free(p2s_buf); + if (rp == NULL) + free(rp); + if (hsp != NULL) { + for (i = 0; i < r_count; i++) { + if (hsp[i].h_host != NULL) + free(hsp[i].h_host); + if (hsp[i].h_serv != NULL) + free(hsp[i].h_serv); + } + free(hsp); + } + return (NULL); +} + +char * +/* ARGSUSED */ +_taddr2uaddr(struct netconfig *netconfigp, struct netbuf *nbp) +{ + extern char *inet_ntoa_r(); + + struct sockaddr_in *sa; + char tmp[RPC_INET6_MAXUADDRSIZE]; + unsigned short myport; + + if (netconfigp == NULL || nbp == NULL || nbp->buf == NULL) { + _nderror = ND_BADARG; + return (NULL); + } + + if (strcmp(netconfigp->nc_protofmly, NC_INET) != 0) { + /* we only support inet address translation */ + assert(0); + _nderror = ND_SYSTEM; + return (NULL); + } + + /* LINTED pointer cast */ + sa = (struct sockaddr_in *)(nbp->buf); + myport = ntohs(sa->sin_port); + (void) inet_ntoa_r(sa->sin_addr, tmp); + + (void) sprintf(tmp + strlen(tmp), ".%d.%d", + myport >> 8, myport & 255); + return (strdup(tmp)); /* Doesn't return static data ! */ +} + +/* + * _uaddr2taddr() translates a universal address back into a + * netaddr structure. Since the universal address is a string, + * put that into the TLI buffer (making sure to change all \ddd + * characters back and strip off the trailing \0 character). + */ +struct netbuf * +/* ARGSUSED */ +_uaddr2taddr(struct netconfig *netconfigp, char *uaddr) +{ + assert(0); + _nderror = ND_SYSTEM; + return (NULL); +} + +/* + * _netdir_options() is a "catch-all" routine that does + * transport specific things. The only thing that these + * routines have to worry about is ND_MERGEADDR. + */ +int +/* ARGSUSED */ +_netdir_options(struct netconfig *netconfigp, int option, int fd, void *par) +{ + assert(0); + _nderror = ND_SYSTEM; + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers b/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers new file mode 100644 index 0000000000..5289915a27 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/common/mapfile-vers @@ -0,0 +1,39 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +SUNWprivate_1.1 { + global: + _netdir_getbyname; + _netdir_getbyaddr; + _taddr2uaddr; + _uaddr2taddr; + _netdir_options; + + local: + *; +}; diff --git a/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile b/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile new file mode 100644 index 0000000000..9dc82eb64f --- /dev/null +++ b/usr/src/lib/brand/lx/lx_nametoaddr/i386/Makefile @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +DYNFLAGS += $(LX_THUNK)/$(MACH)/lx_thunk.so.1 +CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB) + +install: $(ROOTLIBDIR) all $(ROOTLIBS) diff --git a/usr/src/lib/brand/lx/lx_support/Makefile b/usr/src/lib/brand/lx/lx_support/Makefile new file mode 100644 index 0000000000..aead8b6762 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_support/Makefile @@ -0,0 +1,55 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +PROG = lx_support +PROGS = $(PROG) +OBJS = lx_support + +all: $(PROG) + +include ../Makefile.lx +include ../../../../cmd/Makefile.cmd + +# override the install directory +ROOTBIN = $(ROOTBRANDDIR) +CLOBBERFILES = $(OBJS) $(ROOTPROGS) + +UTSBASE = ../../../../uts + +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I$(UTSBASE)/common/brand/lx +LDLIBS += -lzonecfg + +.KEEP_STATE: + +install: all $(ROOTBRANDDIR) $(ROOTPROGS) + +clean: + $(RM) $(PROG) $(OBJS) + +lint: lint_PROG + +include ../../../../cmd/Makefile.targ diff --git a/usr/src/lib/brand/lx/lx_support/lx_support.c b/usr/src/lib/brand/lx/lx_support/lx_support.c new file mode 100644 index 0000000000..560b76a685 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_support/lx_support.c @@ -0,0 +1,537 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * lx_support is a small cli utility used to perform some brand-specific + * tasks when booting, halting, or verifying a zone. This utility is not + * intended to be called by users - it is intended to be invoked by the + * zones utilities. + */ +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <stropts.h> +#include <sys/ioccom.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/varargs.h> +#include <unistd.h> +#include <libintl.h> +#include <locale.h> + +#include <libzonecfg.h> +#include <sys/lx_audio.h> +#include <sys/lx_brand.h> + +#define CP_CMD "/usr/bin/cp" +#define MOUNT_CMD "/sbin/mount" + +#define LXA_AUDIO_DEV "/dev/brand/lx/audio_devctl" +#define INTSTRLEN 32 + +static char *bname = NULL; +static char *zonename = NULL; +static char *zoneroot = NULL; + +#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ +#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ +#endif + +static void +lxs_err(char *msg, ...) +{ + char buf[1024]; + va_list ap; + + va_start(ap, msg); + /*LINTED*/ + (void) vsnprintf(buf, sizeof (buf), msg, ap); + va_end(ap); + + (void) printf("%s error: %s\n", bname, buf); + + exit(1); + /*NOTREACHED*/ +} + +/* + * The Linux init(1M) command requires communication over the /dev/initctl + * FIFO. Since any attempt to create a file in /dev will fail, we must + * create it here. + */ +static void +lxs_make_initctl() +{ + char cmdbuf[ARG_MAX]; + char path[MAXPATHLEN]; + char special[MAXPATHLEN]; + struct stat buf; + int err; + + if (snprintf(special, sizeof (special), "%s/dev/initctl", zoneroot) >= + sizeof (special)) + lxs_err("%s: %s", gettext("Failed to create /dev/initctl"), + gettext("zoneroot is too long")); + + if (snprintf(path, sizeof (path), "%s/root/dev/initctl", zoneroot) >= + sizeof (path)) + lxs_err("%s: %s", gettext("Failed to create /dev/initctl"), + gettext("zoneroot is too long")); + + /* create the actual fifo as <zoneroot>/dev/initctl */ + if (stat(special, &buf) != 0) { + err = errno; + if (err != ENOENT) + lxs_err("%s: %s", + gettext("Failed to create /dev/initctl"), + strerror(err)); + if (mkfifo(special, 0644) < 0) { + err = errno; + lxs_err("%s: %s", + gettext("Failed to create /dev/initctl"), + strerror(err)); + } + } else { + if ((buf.st_mode & S_IFIFO) == 0) + lxs_err("%s: %s", + gettext("Failed to create /dev/initctl"), + gettext("It already exists, and is not a FIFO.")); + } + + /* + * now lofs mount the <zoneroot>/dev/initctl fifo onto + * <zoneroot>/root/dev/initctl + */ + if (snprintf(cmdbuf, sizeof (cmdbuf), "%s -F lofs %s %s", MOUNT_CMD, + special, path) >= sizeof (cmdbuf)) + lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"), + gettext("zoneroot is too long")); + + if (system(cmdbuf) < 0) { + err = errno; + lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"), + strerror(err)); + } +} + +/* + * fsck gets really confused when run inside a zone. Removing this file + * prevents it from running + */ +static void +lxs_remove_autofsck() +{ + char path[MAXPATHLEN]; + int err; + + if (snprintf(path, MAXPATHLEN, "%s/root/.autofsck", zoneroot) >= + MAXPATHLEN) + lxs_err("%s: %s", gettext("Failed to remove /.autofsck"), + gettext("zoneroot is too long")); + + if (unlink(path) < 0) { + err = errno; + if (err != ENOENT) + lxs_err("%s: %s", + gettext("Failed to remove /.autofsck"), + strerror(err)); + } +} + +/* + * Extract any lx-supported attributes from the zone configuration file. + */ +static void +lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio, + char **idev, char **odev) +{ + struct zone_attrtab attrtab; + int err; + + /* initialize the attribute iterator */ + if (zonecfg_setattrent(zdh) != Z_OK) { + zonecfg_fini_handle(zdh); + lxs_err(gettext("error accessing zone configuration")); + } + + *idev = (char *)malloc(INTSTRLEN); + *odev = (char *)malloc(INTSTRLEN); + if (*idev == NULL || *odev == NULL) + lxs_err(gettext("out of memory")); + + *audio = B_FALSE; + *restart = B_FALSE; + bzero(*idev, INTSTRLEN); + bzero(*odev, INTSTRLEN); + while ((err = zonecfg_getattrent(zdh, &attrtab)) == Z_OK) { + if ((strcmp(attrtab.zone_attr_name, "init-restart") == 0) && + (zonecfg_get_attr_boolean(&attrtab, restart) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); + if ((strcmp(attrtab.zone_attr_name, "audio") == 0) && + (zonecfg_get_attr_boolean(&attrtab, audio) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); + if ((strcmp(attrtab.zone_attr_name, "audio-inputdev") == 0) && + (zonecfg_get_attr_string(&attrtab, *idev, + INTSTRLEN) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); + if ((strcmp(attrtab.zone_attr_name, "audio-outputdev") == 0) && + (zonecfg_get_attr_string(&attrtab, *odev, + INTSTRLEN) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); + } + + /* some kind of error while looking up attributes */ + if (err != Z_NO_ENTRY) + lxs_err(gettext("error accessing zone configuration")); +} + +static int +lxs_iodev_ok(char *dev) +{ + int i, j; + + if ((j = strlen(dev)) == 0) + return (1); + if (strcmp(dev, "default") == 0) + return (1); + if (strcmp(dev, "none") == 0) + return (1); + for (i = 0; i < j; i++) { + if (!isdigit(dev[i])) + return (0); + } + return (1); +} + +/* + * The audio configuration settings are read from the zone configuration + * file. Audio configuration is specified via the following attributes + * (settable via zonecfg): + * attr name: audio + * attr type: boolean + * + * attr name: audio-inputdev + * attr type: string + * attr values: "none" | [0-9]+ + * + * attr name: audio-outputdev + * attr type: string + * attr values: "none" | [0-9]+ + * + * The user can enable linux brand audio device (ie /dev/dsp and /dev/mixer) + * for a zone by setting the "audio" attribute to true. (The absence of + * this attribute leads to an assumed value of false.) + * + * If the "audio" attribute is set to true and "audio-inputdev" and + * "audio-outputdev" are not set, then when a linux applications access + * audio devices these access will be mapped to the system default audio + * device, ie /dev/audio and/dev/audioctl. + * + * If "audio-inputdev" is set to none, then audio input will be disabled. + * If "audio-inputdev" is set to an integer, then when a Linux application + * attempts to access audio devices these access will be mapped to + * /dev/sound/<audio-inputdev attribute value>. The same behavior will + * apply to the "audio-outputdev" attribute for linux audio output + * device accesses. + * + * If "audio-inputdev" or "audio-outputdev" exist but the audio attribute + * is missing (or set to false) audio will not be enabled for the zone. + */ +static void +lxs_init_audio(char *idev, char *odev) +{ + int err, fd; + lxa_zone_reg_t lxa_zr; + + /* sanity check the input and output device properties */ + if (!lxs_iodev_ok(idev)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "audio-inputdev"); + + if (!lxs_iodev_ok(odev)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "audio-outputdev"); + + /* initialize the zone name in the ioctl request */ + bzero(&lxa_zr, sizeof (lxa_zr)); + (void) strlcpy(lxa_zr.lxa_zr_zone_name, zonename, + sizeof (lxa_zr.lxa_zr_zone_name)); + + /* initialize the input device property in the ioctl request */ + (void) strlcpy(lxa_zr.lxa_zr_inputdev, idev, + sizeof (lxa_zr.lxa_zr_inputdev)); + if (lxa_zr.lxa_zr_inputdev[0] == '\0') { + /* + * if no input device was specified, set the input device + * to "default" + */ + (void) strlcpy(lxa_zr.lxa_zr_inputdev, "default", + sizeof (lxa_zr.lxa_zr_inputdev)); + } + + /* initialize the output device property in the ioctl request */ + (void) strlcpy(lxa_zr.lxa_zr_outputdev, odev, + sizeof (lxa_zr.lxa_zr_outputdev)); + if (lxa_zr.lxa_zr_outputdev[0] == '\0') { + /* + * if no output device was specified, set the output device + * to "default" + */ + (void) strlcpy(lxa_zr.lxa_zr_outputdev, "default", + sizeof (lxa_zr.lxa_zr_outputdev)); + } + + /* open the audio device control node */ + if ((fd = open(LXA_AUDIO_DEV, O_RDWR)) < 0) + lxs_err(gettext("error accessing lx_audio device")); + + /* enable audio for this zone */ + err = ioctl(fd, LXA_IOC_ZONE_REG, &lxa_zr); + (void) close(fd); + if (err != 0) + lxs_err(gettext("error configuring lx_audio device")); +} + +static int +lxs_boot() +{ + zoneid_t zoneid; + zone_dochandle_t zdh; + boolean_t audio, restart; + char *idev, *odev; + + lxs_make_initctl(); + lxs_remove_autofsck(); + + if ((zdh = zonecfg_init_handle()) == NULL) + lxs_err(gettext("unable to initialize zone handle")); + + if (zonecfg_get_handle((char *)zonename, zdh) != Z_OK) { + zonecfg_fini_handle(zdh); + lxs_err(gettext("unable to load zone configuration")); + } + + /* Extract any relevant attributes from the config file. */ + lxs_getattrs(zdh, &restart, &audio, &idev, &odev); + zonecfg_fini_handle(zdh); + + /* Configure the zone's audio support (if any). */ + if (audio == B_TRUE) + lxs_init_audio(idev, odev); + + /* + * Let the kernel know whether or not this zone's init process + * should be automatically restarted on its death. + */ + if ((zoneid = getzoneidbyname(zonename)) < 0) + lxs_err(gettext("unable to get zoneid")); + if (zone_setattr(zoneid, LX_ATTR_RESTART_INIT, &restart, + sizeof (boolean_t)) == -1) + lxs_err(gettext("error setting zone's restart_init property")); + + return (0); +} + +static int +lxs_halt() +{ + lxa_zone_reg_t lxa_zr; + int fd, rv; + + /* + * We don't bother to check if audio is configured for this zone + * before issuing a request to unconfigure it. There's no real + * reason to do this, it would require looking up the xml zone and + * brand configuration information (which could have been changed + * since the zone was booted), and it would involve more library + * calls there by increasing chances for failure. + */ + + /* initialize the zone name in the ioctl request */ + bzero(&lxa_zr, sizeof (lxa_zr)); + (void) strlcpy(lxa_zr.lxa_zr_zone_name, zonename, + sizeof (lxa_zr.lxa_zr_zone_name)); + + /* open the audio device control node */ + if ((fd = open(LXA_AUDIO_DEV, O_RDWR)) < 0) + lxs_err(gettext("error accessing lx_audio device")); + + /* + * disable audio for this zone + * + * we ignore ENOENT errors here because it's possible that + * audio is not configured for this zone. (either it was + * already unconfigured or someone could have added the + * audio resource to this zone after it was booted.) + */ + rv = ioctl(fd, LXA_IOC_ZONE_UNREG, &lxa_zr); + (void) close(fd); + if ((rv == 0) || (errno == ENOENT)) + return (0); + lxs_err(gettext("error unconfiguring lx_audio device: %s"), + strerror(errno)); + /*NOTREACHED*/ +} + +static int +lxs_verify(char *xmlfile) +{ + zone_dochandle_t handle; + struct zone_fstab fstab; + struct zone_dstab dstab; + struct zone_devtab devtab; + boolean_t audio, restart; + char *idev, *odev; + + if ((handle = zonecfg_init_handle()) == NULL) + lxs_err(gettext("internal libzonecfg.so.1 error"), 0); + + if (zonecfg_get_xml_handle(xmlfile, handle) != Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("zonecfg provided an invalid XML file")); + } + + /* + * Check to see whether the zone has any inherit-pkg-dirs + * configured. + */ + if (zonecfg_setipdent(handle) != Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("zonecfg provided an invalid XML file")); + } + + if (zonecfg_getipdent(handle, &fstab) == Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("lx zones do not support inherit-pkg-dirs")); + } + + /* + * Check to see whether the zone has any ZFS datasets configured. + */ + if (zonecfg_setdsent(handle) != Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("zonecfg provided an invalid XML file")); + } + + if (zonecfg_getdsent(handle, &dstab) == Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("lx zones do not support ZFS datasets")); + } + + /* + * Check to see whether the zone has any devices configured. + */ + if (zonecfg_setdevent(handle) != Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("zonecfg provided an invalid XML file")); + } + + if (zonecfg_getdevent(handle, &devtab) == Z_OK) { + zonecfg_fini_handle(handle); + lxs_err(gettext("lx zones do not support added devices")); + } + + /* Extract any relevant attributes from the config file. */ + lxs_getattrs(handle, &restart, &audio, &idev, &odev); + zonecfg_fini_handle(handle); + + if (audio) { + /* sanity check the input and output device properties */ + if (!lxs_iodev_ok(idev)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "audio-inputdev"); + + if (!lxs_iodev_ok(odev)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "audio-outputdev"); + } + return (0); +} + +static void +usage() +{ + + (void) fprintf(stderr, + gettext("usage:\t%s boot <zoneroot> <zonename>\n"), bname); + (void) fprintf(stderr, + gettext(" \t%s halt <zoneroot> <zonename>\n"), bname); + (void) fprintf(stderr, + gettext(" \t%s verify <xml file>\n\n"), bname); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + (void) setlocale(LC_ALL, ""); + (void) textdomain(TEXT_DOMAIN); + + bname = basename(argv[0]); + + if (argc < 3) + usage(); + + if (strcmp(argv[1], "boot") == 0) { + if (argc != 4) + lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"), + bname, argv[1]); + zoneroot = argv[2]; + zonename = argv[3]; + return (lxs_boot()); + } + + if (strcmp(argv[1], "halt") == 0) { + if (argc != 4) + lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"), + bname, argv[1]); + zoneroot = argv[2]; + zonename = argv[3]; + return (lxs_halt()); + } + + if (strcmp(argv[1], "verify") == 0) { + if (argc != 3) + lxs_err(gettext("usage: %s verify <xml file>"), + bname); + return (lxs_verify(argv[2])); + } + + usage(); + /*NOTREACHED*/ +} diff --git a/usr/src/lib/brand/lx/lx_thunk/Makefile b/usr/src/lib/brand/lx/lx_thunk/Makefile new file mode 100644 index 0000000000..9142ef2986 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/Makefile @@ -0,0 +1,52 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../../../Makefile.lib + +SUBDIRS = $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +LINT_SUBDIRS = $(MACH) +$(BUILD64)LINT_SUBDIRS += $(MACH64) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all install clean clobber: $(ROOTBRANDDIR) $(ROOTBRANDDIR64) $(SUBDIRS) + +lint: $(LINT_SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/lx/lx_thunk/Makefile.com b/usr/src/lib/brand/lx/lx_thunk/Makefile.com new file mode 100644 index 0000000000..2a537b0535 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/Makefile.com @@ -0,0 +1,72 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = lx_thunk.a +VERS = .1 + +COBJS = lx_thunk.o +OBJECTS = $(COBJS) + +include ../../../../Makefile.lib +include ../../Makefile.lx + +# +# Since our name doesn't start with "lib", Makefile.lib incorrectly +# calculates LIBNAME. Therefore, we set it here. +# +LIBNAME = lx_thunk + +MAPFILES = ../common/mapfile-vers +MAPOPTS = $(MAPFILES:%=-M%) + +CSRCS = $(COBJS:%o=../common/%c) +SRCS = $(CSRCS) + +SRCDIR = ../common +UTSBASE = ../../../../../uts + +ASFLAGS += -P -D_ASM +LDLIBS += -lc +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I../ -I ../../lx_brand \ + -I$(UTSBASE)/common/brand/lx +DYNFLAGS += $(MAPOPTS) + +LIBS = $(DYNLIB) + +CLEANFILES = $(DYNLIB) +ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx +ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64) + +.KEEP_STATE: + +all: $(DYNLIB) + +lint: $(LINTLIB) lintcheck + +include ../../../../Makefile.targ diff --git a/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile b/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile new file mode 100644 index 0000000000..086a3c821a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/amd64/Makefile @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com +include $(SRC)/lib/Makefile.lib.64 + +CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB) + +install: $(ROOTLIBDIR64) all $(ROOTLIBS64) diff --git a/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c b/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c new file mode 100644 index 0000000000..8900e3a6d1 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/common/lx_thunk.c @@ -0,0 +1,1122 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The BrandZ Linux thunking library. + * + * The interfaces defined in this file form the client side of a bridge + * to allow native Solaris process to access Linux services. Currently + * the Linux services that is made accessible by these interfaces here + * are: + * - Linux host <-> address naming services + * - Linux service <-> port naming services + * - Linux syslog + * + * Currently, to use this library it must be LD_PRELOADed into the + * application that needs to access Linux services. Once loaded + * Linux services are accessed by the client application in two + * different ways: + * + * - Direct library calls: + * lxt_gethostbyname_r + * lxt_gethostbyaddr_r + * lxt_getservbyname_r + * lxt_getservbyport_r + * lxt_debug + * + * These library functions are used by the BrandZ lx name services + * translation library (lx_nametoaddr.so) to handle libnsl.so name + * service requests. + * + * - Intercepted library calls: + * openlog(3c) + * syslog(3c) + * vsyslog(3c) + * closelog(3c) + * + * Via the LD_PRELOAD mechanism this library interposes itself on + * these interfaces and when the application calls these interfaces + * (either directly or indirectly via any libraries the program may + * be linked against) this library intercepts the request and passes + * it onto a Linux process to handle the request. + * + * Once this library receives a request that needs to be serviced by a + * Linux process, it packs up that request and attempts to send it + * to a doors server. The door server interfaces are defined in + * lx_thunk_server.h. If the doors server is not running or not + * responding, this library will attempt to spawn a new doors server + * by forking and executing the following shell script (which runs as + * a native /bin/sh Linux process): + * /native/usr/lib/brand/lx/lx_thunk + * + * Notes: + * - This library also intercepts the following system calls: + * close(2) - We intercept close(2) to prevent the caller from + * accidentally closing any of the file descriptors we + * need to do our work. + * + * setppriv(2) - We intercept setppriv(2) to prevent a process + * from dropping any of the privileges we'll need to create + * a new lx_thunk server process and to deal with service + * requests. + * + * - To facilitate the running of native Solaris programs and libraries + * when this library is preloaded into an application it will chroot() + * into /native. This way the Solaris application and libraries can + * access files via their expected paths and we can avoid having to + * either do path mapping or modifying all libraries to make them + * aware of "/native" so that they can pre-pend it to all their + * filesystem operations. + * + * - This library can only be used with processes that are initially + * run by root in a zone. The reason is that we use the chroot() + * system call and this requires the PRIV_PROC_CHROOT privilege, + * which non-root users don't have. + */ + +#include <alloca.h> +#include <assert.h> +#include <dlfcn.h> +#include <door.h> +#include <errno.h> +#include <fcntl.h> +#include <netdb.h> +#include <netdir.h> +#include <priv.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <synch.h> +#include <sys/brand.h> +#include <sys/fcntl.h> +#include <sys/lx_thunk_server.h> +#include <sys/lx_thunk.h> +#include <sys/mman.h> +#include <sys/priv_impl.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <thread.h> +#include <unistd.h> +#include <sys/varargs.h> + +#define LXT_DOOR_DIR "/tmp" +#define LXT_DOOR_PREFIX "lxt" +#define LXT_MSG_MAXLEN (128 + MAXPATHLEN) + +#pragma init(init) + +typedef uintptr_t (*fp1_t)(uintptr_t); +typedef uintptr_t (*fp3_t)(uintptr_t, uintptr_t, uintptr_t); + +static char *lxt_debug_path = NULL; /* debug output file path */ +static char lxt_debug_path_buf[MAXPATHLEN]; +static int root_fd; +static int debug_fd = -1; + +void lxt_debug(const char *msg, ...); + +void +init(void) +{ + if (getenv("LX_DEBUG") != NULL) { + + /* check if there's a debug log file specified */ + lxt_debug_path = getenv("LX_DEBUG_FILE"); + if (lxt_debug_path == NULL) { + /* send all debugging output to /dev/tty */ + lxt_debug_path = "/dev/tty"; + } + + (void) strlcpy(lxt_debug_path_buf, lxt_debug_path, + sizeof (lxt_debug_path_buf)); + lxt_debug_path = lxt_debug_path_buf; + + /* + * Open the debugging output file. We need to open it + * and hold it open because we're going to call chroot() + * in just a second, so we won't be able to open it later. + */ + if ((debug_fd = open(lxt_debug_path, + O_WRONLY|O_APPEND|O_CREAT|O_NDELAY|O_NOCTTY, + 0666)) != -1) { + (void) fchmod(debug_fd, 0666); + } + } + lxt_debug("lxt_init: executing native process"); + + /* Get a fd that points to the root directory */ + if ((root_fd = open("/", O_RDONLY)) < 0) { + lxt_debug("lxt_init(): " + "failed to open root directory: %s", strerror(errno)); + exit(-1); + } + + /* + * Now, so that we can avoid having to do path mapping, + * just chdir() and chroot() into /native. + */ + if (chdir("/native") != 0) { + lxt_debug("lxt_init(): " + "failed to chdir to /native: %s", strerror(errno)); + exit(-1); + } + if (chroot("/native") != 0) { + lxt_debug("lxt_init(): " + "failed to chroot to /native: %s", strerror(errno)); + exit(-1); + } +} + +/* + * Linux Thunking Interfaces - Client Side + */ +static mutex_t lxt_door_lock = DEFAULTMUTEX; +static int lxt_door_fd = -1; + +static void +lxt_server_exec(int fifo_wr, int fifo_rd) +{ + extern const char **environ; + char *nullist[] = { NULL }; + + lxt_debug("lxt_server_exec: server starting"); + + /* + * First we need to dup our fifos to the file descriptors + * the brand library is expecting them to be at. + */ + + /* Check if the write fifo needs to be moved aside */ + if ((fifo_wr == LXT_SERVER_FIFO_RD_FD) && + ((fifo_wr = dup(fifo_wr)) < 0)) + return; + + /* Check if the read fifo needs to be moved aside */ + if ((fifo_rd == LXT_SERVER_FIFO_WR_FD) && + ((fifo_rd = dup(fifo_rd)) < 0)) + return; + + if ((fifo_wr != LXT_SERVER_FIFO_WR_FD) && + (dup2(fifo_wr, LXT_SERVER_FIFO_WR_FD) < 0)) + return; + if ((fifo_rd != LXT_SERVER_FIFO_RD_FD) && + (dup2(fifo_rd, LXT_SERVER_FIFO_RD_FD) < 0)) + return; + + /* + * We're about to execute a native Linux process. + * Since we've been loaded into a Solaris process with + * LD_PRELOAD and LD_LIBRARY_PATH we should clear these + * variables from the environment before calling exec. + */ + (void) unsetenv("LD_PRELOAD"); + (void) unsetenv("LD_LIBRARY_PATH"); + + /* + * Now we need to exec the thunk server process. This is a + * branded Linux process that will act as a doors server and + * service our requests to perform native Linux operations. + * Since we're currently running as a native Solaris process + * to start up the server we'll use the brand system call to + * the kernel that the target of the exec will be a branded + * process. + */ + lxt_debug("lxt_server_exec: execing as Linux process"); + (void) syscall(SYS_brand, B_EXEC_BRAND, + LXT_SERVER_BINARY, nullist, environ); +} + + +static void * +lxt_door_waitpid(void *arg) +{ + pid_t child_pid = (pid_t)(uintptr_t)arg; + int stat; + + (void) waitpid(child_pid, &stat, 0); + return (NULL); +} + +static char * +lxt_door_mkfifo() +{ + char *path; + + for (;;) { + path = tempnam(LXT_DOOR_DIR, LXT_DOOR_PREFIX); + if (path == NULL) + return (NULL); + if (mkfifo(path, S_IWUSR | S_IRUSR) != 0) { + if (errno != EEXIST) { + free(path); + return (NULL); + } + /* This file path exists, pick a new name. */ + free(path); + continue; + } + /* We successfully created the fifo */ + break; + } + return (path); +} + +static void +lxt_door_init() +{ + char *fifo1_path = NULL, *fifo2_path; + char fifo1_path_native[MAXPATHLEN]; + int fifo1_rd = -1, fifo1_wr = -1; + int fifo2_rd = -1, fifo2_wr = -1; + int junk; + pid_t child_pid; + thread_t tid; + + lxt_debug("lxt_door_init: preparint to start server"); + + /* Create two new fifos. */ + if (((fifo1_path = lxt_door_mkfifo()) == NULL) || + ((fifo2_path = lxt_door_mkfifo()) == NULL)) + goto fail; + + (void) snprintf(fifo1_path_native, sizeof (fifo1_path_native), + "/native%s", fifo1_path); + + /* + * Open both fifos for reading and writing. We have to open + * the read side of the fifo first (because the write side will + * fail to open if there is no reader) and we have to use the + * O_NONBLOCK flag (because the read open with hang without it). + */ + if (((fifo1_rd = open(fifo1_path, O_RDONLY | O_NONBLOCK)) < 0) || + ((fifo1_wr = open(fifo1_path, O_WRONLY)) < 0) || + ((fifo2_rd = open(fifo2_path, O_RDONLY | O_NONBLOCK)) < 0) || + ((fifo2_wr = open(fifo2_path, O_WRONLY)) < 0)) + goto fail; + + /* + * Now we have to close the read side of fifo1 and fifo2 and re-open + * them without the O_NONBLOCK flag. This is because we're using + * the fifos for synchronization and when we actually try to read + * from them we want to block. + */ + (void) close(fifo1_rd); + if ((fifo1_rd = open(fifo1_path, O_RDONLY)) < 0) + goto fail; + (void) close(fifo2_rd); + if ((fifo2_rd = open(fifo2_path, O_RDONLY)) < 0) + goto fail; + + /* + * Once fifo2 is opened no one will ever need to open it again + * so delete it now. + */ + (void) unlink(fifo2_path); + free(fifo2_path); + fifo2_path = NULL; + + /* Attempt to fork and start the door server */ + lxt_debug("lxt_door_init: starting server"); + switch (child_pid = fork1()) { + case -1: + /* fork1() failed. */ + goto fail; + case 0: + /* Child process - new door server. */ + (void) close(fifo1_rd); + (void) close(fifo2_wr); + + /* Need to chroot back to the real root directory */ + if (fchroot(root_fd) != 0) { + lxt_debug("lxt_server_exec: " + "failed fchroot(\"/\"): %s", strerror(errno)); + exit(-1); + } + (void) close(root_fd); + + /* Start the server */ + lxt_server_exec(fifo1_wr, fifo2_rd); + lxt_debug("lxt_server_exec: server init failed"); + exit(-1); + /*NOTREACHED*/ + } + /* Parent process - door client. */ + + /* + * fifo2 is used to send the door path to the child. + * (We can't simply pass it via the address space since the + * child will need to exec.) We'll write the name of the door + * file to fifo2 before we close the read end of the fifo2 so + * that if the child has exited for some reason we won't get + * a SIGPIPE. Note that we're reusing the name of fifo1 as + * the door path. Also note that we've pre-pended /native + * to the fifo/door path. The reason is that we're chroot'ed + * to /native, but when the thunking server executes it will + * be chroot'ed back to the real root directory. + */ + (void) write(fifo2_wr, + fifo1_path_native, strlen(fifo1_path_native) + 1); + (void) close(fifo2_wr); + (void) close(fifo2_rd); + + /* + * Start up a thread that will perfom a waitpid() on the child + * door server process. We do this because if the calling + * application that is using our interfaces is forking it's own + * children and using wait(), then it won't expect to see our + * children. We take advantage of the fact that if there are + * wait() and a waitpid() calls in progress at the same time + * when a child exists, preference will be given to any + * waitpid() calls that are explicity waiting for that child. + * There is of course a window of time where the child could + * exit after we've forked it but before we've called waitpid() + * where another wait() in this process could collect the result. + * There's nothing we can really do to prevent this short of + * stopping all the other threads in this process. + */ + (void) thr_create(NULL, 0, + lxt_door_waitpid, (void *)(uintptr_t)child_pid, THR_DAEMON, &tid); + + /* + * fifo1 is used for the child process to signal us that the + * door server is ready to take requests. + */ + (void) close(fifo1_wr); + (void) read(fifo1_rd, &junk, 1); + (void) close(fifo1_rd); + + /* If there was a door that was open, close it now. */ + + if (lxt_door_fd >= 0) + (void) close(lxt_door_fd); + /* + * The server should be started up by now and fattach()ed the door + * server to the fifo/door path. so if we re-open that path now we + * should get a fd to the door server. + */ + lxt_door_fd = open(fifo1_path, O_RDWR); + + lxt_debug("lxt_door_init: new server door = %d", lxt_door_fd); + + /* We don't need the fifo/door anymore so delete it. */ + (void) unlink(fifo1_path); + free(fifo1_path); + return; + +fail: + if (fifo1_path != NULL) + (void) unlink(fifo1_path); + if (fifo2_path != NULL) + (void) unlink(fifo2_path); + if (fifo1_rd != -1) + (void) close(fifo1_rd); + if (fifo1_wr != -1) + (void) close(fifo1_wr); + if (fifo2_rd != -1) + (void) close(fifo2_rd); + if (fifo2_wr != -1) + (void) close(fifo2_wr); +} + +static int +lxt_door_call(door_arg_t *door_arg, int lock_held) +{ + int fd; + + if (!lock_held) + (void) mutex_lock(&lxt_door_lock); + + /* Get a copy of lxt_door_fd */ + fd = lxt_door_fd; + + if (!lock_held) + (void) mutex_unlock(&lxt_door_lock); + + if (fd == -1) { + lxt_debug("lxt_door_call: no door available"); + return (-1); + } + + if (door_call(fd, door_arg) != 0) { + lxt_debug("lxt_door_call: call failed"); + return (-1); + } + if (door_arg->rbuf == NULL) { + lxt_debug("lxt_door_call: call returned NULL"); + return (-1); + } + return (0); +} + +static int +lxt_door_request(door_arg_t *door_arg) +{ + door_arg_t door_ping; + lxt_server_arg_t ping_request, *ping_result; + int rv, ping_success = 0; + + /* First just try the door call. */ + lxt_debug("lxt_door_request: calling server"); + if (lxt_door_call(door_arg, 0) == 0) + return (0); + + /* Prepare a door server ping request. */ + bzero(&door_ping, sizeof (door_ping)); + bzero(&ping_request, sizeof (ping_request)); + door_ping.data_ptr = (char *)&ping_request; + door_ping.data_size = sizeof (ping_request); + ping_request.lxt_sa_op = LXT_SERVER_OP_PING; + + (void) mutex_lock(&lxt_door_lock); + + /* Ping the doors server. */ + lxt_debug("lxt_door_request: pinging server"); + if (lxt_door_call(&door_ping, 1) == 0) { + /*LINTED*/ + ping_result = (lxt_server_arg_t *)door_ping.rbuf; + ping_success = ping_result->lxt_sa_success; + (void) munmap(door_ping.rbuf, door_ping.rsize); + } + + if (!ping_success) { + /* The server is not responding so start up a new one. */ + lxt_door_init(); + } + (void) mutex_unlock(&lxt_door_lock); + + /* Retry the original request */ + lxt_debug("lxt_door_request: calling server, retry"); + if ((rv = lxt_door_call(door_arg, 0)) == 0) + return (0); + return (rv); +} + +static struct hostent * +lxt_gethost(int op, const char *token, int token_len, int type, + struct hostent *result, char *buf, int buf_len, int *h_errnop) +{ + door_arg_t door_arg; + lxt_gethost_arg_t *data; + lxt_server_arg_t *request; + int request_size, errno_tmp, i; + + lxt_debug("lxt_gethost: request caught"); + + request_size = sizeof (*request) + sizeof (*data) + + token_len + buf_len - 1; + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_gethost: calloc() failed"); + *h_errnop = TRY_AGAIN; + return (NULL); + } + /*LINTED*/ + data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0]; + + /* Initialize the server request. */ + request->lxt_sa_op = op; + data->lxt_gh_type = type; + data->lxt_gh_token_len = token_len; + data->lxt_gh_buf_len = buf_len; + data->lxt_gh_storage_len = token_len + token_len; + bcopy(token, &data->lxt_gh_storage[0], token_len); + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_gethost: door_call() failed"); + /* Don't know what caused the error so clear errno. */ + errno = 0; + *h_errnop = ND_SYSTEM; + free(request); + return (NULL); + } + + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_gethost: door_call() returned NULL"); + /* Don't know what caused the error so clear errno. */ + errno = 0; + *h_errnop = ND_SYSTEM; + return (NULL); + } + + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + /*LINTED*/ + data = (lxt_gethost_arg_t *)&request->lxt_sa_data[0]; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_gethost: remote function call failed"); + errno_tmp = request->lxt_sa_errno; + *h_errnop = data->lxt_gh_h_errno; + (void) munmap(door_arg.rbuf, door_arg.rsize); + errno = errno_tmp; + return (NULL); + } + + /* Copy out the results and output buffer. */ + bcopy(&data->lxt_gh_result, result, sizeof (*result)); + bcopy(&data->lxt_gh_storage[token_len], buf, buf_len); + (void) munmap(door_arg.rbuf, door_arg.rsize); + + /* Now go through the results and convert all offsets to pointers */ + result->h_name = LXT_OFFSET_TO_PTR(result->h_name, buf); + result->h_aliases = LXT_OFFSET_TO_PTR(result->h_aliases, buf); + result->h_addr_list = LXT_OFFSET_TO_PTR(result->h_addr_list, buf); + for (i = 0; result->h_aliases[i] != NULL; i++) { + result->h_aliases[i] = + LXT_OFFSET_TO_PTR(result->h_aliases[i], buf); + } + for (i = 0; result->h_addr_list[i] != NULL; i++) { + result->h_addr_list[i] = + LXT_OFFSET_TO_PTR(result->h_addr_list[i], buf); + } + + return (result); +} + +static struct servent * +lxt_getserv(int op, const char *token, const int token_len, const char *proto, + struct servent *result, char *buf, int buf_len) +{ + door_arg_t door_arg; + lxt_getserv_arg_t *data; + lxt_server_arg_t *request; + int request_size, errno_tmp, i; + + lxt_debug("lxt_getserv: request caught"); + + request_size = sizeof (*request) + sizeof (*data) + + token_len + buf_len - 1; + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_getserv: calloc() failed"); + return (NULL); + } + /*LINTED*/ + data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0]; + + /* Initialize the server request. */ + request->lxt_sa_op = op; + data->lxt_gs_token_len = token_len; + data->lxt_gs_buf_len = buf_len; + data->lxt_gs_storage_len = token_len + token_len; + bcopy(token, &data->lxt_gs_storage[0], token_len); + + bzero(data->lxt_gs_proto, sizeof (data->lxt_gs_proto)); + if (proto != NULL) + (void) strncpy(data->lxt_gs_proto, proto, + sizeof (data->lxt_gs_proto)); + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + /* Call the doors server */ + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_getserv: door_call() failed"); + /* Don't know what caused the error so clear errno */ + errno = 0; + free(request); + return (NULL); + } + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_getserv: door_call() returned NULL"); + /* Don't know what caused the error so clear errno */ + errno = 0; + return (NULL); + } + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + /*LINTED*/ + data = (lxt_getserv_arg_t *)&request->lxt_sa_data[0]; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_getserv: remote function call failed"); + errno_tmp = request->lxt_sa_errno; + (void) munmap(door_arg.rbuf, door_arg.rsize); + errno = errno_tmp; + return (NULL); + } + + /* Copy out the results and output buffer. */ + bcopy(&data->lxt_gs_result, result, sizeof (*result)); + bcopy(&data->lxt_gs_storage[token_len], buf, buf_len); + (void) munmap(door_arg.rbuf, door_arg.rsize); + + /* + * Now go through the results and convert all offsets to pointers. + * See the comments in lxt_server_getserv() for why we need + * to subtract 1 from each offset. + */ + result->s_name = LXT_OFFSET_TO_PTR(result->s_name, buf); + result->s_proto = LXT_OFFSET_TO_PTR(result->s_proto, buf); + result->s_aliases = LXT_OFFSET_TO_PTR(result->s_aliases, buf); + for (i = 0; result->s_aliases[i] != NULL; i++) { + result->s_aliases[i] = + LXT_OFFSET_TO_PTR(result->s_aliases[i], buf); + } + + return (result); +} + +static void +lxt_openlog(const char *ident, int logopt, int facility) +{ + door_arg_t door_arg; + lxt_openlog_arg_t *data; + lxt_server_arg_t *request; + int request_size; + + request_size = sizeof (*request) + sizeof (*data); + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_openlog: calloc() failed"); + return; + } + /*LINTED*/ + data = (lxt_openlog_arg_t *)&request->lxt_sa_data[0]; + + /* Initialize the server request. */ + request->lxt_sa_op = LXT_SERVER_OP_OPENLOG; + data->lxt_ol_facility = facility; + data->lxt_ol_logopt = logopt; + (void) strlcpy(data->lxt_ol_ident, ident, sizeof (data->lxt_ol_ident)); + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + /* Call the doors server */ + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_openlog: door_call() failed"); + free(request); + return; + } + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_openlog: door_call() returned NULL"); + return; + } + + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_openlog: remote function call failed"); + } + (void) munmap(door_arg.rbuf, door_arg.rsize); +} + +static void +lxt_vsyslog(int priority, const char *message, va_list va) +{ + door_arg_t door_arg; + lxt_syslog_arg_t *data; + lxt_server_arg_t *request; + psinfo_t p; + char procfile[PRFNSZ], *buf, *estr; + int buf_len, buf_i, estr_len, request_size, procfd; + int i, key, err_count = 0, tok_count = 0; + int errno_backup = errno; + + /* + * Here we're going to use vsnprintf() to expand the message + * string passed in before we hand it off to a Linux process. + * Before we can call vsnprintf() we'll need to do modify the + * string to deal with certain special tokens. + * + * syslog() supports a special '%m' format token that expands to + * the error message string associated with the current value + * of errno. Unfortunatly if we pass this token to vsnprintf() + * it will choke so we need to expand that token manually here. + * + * We also need to expand any "%%" characters into "%%%%". + * The reason is that we'll be calling vsnprintf() which will + * translate "%%%%" back to "%%", which is safe to pass to the + * Linux version if syslog. If we didn't do this then vsnprintf() + * would translate "%%" to "%" and then the Linux syslog would + * attempt to intrepret "%" and whatever character follows it + * as a printf format style token. + */ + for (key = i = 0; message[i] != '\0'; i++) { + if (!key && message[i] == '%') { + key = 1; + continue; + } + if (key && message[i] == '%') + tok_count++; + if (key && message[i] == 'm') + err_count++; + key = 0; + } + + /* We found some tokens that we need to expand. */ + if (err_count || tok_count) { + estr = strerror(errno_backup); + estr_len = strlen(estr); + assert(estr_len >= 2); + + /* Allocate a buffer to hold the expanded string. */ + buf_len = i + 1 + + (tok_count * 2) + (err_count * (estr_len - 2)); + if ((buf = calloc(1, buf_len)) == NULL) { + lxt_debug("lxt_vsyslog: calloc() failed"); + return; + } + + /* Finally, expand %% and %m. */ + for (key = buf_i = i = 0; message[i] != '\0'; i++) { + assert(buf_i < buf_len); + if (!key && message[i] == '%') { + buf[buf_i++] = '%'; + key = 1; + continue; + } + if (key && message[i] == 'm') { + (void) bcopy(estr, &buf[buf_i - 1], estr_len); + buf_i += estr_len - 1; + } else if (key && message[i] == '%') { + (void) bcopy("%%%%", &buf[buf_i - 1], 4); + buf_i += 4 - 1; + } else { + buf[buf_i++] = message[i]; + } + key = 0; + } + assert(buf[buf_i] == '\0'); + assert(buf_i == (buf_len - 1)); + + /* Use the expanded buffer as our format string. */ + message = buf; + } + + /* Allocate the request we're going to send to the server */ + request_size = sizeof (*request) + sizeof (*data); + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_vsyslog: calloc() failed"); + return; + } + + /*LINTED*/ + data = (lxt_syslog_arg_t *)&request->lxt_sa_data[0]; + + /* Initialize the server request. */ + request->lxt_sa_op = LXT_SERVER_OP_SYSLOG; + data->lxt_sl_priority = priority; + data->lxt_sl_pid = getpid(); + (void) vsnprintf(data->lxt_sl_message, sizeof (data->lxt_sl_message), + message, va); + + /* If we did token expansion then free the intermediate buffer. */ + if (err_count || tok_count) + free(buf); + + /* Add the current program name into the request */ + (void) sprintf(procfile, "/proc/%u/psinfo", (int)getpid()); + /* (void) sprintf(procfile, "/native/proc/%u/psinfo", (int)getpid()); */ + if ((procfd = open(procfile, O_RDONLY)) >= 0) { + if (read(procfd, &p, sizeof (psinfo_t)) >= 0) { + (void) strncpy(data->lxt_sl_progname, p.pr_fname, + sizeof (data->lxt_sl_progname)); + } + (void) close(procfd); + } + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + /* Call the doors server */ + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_vsyslog: door_call() failed"); + free(request); + return; + } + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_vsyslog: door_call() returned NULL"); + return; + } + + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_vsyslog: remote function call failed"); + } + (void) munmap(door_arg.rbuf, door_arg.rsize); +} + +static void +lxt_closelog(void) +{ + door_arg_t door_arg; + lxt_server_arg_t *request; + int request_size; + + request_size = sizeof (*request); + if ((request = calloc(1, request_size)) == NULL) { + lxt_debug("lxt_closelog: calloc() failed"); + return; + } + + /* Initialize the server request. */ + request->lxt_sa_op = LXT_SERVER_OP_CLOSELOG; + + /* Initialize door_call() arguments. */ + bzero(&door_arg, sizeof (door_arg)); + door_arg.data_ptr = (char *)request; + door_arg.data_size = request_size; + + /* Call the doors server */ + if (lxt_door_request(&door_arg) != 0) { + lxt_debug("lxt_closelog: door_call() failed"); + free(request); + return; + } + free(request); + + if (door_arg.rbuf == NULL) { + lxt_debug("lxt_closelog: door_call() returned NULL"); + return; + } + + /*LINTED*/ + request = (lxt_server_arg_t *)door_arg.rbuf; + + /* Check if the remote procedure call failed */ + if (!request->lxt_sa_success) { + lxt_debug("lxt_closelog: remote function call failed"); + } + (void) munmap(door_arg.rbuf, door_arg.rsize); +} + +static void +lxt_pset_keep(priv_op_t op, priv_ptype_t type, priv_set_t *pset, + const char *priv) +{ + if (priv_ismember(pset, priv) == B_TRUE) { + if (op == PRIV_OFF) { + (void) priv_delset(pset, priv); + lxt_debug("lxt_pset_keep: " + "preventing drop of \"%s\" from \"%s\" set", + priv, type); + } + } else { + if (op == PRIV_SET) { + (void) priv_addset(pset, priv); + lxt_debug("lxt_pset_keep: " + "preventing drop of \"%s\" from \"%s\" set", + priv, type); + } + } +} + +/* + * Public interfaces - used by lx_nametoaddr + */ +void +lxt_vdebug(const char *msg, va_list va) +{ + char buf[LXT_MSG_MAXLEN + 1]; + int rv, n; + + if (debug_fd == -1) + return; + + /* Prefix the message with pid/tid. */ + if ((n = snprintf(buf, sizeof (buf), "%u/%u: ", + getpid(), thr_self())) == -1) + return; + + /* Format the message. */ + if (vsnprintf(&buf[n], sizeof (buf) - n, msg, va) == -1) + return; + + /* Add a carrige return if there isn't one already. */ + if ((buf[strlen(buf) - 1] != '\n') && + (strlcat(buf, "\n", sizeof (buf)) >= sizeof (buf))) + return; + + /* We retry in case of EINTR */ + do { + rv = write(debug_fd, buf, strlen(buf)); + } while ((rv == -1) && (errno == EINTR)); +} + +void +lxt_debug(const char *msg, ...) +{ + va_list va; + int errno_backup; + + if (debug_fd == -1) + return; + + errno_backup = errno; + va_start(va, msg); + lxt_vdebug(msg, va); + va_end(va); + errno = errno_backup; +} + +struct hostent * +lxt_gethostbyaddr_r(const char *addr, int addr_len, int type, + struct hostent *result, char *buf, int buf_len, int *h_errnop) +{ + lxt_debug("lxt_gethostbyaddr_r: request recieved"); + return (lxt_gethost(LXT_SERVER_OP_ADDR2HOST, + addr, addr_len, type, result, buf, buf_len, h_errnop)); +} + +struct hostent * +lxt_gethostbyname_r(const char *name, + struct hostent *result, char *buf, int buf_len, int *h_errnop) +{ + lxt_debug("lxt_gethostbyname_r: request recieved"); + return (lxt_gethost(LXT_SERVER_OP_NAME2HOST, + name, strlen(name) + 1, 0, result, buf, buf_len, h_errnop)); +} + +struct servent * +lxt_getservbyport_r(int port, const char *proto, + struct servent *result, char *buf, int buf_len) +{ + lxt_debug("lxt_getservbyport_r: request recieved"); + return (lxt_getserv(LXT_SERVER_OP_PORT2SERV, + (const char *)&port, sizeof (int), proto, result, buf, buf_len)); +} + +struct servent * +lxt_getservbyname_r(const char *name, const char *proto, + struct servent *result, char *buf, int buf_len) +{ + lxt_debug("lxt_getservbyname_r: request recieved"); + return (lxt_getserv(LXT_SERVER_OP_NAME2SERV, + name, strlen(name) + 1, proto, result, buf, buf_len)); +} + +/* + * "Public" interfaces - used to override public existing interfaces + */ +int +_close(int fd) +{ + static fp1_t fp = NULL; + + /* + * Don't let the process close our file descriptor that points + * back to the root directory. + */ + if (fd == root_fd) + return (0); + if (fd == debug_fd) + return (0); + + if (fp == NULL) + fp = (fp1_t)dlsym(RTLD_NEXT, "_close"); + return (fp((uintptr_t)fd)); +} + +int +_setppriv(priv_op_t op, priv_ptype_t type, const priv_set_t *pset) +{ + static fp3_t fp = NULL; + priv_set_t *pset_new; + int rv; + + lxt_debug("_setppriv: request caught"); + + if (fp == NULL) + fp = (fp3_t)dlsym(RTLD_NEXT, "_setppriv"); + + while ((pset_new = priv_allocset()) == NULL) + (void) sleep(1); + + priv_copyset(pset, pset_new); + lxt_pset_keep(op, type, pset_new, PRIV_PROC_EXEC); + lxt_pset_keep(op, type, pset_new, PRIV_PROC_FORK); + lxt_pset_keep(op, type, pset_new, PRIV_PROC_CHROOT); + lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_READ); + lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_WRITE); + lxt_pset_keep(op, type, pset_new, PRIV_FILE_DAC_SEARCH); + + rv = fp(op, (uintptr_t)type, (uintptr_t)pset_new); + priv_freeset(pset_new); + return (rv); +} + +void +openlog(const char *ident, int logopt, int facility) +{ + lxt_debug("openlog: request caught"); + lxt_openlog(ident, logopt, facility); +} + +void +syslog(int priority, const char *message, ...) +{ + va_list va; + + lxt_debug("syslog: request caught"); + va_start(va, message); + lxt_vsyslog(priority, message, va); + va_end(va); +} + +void +vsyslog(int priority, const char *message, va_list va) +{ + lxt_debug("vsyslog: request caught"); + lxt_vsyslog(priority, message, va); +} + +void +closelog(void) +{ + lxt_debug("closelog: request caught"); + lxt_closelog(); +} diff --git a/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers b/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers new file mode 100644 index 0000000000..8aaee4f164 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/common/mapfile-vers @@ -0,0 +1,46 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +SUNWprivate_1.1 { + global: + lxt_vdebug; + lxt_debug; + lxt_gethostbyaddr_r; + lxt_gethostbyname_r; + lxt_getservbyport_r; + lxt_getservbyname_r; + _close; + _setppriv; + openlog; + syslog; + vsyslog; + closelog; + + local: + *; +}; diff --git a/usr/src/lib/brand/lx/lx_thunk/i386/Makefile b/usr/src/lib/brand/lx/lx_thunk/i386/Makefile new file mode 100644 index 0000000000..4ad13cf029 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/i386/Makefile @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB) + +install: $(ROOTLIBDIR) all $(ROOTLIBS) diff --git a/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h b/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h new file mode 100644 index 0000000000..b19c91873a --- /dev/null +++ b/usr/src/lib/brand/lx/lx_thunk/sys/lx_thunk.h @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_THUNK_H +#define _LX_THUNK_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +struct hostent *lxt_gethostbyaddr_r(const char *addr, int addr_len, int type, + struct hostent *result, char *buf, int buf_len, int *h_errnop); +struct hostent *lxt_gethostbyname_r(const char *name, + struct hostent *result, char *buf, int buf_len, int *h_errnop); +struct servent *lxt_getservbyport_r(int port, const char *proto, + struct servent *result, char *buf, int buf_len); +struct servent *lxt_getservbyname_r(const char *name, const char *proto, + struct servent *result, char *buf, int buf_len); + +void openlog(const char *ident, int logopt, int facility); +void syslog(int priority, const char *message, ...); +void closelog(void); + +void lxt_debug(const char *msg, ...); +void lxt_vdebug(const char *msg, va_list va); + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_THUNK_H */ diff --git a/usr/src/lib/brand/lx/netfiles/Makefile b/usr/src/lib/brand/lx/netfiles/Makefile new file mode 100644 index 0000000000..ea661da94b --- /dev/null +++ b/usr/src/lib/brand/lx/netfiles/Makefile @@ -0,0 +1,48 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +TXTS = etc_netconfig etc_default_nfs +NFS_DFL = ../../../../cmd/fs.d/nfs/etc/nfs.dfl + +all: $(TXTS) + +include ../Makefile.lx + +lint: + +install: $(ROOTBRANDDIR) $(ROOTTXTS) + +clean: + -$(RM) etc_default_nfs + +clobber: + -$(RM) $(ROOTXMLDOCS) $(ROOTTXTS) + +etc_default_nfs: $(NFS_DFL) + $(RM) $@ + $(CP) $(NFS_DFL) $@ diff --git a/usr/src/lib/brand/lx/netfiles/etc_netconfig b/usr/src/lib/brand/lx/netfiles/etc_netconfig new file mode 100644 index 0000000000..56222abf56 --- /dev/null +++ b/usr/src/lib/brand/lx/netfiles/etc_netconfig @@ -0,0 +1,38 @@ +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# The "Network Configuration" File. +# +# Each entry is of the form: +# +# <network_id> <semantics> <flags> <protofamily> <protoname> \ +# <device> <nametoaddr_libs> +# +# For running solaris daemons in a linux zone we use this non-default +# /etc/netconfig. The reason is that all name resolution has to be +# done linux name service interfaces. To do this we specify a custom +# nametoaddr library that libnsl will invoke to do name service lookups. +# +udp tpi_clts v inet udp /dev/udp lx_nametoaddr.so.1 +tcp tpi_cots_ord v inet tcp /dev/tcp lx_nametoaddr.so.1 diff --git a/usr/src/lib/brand/lx/zone/Makefile b/usr/src/lib/brand/lx/zone/Makefile new file mode 100644 index 0000000000..26c95d2f9b --- /dev/null +++ b/usr/src/lib/brand/lx/zone/Makefile @@ -0,0 +1,68 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +PROGS = lx_install lx_distro_install lx_init_zone +SUBDIRS = distros +XMLDOCS = config.xml platform.xml +TEMPLATES = SUNWlx.xml + +all: $(PROGS) + +include $(SRC)/cmd/Makefile.cmd +include ../Makefile.lx + +all := TARGET= all +install := TARGET= install +clobber := TARGET= clobber + +POFILES= $(PROGS:%=%.po) +POFILE= lx_zone.po + +$(POFILE): $(POFILES) + $(RM) $@ + $(BUILDPO.pofiles) + +_msg: $(MSGDOMAINPOFILE) + +install: $(PROGS) $(ROOTBRANDDIR) $(ROOTTEMPLATEDIR) $(ROOTXMLDOCS) \ + $(ROOTTEMPLATES) $(ROOTPROGS) $(SUBDIRS) + +lint: + +clean: + -$(RM) $(PROGS) + +clobber: clean $(SUBDIRS) + -$(RM) $(ROOTXMLDOCS) $(ROOTPROGS) $(ROOTTEMPLATES) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include $(SRC)/Makefile.msg.targ diff --git a/usr/src/lib/brand/lx/zone/SUNWlx.xml b/usr/src/lib/brand/lx/zone/SUNWlx.xml new file mode 100644 index 0000000000..04c38873de --- /dev/null +++ b/usr/src/lib/brand/lx/zone/SUNWlx.xml @@ -0,0 +1,34 @@ +<?xml version="1.0"?> + +<!-- + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. Use zonecfg(1M) instead. +--> + +<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1"> + +<zone name="default" zonepath="" autoboot="false" brand="lx"> +</zone> diff --git a/usr/src/lib/brand/lx/zone/config.xml b/usr/src/lib/brand/lx/zone/config.xml new file mode 100644 index 0000000000..12deb33022 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/config.xml @@ -0,0 +1,89 @@ +<?xml version="1.0"?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. +--> + +<!DOCTYPE brand PUBLIC "-//Sun Microsystems Inc//DTD Brands//EN" + "file:///usr/share/lib/xml/dtd/brand.dtd.1"> + +<brand name="lx"> + <modname>lx_brand</modname> + + <initname>/sbin/init</initname> + <login_cmd>/bin/login -h zone:%Z -f %u</login_cmd> + + <install>/usr/lib/brand/lx/lx_install %z %R %*</install> + <installopts>d:hsvX</installopts> + <boot>/usr/lib/brand/lx/lx_support boot %R %z</boot> + <halt>/usr/lib/brand/lx/lx_support halt %R %z</halt> + <verify_cfg>/usr/lib/brand/lx/lx_support verify</verify_cfg> + <verify_adm></verify_adm> + <postclone></postclone> + + <privilege set="default" name="contract_event" /> + <privilege set="default" name="contract_observer" /> + <privilege set="default" name="file_chown" /> + <privilege set="default" name="file_chown_self" /> + <privilege set="default" name="file_dac_execute" /> + <privilege set="default" name="file_dac_read" /> + <privilege set="default" name="file_dac_search" /> + <privilege set="default" name="file_dac_write" /> + <privilege set="default" name="file_owner" /> + <privilege set="default" name="file_setid" /> + <privilege set="default" name="ipc_dac_read" /> + <privilege set="default" name="ipc_dac_write" /> + <privilege set="default" name="ipc_owner" /> + <privilege set="default" name="net_bindmlp" /> + <privilege set="default" name="net_icmpaccess" /> + <privilege set="default" name="net_mac_aware" /> + <privilege set="default" name="net_privaddr" /> + <privilege set="default" name="proc_chroot" /> + <privilege set="default" name="sys_audit" /> + <privilege set="default" name="proc_audit" /> + <privilege set="default" name="proc_owner" /> + <privilege set="default" name="proc_setid" /> + <privilege set="default" name="proc_taskid" /> + <privilege set="default" name="sys_acct" /> + <privilege set="default" name="sys_admin" /> + <privilege set="default" name="sys_mount" /> + <privilege set="default" name="sys_nfs" /> + <privilege set="default" name="sys_resource" /> + + <privilege set="prohibited" name="dtrace_kernel" /> + <privilege set="prohibited" name="proc_zone" /> + <privilege set="prohibited" name="sys_config" /> + <privilege set="prohibited" name="sys_devices" /> + <privilege set="prohibited" name="sys_linkdir" /> + <privilege set="prohibited" name="sys_net_config" /> + <privilege set="prohibited" name="sys_res_config" /> + <privilege set="prohibited" name="sys_suser_compat" /> + + <privilege set="required" name="proc_exec" /> + <privilege set="required" name="proc_fork" /> + <privilege set="required" name="sys_mount" /> +</brand> diff --git a/usr/src/lib/brand/lx/zone/distros/Makefile b/usr/src/lib/brand/lx/zone/distros/Makefile new file mode 100644 index 0000000000..9c410a6176 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/Makefile @@ -0,0 +1,53 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../../Makefile.lx + +DISTROS = centos35.distro centos36.distro centos37.distro \ + rhel35.distro rhel36.distro rhel37.distro rhel_centos_common + +ROOTDISTRODIR= $(ROOTBRANDDIR)/distros +ROOTDISTROS= $(DISTROS:%=$(ROOTDISTRODIR)/%) + +$(ROOTDISTROS) := FILEMODE = 444 +$(ROOTDISTROS) := OWNER = root +$(ROOTDISTROS) := GROUP = bin + +$(ROOTDISTRODIR): + $(INS.dir) + +$(ROOTDISTRODIR)/%: % + $(INS.file) + +install: $(ROOTDISTRODIR) $(ROOTDISTROS) + +lint clean all: + +clobber: + -$(RM) $(ROOTDISTROS) + diff --git a/usr/src/lib/brand/lx/zone/distros/centos35.distro b/usr/src/lib/brand/lx/zone/distros/centos35.distro new file mode 100644 index 0000000000..a4390a0f7c --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/centos35.distro @@ -0,0 +1,67 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Install information for the Centos 3.5 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Number of discs required to hold the entire distribution +# + MB of disk space required to hold a full install of the distribution +# + Pathname of actual RPM files within a mounted disc +# +distro_serial=1118161135.08 +distro_ndiscs=3 +set -A distro_discorder 1 2 3 + +distro_mb_required=500 +distro_rpmdir=RedHat/RPMS + +unset distro_miniroot_rpms +unset distro_base_rpms +unset distro_desktop_rpms +unset distro_app_rpms +unset distro_server_rpms +unset distro_dev_rpms +unset distro_system_rpms + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the CentOS 3.5 deltas from the common cluster lists +delta_miniroot_rpms=centos-release +delta_core_rpms="centos-yumconf centos-yumcache yum" +delta_server_rpms=$delta_core_rpms +delta_desktop_rpms="$delta_server_rpms openoffice.org-style-gnome" +delta_developer_rpms=$delta_desktop_rpms +delta_all_rpms=$delta_developer_rpms + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/centos36.distro b/usr/src/lib/brand/lx/zone/distros/centos36.distro new file mode 100644 index 0000000000..c0f4a2d095 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/centos36.distro @@ -0,0 +1,67 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Elementary configuration information for the Centos 3.6 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Number of discs required to hold the entire distribution +# + MB of disk space required to hold a full install of the distribution +# + Pathname of actual RPM files within a mounted disc +# +distro_serial=1130453594.8 +distro_ndiscs=3 +set -A distro_discorder 1 2 3 + +distro_mb_required=500 +distro_rpmdir=RedHat/RPMS + +unset distro_miniroot_rpms +unset distro_base_rpms +unset distro_desktop_rpms +unset distro_app_rpms +unset distro_server_rpms +unset distro_developer_rpms +unset distro_system_rpms + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the CentOS 3.6 deltas from the common cluster lists +delta_miniroot_rpms=centos-release +delta_core_rpms="centos-yumconf centos-yumcache yum" +delta_server_rpms=$delta_core_rpms +delta_desktop_rpms="$delta_server_rpms openoffice.org-style-gnome" +delta_developer_rpms="$delta_desktop_rpms gd-progs" +delta_all_rpms="$delta_developer_rpms emacs-nox" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/centos37.distro b/usr/src/lib/brand/lx/zone/distros/centos37.distro new file mode 100644 index 0000000000..a9a5cada7d --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/centos37.distro @@ -0,0 +1,67 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Elementary configuration information for the Centos 3.7 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Number of discs required to hold the entire distribution +# + MB of disk space required to hold a full install of the distribution +# + Pathname of actual RPM files within a mounted disc +# +distro_serial=1144177644.47 +distro_ndiscs=3 +set -A distro_discorder 1 2 3 + +distro_mb_required=500 +distro_rpmdir=RedHat/RPMS + +unset distro_miniroot_rpms +unset distro_base_rpms +unset distro_desktop_rpms +unset distro_app_rpms +unset distro_server_rpms +unset distro_dev_rpms +unset distro_system_rpms + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the CentOS 3.7 deltas from the common cluster lists +delta_miniroot_rpms=centos-release +delta_core_rpms="centos-yumconf centos-yumcache yum" +delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config" +delta_desktop_rpms="$delta_server_rpms" +delta_developer_rpms="$delta_desktop_rpms gd-progs ruby-docs irb ruby-tcltk" +delta_all_rpms="$delta_developer_rpms emacs-nox" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/rhel35.distro b/usr/src/lib/brand/lx/zone/distros/rhel35.distro new file mode 100644 index 0000000000..53208bd2a0 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/rhel35.distro @@ -0,0 +1,67 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the RHEL 3.5 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Number of discs required to hold the entire distribution +# + MB of disk space required to hold a full install of the distribution +# + Pathname of actual RPM files within a mounted disc +# +distro_serial=1115874580.003298 +distro_ndiscs=4 +set -A distro_discorder 4 1 2 3 + +distro_mb_required=500 +distro_rpmdir=RedHat/RPMS + +unset distro_miniroot_rpms +unset distro_base_rpms +unset distro_desktop_rpms +unset distro_app_rpms +unset distro_server_rpms +unset distro_developer_rpms +unset distro_system_rpms + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the RHEL 3.5 deltas from the common cluster lists +delta_miniroot_rpms=redhat-release +delta_core_rpms="" +delta_server_rpms=$delta_core_rpms +delta_desktop_rpms="$delta_server_rpms openoffice.org-style-gnome" +delta_developer_rpms=$delta_desktop_rpms +delta_all_rpms="$delta_developer_rpms comps-3AS" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/rhel36.distro b/usr/src/lib/brand/lx/zone/distros/rhel36.distro new file mode 100644 index 0000000000..ad69139641 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/rhel36.distro @@ -0,0 +1,67 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the RHEL 3.6 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Number of discs required to hold the entire distribution +# + MB of disk space required to hold a full install of the distribution +# + Pathname of actual RPM files within a mounted disc +# +distro_serial=1127323691.616555 +distro_ndiscs=4 +set -A distro_discorder 4 1 2 3 + +distro_mb_required=500 +distro_rpmdir=RedHat/RPMS + +unset distro_miniroot_rpms +unset distro_base_rpms +unset distro_desktop_rpms +unset distro_app_rpms +unset distro_server_rpms +unset distro_developer_rpms +unset distro_system_rpms + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the RHEL 3.6 deltas from the common cluster lists +delta_miniroot_rpms=redhat-release +delta_core_rpms="" +delta_server_rpms=$delta_core_rpms +delta_desktop_rpms="$delta_server_rpms openoffice.org-style-gnome" +delta_developer_rpms="$delta_desktop_rpms gd-progs" +delta_all_rpms="$delta_developer_rpms emacs-nox comps-3AS" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/rhel37.distro b/usr/src/lib/brand/lx/zone/distros/rhel37.distro new file mode 100644 index 0000000000..9997eda1a2 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/rhel37.distro @@ -0,0 +1,66 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Installation information for the RHEL 3.7 distribution disc set: +# +# + Serial number (as found in the disc set's .discinfo file) +# + Number of discs required to hold the entire distribution +# + MB of disk space required to hold a full install of the distribution +# + Pathname of actual RPM files within a mounted disc +# +distro_serial=1141679045.364586 +distro_ndiscs=4 +set -A distro_discorder 4 1 2 3 + +distro_mb_required=500 +distro_rpmdir=RedHat/RPMS + +unset distro_miniroot_rpms +unset distro_core_rpms +unset distro_server_rpms +unset distro_desktop_rpms +unset distro_developer_rpms +unset distro_all_rpms + +# Include the common_<cluster>_* definitions. +. ${distro_dir}/rhel_centos_common + +# Define the RHEL 3.7 deltas from the common cluster lists +delta_miniroot_rpms=redhat-release +delta_core_rpms="" +delta_server_rpms="$delta_core_rpms nss_db-compat sendmail-doc qt-config" +delta_desktop_rpms="$delta_server_rpms" +delta_developer_rpms="$delta_desktop_rpms gd-progs ruby-docs irb ruby-tcltk" +delta_all_rpms="$delta_developer_rpms emacs-nox comps-3AS" + +# Define the final cluster lists for the installer +distro_miniroot_rpms="$common_miniroot_rpms $delta_miniroot_rpms" +distro_core_rpms="$common_core_rpms $delta_core_rpms" +distro_server_rpms="$common_server_rpms $delta_server_rpms" +distro_desktop_rpms="$common_desktop_rpms $delta_desktop_rpms" +distro_developer_rpms="$common_developer_rpms $delta_developer_rpms" +distro_all_rpms="$common_all_rpms $delta_all_rpms" diff --git a/usr/src/lib/brand/lx/zone/distros/rhel_centos_common b/usr/src/lib/brand/lx/zone/distros/rhel_centos_common new file mode 100644 index 0000000000..63f0e976f7 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/distros/rhel_centos_common @@ -0,0 +1,1023 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# This file contains the basic cluster contents shared by all of the +# Linux distros we support. Each distro has its own .distro file that +# expands on the basic cluster lists provided here. +# + +# +# Required packages for the install miniroot, these are the minimum packages a +# system must have installed in order to run rpm (which is then used from +# within the zone to perform the balance of the installation.) +# +common_miniroot_rpms="SysVinit \ + basesystem \ + bash \ + beecrypt \ + bzip2-libs \ + coreutils \ + elfutils \ + elfutils-libelf \ + filesystem \ + glibc \ + glibc-common \ + gpm \ + initscripts \ + iptables \ + iptables-ipv6 \ + kernel-utils \ + laus-libs \ + libacl \ + libattr \ + libgcc \ + libtermcap \ + ncurses \ + pam \ + popt \ + rpm \ + rpm-libs \ + setup \ + termcap \ + zlib" + +# +# This starts a listing of RPMs comprising a variety of install package options +# for a distribution. +# +# The supported package clusters are: +# +# + core +# + server +# + desktop +# + developer +# + system +# +# The RPMs needed to install each cluster are listed in the shell variable +# +# distro_<level>_rpms +# +# This file provides "common_<level>_rpms", which are lists of the packages +# in each cluster that are common to all distros. +# +# The package names are listed alphabetically for readability. rpm will +# reorder the list to ensure that each package's dependencies are installed +# before it is. +# +# Note: Since the distro_install script uses a regular expression to expand +# RPM package names to filenames, there may be some tweaking required to +# guarantee a unique match between a package name and a corresponding RPM +# file on the install media. +# +# One such example below is the package "XFree86-4." The official name of +# the package is "XFree86," but the regular expression in the script +# matches that package name to the XFree86-100dpi-fonts and +# XFree86-75dpi-fonts package RPMs in addition to the proper XFree86 RPM. +# Therefore the "XFree86" package name was modified to be "XFree86-4", +# which does result in a unique package name to RPM file match. +# +common_core_rpms="GConf2 \ + Glide3 \ + ORBit \ + ORBit2 \ + XFree86-Mesa-libGL \ + XFree86-Mesa-libGLU \ + XFree86-libs \ + XFree86-libs-data \ + Xaw3d \ + ash \ + at \ + atk \ + audiofile \ + autofs \ + bc \ + binutils \ + bonobo-activation \ + bzip2 \ + chkconfig \ + compat-pwdb \ + cpio \ + cpp \ + cracklib \ + cracklib-dicts \ + crontabs \ + cups-libs \ + cyrus-sasl \ + cyrus-sasl-md5 \ + db4 \ + desktop-file-utils \ + dev \ + diffutils \ + diskdumputils \ + e2fsprogs \ + ed \ + ethtool \ + expat \ + file \ + findutils \ + finger \ + fontconfig \ + freetype \ + ftp \ + gail \ + gawk \ + gdbm \ + gdk-pixbuf \ + gettext \ + glib \ + glib2 \ + glibc-headers \ + glibc-kernheaders \ + gmp \ + grep \ + groff \ + gtk+ \ + gtk2 \ + gzip \ + hesiod \ + hwdata \ + indexhtml \ + info \ + iproute \ + iputils \ + kernel \ + kernel-BOOT \ + krb5-libs \ + krb5-workstation \ + kudzu \ + laus \ + less \ + libaio \ + libart_lgpl \ + libbonobo \ + libcap \ + libgcj \ + libgcj-ssa \ + libglade2 \ + libgnomecanvas \ + libjpeg \ + libmng \ + libogg \ + libpng \ + libpng10 \ + libstdc++ \ + libtiff \ + libtool-libs \ + libungif \ + libusb \ + libuser \ + libvorbis \ + libwnck \ + libxml \ + libxml2 \ + libxml2-python \ + libxslt \ + linc \ + lockdev \ + logrotate \ + losetup \ + lsof \ + lvm \ + lynx \ + m4 \ + mailcap \ + make \ + man \ + man-pages \ + mingetty \ + mkinitrd \ + mkisofs \ + mktemp \ + modutils \ + mount \ + mtools \ + nc \ + net-snmp \ + net-snmp-libs \ + net-tools \ + netdump \ + newt \ + nfs-utils \ + nscd \ + nss_db \ + nss_ldap \ + ntp \ + ntsysv \ + openldap \ + openssh \ + openssh-clients \ + openssh-server \ + openssl \ + pango \ + passwd \ + patch \ + pax \ + pcre \ + pdksh \ + perl \ + perl-CGI \ + perl-DateManip \ + perl-Filter \ + perl-HTML-Parser \ + perl-HTML-Tagset \ + perl-Parse-Yapp \ + perl-URI \ + perl-XML-Dumper \ + perl-XML-Encoding \ + perl-XML-Grove \ + perl-XML-Parser \ + perl-XML-Twig \ + perl-libwww-perl \ + perl-libxml-enno \ + perl-libxml-perl \ + portmap \ + procmail \ + procps \ + psacct \ + psmisc \ + pspell \ + pygtk2 \ + pygtk2-libglade \ + python \ + pyxf86config \ + readline \ + redhat-logos \ + redhat-menus \ + rhpl \ + rpm-python \ + rpmdb-redhat \ + rsh \ + rsync \ + rusers \ + rwho \ + sed \ + setarch \ + sgml-common \ + shadow-utils \ + slang \ + startup-notification \ + sudo \ + sysklogd \ + syslinux \ + tar \ + tcl \ + tcp_wrappers \ + tcsh \ + telnet \ + time \ + traceroute \ + ttmkfdir \ + tzdata \ + units \ + unix2dos \ + unzip \ + usermode \ + utempter \ + util-linux \ + vim-common \ + vim-minimal \ + vixie-cron \ + wget \ + which \ + words \ + xinetd \ + xml-common \ + yp-tools \ + ypbind \ + zip" + +common_server_rpms="$common_core_rpms \ + 4Suite \ + MyODBC \ + MySQL-python \ + Omni \ + Omni-foomatic \ + PyXML \ + VFlib2 \ + XFree86-4 \ + XFree86-base-fonts \ + XFree86-font-utils \ + XFree86-truetype-fonts \ + XFree86-xauth \ + XFree86-xdm \ + XFree86-xfs \ + acl \ + alchemist \ + amanda \ + amanda-server \ + arts \ + aspell \ + aspell-config \ + at-spi \ + authd \ + bcel \ + bind \ + bind-chroot \ + bind-libs \ + bind-utils \ + bitmap-fonts \ + caching-nameserver \ + chkfontpath \ + commons-beanutils \ + commons-collections \ + commons-digester \ + commons-logging \ + commons-modeler \ + compat-db \ + compat-libstdc++ \ + crypto-utils \ + cup-v10k \ + cups \ + curl \ + cyrus-sasl-gssapi \ + cyrus-sasl-plain \ + dhcp \ + distcache \ + distcache-devel \ + esound \ + fam \ + finger-server \ + foomatic \ + freeradius \ + gd \ + ghostscript \ + ghostscript-fonts \ + gimp-print \ + gnome-libs \ + gnome-mime-data \ + gnome-python2 \ + gnome-python2-bonobo \ + gnome-python2-canvas \ + gnome-python2-gtkhtml2 \ + gnome-vfs2 \ + gnuplot \ + gtkhtml2 \ + htmlview \ + httpd \ + hwcrypto \ + imap \ + imap-utils \ + imlib \ + inews \ + inn \ + jakarta-regexp \ + krb5-server \ + krbafs \ + libIDL \ + libbonoboui \ + libdbi \ + libdbi-dbd-mysql \ + libgnome \ + libgnomeprint22 \ + libgnomeprintui22 \ + libgnomeui \ + libgsf \ + libole2 \ + logwatch \ + mailman \ + mailx \ + mod_auth_mysql \ + mod_auth_pgsql \ + mod_authz_ldap \ + mod_perl \ + mod_python \ + mod_ssl \ + mpage \ + mtr \ + mx \ + mx4j \ + mysql \ + mysql-bench \ + mysql-devel \ + net-snmp-utils \ + netdump-server \ + newt-perl \ + openldap-servers \ + openssl-perl \ + pam_krb5 \ + perl-DBD-MySQL \ + perl-DBD-Pg \ + perl-DBI \ + perl-DB_File \ + perl-Digest-HMAC \ + perl-Digest-SHA1 \ + perl-Net-DNS \ + perl-Time-HiRes \ + php \ + php-imap \ + php-ldap \ + php-mysql \ + php-odbc \ + php-pgsql \ + pnm2ppa \ + postfix \ + postgresql-odbc \ + pxe \ + pyorbit \ + qt \ + qt-MySQL \ + qt-ODBC \ + quagga \ + radvd \ + rdist \ + redhat-config-bind \ + redhat-config-httpd \ + redhat-config-printer \ + redhat-config-printer-gui \ + redhat-config-samba \ + redhat-config-securitylevel \ + redhat-config-securitylevel-tui \ + redhat-config-services \ + redhat-java-rpm-scripts \ + redhat-switch-mail \ + redhat-switch-mail-gnome \ + rh-postgresql \ + rh-postgresql-contrib \ + rh-postgresql-docs \ + rh-postgresql-jdbc \ + rh-postgresql-libs \ + rh-postgresql-python \ + rh-postgresql-server \ + rh-postgresql-tcl \ + rh-postgresql-test \ + rhdb-utils \ + rsh-server \ + rusers-server \ + samba \ + samba-client \ + samba-common \ + samba-swat \ + sendmail \ + sendmail-cf \ + slocate \ + spamassassin \ + squid \ + squirrelmail \ + switchdesk \ + sysreport \ + telnet-server \ + tftp-server \ + tmpwatch \ + tux \ + unixODBC \ + unixODBC-kde \ + urw-fonts \ + usermode-gtk \ + vsftpd \ + webalizer \ + xalan-j \ + xerces-j \ + xinitrc \ + ypserv" + +common_desktop_rpms="$common_server_rpms \ + Canna-libs \ + FreeWnn-libs \ + Gtk-Perl \ + ImageMagick \ + ImageMagick-perl \ + SDL \ + XFree86-100dpi-fonts \ + XFree86-75dpi-fonts \ + XFree86-Xnest \ + XFree86-Xvfb \ + XFree86-doc \ + XFree86-tools \ + XFree86-twm \ + a2ps \ + am-utils \ + amanda-client \ + anacron \ + apel-xemacs \ + aumix \ + authconfig \ + authconfig-gtk \ + autorun \ + cdparanoia-alpha9.8 \ + cdparanoia-libs-alpha9.8 \ + cdrecord \ + cipe \ + ckermit \ + comps-extras \ + control-center \ + ctags \ + desktop-backgrounds-basic \ + desktop-printing \ + dialog \ + docbook-dtds \ + docbook-style-dsssl \ + docbook-style-xsl \ + docbook-utils \ + docbook-utils-pdf \ + dtach \ + dvd+rw-tools \ + dvdrecord \ + eel2 \ + elinks \ + enscript \ + eog \ + evolution \ + evolution-connector \ + fetchmail \ + file-roller \ + firstboot \ + fontilus \ + gaim \ + gconf-editor \ + gdm \ + gedit \ + gftp \ + ggv \ + gimp \ + gimp-data-extras \ + gimp-perl \ + gimp-print-cups \ + gimp-print-plugin \ + gimp-print-utils \ + gnome-applets \ + gnome-audio \ + gnome-desktop \ + gnome-games \ + gnome-icon-theme \ + gnome-media \ + gnome-panel \ + gnome-pilot \ + gnome-python2-applet \ + gnome-session \ + gnome-spell \ + gnome-system-monitor \ + gnome-terminal \ + gnome-themes \ + gnome-user-docs \ + gnome-utils \ + gnome-vfs2-extras \ + gnomemeeting \ + gnupg \ + gphoto2 \ + gsl \ + gstreamer \ + gstreamer-plugins \ + gstreamer-tools \ + gtk-engines \ + gtk2-engines \ + gtkam \ + gtkam-gimp \ + gtkglarea \ + gtkhtml3 \ + guile \ + hotplug \ + hpijs \ + hpoj \ + htdig \ + hwbrowser \ + intltool \ + itcl \ + jadetex \ + kdeaddons \ + kdeartwork \ + kdebase \ + kdegames \ + kdegraphics \ + kdelibs \ + kdemultimedia \ + kdenetwork \ + kdepim \ + kdeutils \ + lftp \ + libao \ + libf2c \ + libgail-gnome \ + libgal2 \ + libghttp \ + libglade \ + libgtop2 \ + libmrproject \ + libpcap \ + libraw1394 \ + librsvg2 \ + libsoup \ + linuxdoc-tools \ + lm_sensors \ + magicdev \ + metacity \ + mikmod \ + mozilla \ + mozilla-chat \ + mozilla-dom-inspector \ + mozilla-js-debugger \ + mozilla-mail \ + mozilla-nspr \ + mozilla-nss \ + mrproject \ + mrtg \ + mutt \ + nautilus \ + nautilus-cd-burner \ + nautilus-media \ + netpbm \ + netpbm-progs \ + open \ + openh323 \ + openjade \ + openldap-clients \ + openmotif \ + openmotif21 \ + openoffice.org \ + openoffice.org-i18n \ + openoffice.org-libs \ + openssh-askpass \ + openssh-askpass-gnome \ + parted \ + passivetex \ + perl-PDL \ + perl-SGMLSpm \ + perl-suidperl \ + pilot-link \ + printman \ + psutils \ + pwlib \ + pyOpenSSL \ + python-optik \ + redhat-artwork \ + redhat-config-date \ + redhat-config-keyboard \ + redhat-config-kickstart \ + redhat-config-language \ + redhat-config-mouse \ + redhat-config-network \ + redhat-config-network-tui \ + redhat-config-nfs \ + redhat-config-packages \ + redhat-config-proc \ + redhat-config-rootpassword \ + redhat-config-soundcard \ + redhat-config-users \ + redhat-config-xfree86 \ + redhat-logviewer \ + rhn-applet \ + rhnlib \ + sane-backends \ + sane-frontends \ + screen \ + scrollkeeper \ + shapecfg \ + sharutils \ + sox \ + star \ + switchdesk-gnome \ + switchdesk-kde \ + sysstat \ + talk \ + tclx \ + tetex \ + tetex-afm \ + tetex-dvips \ + tetex-fonts \ + tetex-latex \ + tetex-xdvi \ + tix \ + tk \ + tkinter \ + transfig \ + ttfprint \ + umb-scheme \ + up2date \ + up2date-gnome \ + usbutils \ + uucp \ + vim-enhanced \ + vlock \ + vnc \ + vnc-server \ + vorbis-tools \ + vte \ + w3c-libwww \ + xchat \ + xdelta \ + xemacs \ + xemacs-el \ + xemacs-info \ + xfig \ + xhtml1-dtds \ + xloadimage \ + xmltex \ + xmlto \ + xmms \ + xpdf \ + xsane \ + xsane-gimp \ + xscreensaver \ + xsri \ + xterm \ + yelp \ + zsh" + +common_developer_rpms="$common_desktop_rpms \ + ElectricFence \ + GConf2-devel \ + ORBit-devel \ + ORBit2-devel \ + SDL-devel \ + XFree86-devel \ + ant \ + ant-libs \ + arts-devel \ + at-spi-devel \ + atk-devel \ + audiofile-devel \ + autoconf \ + autoconf213 \ + automake \ + automake14 \ + automake15 \ + bison \ + blas \ + bonobo-activation-devel \ + bug-buddy \ + byacc \ + cdecl \ + cproto \ + crash \ + cscope \ + cups-devel \ + cvs \ + ddd \ + dejagnu \ + dev86 \ + diffstat \ + doxygen \ + eel2-devel \ + emacs \ + emacs-el \ + emacs-leim \ + esound-devel \ + expect \ + flex \ + fontconfig-devel \ + freetype-devel \ + gail-devel \ + gcc \ + gcc-c++ \ + gcc-c++-ssa \ + gcc-g77 \ + gcc-g77-ssa \ + gcc-gnat \ + gcc-java \ + gcc-java-ssa \ + gcc-objc \ + gcc-objc-ssa \ + gcc-ssa \ + gd-devel \ + gdb \ + gdk-pixbuf-devel \ + gdk-pixbuf-gnome \ + glade2 \ + glib-devel \ + glib2-devel \ + glibc-devel \ + glibc-profile \ + glibc-utils \ + gnome-desktop-devel \ + gnome-libs-devel \ + gnome-vfs2-devel \ + gperf \ + gtk+-devel \ + gtk-doc \ + gtk2-devel \ + gtkhtml2-devel \ + httpd-devel \ + im-sdk \ + imlib-devel \ + indent \ + jaf \ + javamail \ + joe \ + jpackage-utils \ + junit \ + kdebase-devel \ + kdegraphics-devel \ + kdelibs-devel \ + kdenetwork-devel \ + kdepim-devel \ + kdesdk \ + kdesdk-devel \ + kdeutils-devel \ + kdevelop \ + kdoc \ + kernel-doc \ + kernel-source \ + lam \ + lapack \ + lha \ + libIDL-devel \ + libacl-devel \ + libart_lgpl-devel \ + libattr-devel \ + libbonobo-devel \ + libbonoboui-devel \ + libgcc-ssa \ + libgcj-devel \ + libgcj-ssa-devel \ + libglade2-devel \ + libgnat \ + libgnome-devel \ + libgnomecanvas-devel \ + libgnomeprint22-devel \ + libgnomeprintui22-devel \ + libgnomeui-devel \ + libjpeg-devel \ + libmng-devel \ + libmudflap \ + libmudflap-devel \ + libobjc \ + libole2-devel \ + libpng-devel \ + librsvg2-devel \ + libstdc++-devel \ + libstdc++-ssa \ + libstdc++-ssa-devel \ + libtiff-devel \ + libtool \ + libungif-devel \ + libxml2-devel \ + libxslt-devel \ + linc-devel \ + ltrace \ + memprof \ + nasm \ + ncurses-devel \ + nedit \ + netpbm-devel \ + openmotif-devel \ + oprofile \ + pango-devel \ + patchutils \ + pcre-devel \ + perl-CPAN \ + perl-Crypt-SSLeay \ + pilot-link-devel \ + pkgconfig \ + pstack \ + pygtk2-devel \ + python-devel \ + python-tools \ + qt-designer \ + qt-devel \ + rcs \ + redhat-rpm-config \ + rpm-build \ + ruby \ + ruby-libs \ + ruby-mode \ + sane-backends-devel \ + sip \ + sip-devel \ + splint \ + startup-notification-devel \ + strace \ + swig \ + texinfo \ + tora \ + vim-X11 \ + vte-devel \ + zlib-devel" + +common_all_rpms="$common_developer_rpms \ + Canna + FreeWnn \ + ImageMagick-c++-5.5.6 \ + Wnn6-SDK \ + ami \ + amtu \ + anaconda \ + anaconda-help \ + anaconda-images \ + anaconda-product \ + anaconda-runtime \ + apmd \ + arptables_jf \ + attr \ + bg5ps \ + bitmap-fonts-cjk \ + bogl \ + bogl-bterm \ + bootparamd \ + booty \ + bridge-utils \ + busybox \ + busybox-anaconda \ + compat-gcc \ + compat-gcc-c++ \ + compat-glibc-7.x \ + compat-libstdc++-devel \ + compat-slang \ + db4-java \ + db4-utils \ + dbskkd-cdb \ + desktop-backgrounds-extra \ + devlabel \ + dhclient \ + dietlibc \ + dos2unix \ + dosfstools \ + dump \ + eject \ + emacspeak \ + ethereal \ + ethereal-gnome \ + fbset \ + festival \ + grub \ + h2ps \ + hdparm \ + ipsec-tools \ + irda-utils \ + iscsi-initiator-utils \ + isdn4k-utils \ + jfsutils \ + jisksp14 \ + jisksp16 \ + jwhois \ + kappa20 \ + kbd \ + kernel-pcmcia-cs \ + knm_new \ + kon2 \ + kon2-fonts \ + libtabe \ + libwvstreams \ + lilo \ + linuxwacom \ + lslk \ + mdadm \ + mgetty \ + minicom \ + mkbootdisk \ + mt-st \ + mtx \ + nano \ + ncompress \ + net-snmp-perl \ + netconfig \ + nhpf \ + nmap \ + octave \ + openssl096b \ + pam_passwdqc \ + pam_smb \ + pinfo \ + ppp \ + prelink \ + psgml \ + pvm \ + quota \ + rdate \ + rdesktop \ + redhat-config-netboot \ + rhgb \ + rmt \ + rootfiles \ + rp-pppoe \ + schedutils \ + setserial \ + setuptool \ + sg3_utils \ + skkdic \ + skkinput + specspo \ + stunnel \ + tcpdump \ + tftp \ + tn5250 \ + tsclient \ + vconfig \ + wireless-tools \ + wvdial \ + x3270 \ + x3270-text \ + x3270-x11 \ + xcin" diff --git a/usr/src/lib/brand/lx/zone/lx_distro_install.ksh b/usr/src/lib/brand/lx/zone/lx_distro_install.ksh new file mode 100644 index 0000000000..c4dbbf8074 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/lx_distro_install.ksh @@ -0,0 +1,2010 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +# +# This script is called from /usr/lib/brand/lx/lx_install. +# +# options passed down from lx_install: +# -z $ZONENAME +# -r $LINUX_ROOT +# +# options passed down from zoneadm -z <zone-name> install +# -d <Linux-archives-dir> +# [core | server | desktop | development | all] +# +# The desktop cluster will be installed by default. +# + +# +# Restrict executables to /bin, /usr/bin and /usr/sbin +# +PATH=/bin:/usr/bin:/usr/sbin +export PATH + + +# +# Setup i18n output +# +TEXTDOMAIN="SUNW_OST_OSCMD" +export TEXTDOMAIN + +# +# Log passed arguments to file descriptor 2 +# +log() +{ + [[ -n $logfile ]] && echo "$@" >&2 +} + +# +# Send the provided printf()-style arguments to the screen and to the +# logfile. +# +screenlog() +{ + typeset fmt="$1" + shift + + printf "$fmt\n" "$@" + [[ -n $logfile ]] && printf "$fmt\n" "$@" >&2 +} + +# +# Print and log provided text if the shell variable "verbose_mode" is set +# +verbose() +{ + [[ -n $verbose_mode ]] && echo "$@" + [[ -n $logfile ]] && [[ -n $verbose_mode ]] && echo "$@" >&2 +} + +no_space=$(gettext "Not enough free space available in '%s'") +mb_req=$(gettext "(%s MB required, %s MB available)") +bad_rpmdir=$(gettext "'%s' is not a valid RPM directory!") + +inst_clust=$(gettext "Installing cluster '%s'") +unknown_clust=$(gettext "ERROR: Unknown cluster name: '%s'") + +wrong_disk=\ +$(gettext "Incorrect disk inserted (found %s, wanted %s), ejecting...") + +lofs_failed=$(gettext "Attempt to lofs mount '%s' on '%s' FAILED.") +symlink_failed=$(gettext "Attempt to symbolically link '%s' to '%s' FAILED.") + +mini_discfail=$(gettext "Install of zone '%s' miniroot from disc %s FAILED.") +mini_isofail=$(gettext "Install of zone '%s' miniroot from ISO '%s' FAILED.") + +mini_initfail=$(gettext "Attempt to initialize miniroot for zone '%s' FAILED.") +mini_instfail=$(gettext "Attempt to install miniroot for zone '%s' FAILED.") +mini_rpmfail=$(gettext "Miniroot install of RPM '%s' FAILED.") +mini_copyfail=$(gettext "Attempt to copy miniroot for zone '%s' FAILED.") +mini_bootfail=$(gettext "Attempt to boot miniroot for zone '%s' FAILED.") +mini_setfail=$(gettext "Attempt to setup miniroot for zone '%s' FAILED.") + +mini_mntfsfail=\ +$(gettext "Attempt to mount miniroot filesystems for zone '%s' FAILED.") + +rpm_initfail=\ +$(gettext "Attempt to initialize RPM database for zone '%s' FAILED.") + +zone_initrootfail=\ +$(gettext "Attempt to initialize root filesystem for zone '%s' FAILED.") + +zone_discfail=$(gettext "Install of zone '%s' from disc %s FAILED.") +zone_isofail=$(gettext "Install of zone '%s' from ISO '%s' FAILED.") +zone_instfail=$(gettext "Install of zone '%s' from '%s' FAILED '%s'.") +zone_haltfail=$(gettext "Unable to halt zone '%s'!") + +zone_rootfail=\ +$(gettext "ERROR: The specified zone root directory '%s' could not be created.") + +zone_rootsub=\ +$(gettext "ERROR: The specified zone root subdirectory '%s' does not exist.") + +mk_mntfail=$(gettext "Could not create the mount directory '%s'") +iso_mntfail=$(gettext "Unable to mount ISO image '%s' within zone '%s'") +iso_umntfail=$(gettext "Unable to unmount ISO image '%s' from within zone '%s'") +mountfail=$(gettext "Mount of '%s' on '%s' FAILED.") + +insert_discmsg=\ +$(gettext "Please insert disc %s in the removable media drive and press") + +install_discmsg=$(gettext "Installing zone '%s' from disc %s.") +install_isomsg=$(gettext "Installing zone '%s' from ISO image %s.") +install_ndiscs=$(gettext "You will need discs 1 - %s to fully install ") + +expand_nrpms=$(gettext "Attempting to expand %s RPM names...") + +install_nrpms_few=\ +$(gettext "Installing %s RPM packages; this may take a few minutes...") +install_nrpms_several=\ +$(gettext "Installing %s RPM packages; this may take several minutes...") + +install_zonefail=$(gettext "Attempt to install zone '%s' FAILED.") +install_dist=$(gettext "Installing distribution '%s'...") + +log_wrfail=$(gettext "Error: cannot write to log file '%s'.") + +no_distropath=$(gettext "ERROR: Distribution path '%s' doesn't exist.") + +initinstall_zonefail=$(gettext "Initial installation of zone '%s' FAILED.") + +install_abort=$(gettext "Installation aborted %s") +install_done=$(gettext "Initial installation of zone '%s' complete %s") + +# +# Get the device underlying a specified mounted file system +# +# Returns 0 on success, 1 on failure. +# +get_mountdev() +{ + typeset mount_dir="$1" + typeset device + unset mount_dev + + device="`{ df -k "$mount_dir" | egrep "^/" ; } 2>/dev/null`" || return 1 + mount_dev=$(echo $device | awk -e '{print $1}' 2>/dev/null) + + [[ "`echo $mount_dev | cut -c 1`" = "/" ]] && return 0 + + unset mount_dev + return 1 +} + +# +# Get the directory name a specified device is mounted as +# +# Returns 0 on success, 1 on failre. +# +get_mountdir() +{ + typeset mount_dev="$1" + typeset dir + unset mount_dir + + dir="`{ df -k "$mount_dev" | egrep "^/" ; } 2>/dev/null`" || return 1 + mount_dir=$(echo $dir | awk -e '{print $6}' 2>/dev/null) + + [[ "`echo $mount_dir | cut -c 1`" = "/" ]] && return 0 + + unset mount_dir + return 1 +} + +# +# Check the free disk space of the passed filesystem against the passed +# argument. +# +# Returns 0 on success, 1 on failure. +# +check_mbfree() +{ + typeset dir="$1" + typeset mb_required=$2 + + # + # Return free space in partition containing passed argument in MB + # + typeset mbfree=`( df -k "$dir" 2>/dev/null | \ + egrep -v Filesystem | awk -e '{print $4}' ) 2>/dev/null` || return 1 + + ((mbfree /= 1024)) + if ((mbfree < mb_required)); then + screenlog "$no_space" "$zoneroot" + screenlog "$mb_req" "$mb_required" "$mb_free" + return 1 + fi + return 0 +} + +# +# Expand passed RPM names to their appropriate filenames in the passed RPM +# directory. +# +# Arguments: +# +# Argument 1: Mounted CD-ROM/ISO directory +# Argument 2: RPM directory +# Arguments [3 - n]: RPM names to process +# +# The expanded RPM names are returned in the shell array "rpm_names." +# +# For example: +# +# expand_rpm_names /mnt/iso RedHat/RPMS dev kernel tetex redhat-menus +# +# would return something like: +# +# rpms_found[0]: dev-3.3.12.3-1.centos.0.i386.rpm +# rpms_found[1]: kernel-2.4.21-32.EL.i586.rpm +# rpms_found[2]: tetex-1.0.7-67.7.i386.rpm +# rpms_found[3]: redhat-menus-0.39-1.noarch.rpm +# +# The routine returns 0 on success, 1 on an error. +# +expand_rpm_names() +{ + typeset found=0 + typeset left=0 + + typeset rpmdir="$1/$2" + typeset curdir=${PWD:=$(pwd)} + + typeset arch + typeset procinfo + typeset rpmglob + typeset rpmfile + + unset rpms_found + unset rpms_left + + shift; shift + cd "$rpmdir" + + typeset rpmcheck="$(echo *.rpm)" + + if [[ "$rpmcheck" = "*.rpm" ]]; then + screenlog "$bad_rpmdir" "$rpmdir" + cd "$curdir" + return 1 + fi + + # + # If the miniroot is booted, and the archs list isn't already set, + # ask the zone's rpm command for the list of compatible architectures. + # + if [[ -n $miniroot_booted && -z $archs ]]; then + procinfo=$(zlogin "$zonename" /bin/rpm --showrc | \ + grep "^compatible archs") + + [[ $? -eq 0 ]] && + archs=$(echo $procinfo | sed -e 's/^compatible archs : //') + + [[ -n $archs ]] && + log "RPM-reported compatible architectures: $archs" + fi + + # + # Either the miniroot isn't booted or asking rpm for the information + # failed for some reason, so make some reasonable assumptions. + # + if [[ -z $archs ]]; then + procinfo=$(psrinfo -vp | grep family) + + if echo "$procinfo" | egrep -s "AuthenticAMD"; then + # + # Check for AMD athlon compatibility. The decision to + # have athlon files checked for before i686 files is + # what Linux does. + # + if echo "$procinfo" | egrep -s "family 6" || + echo "$procinfo" | egrep -s "family 15"; then + archs="athlon i686" + fi + elif echo "$procinfo" | egrep -s "GenuineIntel"; then + # + # Check for i686 compatibility + # + if echo "$procinfo" | egrep -s "family 15"; then + archs="i686" + fi + fi + + archs="$archs i586 i486 i386 noarch" + + log "Derived compatible architectures: $archs" + fi + + verbose "RPM source directory: \"$rpmdir\"" + log "RPM source directory: \"$rpmdir\"" + + if [[ $# -eq 1 ]]; then + screenlog "$(gettext 'Attempting to expand 1 RPM name')" + else + screenlog "$expand_nrpms" "$#" + fi + + for rpm in "$@"; do + # + # Search for the appropriate RPM package, using the compatible + # architecture list contained in "archs" to look for the best + # match. + # + # For example, if the processor is an i686, and the rpm is + # "glibc", the script will look for the files (in order): + # + # glibc[.-][0-9]*.i686.rpm + # glibc[.-][0-9]*.i586.rpm + # glibc[.-][0-9]*.i486.rpm + # glibc[.-][0-9]*.i386.rpm + # glibc[.-][0-9]*.noarch.rpm + # glibc[.-][0-9]*.fat.rpm + # + # and will stop when it finds the first match. + # + # TODO: Once the miniroot is booted, we should verify that + # the rpm name has been expanded to "$rpmfile" properly + # by comparing "$rpm" and the output of: + # + # zlogin -z <zone> /bin/rpm --qf '%{NAME}' -qp $rpmfile + # + for arch in $archs; do + rpmglob="$rpm[.-][0-9]*.$arch.rpm" + rpmfile="$(echo $rpmglob)" + + [[ "$rpmfile" != "$rpmglob" ]] && break + + unset rpmfile + done + + if [[ -z $rpmfile ]]; then + rpms_left[$left]="$rpm" + ((left += 1)) + else + rpms_found[$found]="$rpmfile" + ((found += 1)) + fi + done + + cd "$curdir" + log "\"$rpmdir\": matched $found of $# RPM names." + log "\"$rpmdir\": $left RPMs remaining." + return 0 +} + +# +# Build the rpm lists used to install a machine. +# +# The first argument is the number of discs in the distribution. The +# second, optional, argument is the metacluster to install. +# +# The array "distro_rpm[]" is built from the individual package RPM arrays +# read in from an individual distribution definition file. +# +build_rpm_list() +{ + # Default to a desktop installation + typeset cluster=desktop + typeset cnt=0 + typeset pkgs + + for clust in "$@"; do + ((cnt += 1)) + case $clust in + core) cluster=core ;; + desk*) cluster=desktop ;; + serv*) cluster=server ;; + dev*) cluster=developer ;; + all) cluster=all + break;; + *) screenlog "$unknown_clust" "$clust" + exit $ZONE_SUBPROC_USAGE ;; + esac + done + + if [ $cnt -gt 1 ]; then + screenlog "$(gettext 'Too many install clusters specified')" + exit $ZONE_SUBPROC_USAGE + fi + + screenlog "$inst_clust" $cluster + + case $cluster in + core) distro_rpms=$distro_core_rpms ;; + desktop) distro_rpms=$distro_desktop_rpms ;; + server) distro_rpms=$distro_server_rpms ;; + developer) distro_rpms=$distro_developer_rpms ;; + all) distro_rpms=$distro_all_rpms ;; + esac + + # The RPMs in the miniroot must all be installed properly as well + distro_rpms="$distro_miniroot_rpms $distro_rpms" +} + +# +# Install the "miniroot" minimal Linux environment that is booted single-user +# to complete the install. +# +# This works by doing feeding the RPM list needed for the installation one +# by one to rpm2cpio(1). +# +# Usage: +# install_miniroot <mounted media dir> <RPM directory> <RPMS to install> +# +# +install_miniroot() +{ + typeset mediadir="$1" + typeset rpmdir="$2" + typeset rpm + + shift; shift + + for rpm in "$@"; do + verbose "Installing RPM \"$rpm\" to miniroot at" \ + "\n \"$zoneroot\"..." + + rpm2cpio "$mediadir/$rpmdir/$rpm" | \ + ( cd "$rootdir" && cpio -idu ) 1>&2 + + if [[ $? -ne 0 ]]; then + screenlog "$mini_instfail" "$rpm" + return 1 + fi + done + + return 0 +} + +# +# Install the zone from the mounted disc image by feeding a list of RPMs to +# install from this image to RPM running on the zone via zlogin(1). +# +# Usage: +# install_zone <root dir> <RPM directory> <RPMS to install> +# +install_zone() +{ + # + # convert media directory to zone-relative path + # + typeset zonerpmdir=${1##$rootdir}/$2 + typeset rpmopts="-i" + + typeset rpmerr + + shift; shift + + [[ -n $verbose_mode ]] && rpmopts="-ivh" + + # + # There's a quirk in our version of ksh that sometimes resets the + # trap handler for the shell. Since the rpm command will be the + # longest part of any given install, make sure that an interrupt while + # the command is running will bring the miniroot down and clean up + # the interrupted install. + # + trap trap_cleanup INT + + # + # Print a message depending on how many RPMS we have to install. + # + # Ten RPMS seems like a reasonable boundary between when an install may + # take a "few" or "several" minutes. + # + if [[ $# -eq 1 ]]; then + screenlog "$(gettext 'Installing 1 RPM package.')" + elif [[ $# -lt 10 ]]; then + screenlog "$install_nrpms_few" "$#" + else + screenlog "$install_nrpms_several" "$#" + fi + + log "" + log "Installing: $@" + log "" + + echo + + # + # LX_INSTALL must be defined when running this command in order to + # enable switches built into various emulated system calls to allow + # the dev package (which may not actually write to /dev) to function. + # + zlogin "$zonename" "( cd "$zonerpmdir" ; LX_INSTALL=1 \ + /bin/rpm $rpmopts --force --aid --nosignature --root /a $@ )" + + rpmerr=$? + + if [[ $rpmerr -ne 0 ]]; then + log "" + log "Zone RPM install exited, code $rpmerr" + log "" + + screenlog "$zone_instfail" "$zonename" "$zonerpmdir" "$rpmerr" + return 1 + fi + + return 0 +} + +# +# Attempt to unmount all file systems passed on the command line +# +# Returns 0 if all umounts succeeded, otherwise the number of umount failures +# +umount_list() +{ + typeset failures=0 + typeset mounted + + unset umount_failures + + for mounted in "$@"; do + if ! umount "$mounted"; then + umount_failures="$umount_failures $mounted" + ((failures += 1)) + fi + done + + return $failures +} + +# +# Set up lofi mounts required for chroot(1M) to work on a new root directory +# located in /a within a zone. +# +newroot_lofimnt() +{ + typeset dev + typeset mounted + typeset target + + unset newroot_mounted + + # + # /usr and /lib get lofs mounted in the zone on /native read-only + # + # $zoneroot/dev gets lofs mounted on /native/dev read/write to allow + # the use of native devices. + # + mount -F lofs -o ro /lib "$rootdir/a/native/lib" || return 1 + newroot_mounted="$rootdir/a/native/lib" + + if ! mount -F lofs -o ro /usr "$rootdir/a/native/usr"; then + umount "$rootdir/a/native/lib" + unset newroot_mounted + return 1 + fi + + newroot_mounted="$newroot_mounted $rootdir/a/native/usr" + + if ! mount -F lofs "$zoneroot/root/native/dev" \ + "$rootdir/a/native/dev"; then + umount_list $newroot_mounted + unset newroot_mounted + return 1 + fi + + newroot_mounted="$newroot_mounted $rootdir/a/native/dev" + + # + # This is a bit ugly; to provide device access within the chrooted + # environment RPM will use for its install, we will create the same + # symlinks "$rootdir/dev" contains in the new dev directory, and will + # lofs mount the balance of "$rootdir/dev" into the same locations in + # /dev in the new filesystem we're installing to. + # + for dev in "$zoneroot"/root/dev/* + do + if [[ "$dev" == "$zoneroot/root/dev/*" ]]; then + log "ERROR: No files found in $zoneroot/root/dev" + umount_list $newroot_mounted + return 1 + fi + + target="$rootdir/a/dev/$(basename $dev)" + + # + # If the device file is a symbolic link, create a new link + # in the target directory with the same source. + # + # If the device file is any other file or directory, lofs + # mount it from the device directory into the target directory. + # + if [[ -h $dev ]]; then + typeset source=$(LC_ALL=C; file -h "$dev") + + # + # Remove extraneous text from the output of file(1) so + # we're left only with the target path of the symbolic + # link. + # + source="${source##*link to }" + + [[ -a "$target" ]] && /bin/rm -f "$target" + + if ! ln -s "$source" "$target"; then + screenlog "$symlink_failed" "$source" "$target" + umount_list $newroot_mounted + unset newroot_mounted + return 1 + fi + else + [[ ! -a "$target" ]] && touch "$target" + + if ! mount -F lofs "$dev" "$target"; then + screenlog "$lofs_failed" "$dev" "$target" + umount_list $newroot_mounted + unset newroot_mounted + return 1 + fi + + newroot_mounted="$newroot_mounted $target" + fi + + done + + return 0 +} + +# +# Replace the root directory of a zone with the duplicate previously created +# in the zone's /a directory. +# +replace_miniroot() +{ + # + # The zoneadm halt will automatically unmount any file systems + # mounted via lofs in the zone, so that saves us from having to + # methodically unmount each one. + # + if ! zoneadm -z "$zonename" halt; then + screenlog "$zone_haltfail" "$zonename" + return 1 + fi + + unset miniroot_booted + unset newroot_mounted + + [[ -d "$zoneroot/a" ]] && rm -rf "$zoneroot/a" + [[ -d "$zoneroot/oldroot" ]] && rm -rf "$zoneroot/oldroot" + + # + # Copy the logfile or we'll lose all details of the install into the + # new root directory, so strip "$zoneroot" off the pathname of the + # current logfile and use it to generate the pathname of the log file + # in the new root directory. + # + [[ -n $logfile && -f "$logfile" ]] && + cp "$logfile" "$rootdir/a${logfile##$rootdir}" + + mv -f "$rootdir/a" "$zoneroot/a" || return 1 + mv -f "$rootdir" "$zoneroot/oldroot" || return 1 + mv -f "$zoneroot/a" "$rootdir" || return 1 + + screenlog \ + "$(gettext 'Completing install processing; this may take a few')" + screenlog "$(gettext 'minutes...')" + rm -rf "$zoneroot/oldroot" + + # + # Remove the contents of the /dev directory created by the install. + # + # We don't technically need to do this, but the zone infrastructure + # will mount $zoneroot/dev atop $rootdir/dev anyway, hiding its + # contents so we may as well clean up after ourselves. + # + # The extra checks are some basic paranoia due to the potentially + # dangerous nature of this command but are not intended to catch all + # malicious cases + # + [[ "$rootdir" != "" && "$rootdir" != "/" ]] && rm -rf "$rootdir"/dev/* + + return 0 +} + +setup_miniroot() +{ + unset miniroot_booted + + if ! "$cwd/lx_init_zone" "$rootdir" "$logfile" mini; then + screenlog "$mini_initfail" "$zonename" + return 1 + fi + + if ! copy_miniroot; then + screenlog "$mini_copyfail" "$zonename" + return 1 + fi + + # + # zoneadm gets upset if the zone root directory is group or world + # readable or executable, so make sure it isn't before proceeding. + # + chmod 0700 "$zoneroot" + + screenlog "$(gettext 'Booting zone miniroot...')" + + if ! zoneadm -z "$zonename" boot -f; then + screenlog "$mini_bootfail" "$zonename" + return 1 + fi + + miniroot_booted=1 + + # + # Now that the miniroot is booted, unset the compatible architecture + # list that expand_rpm_names was using for the miniroot so that it will + # get the list from rpm for the full install. + # + unset archs + + # + # Mount all the filesystems needed to install the new root + # directory. + # + if ! newroot_lofimnt; then + screenlog "$mini_mntfsfail" "$zonename" + + if [[ -n $newroot_mounted ]]; then + umount_list $newroot_mounted + unset newroot_mounted + fi + return 1 + fi + + # + # Attempt to initialize the RPM database for the new zone + # + if ! zlogin "$zonename" /bin/rpm --initdb --root /a; then + screenlog "$rpm_initfail" "$zonename" + return 1 + fi + + screenlog "$(gettext 'Miniroot zone setup complete.')" + return 0 +} + +finish_install() +{ + # + # Perform some last cleanup tasks on the newly installed zone. + # + # Note that the zlogin commands aren't checked for errors, as the + # newly installed zone will still boot even if the commands fail. + # + typeset file + + # + # Run ldconfig in the new root + # + zlogin "$zonename" /usr/sbin/chroot /a \ + /sbin/ldconfig -f /etc/ld.so.conf + + # + # Create the /etc/shadow and /etc/gshadow files if they don't already + # exist + # + [[ -a "$rootdir/a/etc/shadow" ]] || + zlogin "$zonename" /usr/sbin/chroot /a /usr/sbin/pwconv + + [[ -a "$rootdir/a/etc/gshadow" ]] || + zlogin "$zonename" /usr/sbin/chroot /a /usr/sbin/grpconv + + # + # Make sure all init.d and rc[0-6].d links are set up properly. + # + for file in `ls "$rootdir/a/etc/init.d"`; do + zlogin "$zonename" /usr/sbin/chroot /a \ + /sbin/chkconfig --del $file > /dev/null 2>&1 + + zlogin "$zonename" /usr/sbin/chroot /a \ + /sbin/chkconfig --add $file > /dev/null 2>&1 + done + + replace_miniroot + + if ! "$cwd/lx_init_zone" "$rootdir" "$logfile"; then + screenlog "$zone_initrootfail" "$zonename" + return 1 + fi + + return 0 +} + +# +# Duplicate the installed "miniroot" image in a subdirectory of the base +# directory of the zone. +# +# This is done so that a new root directory can be created that will be used +# as the root of a chrooted directory that RPM running on the zone will install +# into. +# +copy_miniroot() +{ + # + # Create the directory $zoneroot/a if it doesn't already exist + # + [[ -d "$zoneroot/a" ]] || + { mkdir -p "$zoneroot/a" || return 1 ; } + + screenlog \ + "$(gettext 'Duplicating miniroot; this may take a few minutes...')" + + # + # Duplicate the miniroot to /a, but don't copy over any /etc/rc.d or + # lxsave_ files. + # + ( cd "$rootdir"; find . -print | egrep -v "/etc/rc\.d|lxsave_" | \ + cpio -pdm ../a ) + + [[ -d "$rootdir/a" ]] && rm -rf "$rootdir/a" 2>/dev/null + mv -f "$zoneroot/a" "$rootdir/a" || return 1 + + return 0 +} + +# +# Read the first four lines of the .discinfo file from the root of the passed +# disc directory (which should either be a mounted disc or ISO file.) +# +# The first four lines of the .discinfo file will be used to set appropriate +# shell variables on success: +# +# rd_line[0]: Disc Set Serial Number (sets rd_serial) +# rd_line[1]: Distribution Release Name (sets rd_release) +# rd_line[2]: Distribution Architecture (sets rd_arch) +# rd_line[3]: Disc Number in Distribution (sets rd_discnum) +# +# Returns 0 on success, 1 on failure. +# +read_discinfo() +{ + typeset rd_file="$1/.discinfo" + + verbose "read discinfo file \"$rd_file\"" + + # + # If the .discinfo file doesn't exist or isn't readable, return 1 + # + [[ ! -f "$rd_file" || ! -r "$rd_file" ]] && return 1 + + typeset rd_line + + unset rd_arch + unset rd_discnum + unset rd_release + unset rd_serial + + typeset linenum=0 + + while read -r rd_line[$linenum]; do + # + # If .discinfo architecture isn't "i386," fail here as + # we only support i386 distros at this time. + # + [[ $linenum = 2 ]] && [[ "${rd_line[2]}" != "i386" ]] && + return 1 + + # + # We've successfully read the first four lines of .discinfo + # into $rd_line, so do the appropriate shell variable munging. + # + if ((linenum == 3)); then + rd_serial=${rd_line[0]} + rd_release=${rd_line[1]} + + # + # CentOS names their releases "final" + # + [[ "$rd_release" = "final" ]] && + rd_release="CentOS [Disc Set $rd_serial]" + + rd_arch=${rd_line[2]} + rd_discnum=${rd_line[3]} + return 0 + fi + + ((linenum += 1)) + done < "$rd_file" + + # + # The file didn't have at least four lines, so indicate the read + # failed. + # + return 1 +} + +# +# Mount a disc as reprsented by the passed device name +# +# The disc will be mounted at $zoneroot/root/disc, either via a loopback +# mount (if vold is active) or directly (if vold is not active.) +# +# Returns 0 on success, 1 on failure, 2 if no disc was available +# +mount_removable_disc() +{ + typeset device="$1" + typeset mount_err + mntdir="$rootdir/disc" + + removable=0 + + [[ -d $mntdir ]] || if ! mkdir -p $mntdir; then + screenlog "$mk_mntfail" "$mntdir" + unset mntdir + return 1 + fi + + if [[ "$vold_present" = "1" ]]; then + # + # allow vold to handle disc mounting + # + # Have volcheck check for the appropriate disc every two + # seconds for ten seconds. + # + typeset mount_timeout=10 + typeset mount_interval=2 + + volcheck -i $mount_interval -t $mount_timeout \ + "$device" > /dev/null 2>&1 + + [[ -d "$device" ]] || return 2 + + mount -F lofs -o ro "$device" "$mntdir" + mount_err=$? + else + # + # Attempt to mount the disc manually + # + mount -F hsfs -o ro "$device" "$mntdir" + mount_err=$? + + ((mount_err == 33)) && return 2 + fi + + if ((mount_err != 0)); then + screenlog "$mntfail" "$device" "$mntdir" + unset mntdir + return 1 + fi + + verbose "Mount of \"$device\" on \"$mntdir\" succeeded!" + removable=1 + return 0 +} + +# +# Eject the disc mounted on the passed directory name +# +# Returns 0 on success, 1 on failure. +# +eject_removable_disc() +{ + [[ "$removable" != "1" ]] && return 1 + + typeset mount_dir="$1" + + get_mountdev "$mount_dir" || return 1 + + umount "$mount_dir" > /dev/null 2>&1 && unset mntdir + eject -p "$mount_dev" || return 1 + + return 0 +} + +# +# Get a particular disc of a multi-disc set. +# +# This basically works by doing the following: +# +# 1) Mount the disc +# 2) Read the disc's .discinfo file to see which disc it is +# 3) If it's not the desired disc, eject it and ask the user to insert the +# disc we wanted. +# +# Returns 0 on success, 1 on failure. +# +get_discnum() +{ + typeset mntdev="$1" + typeset discnum="$2" + typeset enter + typeset mount_err + + while :; do + while :; do + mount_removable_disc "$mntdev" + mount_err=$? + + if ((mount_err == 2)); then + screenlog "$insert_discmsg" $discnum + screenlog "$(gettext '<ENTER>')" + read enter && continue + return 1 + fi + + ((mount_err == 0)) && break; + + return 1 + done + + # + # Make sure that the mounted disc is disc $discnum. + # + # If it is, return to the caller, otherwise eject the + # disc and try again. + # + read_discinfo "$mntdir" + + verbose "\nRemovable Disc \"$1\": Serial \"$rd_serial\"" + verbose " Release \"$rd_release\" Disc #$rd_discnum\n" + + [[ "$rd_discnum" = "$discnum" ]] && return 0 + + screenlog "$wrong_disk" "$rd_discnum" "$discnum" + eject_removable_disc "$mntdir" || return 1 + + screenlog "$insert_discmsg" $discnum + screenlog "$(gettext '<ENTER>')" + read enter || return 1 + done +} + +# +# Find out which distro the mounted disc belongs to +# +# Do this by cycling through the distro directory and reading each distro +# file in turn looking for: +# +# 1) The number of discs in a distribution +# 2) The serial number of the distribution +# 3) The name of the distribution +# +# Based on this, we can determine based on the ISO files available which +# distributions, if any, we have a complete set of files to support. +# +# The function returns the supported isos in the array "iso_names." +# +get_disc_distro() +{ + typeset distro + typeset distro_files="$(echo $distro_dir/*.distro)" + + unset distro_ndiscs + unset distro_file + unset release + + [[ "$distro_files" = "$distro_dir/*.distro" ]] && return 1 + + for distro in $distro_files; do + [[ ! -f "$distro" ]] && continue + + verbose "Checking for disc distro \"$distro\"..." + + . "$distro" > /dev/null + + [[ "$rd_serial" != "$distro_serial" ]] && continue + + distro_file="$distro" + distro_ndiscs="$rd_ndiscs" + release="$rd_release" + return 0 + done + + return 1 +} + +# +# Install a zone from discs +# +# Depends on the following variables: +# +# $distro_ndiscs: Number of discs needed to fully install the distribution +# +# returns 0 on success, 1 on failure +# +install_from_discs() +{ + typeset status=0 + typeset discnum=1 + typeset mountdev="$1" + typeset discorder + typeset retval + + # + # Ask for the first disc. + # + # We don't know which distro this may be yet, so we can't ask for + # the first disc in the install order, so we'll just have to ask for + # disc 1. + # + if ! get_discnum "$mountdev" "$discnum"; then + screenlog "$mini_discfail" "$zonename" "1" + return 1 + fi + + if ! get_disc_distro "$mntdir"; then + screenlog \ + "$(gettext 'Unable to find a supported Linux release on')" + screenlog "$(gettext 'the media in the removable media drive.')" + echo + umount "$mntdir" > /dev/null 2>&1 + return 1 + fi + + . "$distro_file" > /dev/null + + check_mbfree $zoneroot $distro_mb_required || return 1 + + build_rpm_list $install_packages + + echo + screenlog "$install_ndiscs" "$distro_ndiscs" + echo "\"$rd_release\"\n" + + # + # Calculate the proper order for the install discs. + # + # distro_discorder is an array that indicates each disc's place + # in the overall installation process. An array of [4 1 2 3] + # means that "Disk 1" is the 4th disk to be installed, "Disk + # 2" is the 1st disk to be installed, and so on. + # + # Here we are converting that array into one that lists the + # CDs in the order in which they should be installed, such that a + # distro_discorder array of [4 1 2 3] would be converted into + # a discorder array of [2 3 4 1]. + # + while ((discnum <= distro_ndiscs)); do + discorder[${distro_discorder[$discnum - 1]}]=$discnum + ((discnum += 1)) + done + + # + # If the disc that was read above isn't the first disc in the install + # order, eject it and ask for the appropriate disc. + # + if [[ "${discorder[1]}" != "$rd_discnum" ]]; then + eject_removable_disc "$mntdir" + if ! get_discnum "$mountdev" "${discorder[1]}"; then + screenlog "$mini_discfail" "$zonename" "${discorder[1]}" + return 1 + fi + fi + + zone_mounted="$mntdir" + + log "Installing zone miniroot." + screenlog "$(gettext 'Installing zone miniroot.')" + + discnum=1 + while ((discnum <= distro_ndiscs)); do + expand_rpm_names "$mntdir" "$distro_rpmdir" \ + $distro_miniroot_rpms + + retval=0 + + if [[ -n $rpms_found ]]; then + verbose "Installing miniroot from disc" \ + "${discorder[$discnum]}..." + + if ! install_miniroot "$mntdir" "$distro_rpmdir" \ + "${rpms_found[@]}"; then + screenlog "$mini_discfail" "$zonename" \ + "$rd_discnum" + return 1 + fi + fi + + # + # If this is the first disc in the install order and we're + # done installing the miniroot, just exit the loop without + # ejecting the disk as we'll need it again to start the actual + # install. + # + if [[ "$discnum" = "1" && -z $rpms_left ]]; then + umount "$mntdir" + unset zone_mounted + break + fi + + eject_removable_disc "$mntdir" + unset zone_mounted + + [[ -z $rpms_left ]] && break + + distro_miniroot_rpms="${rpms_left[@]}" + ((discnum += 1)) + + if ! get_discnum "$mountdev" "${discorder[$discnum]}"; then + screenlog "$mini_discfail" "$zonename" \ + "${discorder[$discnum]}" + return 1 + fi + + zone_mounted="$mntdir" + done + + if [[ -n $rpms_left ]]; then + log "" + log "Unable to locate some packages on install media:\n" \ + " ${rpms_left[@]}" + log "" + screenlog "$mini_instfail" "$zonename" + return 1 + fi + + if ! setup_miniroot; then + screenlog "$mini_setfail" "$zonename" + return 1 + fi + + discnum=1 + while ((discnum <= distro_ndiscs)); do + # + # If the disc needed in the install order isn't the one in + # the drive, eject it and ask for the correct one. + # + if [[ "${discorder[$discnum]}" != "$rd_discnum" ]]; then + eject_removable_disc "$mntdir" + if ! get_discnum "$mountdev" \ + "${discorder[$discnum]}"; then + screenlog "$mini_discfail" "$zonename" \ + "${discorder[$discnum]}" + return 1 + fi + fi + + zone_mounted="$mntdir" + + expand_rpm_names "$rootdir/disc" "$distro_rpmdir" $distro_rpms + + retval=0 + + if [[ -n $rpms_found ]]; then + log "" + echo + screenlog "$install_discmsg" "$zonename" \ + "$rd_discnum" + + if ! install_zone "$mntdir" "$distro_rpmdir" \ + ${rpms_found[@]}; then + screenlog "$zone_discfail" "$zonename" \ + "$rd_discnum" + retval=1 + fi + fi + + eject_removable_disc "$zone_mounted" + unset zone_mounted + + # + # Return non-zero now if the install_zone above failed. + # + [[ $retval -ne 0 ]] && return $retval + + # + # No more RPMs means we're done! + # + [[ -z $rpms_left ]] && break + + distro_rpms="${rpms_left[@]}" + ((discnum += 1)) + done + + if [[ -n $rpms_left ]]; then + log "" + log "Unable to locate some packages on install media:\n" \ + " ${rpms_left[@]}" + log "" + screenlog "$install_zonefail" "$zonename" + return 1 + fi + + finish_install + return $? +} + +# +# Find out which distros we have ISO files to support +# +# Do this by cycling through the distro directory and reading each distro +# file in turn looking for: +# +# 1) The number of discs in a distribution +# 2) The serial number of the distribution +# 3) The name of the distribution +# +# Based on this, we can determine based on the ISO files available which +# distributions, if any, we have a complete set of files to support. +# +# The function returns the supported isos in the array "iso_names." +# +get_iso_distros() +{ + typeset index + typeset iso_names + typeset iso_release + typeset serial + + typeset distro_files="$(echo $distro_dir/*.distro)" + + ndistros=0 + + unset iso_set + unset distro_file + unset distro_ndiscs + unset release + + [[ "$distro_files" = "$distro_dir/*.distro" ]] && return + + set -A iso_files "$@" + + for distro in $distro_files; do + [[ ! -f $distro ]] && continue + + . "$distro" > /dev/null + + index=0 + unset iso_names + verbose "Checking for distro \"$distro\"..." + + for iso in "${iso_files[@]}"; do + [[ -z "$iso" ]] && continue + + verbose "Checking iso file mounted at \"$iso\"..." + + if [[ ! -d "$iso" || ! -r "$iso" ]]; then + unset iso_files[$index] + continue + fi + + read_discinfo "$iso" || continue + + verbose " ISO \"$iso\": Serial \"$rd_serial\"" + verbose " Release \"$rd_release\" Disc $rd_discnum" + + if [[ -z "$serial" ]]; then + [[ "$rd_serial" != "$distro_serial" ]] && + continue + + discnum=${distro_discorder[$rd_discnum - 1]} + verbose "Added ISO \"$iso\" as disc $discnum" + iso_names[$discnum]="$iso" + iso_release="$rd_release" + serial="$rd_serial" + unset iso_files[$index] + ((index += 1)) + else + [[ "$rd_serial" != "$serial" ]] && continue + + discnum=${distro_discorder[$rd_discnum - 1]} + verbose "Added ISO \"$iso\" as disc $discnum" + iso_names[$discnum]="$iso" + unset iso_files[$index] + ((index += 1)) + fi + done + + [[ ${#iso_names[@]} -ne $distro_ndiscs ]] && continue + + distro_file[$ndistros]="$distro" + distro_ndiscs[$ndistros]="$rd_ndiscs" + iso_set[$ndistros]="${iso_names[@]}" + release[$ndistros]="$iso_release" + + ((ndistros += 1)) + ((${#iso_files[@]} == 0)) && break + done +} + +# +# Do a lofi add for the passed filename and set lofi_dev to the lofi +# device name (e.g. "/dev/lofi/1".) +# +# If the passed filename already has a lofi device name, simply set lofi_dir +# to the existing device name. +# +# Returns 0 on success, 1 on failure. +# +lofi_add() +{ + typeset filename="$1" + + lofi_dev=$(lofiadm "$filename" 2>/dev/null) && return 0 + lofi_dev=$(lofiadm -a "$filename" 2>/dev/null) && return 0 + return 1 +} + +# +# Delete the lofi device name passed in. +# +# Returns 0 on success, 1 on failure. +# +lofi_del() +{ + typeset lofi_device="$1" + + lofiadm -d "$lofi_device" 2>/dev/null + return $? +} + +# +# Mount the lofi device name passed in. +# +# Set the variable mntdir to the directory on which the lofi device is +# mounted. +# +# Returns 0 on success, 1 on failure. +# +lofi_mount() +{ + typeset created=0 + typeset lofidev="$1" + typeset mntroot="$2" + + # + # Check to see if the lofi device is already mounted and return + # the existing mount point if it is. + # + get_mountdir "$lofidev" && { mntdir="$mount_dir" ; return 0 ; } + + mntdir="$mntroot/iso.`/usr/bin/basename $1`" + if [[ ! -d "$mntdir" ]]; then + mkdir -p "$mntdir" || return 1 + created=1 + fi + + verbose "Attempting mount of device \"$lofidev\"" + verbose " on directory \"$mntdir\"... \c" + + if ! mount -F hsfs -o ro "$lofidev" "$mntdir" 2>/dev/null; then + verbose "FAILED." + ((created == 1)) && rmdir -ps "$mntdir" + return 1 + fi + + verbose "succeeded." + return 0 +} + +# +# Unmount the lofi device name passed in, and remove the device mount point +# after unmounting the device. +# +# Returns 0 on success, 1 on failure. +# +lofi_umount() +{ + typeset mntdev="$1" + + # + # If the directory name passed wasn't mounted to begin with, + # just return success. + # + get_mountdir "$mntdev" || return 0 + + verbose "Unmounting device \"$mntdev\"... \c" + + if ! umount "$mntdev" >/dev/null 2>&1 ; then + verbose "FAILED." + return 1 + fi + + verbose "succeeded." + return 0 +} + +# +# Install a zone from mounted ISO files +# +# Argument: Array index of distribution to install +# +# Depends on the following variables: +# +# $iso_set[arg]: List of ISOs required to fully install the +# distribution +# +install_from_isos() +{ + typeset distro=$1 + typeset isonum=1 + + set ${iso_set[$distro]} # set passed args array + + log "Installing zone miniroot." + screenlog "$(gettext 'Installing zone miniroot.')" + + while ((isonum <= ${distro_ndiscs[$distro]})); do + verbose "Installing miniroot from ISO image $isonum (of" \ + "${distro_ndiscs[$distro]})" + + ldir="${lofi_mntdir[$isonum]}" + expand_rpm_names "$ldir" "$distro_rpmdir" $distro_miniroot_rpms + + if [[ -n $rpms_found ]]; then + if ! install_miniroot "$ldir" "$distro_rpmdir" \ + "${rpms_found[@]}"; then + screenlog "$mini_isofail" "$zonename" "$ldir" + return 1 + fi + fi + + [[ -z $rpms_left ]] && break + + distro_miniroot_rpms="${rpms_left[@]}" + ((isonum += 1)) + done + + if [[ -n $rpms_left ]]; then + log "" + log "Unable to locate some packages on ISO images:\n" \ + " ${rpms_left[@]}" + log "" + screenlog "$mini_instfail" "$zonename" + return 1 + fi + + if ! setup_miniroot; then + screenlog "$mini_setfail" "$zonename" + return 1 + fi + + [[ -d "$rootdir/iso" ]] || mkdir -m 0700 "$rootdir/iso" + + if [[ ! -d "$rootdir/iso" ]]; then + screenlog "$mk_mntfail" "$rootdir/iso" + screenlog "FAILED." + return 1 + fi + + isonum=1 + for iso in ${iso_set[$distro]}; do + echo + screenlog "$install_isomsg" "$zonename" "$isonum" + + if ! mount -F lofs -o ro "$iso" "$rootdir/iso"; then + typeset name="${iso_filename[$isonum]}" + screenlog "iso_mntfail" "$name" "$zonename" + return 1 + fi + + zone_mounted="$rootdir/iso" + + expand_rpm_names "$rootdir/iso" "$distro_rpmdir" $distro_rpms + + if [[ -n $rpms_found ]]; then + log "" + log "Installing: ${rpms_found[@]}" + + if ! install_zone "$rootdir/iso" "$distro_rpmdir" \ + ${rpms_found[@]}; then + screenlog "$zone_isofail" "$zonename" "$iso" + umount "$rootdir/iso" + return 1 + fi + fi + + if ! umount "$rootdir/iso"; then + screenlog "$iso_umntfail" "$name" "$zonename" + return 1 + fi + + unset zone_mounted + + [[ -z $rpms_left ]] && break + + distro_rpms="${rpms_left[@]}" + ((isonum += 1)) + done + + if [[ -n $rpms_left ]]; then + log "" + log "Unable to locate some packages on ISO images:\n" \ + " ${rpms_left[@]}" + log "" + screenlog "$install_zonefail" "$zonename" + return 1 + fi + + finish_install + return $? +} + +# +# Mount the passed list of ISOs. +# +mount_isos() +{ + typeset count=1 + typeset iso + typeset mntroot=$1 + + unset iso_filename + unset lofi_devs + unset lofi_mntdir + + shift + for iso in "$@"; do + verbose "Checking possible ISO\n \"$iso\"..." + if lofi_add "$iso"; then + verbose " added as lofi device \"$lofi_dev\"" + if lofi_mount "$lofi_dev" "$mntroot"; then + iso_filename[$count]="$iso" + lofi_devs[$count]="$lofi_dev" + lofi_mntdir[$count]="$mntdir" + ((count += 1)) + else + lofi_del "$lofi_dev" + fi + else + verbose " not a valid ISO image." + fi + done +} + +umount_isos() +{ + typeset dev + + for dev in "$@"; do + lofi_umount "$dev" && lofi_del "$dev" + done +} + +# +# Select a distribution to install from the arguments passed and set +# "ndsitro" to the value chosen - 1 (so it may be used as an array index.) +# +# The routine will automatically return with ndisto set to 0 if only one +# argument is passed. +# +select_distro() +{ + typeset dist + unset ndistro + + if (($# > 1)); then + if [[ -n $silent_mode ]]; then + log "ERROR: multiple distrubutions present in ISO" \ + "directory but silent install" + log " mode specified. Distros available:" + for dist in "$@"; do + log " $dist" + done + return 1 + fi + + PS3="Select a distribution to install: " + select $dist in "$@"; do + [[ -z $distro ]] && continue + screenlog "$install_dist" "$dist" + ndistro=$((REPLY - 1)) + done + fi + + # + # Covers both the cases of when only one distro name is passed + # to the routine as well as when an EOF is sent to the distribution + # selection prompt. + # + if [[ -z $dist ]]; then + screenlog "$install_dist" "$1" + ndistro=0 + fi + + return 0 +} + +# +# Install a zone using the list of ISO files passed as arguments to this +# function. +# +# Return 0 on success, 1 on failure. +# +do_iso_install() +{ + typeset status=0 + + mount_isos "/tmp/lxisos" "$@" + if [[ -z ${lofi_mntdir[@]} ]]; then + log "No valid ISO images available or mountable." + screenlog \ + "$(gettext 'No valid ISO images available or mountable.')" + [[ -n ${lofi_devs[@]} ]] && umount_isos "${lofi_devs[@]}" + return 1 + fi + + get_iso_distros "${lofi_mntdir[@]}" + + if [[ -z ${release[@]} ]]; then + log "No valid Linux distributions found." + screenlog "$(gettext 'No valid Linux distributions found.')" + [[ -n ${lofi_devs[@]} ]] && umount_isos "${lofi_devs[@]}" + return 1 + fi + + select_distro "${release[@]}" || return 1 + + . ${distro_file[$ndistro]} > /dev/null + + check_mbfree $zoneroot $distro_mb_required || return 1 + + build_rpm_list $install_packages + + install_from_isos $ndistro + status=$? + + umount_isos "${lofi_devs[@]}" + + return $status +} + +# +# Clean up on interrupt +# +trap_cleanup() +{ + cd "$cwd" + + screenlog "$(gettext 'Interrupt received.')" + + [[ -n $miniroot_booted ]] && zoneadm -z "$zonename" halt && + unset miniroot_booted && unset newroot_mounted + + if [[ -n $zone_mounted ]]; then + if [[ "$removable" = "1" ]]; then + eject_removable_disc "$zone_mounted" + else + umount "$zone_mounted" > /dev/null 2>&1 + fi + + unset zone_mounted + fi + + if [[ -n $newroot_mounted ]]; then + umount_list $newroot_mounted + unset newroot_mounted + fi + + [[ -n $mntdir ]] && umount "$mntdir" && unset mntdir + + [[ ${#lofi_devs[@]} -ne 0 ]] && umount_isos "${lofi_devs[@]}" + + screenlog "$(gettext 'Installation aborted.')" + exit $ZONE_SUBPROC_FATAL +} + +# +# Start of main script +# +cwd=$(dirname "$0") +distro_dir="$cwd/distros" + +unset distro_path +unset logfile +unset newroot_mounted +unset silent_mode +unset verbose_mode +unset zone_mounted +unset zoneroot +unset zonename + +# +# Exit values used by the script, as #defined in <sys/zone.h> +# +# ZONE_SUBPROC_OK +# =============== +# Installation was successful +# +# ZONE_SUBPROC_USAGE +# ================== +# Improper arguments were passed, so print a usage message before exiting +# +# ZONE_SUBPROC_NOTCOMPLETE +# ======================== +# Installation did not complete, but another installation attempt can be +# made without an uninstall +# +# ZONE_SUBPROC_FATAL +# ================== +# Installation failed and an uninstall will be required before another +# install can be attempted +# +ZONE_SUBPROC_OK=0 +ZONE_SUBPROC_USAGE=253 +ZONE_SUBPROC_NOTCOMPLETE=254 +ZONE_SUBPROC_FATAL=255 + +# +# Process and set up various global option variables: +# +# distro_path - Path containing files that make up the distribution +# (e.g. a directory containing ISO files or a disc device) +# logfile - Name (if any) of the install log file +# zoneroot - Root directory for the zone to install +# zonename - Name of the zone to install +# +while getopts 'svxd:l:r:z:' opt; do + case $opt in + s) silent_mode=1; unset verbose_mode;; + v) verbose_mode=1; unset silent_mode;; + x) set -x;; + d) distro_path="$OPTARG";; + l) logfile="$OPTARG";; + r) zoneroot="$OPTARG";; + z) zonename="$OPTARG";; + esac +done +shift OPTIND-1 + +distro_path=${distro_path:=/cdrom/cdrom0} + +install_packages="$@" + +[[ -n $silent_mode ]] && exec 1>/dev/null + +if [[ -z $zonename ]]; then + screenlog \ + "$(gettext 'ERROR: Cannot install - no zone name was specified')" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE +fi + +if [[ -z $zoneroot ]]; then + screenlog \ + "$(gettext 'ERROR: Cannot install - no zone root directory was')" + screenlog "$(gettext 'specified')" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE +fi + +# +# Make sure the specified zone root directory exists +# +[[ -d "$zoneroot" ]] || mkdir -m 0700 -p "$zoneroot" + +if [[ ! -d "$zoneroot" ]]; then + screenlog "$zone_rootfail" "$zoneroot" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE +fi + +rootdir="$zoneroot/root" + +# +# Make sure the specified zone root subdirectory exists +# +[[ -d "$rootdir" ]] || mkdir -p "$rootdir" + +if [[ ! -d "$rootdir" ]]; then + screenlog "$zone_rootsub" "$rootdir" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE +fi + +# +# Redirect stderr to the log file if it is specified and is writable +# +if [[ -n $logfile ]]; then + if ! echo "\nInstallation started `date`" >> "$logfile" \ + 2>/dev/null; then + screenlog "$log_wrfail" "$logfile" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + exec 2>>"$logfile" + log "Installing from path \"$distro_path\"" +else + [[ -n $silent_mode ]] && exec 2>/dev/null +fi + +distro_path=${distro_path:=$default_distro_path} + +# +# From this point on, call trap_cleanup() on interrupt (^C) +# +trap trap_cleanup INT + +verbose "Installing zone \"$zonename\" at root \"$zoneroot\"" + +# +# If the distribution path starts with "/cdrom/" assume the install will be +# done from discs, otherwise assume the path is a directory containing ISO +# images. +# +if [[ "$distro_path" = /cdrom/* || "$distro_path" = /dev/dsk/* ]]; then + if [[ -n $silent_mode ]]; then + screenlog "$(gettext \ + 'ERROR: Cannot install from discs in silent mode.')" + echo + return 1 + fi + + vold_present=0 + + pgrep vold > /dev/null 2>&1 && vold_present=1 + + if [[ $vold_present -eq 1 && ! -d /cdrom ]]; then + screenlog "$(gettext 'ERROR: This system does not contain a')" + screenlog "$(gettext 'removable disc device and no ISO source')" + screenlog "$(gettext 'directory was specified.')" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + log "Installing zone \"$zonename\" at root \"$zoneroot\"" + verbose " Attempting disc-based install via path:" + verbose " \"$distro_path\"" + install_from_discs $distro_path +else + typeset dir_start + + dir_start=$(dirname "$distro_path" | cut -c 1) + + [[ "$dir_start" != "/" ]] && distro_path="`pwd`/$distro_path" + + if [[ ! -d "$distro_path" ]]; then + screenlog "$no_distropath" "$distro_path" + echo + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + log "Installing zone \"$zonename\" at root \"$zoneroot\"" + verbose " Attempting ISO-based install from directory:" + verbose " \"$distro_path\"" + + iso_files=$(find $distro_path -type f -print) + do_iso_install $iso_files +fi + +if [[ $? -ne 0 ]]; then + cd "$cwd" + + [[ -n $miniroot_booted ]] && zoneadm -z "$zonename" halt && + unset miniroot_booted && unset newroot_mounted + + if [[ -n $zone_mounted ]]; then + if [[ "$removable" = "1" ]]; then + eject_removable_disc "$zone_mounted" + else + umount "$zone_mounted" > /dev/null 2>&1 + fi + + unset zone_mounted + fi + + if [[ -n $newroot_mounted ]]; then + umount_list $newroot_mounted + unset newroot_mounted + fi + + log "Initial installation of zone \"$zonename\" at root \"$zoneroot\"" \ + "FAILED." + + screenlog "$(gettext 'Cleaning up after failed install.')" + + # + # The extra checks are some basic paranoia due to the potentially + # dangerous nature of these commands but are not intended to catch all + # malicious cases. + # + [[ -d "$zoneroot/a" ]] && rm -rf "$zoneroot/a" + + screenlog "$initinstall_zonefail" "$zonename" + screenlog "$install_abort" "`date`" + + exit $ZONE_SUBPROC_FATAL +fi + +log "" +screenlog "$install_done" "$zonename" "`date`" + +exit $ZONE_SUBPROC_OK diff --git a/usr/src/lib/brand/lx/zone/lx_init_zone.ksh b/usr/src/lib/brand/lx/zone/lx_init_zone.ksh new file mode 100644 index 0000000000..64d819c048 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/lx_init_zone.ksh @@ -0,0 +1,698 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# This script contains various routines used to post-process a zone for use +# with BrandZ after it has been installed from RPM media or a tar image. +# +# Briefly, there are three main jobs we need to do: +# +# 1) Create any needed directories and symlinks BrandZ needs but that the +# Linux install may not create +# +# 2) Modify rc scripts to shut off services that don't apply to a zone +# or that wish to access hardware directly +# +# 3) Modify various Linux system files for use within a zone environment +# + +# +# Restrict executables to /bin and /usr/bin +# +PATH=/bin:/usr/bin +export PATH + +# +# Sends output to log file if "$logfile" is set +# +log() +{ + [[ -n $logfile ]] && echo "$@" >&2 +} + +# +# Setup i18n output +# +TEXTDOMAIN="SUNW_OST_OSCMD" +export TEXTDOMAIN + +cmd_failed=$(gettext "%s failed! Aborting installation...") +cmd2_failed=$(gettext "%s of '%s' to '%s' failed!") +create_failed=$(gettext "Could not create new file '%s'!") +disable_failed=$(gettext "Attempt to disable entries in '%s' failed!") +install_aborted=$(gettext "Aborting installation...") +install_noroot=$(gettext "Installation root directory '%s' does not exist.") +ln_fail=$(gettext "Unable to symlink '%s' to '%s'!") +mkdir_fail=$(gettext "Unable to create the directory '%s'") +mod_failed=$(gettext -n "Attempt to modify entries in '%s' failed!") + +usage=$(gettext "usage: %s <install_root> <logfile> [<mini>]") + +# +# Output an internationalized string followed by a carriage return +# +i18n_echo() +{ + typeset fmt="$1" + shift + + printf "$fmt\n" "$@" +} + +# +# Routine to make a full path out of a supplied path +# +fullpath() +{ + typeset path="$1" + + echo $path | egrep -s "^/" || path="${PWD:=$(pwd)}/$path" + echo $path +} + +# +# Routine to create directories and handle errors +# +makedir() +{ + typeset dirname=$(fullpath "$1") + typeset mode="" + + [[ $# -eq 2 ]] && mode="-m $2" + + [[ -d "$dirname" ]] && return + + if ! mkdir $mode -p "$dirname"; then + log "Unable to create the directory \"$dirname\"!" + i18n_echo "$mkdir_fail" "$dirname" + echo $(gettext "Aborting installation...") + exit 1 + fi +} + +# +# Routine to create initial symlinks and handle errors +# +symlink() +{ + typeset src="$1" + typeset dst=$(fullpath "$2") + + [[ -e "$dst" || -h "$dst" ]] && rm -f "$dst" + + if ! ln -s "$src" "$dst"; then + log "Unable to symlink \"$src\" to \"$dst\"!" + i18n_echo "$ln_fail" "$src" "$dst" + echo $(gettext "Aborting installation...") + exit 1 + fi +} + +# +# Install a file using "ln -s" +# +# Returns 0 on success, 1 on failure. +# +install_ln() +{ + typeset source="$1" + typeset target=$(fullpath "$2") + + log " Installing \"$target\"" + + [[ -a "$target" ]] && mv -f "$target" "$target.$tag" + + if ! ln -s "$source" "$target"; then + log "" + log "Attempt to install $target FAILED." + return 1 + fi + + return 0 +} + + +# +# Enable NFS servers and the NFS lock daemon for a particular zone. +# +enable_nfs_services() +{ + log "Non-miniroot install; enabing NFS servers and NFS lock daemon" + + # + # Setup files required for NFS: + # + # /native/etc/netconfig + # /native/etc/default/nfs + # + # These two files are treated as read-only in lx branded zones. + # To enfore this restriction we will read-only lofs mount them + # into the zone from the global zone. For these lofs mounts to + # work we'll need to create empty directories now that will serve + # as mount points later. + # + # /sbin/rpc.statd + # /sbin/rpc.lockd + # + # These files are symlinks to scripts supplied by the lx brand + # that will start up the solaris nfs daemons. + # + if { ! makedir native/etc/netconfig || + ! makedir native/etc/default/nfs ; }; then + log "Aborting NFS setup..." + log "" + return + fi + + if { ! install_ln ../native/usr/lib/brand/lx/lx_lockd sbin/rpc.lockd || + ! install_ln ../native/usr/lib/brand/lx/lx_statd \ + sbin/rpc.statd ; }; then + log "Aborting NFS setup..." + log "" + return + fi + + # + # update /etc/services for NFS + # + log "" + log "Adding lockd entry to \"$install_root/etc/services\"..." + + cp -p $install_root/etc/services $install_root/etc/services.$tag + + # + # Brackets in the sed script below contain a space followed by a tab + # + cat $install_root/etc/services.$tag | + sed 's:\(111\/..p[ ][ ]*\):\1rpcbind :' | + cat > $install_root/etc/services + + cat >> $install_root/etc/services <<-EOF + lockd 4045/udp # NFS lock daemon/manager + lockd 4045/tcp # NFS lock daemon/manager + EOF + + # + # Modify /etc/init.d/nfslock to enable the USERLAND_LOCKD option and to + # find some commands in alternate locations. + # + log "" + log "Modifying \"$install_root/etc/init.d/nfslock\"..." + cp -p etc/init.d/nfslock etc/init.d/nfslock.$tag + cat etc/init.d/nfslock.$tag | + sed ' + s/USERLAND_LOCKD=$/USERLAND_LOCKD="yes"/ + s/killproc rpc.statd/killproc statd/ + s/status rpc.statd/status statd/ + s/pidof rpc.statd/pidof statd/ + ' | + cat > etc/init.d/nfslock +} + +# +# The main script starts here. +# +# The syntax is: +# +# lx_init_zone <rootdir> <logfile> [mini] +# +# Where: +# <rootdir> is the root of the zone directory to be modified +# +# <logfile> is the name of the log file to which error messages should +# be appended +# +# [mini] is an optional third argument that signifies whether this is +# to be a miniroot install; if it is, NFS services are not enabled +# in the processed zone +# +unset is_miniroot +unset install_root +unset logfile + +install_root="$1" +logfile="$2" + +tag="lxsave_$(date +%m.%d.%Y@%T)" + +if (($# < 2 || $# > 3)); then + i18n_echo "$usage" "$0" + exit 1 +fi + +(($# == 3)) && is_miniroot=1 + +exec 2>>"$logfile" + +if [[ ! -d "$install_root" ]]; then + i18n_echo "$install_noroot" "$install_root" + echo $(gettext "** Installation aborted **") + exit 1 +fi + +cd "$install_root" + +log "" +log "Initial lx_brand environment modification started `date`" +log "Making needed directories in \"$install_root\"." +echo $(gettext "Setting up the initial lx brand environment.") + +# +# Make various directories in /native that are needed to boot an lx branded +# zone. +# +makedir native/dev +makedir native/etc/default +makedir native/etc/svc/volatile +makedir native/lib +makedir native/proc +makedir native/tmp 1777 +makedir native/usr +makedir native/var + +# +# Make various other directories needed for the lx brand +# +makedir mnt +makedir opt +makedir usr/local/bin +makedir usr/local/include +makedir usr/local/lib +makedir usr/local/sbin +makedir usr/local/share +makedir usr/local/src + +makedir dev 0755 +makedir tmp 1777 +makedir proc 0555 +makedir boot 0755 + +# +# zlogin requires that these utilities live in places other than their +# Linux defaults, so create appropriate links for them here. +# +# XX - The need for these links may go away in the future if zlogin is +# appropriately modified +# +symlink /bin/sh sbin/sh +symlink /bin/su usr/bin/su +symlink /native/usr/lib/ld.so.1 usr/lib/ld.so.1 + +libpam_so="$(echo lib/libpam.so.0.*)" +libpam_misc="$(echo lib/libpam_misc.so.0.*)" +libpamc_so="$(echo lib/libpamc.so.0.*)" + +symlink "/$libpam_so" lib/libpam.so.0 +symlink "/$libpam_misc" lib/libpam_misc.so.0 +symlink "/$libpamc_so" lib/libpamc.so.0 + +log "" +log "Modifying system configuration in \"$install_root\"" + +# +# Create a /var/ld/ld.config that will point to /native/lib for our Solaris +# libraries. +# +log "Creating \"$install_root/var/ld/ld.config\"..." + +makedir var/ld + +if ! crle -c var/ld/ld.config -l /native/lib:/native/usr/lib \ + -s /native/lib/secure:/native/usr/lib/secure; then + log "\tCreation of \"$install_root/var/ld/ld.config\" failed!" + i18n_echo "$cmd_failed" "crle" + exit 1 +fi + +log "" +log "Modifying \"$install_root/etc/fstab\"..." + +[[ -a etc/fstab ]] && mv -f etc/fstab etc/fstab.$tag + +cat > etc/fstab <<- EOF + none / ufs defaults 1 1 + none /proc proc defaults 0 0 +EOF + +if [[ $? -ne 0 ]]; then + log "Could not create new \"$install_root/etc/fstab\"!" + i18n_echo "$create_failed" "$install_root/etc/fstab" + exit 1 +fi + +# +# The default /etc/inittab spawns mingetty on each of the virtual consoles +# as well as xdm on the X console. Since we don't have virtual consoles nor +# an X console, spawn a single mingetty on /dev/console instead. +# +# Don't bother changing the file if it looks like we already did. +# +if ! egrep -s "Disabled by lx brand" etc/inittab; then + log "Modifying: \"$install_root/etc/inittab\"..." + + tmpfile=/tmp/inittab.$$ + + sed -e 's/^[1-6x]:/# Disabled by lx brand: &/ + s/^id:5:initdefault:/id:3:initdefault: # Modified by lx brand: &/' \ + etc/inittab > $tmpfile + + # + # Don't bother with further alterations if the sed above failed... + # + if [[ $? -eq 0 ]]; then + egrep -s "console login for lx brand" etc/inittab + if [[ $? -ne 0 ]]; then + cat >> $tmpfile <<- EOF + + # + # console login for lx brand + # + 1:2345:respawn:/sbin/mingetty console + EOF + + # + # Only install the new inittab if the append + # above succeeded. + # + if [[ $? -eq 0 ]]; then + # + # Attempt to save off the original inittab + # before moving over the modified version. + # + mv -f etc/inittab etc/inittab.$tag + + mv -f $tmpfile etc/inittab + + if [[ $? -ne 0 ]]; then + log "mv of \"$tmpfile\" to" \ + "\"$installroot/etc/inittab\"" \ + "failed!" + i18n_echo "$cmd2_failed" "mv" \ + "$tmpfile" \ + "$installroot/etc/inittab" + i18n_echo "$install_aborted" + exit 1 + else + chmod 644 etc/inittab + fi + fi + fi + + else + log "Attempt to disable entries in" \ + "\"$install_root/etc/inittab\" failed!" + i18n_echo "$disable_failed" "$install_root/etc/inittab" + i18n_echo "$install_aborted" + exit 1 + fi +fi + +if [[ ! -e "$install_root/etc/hosts" ]]; then + log "" + log "Creating: \"$install_root/etc/hosts\"..." + + cat > "$install_root/etc/hosts" <<-_EOF_ + 127.0.0.1 localhost + _EOF_ +fi + +# +# User must configure various brand-specific items to enable networking, so +# boot the system non-networked. +# +log "" +log "Modifying: \"$install_root/etc/sysconfig/network\"..." + +[[ -a etc/sysconfig/network ]] && + mv -f etc/sysconfig/network etc/sysconfig/network.$tag + +cat > etc/sysconfig/network <<- EOF + NETWORKING="no" + # + # To enable networking, change the "no" above to "yes" and + # uncomment and fill in the following parameters. + # + # If you are specifying a hostname by name rather than by IP address, + # be sure the system can resolve the name properly via the use of a + # name service and/or the proper name files, as specified by + # nsswitch.conf. See nsswitch.conf(5) for further details. + # + # HOSTNAME=your_hostname_here + # +EOF + +if [[ $? -ne 0 ]]; then + log "Could not create new \"$install_root/etc/sysconfig/network\"!" + i18n_echo "$create_failed" "$install_root/etc/sysconfig/network" + i18n_echo "$install_aborted" + exit 1 +fi + +if [[ -a etc/sysconfig/syslog ]]; then + # + # By default, syslogd will attempt to create a socket in /dev/log, but + # /dev is not be writable. Instead, modify /etc/sysconfig/syslog to + # tell it to use /var/run/syslog instead, and make /dev/log a symlink + # to /var/run/syslog. + # + log "" + log "Modifying: \"$install_root/etc/sysconfig/syslog\"..." + + tmpfile=/tmp/lx_sc.syslog.$$ + + sed -e 's@\(SYSLOGD_OPTIONS="-m 0\)"@\1 -p /var/run/syslog"@' \ + etc/sysconfig/syslog > $tmpfile + + # + # Only install the new sysconfig/syslog if the edit above succeeded. + # + if [[ $? -eq 0 ]]; then + # + # Attempt to save off the original syslog before moving over + # the modified version. + # + mv -f etc/sysconfig/syslog etc/sysconfig/syslog.$tag + + if ! mv -f $tmpfile etc/sysconfig/syslog; then + log "mv of \"$tmpfile\" to" \ + "\"$installroot/etc/sysconfig/syslog\" failed!" + i18n_echo "$cmd2_failed" "mv" "$tmpfile" \ + "$installroot/etc/sysconfig/syslog" + i18n_echo "$install_aborted" + exit 1 + else + chmod 755 etc/sysconfig/syslog + fi + else + log "Attempt to modify entries in" \ + "\"$install_root/sysconfig/syslog\" failed!" + i18n_echo "$mod_failed" "$install_root/sysconfig/syslog" + i18n_echo "$install_aborted" + exit 1 + fi +fi + +if [[ $? -ne 0 ]]; then + log "Could not create new \"$install_root/etc/sysconfig/syslog\"!" + i18n_echo "$create_failed" "$install_root/etc/sysconfig/syslog" + i18n_echo "$install_aborted" + exit 1 +fi + +# +# /etc/rc.d/init.d/keytable tries to load a physical keyboard map, which won't +# work in a zone. If we remove etc/sysconfig/keyboard, it won't try this at all. +# +[[ -a etc/sysconfig/keyboard ]] && + mv -f etc/sysconfig/keyboard etc/sysconfig/keyboard.$tag + +# +# /etc/rc.d/init.d/gpm tries to configure the console mouse for cut-and-paste +# text operations, which we don't support. Removing this file disables the +# mouse configuration. +# +[[ -a etc/sysconfig/mouse ]] && + mv -f etc/sysconfig/mouse etc/sysconfig/mouse.$tag + +# +# The following scripts attempt to start services or otherwise configure +# the system in ways incompatible with zones, so don't execute them at boot +# time. +# +log "" +log "Modifying \"$install_root/etc/rc.d/init.d\" to disable any" +log " services not supported by BrandZ:" +unsupported_services=" + kudzu + microcode_ctl + network + random + pcmcia + isdn + iptables + ip6tables + iscsi + saslauthd + psacct + xfs + gpm + irda + smartd + rawdevices + netdump + snmpd + snmptrapd + hpoj + netfs + mdmonitor + mdmpd + irqbalance +" + +for file in $unsupported_services; do + if [[ -a "etc/rc.d/init.d/$file" ]]; then + + if mv -f "etc/rc.d/init.d/$file" "etc/rc.d/init.d/$file.$tag"; then + log " + Moved script \"etc/rc.d/init.d/$file\" to" + log " \"etc/rc.d/init.d/$file.$tag\"" + fi + fi + + rc_files="$(echo etc/rc.d/rc[0-6].d/[SK]+([0-9])$file)" + + if [[ "$rc_files" != "etc/rc.d/rc[0-6].d/[SK]+([0-9])$file" ]]; then + for file in $rc_files; do + if [[ -h "$file" ]]; then + rm -f "$file" && + log " + Removed symbolic link \"$file\"" + else + rm -f "$file" && + log " + Removed script \"$file\"" + fi + done + fi +done + +# +# There is a lot of stuff in the standard halt and reboot scripts that we +# have no business running in a zone. Fortunately, the stuff we want to +# skip is all in one contiguous chunk. +# +# Don't bother to modify the file if it looks like we already did. +# +if ! egrep -s "Disabled by lx brand" etc/rc.d/init.d/halt; then + log "" + log "Modifying \"$install_root/etc/rc.d/init.d/halt\" for operation" + log " within a zone..." + awk 'BEGIN {skip = ""} + /^# Save mixer/ {skip = "# Disabled by lx brand: "} + /halt.local/ {skip = ""} + /./ {print skip $0}' etc/rc.d/init.d/halt > /tmp/halt.$$ + + if [[ $? -eq 0 ]]; then + mv -f etc/rc.d/init.d/halt etc/rc.d/init.d/halt.$tag + mv -f /tmp/halt.$$ etc/rc.d/init.d/halt + chmod 755 etc/rc.d/init.d/halt + else + log "Attempt to modify \"$install_root/etc/rc.d/init.d/halt\"" \ + "FAILED" + log "Continuing with balance of zone setup..." + fi +fi + +# +# Fix up /etc/rc.d/rc.sysinit: +# +# 1) /sbin/hwclock requires the iopl() system call, which BrandZ won't support. +# Since the hardware clock cannot be set from within a zone, we comment out +# the line. +# +# 2) Disable dmesg commands, since we don't implement klogctl +# +# 3) Disable initlog and the mount of /dev/pts +# +# 4) Don't touch /dev/tty* in order to start virtual terminals, as that won't +# work from within a zone. +# +# 5) Don't try to check the root filesystem (/) as there is no associated +# physical device, and any attempt to run fsck will fail. +# +# Don't modify the rc.sysinit file if it looks like we already did. +# +if ! egrep -s "Disabled by lx brand" etc/rc.d/rc.sysinit; then + log "" + log "Modifying: \"$install_root/etc/rc.d/rc.sysinit\"..." + log "" + + tmpfile=/tmp/lx_rc.sysinit.$$ + + sed -e 's@^/sbin/hwclock@# Disabled by lx brand: &@ + s@^HOSTTYPE=@HOSTTYPE=\"s390\" # Spoofed for lx brand: &@ + s@/bin/dmesg -n@: # Disabled by lx brand: &@ + s@^dmesg -s@# Disabled by lx brand: &@ + s@initlog -c \"fsck@: # Disabled by lx brand: &@ + s@^.*mount .* /dev/pts$@# Disabled by lx brand: &@' \ + etc/rc.d/rc.sysinit > $tmpfile + + # + # Only install the new rc.sysinit if the edit above succeeded. + # + if [[ $? -eq 0 ]]; then + # + # Attempt to save off the original rc.sysinit + # before moving over the modified version. + # + mv -f etc/rc.d/rc.sysinit etc/rc.d/rc.sysinit.$tag + + if ! mv -f $tmpfile etc/rc.d/rc.sysinit; then + log "mv of \"$tmpfile\" to" \ + "\"$installroot/etc/rc.d/rc.sysinit\" failed!" + i18n_echo "$cmd2_failed" "mv" "$tmpfile" \ + "$installroot/etc/rc.d/rc.sysinit" + i18n_echo "$install_aborted" + exit 1 + else + chmod 755 etc/rc.d/rc.sysinit + fi + else + log "Attempt to modify entries in" \ + "\"$install_root/rc.d/rc.sysinit\" failed!" + i18n_echo "$mod_failed" "$install_root/rc.d/rc.sysinit" + i18n_echo "$install_aborted" + exit 1 + fi +fi + +if [[ -z $is_miniroot ]]; then + enable_nfs_services || log "NFS services were not properly enabled." +fi + +log "" +log "System configuration modifications complete `date`" +log "" +i18n_echo "System configuration modifications complete!" +exit 0 diff --git a/usr/src/lib/brand/lx/zone/lx_install.ksh b/usr/src/lib/brand/lx/zone/lx_install.ksh new file mode 100644 index 0000000000..315f3db30d --- /dev/null +++ b/usr/src/lib/brand/lx/zone/lx_install.ksh @@ -0,0 +1,429 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Restrict executables to /bin, /usr/bin and /usr/sfw/bin +# +PATH=/bin:/usr/bin:/usr/sfw/bin +export PATH + +# +# Setup i18n output +# +TEXTDOMAIN="SUNW_OST_OSCMD" +export TEXTDOMAIN + +# +# Log passed arguments to file descriptor 2 +# +log() +{ + [[ -n $logfile ]] && echo "$@" >&2 +} + +# +# Send the provided printf()-style arguments to the screen and to the +# logfile. +# +screenlog() +{ + typeset fmt="$1" + shift + + printf "$fmt\n" "$@" + [[ -n $logfile ]] && printf "$fmt\n" "$@" >&2 +} + +# +# Print and log provided text if the shell variable "verbose_mode" is set +# +verbose() +{ + [[ -n $verbose_mode ]] && echo "$@" + [[ -n $logfile ]] && [[ -n $verbose_mode ]] && echo "$@" >&2 +} + +cmd_not_found=$(gettext "Required command '%s' cannot be found!") +cmd_not_exec=$(gettext "Required command '%s' not executable!") +zone_initfail=$(gettext "Attempt to initialize zone '%s' FAILED.") +path_abs=$(gettext "Pathname specified to -d '%s' must be absolute.") + +usage_iso=$(gettext "ISO images located in the directory %s") + +cmd_h=$(gettext "%s -z <zone name> %s -h") +cmd_full=\ +$(gettext "%s -z <zone name> %s [-v | -s] [-d <archive dir>] [<cluster> ... ]") + +both_modes=$(gettext "%s: error: cannot select both silent and verbose modes") + +not_found=$(gettext "'%s': file not found.") +unknown_type=$(gettext "'%s': unknown type of file.") +wrong_type=$(gettext "'%s': wrong type of file.") +not_readable=$(gettext "Cannot read file '%s'") + +no_install=$(gettext "Could not create install directory '%s'") +no_log=$(gettext "Could not create log directory '%s'") + +install_zone=$(gettext "Installing zone '%s' at root directory '%s'") +install_from=$(gettext "from archive '%s'") + +install_fail=$(gettext "Installation for zone '%s' FAILED.") +see_log=$(gettext "See the log file '%s' for details.") + +install_good=$(gettext "Installation of zone '%s' completed successfully.") + +# +# Check if commands passed in exist and are executable. +# +check_cmd() +{ + for cmd in "$@"; do + if [[ ! -f $cmd ]]; then + screenlog "$cmd_not_found" "$cmd" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + if [[ ! -x $cmd ]]; then + screenlog "$cmd_not_exec" "$cmd" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + done +} + +# +# Post process as tarball-installed zone for use by BrandZ. +# +init_tarzone() +{ + typeset rootdir="$1" + + if ! $branddir/lx_init_zone "$rootdir" "$logfile"; then + screenlog "$zone_initfail" "$zonename" + return 1 + fi +} + +# +# Output a usage message +# +usage() +{ + echo $(gettext "Usage:") + screenlog "$cmd_h" "zoneadm" "install" + screenlog "$cmd_full" "zoneadm" "install" + echo + echo $(gettext "Linux archives can be in one of several forms:") + echo + echo $(gettext " + A compressed tar archive") + echo $(gettext " + A set of CD-ROM or DVD discs") + echo $(gettext " + A group of ISO images") + echo + echo $(gettext "The install will attempt to use the default system") + echo $(gettext "removable disc device if <archive dir> is not") + echo $(gettext "specified.") + echo + echo $(gettext "<cluster> specifies which package cluster you wish") + echo $(gettext "to install. The desktop cluster will be installed") + echo $(gettext "by default.") + echo + echo $(gettext "The available clusters are:") + echo " core" + echo " server" + echo " desktop" + echo " development" + echo " all" + echo + echo $(gettext "Each cluster includes all of the clusters preceding") + echo $(gettext "it. So, 'server' includes 'core', 'desktop' includes") + echo $(gettext "'core' and 'server', and so on.") + echo + echo $(gettext "Examples") + echo "========" + echo $(gettext "Example 1: Install a base Linux system from CD-ROM") + echo $(gettext "discs using the system default removable disc device:") + echo + echo " # zoneadm -z myzone install" + echo + echo $(gettext "Example 2: Install the server packages from CD-ROM") + echo $(gettext "via an alternative removable disc device:") + echo + echo " # zoneadm -z myzone install -d /cdrom/cdrom0 server" + echo + echo $(gettext "Example 3: Install the entire Linux environment from") + screenlog "$usage_iso" "/export/images/centos_3.5/isos" + echo + echo " # zoneadm -z myzone install -d" \ + "/export/images/centos_3.5/isos all" + echo + echo $(gettext "Example 4: Install from a compressed tar archive of") + echo $(gettext "an existing Linux installation (a tar ball) with") + echo $(gettext "verbose output regarding the progress of the") + echo $(gettext "installation") + echo + echo " # zoneadm -z myzone install -d /tmp/linux_full.tar.gz -v" + echo + echo $(gettext "Example 5: Install from a compressed tar archive of") + echo $(gettext "an existing Linux installation (a tar ball) with") + echo $(gettext "NO output regarding the progress of the installation") + echo $(gettext "(silent mode.) Note that silent mode is only") + echo $(gettext "recommended for use by shell scripts and programs.") + echo + echo " # zoneadm -z myzone install -d /tmp/linux_full.tar.gz -s" + echo +} + +# +# The main body of the script starts here. +# +# This script should never be called directly by a user but rather should +# only be called by zoneadm to install a BrandZ Linux zone. +# + +# +# Exit values used by the script, as #defined in <sys/zone.h> +# +# ZONE_SUBPROC_OK +# =============== +# Installation was successful +# +# ZONE_SUBPROC_USAGE +# ================== +# Improper arguments were passed, so print a usage message before exiting +# +# ZONE_SUBPROC_NOTCOMPLETE +# ======================== +# Installation did not complete, but another installation attempt can be +# made without an uninstall +# +# ZONE_SUBPROC_FATAL +# ================== +# Installation failed and an uninstall will be required before another +# install can be attempted +# +ZONE_SUBPROC_OK=0 +ZONE_SUBPROC_USAGE=253 +ZONE_SUBPROC_NOTCOMPLETE=254 +ZONE_SUBPROC_FATAL=255 + +# +# If we weren't passed at least two arguments, exit now. +# +if [[ $# -lt 2 ]]; then + usage + + exit $ZONE_SUBPROC_USAGE +fi + +# +# This script is always started with a full path so we can extract the +# brand directory name here. +# +branddir=$(dirname "$0") +zonename="$1" +zoneroot="$2" + +install_root="$zoneroot/root" +logdir="$install_root/var/log" + +shift; shift # remove zonename and zoneroot from arguments array + +unset gtaropts +unset install_opts +unset install_src +unset silent_mode +unset verbose_mode + +while getopts "d:hsvX" opt +do + case "$opt" in + h) usage; exit $ZONE_SUBPROC_USAGE ;; + s) silent_mode=1;; + v) verbose_mode=1;; + d) install_src="$OPTARG" ;; + X) install_opts="$install_opts -x" ;; + *) usage ; exit $ZONE_SUBPROC_USAGE ;; + esac +done +shift OPTIND-1 + +# +# The install can't be both verbose AND silent... +# +if [[ -n $silent_mode && -n $verbose_mode ]]; then + screenlog "$both_modes" "zoneadm install" + exit $ZONE_SUBPROC_NOTCOMPLETE +fi + +if [[ -n $install_src ]]; then + # + # Validate $install_src. + # + # If install_src is a directory, assume it contains ISO images to + # install from, otherwise treat the argument as if it points to a + # tar ball file. + # + if [[ "`echo $install_src | cut -c 1`" != "/" ]]; then + screenlog "$path_abs" "$install_src" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + if [[ ! -a "$install_src" ]]; then + screenlog "$not_found" "$install_src" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + if [[ ! -r "$install_src" ]]; then + screenlog "$not_readable" "$install_src" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + if [[ ! -d "$install_src" ]]; then + if [[ ! -f "$install_src" ]]; then + screenlog "$wrong_type" "$install_src" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + + filetype=`{ LC_ALL=C; file $install_src | + awk '{print $2}' ; } 2>/dev/null` + + if [[ "$filetype" = "gzip" ]]; then + verbose "\"$install_src\": \"gzip\" archive" + gtaropts="-xz" + elif [[ "$filetype" = "bzip2" ]]; then + verbose "\"$install_src\": \"bzip2\" archive" + gtaropts="-xj" + elif [[ "$filetype" = "compressed" ]]; then + verbose "\"$install_src\": Lempel-Ziv" \ + "compressed (\".Z\") archive." + gtaropts="-xZ" + elif [[ "$filetype" = "USTAR" ]]; then + verbose "\"$install_src\":" \ + "uncompressed (\"tar\") archive." + gtaropts="-x" + else + screenlog "$unknown_type" "$install_src" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi + fi +fi + +# +# Start silent operation and pass the flag to prepare pass the flag to +# the ISO installer, if needed. +# +if [[ -n $silent_mode ]] +then + exec 1>/dev/null + install_opts="$install_opts -s" +fi + +# +# If verbose mode was specified, pass the verbose flag to lx_distro_install +# for ISO or disc installations and to gtar for tarball-based installs. +# +if [[ -n $verbose_mode ]] +then + echo $(gettext "Verbose output mode enabled.") + install_opts="$install_opts -v" + [[ -n $gtaropts ]] && gtaropts="${gtaropts}v" +fi + +[[ -n $gtaropts ]] && gtaropts="${gtaropts}f" + +if [[ ! -d "$install_root" ]] +then + if ! mkdir -p "$install_root" 2>/dev/null; then + screenlog "$no_install" "$install_root" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi +fi + +if [[ ! -d "$logdir" ]] +then + if ! mkdir -p "$logdir" 2>/dev/null; then + screenlog "$no_log" "$logdir" + exit $ZONE_SUBPROC_NOTCOMPLETE + fi +fi + +logfile="${logdir}/$zonename.install.$$.log" + +exec 2>"$logfile" + +log "Installation started for zone \"$zonename\" `/usr/bin/date`" + +if [[ -n $gtaropts ]]; then + check_cmd /usr/sfw/bin/gtar $branddir/lx_init_zone + + screenlog "$install_zone" "$zonename" "$zoneroot" + screenlog "$install_from" "$install_src" + echo + echo $(gettext "This process may take several minutes.") + echo + + ( cd "$install_root" && gtar "$gtaropts" "$install_src" && + $branddir/lx_init_zone "$install_root" "$logfile" && + init_tarzone "$install_root" ) + + res=$? +else + check_cmd $branddir/lx_distro_install + + $branddir/lx_distro_install -z "$zonename" -r "$zoneroot" \ + -d "$install_src" -l "$logfile" $install_opts "$@" + + res=$? + + if [ $res -eq $ZONE_SUBPROC_USAGE ]; then + usage + exit $ZONE_SUBPROC_USAGE + fi +fi + +if [[ $res -ne $ZONE_SUBPROC_OK ]]; then + log "Installation failed for zone \"$zonename\" `/usr/bin/date`" + + screenlog "$install_fail" "$zonename" + + # + # Only make a reference to the log file if one will exist after + # zoneadm exits. + # + [[ $res -ne $ZONE_SUBPROC_NOTCOMPLETE ]] && + screenlog "$see_log" "$logfile" + + exit $res +fi + +log "Installation complete for zone \"$zonename\" `date`" +screenlog "$install_good" "$zonename" +echo $(gettext "Details saved to log file:") +echo " \"$logfile\"" +echo + +exit $ZONE_SUBPROC_OK diff --git a/usr/src/lib/brand/lx/zone/platform.xml b/usr/src/lib/brand/lx/zone/platform.xml new file mode 100644 index 0000000000..85e763fa71 --- /dev/null +++ b/usr/src/lib/brand/lx/zone/platform.xml @@ -0,0 +1,85 @@ +<?xml version="1.0"?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. +--> + +<!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN" + "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1"> + +<platform name="lx"> + <!-- Global filesystems to mount when booting the zone --> + <global_mount special="/dev" directory="/native/dev" type="dev" + opt="attrdir=%R/dev" /> + <global_mount special="/lib" directory="/native/lib" + opt="ro" type="lofs" /> + <global_mount special="/usr/lib" directory="/native/usr/lib" + opt="ro" type="lofs" /> + <global_mount special="/usr/lib/brand/lx/etc_default_nfs" + directory="/native/etc/default/nfs" type="lofs" opt="ro" /> + <global_mount special="/usr/lib/brand/lx/etc_netconfig" + directory="/native/etc/netconfig" type="lofs" opt="ro" /> + + <!-- Local filesystems to mount when booting the zone --> + <mount special="/native/dev" directory="/dev" type="lofs" /> + <mount special="proc" directory="/native/proc" type="proc" /> + <mount special="swap" directory="/native/etc/svc/volatile" + type="tmpfs" /> + <mount special="swap" directory="/native/tmp" type="tmpfs" /> + + <!-- Devices to create under /dev --> + <device match="null" /> + <device match="pts/*" /> + <device match="random" /> + <device match="tcp" /> + <device match="tcp6" /> + <device match="tty" /> + <device match="udp" /> + <device match="udp6" /> + <device match="urandom" /> + <device match="zero" /> + + <!-- Renamed devices to create under /dev --> + <device match="brand/lx/ptmx" name="ptmx" /> + <device match="zcons/%z/zoneconsole" name="console" /> + + <!-- Audio devices to create under /dev --> + <device match="brand/lx/dsp" name="dsp" /> + <device match="brand/lx/mixer" name="mixer" /> + + <!-- Symlinks to create under /dev --> + <symlink source="fd" target="../proc/self/fd" /> + <symlink source="log" target="/var/run/syslog" /> + <symlink source="stderr" target="../proc/self/fd/2" /> + <symlink source="stdin" target="../proc/self/fd/0" /> + <symlink source="stdout" target="../proc/self/fd/1" /> + <symlink source="systty" target="console" /> + + <!-- Create a mount point for for the /dev/initctl fifo --> + <device match="null" name="initctl" /> + +</platform> diff --git a/usr/src/lib/brand/native/Makefile b/usr/src/lib/brand/native/Makefile new file mode 100644 index 0000000000..edf14afe10 --- /dev/null +++ b/usr/src/lib/brand/native/Makefile @@ -0,0 +1,44 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +SUBDIRS= zone + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all install clean clobber lint: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/native/zone/Makefile b/usr/src/lib/brand/native/zone/Makefile new file mode 100644 index 0000000000..00e620cc8d --- /dev/null +++ b/usr/src/lib/brand/native/zone/Makefile @@ -0,0 +1,59 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +BRAND= native +PROGS= postclone +XMLDOCS= config.xml platform.xml +TEMPLATES= SUNWdefault.xml SUNWblank.xml + +all: $(PROGS) + +include $(SRC)/cmd/Makefile.cmd +include ../../Makefile.brand + +POFILES= $(PROGS:%=%.po) +POFILE= native_zone.po + +$(POFILE): $(POFILES) + $(RM) $@ + $(BUILDPO.pofiles) + +_msg: $(MSGDOMAINPOFILE) + +lint: + +install: $(PROGS) $(ROOTBRANDDIR) $(ROOTPROGS) $(ROOTXMLDOCS) \ + $(ROOTTEMPLATEDIR) $(ROOTTEMPLATES) + +clean: + -$(RM) $(PROGS) + +clobber: + -$(RM) $(ROOTPROGS) $(ROOTXMLDOCS) $(ROOTTEMPLATES) + +include $(SRC)/Makefile.msg.targ diff --git a/usr/src/lib/libzonecfg/zones/SUNWblank.xml b/usr/src/lib/brand/native/zone/SUNWblank.xml index a68f56ed98..a68f56ed98 100644 --- a/usr/src/lib/libzonecfg/zones/SUNWblank.xml +++ b/usr/src/lib/brand/native/zone/SUNWblank.xml diff --git a/usr/src/lib/libzonecfg/zones/SUNWdefault.xml b/usr/src/lib/brand/native/zone/SUNWdefault.xml index ab988ef177..ab988ef177 100644 --- a/usr/src/lib/libzonecfg/zones/SUNWdefault.xml +++ b/usr/src/lib/brand/native/zone/SUNWdefault.xml diff --git a/usr/src/lib/brand/native/zone/config.xml b/usr/src/lib/brand/native/zone/config.xml new file mode 100644 index 0000000000..d91bebf46e --- /dev/null +++ b/usr/src/lib/brand/native/zone/config.xml @@ -0,0 +1,89 @@ +<?xml version="1.0"?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. +--> + +<!DOCTYPE brand PUBLIC "-//Sun Microsystems Inc//DTD Brands//EN" + "file:///usr/share/lib/xml/dtd/brand.dtd.1"> + +<brand name="native"> + <modname></modname> + + <initname>/sbin/init</initname> + <login_cmd>/usr/bin/login -z %Z -f %u</login_cmd> + + <install>/usr/lib/lu/lucreatezone -z %z</install> + <installopts></installopts> + <boot></boot> + <halt></halt> + <verify_cfg></verify_cfg> + <verify_adm></verify_adm> + <postclone>/usr/lib/brand/native/postclone %z %R</postclone> + + <privilege set="default" name="contract_event" /> + <privilege set="default" name="contract_observer" /> + <privilege set="default" name="file_chown" /> + <privilege set="default" name="file_chown_self" /> + <privilege set="default" name="file_dac_execute" /> + <privilege set="default" name="file_dac_read" /> + <privilege set="default" name="file_dac_search" /> + <privilege set="default" name="file_dac_write" /> + <privilege set="default" name="file_owner" /> + <privilege set="default" name="file_setid" /> + <privilege set="default" name="ipc_dac_read" /> + <privilege set="default" name="ipc_dac_write" /> + <privilege set="default" name="ipc_owner" /> + <privilege set="default" name="net_bindmlp" /> + <privilege set="default" name="net_icmpaccess" /> + <privilege set="default" name="net_mac_aware" /> + <privilege set="default" name="net_privaddr" /> + <privilege set="default" name="proc_chroot" /> + <privilege set="default" name="sys_audit" /> + <privilege set="default" name="proc_audit" /> + <privilege set="default" name="proc_owner" /> + <privilege set="default" name="proc_setid" /> + <privilege set="default" name="proc_taskid" /> + <privilege set="default" name="sys_acct" /> + <privilege set="default" name="sys_admin" /> + <privilege set="default" name="sys_mount" /> + <privilege set="default" name="sys_nfs" /> + <privilege set="default" name="sys_resource" /> + + <privilege set="prohibited" name="dtrace_kernel" /> + <privilege set="prohibited" name="proc_zone" /> + <privilege set="prohibited" name="sys_config" /> + <privilege set="prohibited" name="sys_devices" /> + <privilege set="prohibited" name="sys_linkdir" /> + <privilege set="prohibited" name="sys_net_config" /> + <privilege set="prohibited" name="sys_res_config" /> + <privilege set="prohibited" name="sys_suser_compat" /> + + <privilege set="required" name="proc_exec" /> + <privilege set="required" name="proc_fork" /> + <privilege set="required" name="sys_mount" /> +</brand> diff --git a/usr/src/lib/brand/native/zone/platform.xml b/usr/src/lib/brand/native/zone/platform.xml new file mode 100644 index 0000000000..d2bbc839cf --- /dev/null +++ b/usr/src/lib/brand/native/zone/platform.xml @@ -0,0 +1,103 @@ +<?xml version="1.0"?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. +--> + +<!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN" + "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1"> + +<platform name="native"> + + <!-- Global filesystems to mount when booting the zone --> + <global_mount special="/dev" directory="/dev" type="dev" + opt="attrdir=%R/dev"/> + + <!-- Local filesystems to mount when booting the zone --> + <mount special="/proc" directory="/proc" type="proc" /> + <mount special="ctfs" directory="/system/contract" type="ctfs" /> + <mount special="mnttab" directory="/etc/mnttab" type="mntfs" /> + <mount special="objfs" directory="/system/object" type="objfs" /> + <mount special="swap" directory="/etc/svc/volatile" type="tmpfs" /> + + <!-- Devices to create under /dev --> + <device match="arp" /> + <device match="conslog" /> + <device match="cpu/self/cpuid" /> + <device match="crypto" /> + <device match="cryptoadm" /> + <device match="dsk" /> + <device match="dtrace/*" /> + <device match="dtrace/provider/*" /> + <device match="fd" /> + <device match="kstat" /> + <device match="lo0" /> + <device match="lo1" /> + <device match="lo2" /> + <device match="lo3" /> + <device match="log" /> + <device match="logindmux" /> + <device match="null" /> + <device match="openprom" arch="sparc" /> + <device match="poll" /> + <device match="pool" /> + <device match="ptmx" /> + <device match="pts/*" /> + <device match="random" /> + <device match="rdsk" /> + <device match="rmt" /> + <device match="sad/user" /> + <device match="swap" /> + <device match="sysevent" /> + <device match="tcp" /> + <device match="tcp6" /> + <device match="term" /> + <device match="ticlts" /> + <device match="ticots" /> + <device match="ticotsord" /> + <device match="tty" /> + <device match="udp" /> + <device match="udp6" /> + <device match="urandom" /> + <device match="zero" /> + <device match="zfs" /> + + <!-- Renamed devices to create under /dev --> + <device match="zcons/%z/zoneconsole" name="zconsole" /> + + <!-- Symlinks to create under /dev --> + <symlink source="console" target="zconsole" /> + <symlink source="dtremote" target="/dev/null" /> + <symlink source="msglog" target="zconsole" /> + <symlink source="stderr" target="./fd/2" /> + <symlink source="stdin" target="./fd/0" /> + <symlink source="stdout" target="./fd/1" /> + <symlink source="syscon" target="zconsole" /> + <symlink source="sysmsg" target="zconsole" /> + <symlink source="systty" target="zconsole" /> + +</platform> diff --git a/usr/src/lib/brand/native/zone/postclone.sh b/usr/src/lib/brand/native/zone/postclone.sh new file mode 100644 index 0000000000..63fc5e7bda --- /dev/null +++ b/usr/src/lib/brand/native/zone/postclone.sh @@ -0,0 +1,61 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +PATH=/sbin:/usr/bin:/usr/sbin; export PATH + +ZONENAME=$1 +ZONEROOT=$2 + +# If the zone is already sys-unconfiged, then we're done. +if [ -f $ZONEROOT/etc/.UNCONFIGURED ]; then + exit 0 +fi + +# +# Mount the zone. The zone is still in the INCOMPLETE state, so we have to +# -f(orce) mount it. +# +zoneadm -z $ZONENAME mount -f +if [ $? -ne 0 ]; then + echo `gettext "Could not mount zone for sys-unconfig"` + exit 1 +fi + +# Log into the zone and sys-unconfig it. +zlogin -S $ZONENAME /usr/sbin/sys-unconfig -R /a +err=$? +if [ $err -ne 0 ]; then + echo `gettext "sys-unconfig failed"` +fi + +zoneadm -z $ZONENAME unmount +if [ $? -ne 0 ]; then + echo `gettext "Could not unmount zone"` + exit 1 +fi + +exit $err diff --git a/usr/src/lib/brand/sn1/Makefile b/usr/src/lib/brand/sn1/Makefile new file mode 100644 index 0000000000..27fccbb515 --- /dev/null +++ b/usr/src/lib/brand/sn1/Makefile @@ -0,0 +1,46 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../../Makefile.lib + +SUBDIRS= $(MACH) $(VARIANT_SUBDIRS) zone +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET= all +clean := TARGET= clean +clobber := TARGET= clobber +install := TARGET= install +lint := TARGET= lint + +.KEEP_STATE: + +all install clean clobber lint: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: diff --git a/usr/src/lib/brand/sn1/Makefile.com b/usr/src/lib/brand/sn1/Makefile.com new file mode 100644 index 0000000000..d28805931e --- /dev/null +++ b/usr/src/lib/brand/sn1/Makefile.com @@ -0,0 +1,59 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +LIBRARY = sn1_brand.a +VERS = .1 +COBJS = sn1_brand.o +ASOBJS = sn1_handler.o +OBJECTS = $(COBJS) $(ASOBJS) + +include ../../../Makefile.lib + +LIBS = $(DYNLIB) +CSRCS = $(COBJS:%o=../common/%c) +ASSRCS = $(ASOBJS:%o=$(ISASRCDIR)/%s) +SRCS = $(CSRCS) $(ASSRCS) +SRCDIR = ../common + +LDLIBS += -Wl,-esn1_init -lc +CFLAGS += $(CCVERBOSE) +CPPFLAGS += -D_REENTRANT -I../common +DYNFLAGS += $(BLOCAL) $(ZNOVERSION) +ASFLAGS = -P $(ASFLAGS_$(CURTYPE)) -D_ASM -I../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +pics/%.o: $(ISASRCDIR)/%.s + $(COMPILE.s) -o $@ $< + $(POST_PROCESS_O) + +include ../../../Makefile.targ + diff --git a/usr/src/lib/brand/sn1/amd64/Makefile b/usr/src/lib/brand/sn1/amd64/Makefile new file mode 100644 index 0000000000..98897d9ba8 --- /dev/null +++ b/usr/src/lib/brand/sn1/amd64/Makefile @@ -0,0 +1,36 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/sn1/amd64/Makefile + +ISASRCDIR=. + +include ../Makefile.com +include ../../../Makefile.lib.64 + +CPPFLAGS += -D_SYSCALL32 + +install: all $(ROOTLIBS64) diff --git a/usr/src/lib/brand/sn1/amd64/sn1_handler.s b/usr/src/lib/brand/sn1/amd64/sn1_handler.s new file mode 100644 index 0000000000..00a1a83f76 --- /dev/null +++ b/usr/src/lib/brand/sn1/amd64/sn1_handler.s @@ -0,0 +1,371 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> +#include <sn1_brand.h> + +#define RVAL2_FLAG 0x100 + +#if defined(lint) + +void +sn1_handler(void) +{ +} + +#else /* lint */ + /* + * %rax - syscall number + * stack contains return address + */ + ENTRY_NP(sn1_handler) + + pushq %rbp + movq %rsp, %rbp + + /* + * Look up the system call's entry in the sysent table below + */ + pushq %rdx + movq sn1_sysent_table@GOTPCREL(%rip), %rdx /* %rdx = sysent_table */ + shlq $4, %rax /* each entry is 16 bytes */ + addq %rdx, %rax /* %rax = sysent entry address */ + movq 8(%rax), %rdx /* get NARGS */ + testq $RVAL2_FLAG, %rdx /* does syscall have 2 return vals? */ + popq %rdx + jz 1f + + /* + * Two return syscall. We rely on none of these having either rval + * be larger than 32 bits, and stick both into a single 64-bit + * register. If that changes, we'll have to come up with a + * different scheme. + */ + call *(%rax) + movq %rax, %rdx /* copy upper 32-bit rval into %rdx */ + shrq $32, %rdx /* move it to lower 32 bits */ + movl %eax, %eax + jmp 2f + +1: + call *(%rax) /* call per-syscall handler */ + +2: + /* + * If %rax >= 0, it means the call completed successfully and %rax + * is the proper return value. Otherwise, %rax contains -errno. + * In the event of an error, we need to set the carry flag (which + * is the kernel's indication of failure to libc) and set %rax to + * the positive errno. + */ + cmpq $0, %rax + jge done + negq %rax + cmc + +done: + movq %rbp, %rsp + popq %rbp + ret /* return to instr after syscall */ + SET_SIZE(sn1_handler) + + .section .data ,"aw" + .globl sn1_sysent_table + .align 8 + +#define NOSYS \ + .quad sn1_unimpl; \ + .quad 0 + +#define EMULATE(name, args) \ + .globl name; \ + .quad name; \ + .quad args + +sn1_sysent_table: + .type sn1_sysent_table, @object + .size sn1_sysent_table, 4096 + .align 8 + + NOSYS /* 0 */ + NOSYS /* 1 */ + NOSYS /* 2 */ + NOSYS /* 3 */ + NOSYS /* 4 */ + NOSYS /* 5 */ + NOSYS /* 6 */ + NOSYS /* 7 */ + NOSYS /* 8 */ + NOSYS /* 9 */ + NOSYS /* 10 */ + NOSYS /* 11 */ + NOSYS /* 12 */ + NOSYS /* 13 */ + NOSYS /* 14 */ + NOSYS /* 15 */ + NOSYS /* 16 */ + NOSYS /* 17 */ + NOSYS /* 18 */ + NOSYS /* 19 */ + NOSYS /* 20 */ + NOSYS /* 21 */ + NOSYS /* 22 */ + NOSYS /* 23 */ + NOSYS /* 24 */ + NOSYS /* 25 */ + NOSYS /* 26 */ + NOSYS /* 27 */ + NOSYS /* 28 */ + NOSYS /* 29 */ + NOSYS /* 30 */ + NOSYS /* 31 */ + NOSYS /* 32 */ + NOSYS /* 33 */ + NOSYS /* 34 */ + NOSYS /* 35 */ + NOSYS /* 36 */ + NOSYS /* 37 */ + NOSYS /* 38 */ + NOSYS /* 39 */ + NOSYS /* 40 */ + NOSYS /* 41 */ + NOSYS /* 42 */ + NOSYS /* 43 */ + NOSYS /* 44 */ + NOSYS /* 45 */ + NOSYS /* 46 */ + NOSYS /* 47 */ + NOSYS /* 48 */ + NOSYS /* 49 */ + NOSYS /* 50 */ + NOSYS /* 51 */ + NOSYS /* 52 */ + NOSYS /* 53 */ + NOSYS /* 54 */ + NOSYS /* 55 */ + NOSYS /* 56 */ + NOSYS /* 57 */ + NOSYS /* 58 */ + NOSYS /* 59 */ + NOSYS /* 60 */ + NOSYS /* 61 */ + NOSYS /* 62 */ + NOSYS /* 63 */ + NOSYS /* 64 */ + NOSYS /* 65 */ + NOSYS /* 66 */ + NOSYS /* 67 */ + NOSYS /* 68 */ + NOSYS /* 69 */ + NOSYS /* 70 */ + NOSYS /* 71 */ + NOSYS /* 72 */ + NOSYS /* 73 */ + NOSYS /* 74 */ + NOSYS /* 75 */ + NOSYS /* 76 */ + NOSYS /* 77 */ + NOSYS /* 78 */ + NOSYS /* 79 */ + NOSYS /* 80 */ + NOSYS /* 81 */ + NOSYS /* 82 */ + NOSYS /* 83 */ + NOSYS /* 84 */ + NOSYS /* 85 */ + NOSYS /* 86 */ + NOSYS /* 87 */ + NOSYS /* 88 */ + NOSYS /* 89 */ + NOSYS /* 90 */ + NOSYS /* 91 */ + NOSYS /* 92 */ + NOSYS /* 93 */ + NOSYS /* 94 */ + NOSYS /* 95 */ + NOSYS /* 96 */ + NOSYS /* 97 */ + NOSYS /* 98 */ + NOSYS /* 99 */ + NOSYS /* 100 */ + NOSYS /* 101 */ + NOSYS /* 102 */ + NOSYS /* 103 */ + NOSYS /* 104 */ + NOSYS /* 105 */ + NOSYS /* 106 */ + NOSYS /* 107 */ + NOSYS /* 108 */ + NOSYS /* 109 */ + NOSYS /* 110 */ + NOSYS /* 111 */ + NOSYS /* 112 */ + NOSYS /* 113 */ + NOSYS /* 114 */ + NOSYS /* 115 */ + NOSYS /* 116 */ + NOSYS /* 117 */ + NOSYS /* 118 */ + NOSYS /* 119 */ + NOSYS /* 120 */ + NOSYS /* 121 */ + NOSYS /* 122 */ + NOSYS /* 123 */ + NOSYS /* 124 */ + NOSYS /* 125 */ + NOSYS /* 126 */ + NOSYS /* 127 */ + NOSYS /* 128 */ + NOSYS /* 129 */ + NOSYS /* 130 */ + NOSYS /* 131 */ + NOSYS /* 132 */ + NOSYS /* 133 */ + NOSYS /* 134 */ + EMULATE(sn1_uname, 1) /* 135 */ + NOSYS /* 136 */ + NOSYS /* 137 */ + NOSYS /* 138 */ + NOSYS /* 139 */ + NOSYS /* 140 */ + NOSYS /* 141 */ + NOSYS /* 142 */ + EMULATE(sn1_fork1, RVAL2_FLAG | 0) /* 143 */ + NOSYS /* 144 */ + NOSYS /* 145 */ + NOSYS /* 146 */ + NOSYS /* 147 */ + NOSYS /* 148 */ + NOSYS /* 149 */ + NOSYS /* 150 */ + NOSYS /* 151 */ + NOSYS /* 152 */ + NOSYS /* 153 */ + NOSYS /* 154 */ + NOSYS /* 155 */ + NOSYS /* 156 */ + NOSYS /* 157 */ + NOSYS /* 158 */ + NOSYS /* 159 */ + NOSYS /* 160 */ + NOSYS /* 161 */ + NOSYS /* 162 */ + NOSYS /* 163 */ + NOSYS /* 164 */ + NOSYS /* 165 */ + NOSYS /* 166 */ + NOSYS /* 167 */ + NOSYS /* 168 */ + NOSYS /* 169 */ + NOSYS /* 170 */ + NOSYS /* 171 */ + NOSYS /* 172 */ + NOSYS /* 173 */ + NOSYS /* 174 */ + NOSYS /* 175 */ + NOSYS /* 176 */ + NOSYS /* 177 */ + NOSYS /* 178 */ + NOSYS /* 179 */ + NOSYS /* 180 */ + NOSYS /* 181 */ + NOSYS /* 182 */ + NOSYS /* 183 */ + NOSYS /* 184 */ + NOSYS /* 185 */ + NOSYS /* 186 */ + NOSYS /* 187 */ + NOSYS /* 188 */ + NOSYS /* 189 */ + NOSYS /* 190 */ + NOSYS /* 191 */ + NOSYS /* 192 */ + NOSYS /* 193 */ + NOSYS /* 194 */ + NOSYS /* 195 */ + NOSYS /* 196 */ + NOSYS /* 197 */ + NOSYS /* 198 */ + NOSYS /* 199 */ + NOSYS /* 200 */ + NOSYS /* 201 */ + NOSYS /* 202 */ + NOSYS /* 203 */ + NOSYS /* 204 */ + NOSYS /* 205 */ + NOSYS /* 206 */ + NOSYS /* 207 */ + NOSYS /* 208 */ + NOSYS /* 209 */ + NOSYS /* 210 */ + NOSYS /* 211 */ + NOSYS /* 212 */ + NOSYS /* 213 */ + NOSYS /* 214 */ + NOSYS /* 215 */ + NOSYS /* 216 */ + NOSYS /* 217 */ + NOSYS /* 218 */ + NOSYS /* 219 */ + NOSYS /* 220 */ + NOSYS /* 221 */ + NOSYS /* 222 */ + NOSYS /* 223 */ + NOSYS /* 224 */ + NOSYS /* 225 */ + NOSYS /* 226 */ + NOSYS /* 227 */ + NOSYS /* 228 */ + NOSYS /* 229 */ + NOSYS /* 230 */ + NOSYS /* 231 */ + NOSYS /* 232 */ + NOSYS /* 233 */ + NOSYS /* 234 */ + NOSYS /* 235 */ + NOSYS /* 236 */ + NOSYS /* 237 */ + NOSYS /* 238 */ + NOSYS /* 239 */ + NOSYS /* 240 */ + NOSYS /* 241 */ + NOSYS /* 242 */ + NOSYS /* 243 */ + NOSYS /* 244 */ + NOSYS /* 245 */ + NOSYS /* 246 */ + NOSYS /* 247 */ + NOSYS /* 248 */ + NOSYS /* 249 */ + NOSYS /* 250 */ + NOSYS /* 251 */ + NOSYS /* 252 */ + NOSYS /* 253 */ + NOSYS /* 254 */ + NOSYS /* 255 */ + +#endif /* lint */ diff --git a/usr/src/lib/brand/sn1/common/mapfile-vers b/usr/src/lib/brand/sn1/common/mapfile-vers new file mode 100644 index 0000000000..9ba563a2cb --- /dev/null +++ b/usr/src/lib/brand/sn1/common/mapfile-vers @@ -0,0 +1,34 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +# +# Scope everything local -- our .init section is our only public interface. +# +{ + local: + *; +}; diff --git a/usr/src/lib/brand/sn1/common/sn1_brand.c b/usr/src/lib/brand/sn1/common/sn1_brand.c new file mode 100644 index 0000000000..75130f8fb7 --- /dev/null +++ b/usr/src/lib/brand/sn1/common/sn1_brand.c @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <assert.h> +#include <sn1_brand.h> +#include <sys/syscall.h> +#include <sys/utsname.h> +#include <sys/inttypes.h> +#include <sys/errno.h> +#include <sys/systm.h> +#include <sys/brand.h> + +extern int errno; + +longlong_t +sn1_fork1() +{ + sysret_t rval; + longlong_t r; + int err; + + err = __systemcall(&rval, SYS_fork1 + 1024); + + if (err) { + r = ((-1ll) << 32) | (-err); + return (r); + } + r = ((longlong_t)rval.sys_rval2 << 32) | rval.sys_rval1; + return (r); +} + +int +sn1_uname(uintptr_t p1) +{ + struct utsname *un = (struct utsname *)p1; + int rev, err; + + err = syscall(SYS_uname + 1024, p1); + if (err >= 0) { + rev = atoi(&un->release[2]); + assert(rev >= 10); + (void) sprintf(un->release, "5.%d", rev - 1); + } else { + err = -errno; + } + return (err); +} + +int +sn1_unimpl(uintptr_t p1) +{ + (void) fprintf(stderr, + "unimplemented syscall (%d) in emulation library\n", (int)p1); + return (-EINVAL); +} + +#pragma init(sn1_init) + +int +sn1_init() +{ + if (syscall(SYS_brand, B_REGISTER, (void *)sn1_handler)) { + perror("failed to brand the process"); + return (1); + } + + return (0); +} diff --git a/usr/src/lib/brand/sn1/common/sn1_brand.h b/usr/src/lib/brand/sn1/common/sn1_brand.h new file mode 100644 index 0000000000..9d341cf195 --- /dev/null +++ b/usr/src/lib/brand/sn1/common/sn1_brand.h @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SN1_BRAND_H +#define _SN1_BRAND_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define NSYSCALL 256 /* number of system calls */ + +#ifndef _ASM + +extern void sn1_handler(void); + +#endif /* _ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SN1_BRAND_H */ diff --git a/usr/src/lib/brand/sn1/i386/Makefile b/usr/src/lib/brand/sn1/i386/Makefile new file mode 100644 index 0000000000..9f1797b292 --- /dev/null +++ b/usr/src/lib/brand/sn1/i386/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/sn1/i386/Makefile + +ISASRCDIR=. + +ASFLAGS += $(AS_PICFLAGS) -P -D_ASM + +include ../Makefile.com + +install: all $(ROOTLIBS) diff --git a/usr/src/lib/brand/sn1/i386/sn1_handler.s b/usr/src/lib/brand/sn1/i386/sn1_handler.s new file mode 100644 index 0000000000..13100ac8e1 --- /dev/null +++ b/usr/src/lib/brand/sn1/i386/sn1_handler.s @@ -0,0 +1,390 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> +#include <sn1_brand.h> + +#define PIC_SETUP(r) \ + call 9f; \ +9: \ + popl r; \ + addl $_GLOBAL_OFFSET_TABLE_ + [. - 9b], r + +#if defined(lint) + +void +sn1_handler(void) +{ +} + +#else /* lint */ + /* + * %eax - syscall number + * stack contains return address + */ + ENTRY_NP(sn1_handler) + pushl %ebp + movl %esp, %ebp + + /* Save scratch registers */ + pushl %edi + pushl %esi + pushl %ecx + pushl %ebx + + /* + * Look up the system call's entry in the sysent table below, then + * jump to the address of the emulation routine for that call. + */ + PIC_SETUP(%ebx) + movl sn1_sysent_table@GOT(%ebx), %edx /* %edx = sysent_table */ + shll $3, %eax /* each entry is 8 bytes */ + add %eax, %edx /* %edx = sysent entry address */ + + /* + * Copy the arguments to the system call into the stack frame + * for the emulating routine. + * Note: we always save room on the stack for 8 arguments. This is + * because when we come back from the system call routine, we have + * lost our pointer to the sysent structure and no longer know + * precisely know how much stack space to free up. + */ + movl %esp, %esi + addl $28, %esi /* start of args */ + movl 4(%edx), %ecx /* number of args */ + subl $32, %esp /* make room for copies */ + movl %esp, %edi /* start of arg copy area */ + rep; smovl /* copy args */ + + call *(%edx) /* call per-syscall handler */ + + addl $32, %esp /* free up the stack space used for args. */ + + /* + * If %eax >= 0, it means the call completed successfully and %eax + * is the proper return value. Otherwise, %eax contains -errno. + * In the event of an error, we need to set the carry flag (which + * is the kernel's indication of failure to libc) and set %eax to + * the positive errno. + * + * Unlike the amd64, v8, and v9 brands, we don't have to do + * anything special for two rval system calls. By returning a + * longlong_t, the emulation routine puts the proper values into + * %eax and %edx for us. + */ + cmpl $0, %eax + jge done + negl %eax + cmc + +done: + popl %ebx /* restore scratch registers */ + popl %ecx + popl %esi + popl %edi + + movl %ebp, %esp + popl %ebp + ret /* return to instr after sysenter */ + SET_SIZE(sn1_handler) + + .section .data ,"aw" + .globl sn1_sysent_table + .align 4 + +#define NOSYS \ + .long sn1_unimpl; \ + .long 0 + +#define EMULATE(name, args) \ + .globl name; \ + .long name; \ + .long args + +sn1_sysent_table: + .type sn1_sysent_table, @object + .size sn1_sysent_table, 2048 + .align 4 + + NOSYS /* 0 */ + NOSYS /* 1 */ + NOSYS /* 2 */ + NOSYS /* 3 */ + NOSYS /* 4 */ + NOSYS /* 5 */ + NOSYS /* 6 */ + NOSYS /* 7 */ + NOSYS /* 8 */ + NOSYS /* 9 */ + NOSYS /* 10 */ + NOSYS /* 11 */ + NOSYS /* 12 */ + NOSYS /* 13 */ + NOSYS /* 14 */ + NOSYS /* 15 */ + NOSYS /* 16 */ + NOSYS /* 17 */ + NOSYS /* 18 */ + NOSYS /* 19 */ + NOSYS /* 20 */ + NOSYS /* 21 */ + NOSYS /* 22 */ + NOSYS /* 23 */ + NOSYS /* 24 */ + NOSYS /* 25 */ + NOSYS /* 26 */ + NOSYS /* 27 */ + NOSYS /* 28 */ + NOSYS /* 29 */ + NOSYS /* 30 */ + NOSYS /* 31 */ + NOSYS /* 32 */ + NOSYS /* 33 */ + NOSYS /* 34 */ + NOSYS /* 35 */ + NOSYS /* 36 */ + NOSYS /* 37 */ + NOSYS /* 38 */ + NOSYS /* 39 */ + NOSYS /* 40 */ + NOSYS /* 41 */ + NOSYS /* 42 */ + NOSYS /* 43 */ + NOSYS /* 44 */ + NOSYS /* 45 */ + NOSYS /* 46 */ + NOSYS /* 47 */ + NOSYS /* 48 */ + NOSYS /* 49 */ + NOSYS /* 50 */ + NOSYS /* 51 */ + NOSYS /* 52 */ + NOSYS /* 53 */ + NOSYS /* 54 */ + NOSYS /* 55 */ + NOSYS /* 56 */ + NOSYS /* 57 */ + NOSYS /* 58 */ + NOSYS /* 59 */ + NOSYS /* 60 */ + NOSYS /* 61 */ + NOSYS /* 62 */ + NOSYS /* 63 */ + NOSYS /* 64 */ + NOSYS /* 65 */ + NOSYS /* 66 */ + NOSYS /* 67 */ + NOSYS /* 68 */ + NOSYS /* 69 */ + NOSYS /* 70 */ + NOSYS /* 71 */ + NOSYS /* 72 */ + NOSYS /* 73 */ + NOSYS /* 74 */ + NOSYS /* 75 */ + NOSYS /* 76 */ + NOSYS /* 77 */ + NOSYS /* 78 */ + NOSYS /* 79 */ + NOSYS /* 80 */ + NOSYS /* 81 */ + NOSYS /* 82 */ + NOSYS /* 83 */ + NOSYS /* 84 */ + NOSYS /* 85 */ + NOSYS /* 86 */ + NOSYS /* 87 */ + NOSYS /* 88 */ + NOSYS /* 89 */ + NOSYS /* 90 */ + NOSYS /* 91 */ + NOSYS /* 92 */ + NOSYS /* 93 */ + NOSYS /* 94 */ + NOSYS /* 95 */ + NOSYS /* 96 */ + NOSYS /* 97 */ + NOSYS /* 98 */ + NOSYS /* 99 */ + NOSYS /* 100 */ + NOSYS /* 101 */ + NOSYS /* 102 */ + NOSYS /* 103 */ + NOSYS /* 104 */ + NOSYS /* 105 */ + NOSYS /* 106 */ + NOSYS /* 107 */ + NOSYS /* 108 */ + NOSYS /* 109 */ + NOSYS /* 110 */ + NOSYS /* 111 */ + NOSYS /* 112 */ + NOSYS /* 113 */ + NOSYS /* 114 */ + NOSYS /* 115 */ + NOSYS /* 116 */ + NOSYS /* 117 */ + NOSYS /* 118 */ + NOSYS /* 119 */ + NOSYS /* 120 */ + NOSYS /* 121 */ + NOSYS /* 122 */ + NOSYS /* 123 */ + NOSYS /* 124 */ + NOSYS /* 125 */ + NOSYS /* 126 */ + NOSYS /* 127 */ + NOSYS /* 128 */ + NOSYS /* 129 */ + NOSYS /* 130 */ + NOSYS /* 131 */ + NOSYS /* 132 */ + NOSYS /* 133 */ + NOSYS /* 134 */ + EMULATE(sn1_uname, 1) /* 135 */ + NOSYS /* 136 */ + NOSYS /* 137 */ + NOSYS /* 138 */ + NOSYS /* 139 */ + NOSYS /* 140 */ + NOSYS /* 141 */ + NOSYS /* 142 */ + EMULATE(sn1_fork1, 0) /* 143 */ + NOSYS /* 144 */ + NOSYS /* 145 */ + NOSYS /* 146 */ + NOSYS /* 147 */ + NOSYS /* 148 */ + NOSYS /* 149 */ + NOSYS /* 150 */ + NOSYS /* 151 */ + NOSYS /* 152 */ + NOSYS /* 153 */ + NOSYS /* 154 */ + NOSYS /* 155 */ + NOSYS /* 156 */ + NOSYS /* 157 */ + NOSYS /* 158 */ + NOSYS /* 159 */ + NOSYS /* 160 */ + NOSYS /* 161 */ + NOSYS /* 162 */ + NOSYS /* 163 */ + NOSYS /* 164 */ + NOSYS /* 165 */ + NOSYS /* 166 */ + NOSYS /* 167 */ + NOSYS /* 168 */ + NOSYS /* 169 */ + NOSYS /* 170 */ + NOSYS /* 171 */ + NOSYS /* 172 */ + NOSYS /* 173 */ + NOSYS /* 174 */ + NOSYS /* 175 */ + NOSYS /* 176 */ + NOSYS /* 177 */ + NOSYS /* 178 */ + NOSYS /* 179 */ + NOSYS /* 180 */ + NOSYS /* 181 */ + NOSYS /* 182 */ + NOSYS /* 183 */ + NOSYS /* 184 */ + NOSYS /* 185 */ + NOSYS /* 186 */ + NOSYS /* 187 */ + NOSYS /* 188 */ + NOSYS /* 189 */ + NOSYS /* 190 */ + NOSYS /* 191 */ + NOSYS /* 192 */ + NOSYS /* 193 */ + NOSYS /* 194 */ + NOSYS /* 195 */ + NOSYS /* 196 */ + NOSYS /* 197 */ + NOSYS /* 198 */ + NOSYS /* 199 */ + NOSYS /* 200 */ + NOSYS /* 201 */ + NOSYS /* 202 */ + NOSYS /* 203 */ + NOSYS /* 204 */ + NOSYS /* 205 */ + NOSYS /* 206 */ + NOSYS /* 207 */ + NOSYS /* 208 */ + NOSYS /* 209 */ + NOSYS /* 210 */ + NOSYS /* 211 */ + NOSYS /* 212 */ + NOSYS /* 213 */ + NOSYS /* 214 */ + NOSYS /* 215 */ + NOSYS /* 216 */ + NOSYS /* 217 */ + NOSYS /* 218 */ + NOSYS /* 219 */ + NOSYS /* 220 */ + NOSYS /* 221 */ + NOSYS /* 222 */ + NOSYS /* 223 */ + NOSYS /* 224 */ + NOSYS /* 225 */ + NOSYS /* 226 */ + NOSYS /* 227 */ + NOSYS /* 228 */ + NOSYS /* 229 */ + NOSYS /* 230 */ + NOSYS /* 231 */ + NOSYS /* 232 */ + NOSYS /* 233 */ + NOSYS /* 234 */ + NOSYS /* 235 */ + NOSYS /* 236 */ + NOSYS /* 237 */ + NOSYS /* 238 */ + NOSYS /* 239 */ + NOSYS /* 240 */ + NOSYS /* 241 */ + NOSYS /* 242 */ + NOSYS /* 243 */ + NOSYS /* 244 */ + NOSYS /* 245 */ + NOSYS /* 246 */ + NOSYS /* 247 */ + NOSYS /* 248 */ + NOSYS /* 249 */ + NOSYS /* 250 */ + NOSYS /* 251 */ + NOSYS /* 252 */ + NOSYS /* 253 */ + NOSYS /* 254 */ + NOSYS /* 255 */ + +#endif /* lint */ diff --git a/usr/src/lib/brand/sn1/sparc/Makefile b/usr/src/lib/brand/sn1/sparc/Makefile new file mode 100644 index 0000000000..0b13e01a82 --- /dev/null +++ b/usr/src/lib/brand/sn1/sparc/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/sn1/sparc/Makefile + +ISASRCDIR=. + +include ../Makefile.com + +ASFLAGS += -xarch=v8plus ${AS_PICFLAGS} + +install: all $(ROOTLIBS) diff --git a/usr/src/lib/brand/sn1/sparc/sn1_handler.s b/usr/src/lib/brand/sn1/sparc/sn1_handler.s new file mode 100644 index 0000000000..235b29abd8 --- /dev/null +++ b/usr/src/lib/brand/sn1/sparc/sn1_handler.s @@ -0,0 +1,421 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/asm_linkage.h> +#include <sn1_brand.h> + +#define RVAL2_FLAG 0x100 + +#define PIC_SETUP(r) \ + mov %o7, %g1; \ +9: call 8f; \ + sethi %hi(_GLOBAL_OFFSET_TABLE_ - (9b - .)), r; \ +8: or r, %lo(_GLOBAL_OFFSET_TABLE_ - (9b - .)), r; \ + add r, %o7, r; \ + mov %g1, %o7 + +/* + * Translate a global symbol into an address. The resulting address + * is returned in the first register parameter. The second register + * is just for scratch space. + */ +#ifdef __sparcv9 +#define GET_SYM_ADDR(r1, r2, name) \ + PIC_SETUP(r1) ;\ + sethi %hi(name), r2 ;\ + or r2, %lo(name), r2 ;\ + ldn [r2 + r1], r1 +#else +#define GET_SYM_ADDR(r1, r2, name) \ + PIC_SETUP(r1); \ + ld [r1 + name], r1 +#endif + +#if defined(lint) + +void +sn1_handler(void) +{ +} + +#else /* lint */ + + .section ".text" + + /* + * When we get here, %g1 should contain the system call and + * %g6 should contain the address immediately after the trap + * instruction. + */ + ENTRY_NP(sn1_handler) + save %sp, -SA(MINFRAME), %sp + + ! We grabbed a new window, so copy the args for the target routine. + mov %i0, %o0 + mov %i1, %o1 + mov %i2, %o2 + mov %i3, %o3 + mov %i4, %o4 + mov %i5, %o5 + + /* + * Find the base address of the jump table, index into it based + * on the system call number, and extract the address of the proper + * emulation routine. + */ + sll %g1, (1 + CLONGSHIFT), %l1 /* Each entry has 2 longs */ + GET_SYM_ADDR(%l2, %l0, sn1_sysent_table) + add %l1, %l2, %l3 /* index to proper entry */ + ldn [%l3 + CPTRSIZE], %l4 /* save NARGS */ + ldn [%l3], %l3 /* emulation address */ + call %l3 + nop + + /* + * Check for two-return syscall. + */ + andcc %l4, RVAL2_FLAG, %g0 + be 1f + nop +#ifdef __sparcv9 + /* + * In 64-bit code, the syscall emulation routine returns the values + * in a single 64-bit register. We split it into two 32-bit values. + */ + srlx %o0, 32, %o1 + srl %o0, 0, %o0 + mov %o1, %i1 +#else + /* + * In 32-bit code, the syscall emulation routine returns the values + * in two registers - just the wrong ones. + */ + mov %o0, %i1 + mov %o1, %o0 +#endif + +1: + /* + * If %o0 >= 0, it means the call completed successfully and %o0 is + * the proper return value. Otherwise, %o0 contains -errno. In + * the event of an error, we need to set the carry flag (which is + * the kernel's indication of failure to libc) and set %o0 to the + * positive errno. + */ + brgez %o0, 2f /* %o0 >= 0, so we're done. */ + addcc %g0, %g0, %g0 ! psr &= ~C + neg %o0 + subcc %g0, 1, %g0 ! psr |= C +2: + mov %o0, %i0 + restore + jmp %g6 + nop + SET_SIZE(sn1_handler) + + .section ".data",#alloc,#write + .global sn1_sysent_table + .align CLONGSIZE + + .global sn1_unimpl +#ifdef __sparcv9 +#define WORD .xword +#else +#define WORD .word +#endif + +#define NOSYS \ + WORD sn1_unimpl ;\ + WORD 0 + +#define EMULATE(name, args) \ + .global name ;\ + WORD name ;\ + WORD args + +sn1_sysent_table: + .type sn1_sysent_table, #object + .size sn1_sysent_table, (2 * 256 * CLONGSIZE) + .align CLONGSIZE + NOSYS /* 0 */ + NOSYS /* 1 */ + NOSYS /* 2 */ + NOSYS /* 3 */ + NOSYS /* 4 */ + NOSYS /* 5 */ + NOSYS /* 6 */ + NOSYS /* 7 */ + NOSYS /* 8 */ + NOSYS /* 9 */ + NOSYS /* 10 */ + NOSYS /* 11 */ + NOSYS /* 12 */ + NOSYS /* 13 */ + NOSYS /* 14 */ + NOSYS /* 15 */ + NOSYS /* 16 */ + NOSYS /* 17 */ + NOSYS /* 18 */ + NOSYS /* 19 */ + NOSYS /* 20 */ + NOSYS /* 21 */ + NOSYS /* 22 */ + NOSYS /* 23 */ + NOSYS /* 24 */ + NOSYS /* 25 */ + NOSYS /* 26 */ + NOSYS /* 27 */ + NOSYS /* 28 */ + NOSYS /* 29 */ + NOSYS /* 30 */ + NOSYS /* 31 */ + NOSYS /* 32 */ + NOSYS /* 33 */ + NOSYS /* 34 */ + NOSYS /* 35 */ + NOSYS /* 36 */ + NOSYS /* 37 */ + NOSYS /* 38 */ + NOSYS /* 39 */ + NOSYS /* 40 */ + NOSYS /* 41 */ + NOSYS /* 42 */ + NOSYS /* 43 */ + NOSYS /* 44 */ + NOSYS /* 45 */ + NOSYS /* 46 */ + NOSYS /* 47 */ + NOSYS /* 48 */ + NOSYS /* 49 */ + NOSYS /* 50 */ + NOSYS /* 51 */ + NOSYS /* 52 */ + NOSYS /* 53 */ + NOSYS /* 54 */ + NOSYS /* 55 */ + NOSYS /* 56 */ + NOSYS /* 57 */ + NOSYS /* 58 */ + NOSYS /* 59 */ + NOSYS /* 60 */ + NOSYS /* 61 */ + NOSYS /* 62 */ + NOSYS /* 63 */ + NOSYS /* 64 */ + NOSYS /* 65 */ + NOSYS /* 66 */ + NOSYS /* 67 */ + NOSYS /* 68 */ + NOSYS /* 69 */ + NOSYS /* 70 */ + NOSYS /* 71 */ + NOSYS /* 72 */ + NOSYS /* 73 */ + NOSYS /* 74 */ + NOSYS /* 75 */ + NOSYS /* 76 */ + NOSYS /* 77 */ + NOSYS /* 78 */ + NOSYS /* 79 */ + NOSYS /* 80 */ + NOSYS /* 81 */ + NOSYS /* 82 */ + NOSYS /* 83 */ + NOSYS /* 84 */ + NOSYS /* 85 */ + NOSYS /* 86 */ + NOSYS /* 87 */ + NOSYS /* 88 */ + NOSYS /* 89 */ + NOSYS /* 90 */ + NOSYS /* 91 */ + NOSYS /* 92 */ + NOSYS /* 93 */ + NOSYS /* 94 */ + NOSYS /* 95 */ + NOSYS /* 96 */ + NOSYS /* 97 */ + NOSYS /* 98 */ + NOSYS /* 99 */ + NOSYS /* 100 */ + NOSYS /* 101 */ + NOSYS /* 102 */ + NOSYS /* 103 */ + NOSYS /* 104 */ + NOSYS /* 105 */ + NOSYS /* 106 */ + NOSYS /* 107 */ + NOSYS /* 108 */ + NOSYS /* 109 */ + NOSYS /* 110 */ + NOSYS /* 111 */ + NOSYS /* 112 */ + NOSYS /* 113 */ + NOSYS /* 114 */ + NOSYS /* 115 */ + NOSYS /* 116 */ + NOSYS /* 117 */ + NOSYS /* 118 */ + NOSYS /* 119 */ + NOSYS /* 120 */ + NOSYS /* 121 */ + NOSYS /* 122 */ + NOSYS /* 123 */ + NOSYS /* 124 */ + NOSYS /* 125 */ + NOSYS /* 126 */ + NOSYS /* 127 */ + NOSYS /* 128 */ + NOSYS /* 129 */ + NOSYS /* 130 */ + NOSYS /* 131 */ + NOSYS /* 132 */ + NOSYS /* 133 */ + NOSYS /* 134 */ + EMULATE(sn1_uname, 1) /* 135 */ + NOSYS /* 136 */ + NOSYS /* 137 */ + NOSYS /* 138 */ + NOSYS /* 139 */ + NOSYS /* 140 */ + NOSYS /* 141 */ + NOSYS /* 142 */ + EMULATE(sn1_fork1, RVAL2_FLAG | 0) /* 143 */ + NOSYS /* 144 */ + NOSYS /* 145 */ + NOSYS /* 146 */ + NOSYS /* 147 */ + NOSYS /* 148 */ + NOSYS /* 149 */ + NOSYS /* 150 */ + NOSYS /* 151 */ + NOSYS /* 152 */ + NOSYS /* 153 */ + NOSYS /* 154 */ + NOSYS /* 155 */ + NOSYS /* 156 */ + NOSYS /* 157 */ + NOSYS /* 158 */ + NOSYS /* 159 */ + NOSYS /* 160 */ + NOSYS /* 161 */ + NOSYS /* 162 */ + NOSYS /* 163 */ + NOSYS /* 164 */ + NOSYS /* 165 */ + NOSYS /* 166 */ + NOSYS /* 167 */ + NOSYS /* 168 */ + NOSYS /* 169 */ + NOSYS /* 170 */ + NOSYS /* 171 */ + NOSYS /* 172 */ + NOSYS /* 173 */ + NOSYS /* 174 */ + NOSYS /* 175 */ + NOSYS /* 176 */ + NOSYS /* 177 */ + NOSYS /* 178 */ + NOSYS /* 179 */ + NOSYS /* 180 */ + NOSYS /* 181 */ + NOSYS /* 182 */ + NOSYS /* 183 */ + NOSYS /* 184 */ + NOSYS /* 185 */ + NOSYS /* 186 */ + NOSYS /* 187 */ + NOSYS /* 188 */ + NOSYS /* 189 */ + NOSYS /* 190 */ + NOSYS /* 191 */ + NOSYS /* 192 */ + NOSYS /* 193 */ + NOSYS /* 194 */ + NOSYS /* 195 */ + NOSYS /* 196 */ + NOSYS /* 197 */ + NOSYS /* 198 */ + NOSYS /* 199 */ + NOSYS /* 200 */ + NOSYS /* 201 */ + NOSYS /* 202 */ + NOSYS /* 203 */ + NOSYS /* 204 */ + NOSYS /* 205 */ + NOSYS /* 206 */ + NOSYS /* 207 */ + NOSYS /* 208 */ + NOSYS /* 209 */ + NOSYS /* 210 */ + NOSYS /* 211 */ + NOSYS /* 212 */ + NOSYS /* 213 */ + NOSYS /* 214 */ + NOSYS /* 215 */ + NOSYS /* 216 */ + NOSYS /* 217 */ + NOSYS /* 218 */ + NOSYS /* 219 */ + NOSYS /* 220 */ + NOSYS /* 221 */ + NOSYS /* 222 */ + NOSYS /* 223 */ + NOSYS /* 224 */ + NOSYS /* 225 */ + NOSYS /* 226 */ + NOSYS /* 227 */ + NOSYS /* 228 */ + NOSYS /* 229 */ + NOSYS /* 230 */ + NOSYS /* 231 */ + NOSYS /* 232 */ + NOSYS /* 233 */ + NOSYS /* 234 */ + NOSYS /* 235 */ + NOSYS /* 236 */ + NOSYS /* 237 */ + NOSYS /* 238 */ + NOSYS /* 239 */ + NOSYS /* 240 */ + NOSYS /* 241 */ + NOSYS /* 242 */ + NOSYS /* 243 */ + NOSYS /* 244 */ + NOSYS /* 245 */ + NOSYS /* 246 */ + NOSYS /* 247 */ + NOSYS /* 248 */ + NOSYS /* 249 */ + NOSYS /* 250 */ + NOSYS /* 251 */ + NOSYS /* 252 */ + NOSYS /* 253 */ + NOSYS /* 254 */ + NOSYS /* 255 */ + +#endif /* lint */ diff --git a/usr/src/lib/brand/sn1/sparcv9/Makefile b/usr/src/lib/brand/sn1/sparcv9/Makefile new file mode 100644 index 0000000000..71d84e94ae --- /dev/null +++ b/usr/src/lib/brand/sn1/sparcv9/Makefile @@ -0,0 +1,36 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# lib/brand/sn1/sparcv9/Makefile + +ISASRCDIR=../sparc + +ASFLAGS_$(CURTYPE) += ${AS_PICFLAGS} + +include ../Makefile.com +include ../../../Makefile.lib.64 + +install: all $(ROOTLIBS64) diff --git a/usr/src/lib/brand/sn1/zone/Makefile b/usr/src/lib/brand/sn1/zone/Makefile new file mode 100644 index 0000000000..08c0d906a5 --- /dev/null +++ b/usr/src/lib/brand/sn1/zone/Makefile @@ -0,0 +1,47 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +BRAND= sn1 +PROGS= sn1_boot +XMLDOCS= config.xml platform.xml +TEMPLATES= SUNWsn1.xml + +all: $(PROGS) + +include ../../Makefile.brand + +lint: + +clean: + -$(RM) $(PROGS) + +install: $(ROOTBRANDDIR) $(ROOTPROGS) $(ROOTXMLDOCS) $(ROOTTEMPLATEDIR) \ + $(ROOTTEMPLATES) + +clobber: + -$(RM) $(ROOTPROGS) $(ROOTXMLDOCS) $(ROOTTEMPLATES) diff --git a/usr/src/lib/brand/sn1/zone/SUNWsn1.xml b/usr/src/lib/brand/sn1/zone/SUNWsn1.xml new file mode 100644 index 0000000000..f60ef5cb9b --- /dev/null +++ b/usr/src/lib/brand/sn1/zone/SUNWsn1.xml @@ -0,0 +1,38 @@ +<?xml version="1.0"?> + +<!-- + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. Use zonecfg(1M) instead. +--> + +<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1"> + +<zone name="default" zonepath="" autoboot="false" brand="sn1"> + <inherited-pkg-dir directory="/lib"/> + <inherited-pkg-dir directory="/platform"/> + <inherited-pkg-dir directory="/sbin"/> + <inherited-pkg-dir directory="/usr"/> +</zone> diff --git a/usr/src/lib/brand/sn1/zone/config.xml b/usr/src/lib/brand/sn1/zone/config.xml new file mode 100644 index 0000000000..5873587953 --- /dev/null +++ b/usr/src/lib/brand/sn1/zone/config.xml @@ -0,0 +1,89 @@ +<?xml version="1.0"?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. +--> + +<!DOCTYPE brand PUBLIC "-//Sun Microsystems Inc//DTD Brands//EN" + "file:///usr/share/lib/xml/dtd/brand.dtd.1"> + +<brand name="sn1"> + <modname>sn1_brand</modname> + + <initname>/sbin/init</initname> + <login_cmd>/usr/bin/login -z %Z -f %u</login_cmd> + + <install>/usr/lib/lu/lucreatezone -z %z</install> + <installopts></installopts> + <boot>/usr/lib/brand/sn1/sn1_boot %R</boot> + <halt></halt> + <verify_cfg></verify_cfg> + <verify_adm></verify_adm> + <postclone></postclone> + + <privilege set="default" name="contract_event" /> + <privilege set="default" name="contract_observer" /> + <privilege set="default" name="file_chown" /> + <privilege set="default" name="file_chown_self" /> + <privilege set="default" name="file_dac_execute" /> + <privilege set="default" name="file_dac_read" /> + <privilege set="default" name="file_dac_search" /> + <privilege set="default" name="file_dac_write" /> + <privilege set="default" name="file_owner" /> + <privilege set="default" name="file_setid" /> + <privilege set="default" name="ipc_dac_read" /> + <privilege set="default" name="ipc_dac_write" /> + <privilege set="default" name="ipc_owner" /> + <privilege set="default" name="net_bindmlp" /> + <privilege set="default" name="net_icmpaccess" /> + <privilege set="default" name="net_mac_aware" /> + <privilege set="default" name="net_privaddr" /> + <privilege set="default" name="proc_chroot" /> + <privilege set="default" name="sys_audit" /> + <privilege set="default" name="proc_audit" /> + <privilege set="default" name="proc_owner" /> + <privilege set="default" name="proc_setid" /> + <privilege set="default" name="proc_taskid" /> + <privilege set="default" name="sys_acct" /> + <privilege set="default" name="sys_admin" /> + <privilege set="default" name="sys_mount" /> + <privilege set="default" name="sys_nfs" /> + <privilege set="default" name="sys_resource" /> + + <privilege set="prohibited" name="dtrace_kernel" /> + <privilege set="prohibited" name="proc_zone" /> + <privilege set="prohibited" name="sys_config" /> + <privilege set="prohibited" name="sys_devices" /> + <privilege set="prohibited" name="sys_linkdir" /> + <privilege set="prohibited" name="sys_net_config" /> + <privilege set="prohibited" name="sys_res_config" /> + <privilege set="prohibited" name="sys_suser_compat" /> + + <privilege set="required" name="proc_exec" /> + <privilege set="required" name="proc_fork" /> + <privilege set="required" name="sys_mount" /> +</brand> diff --git a/usr/src/lib/brand/sn1/zone/platform.xml b/usr/src/lib/brand/sn1/zone/platform.xml new file mode 100644 index 0000000000..01b03485ea --- /dev/null +++ b/usr/src/lib/brand/sn1/zone/platform.xml @@ -0,0 +1,102 @@ +<?xml version="1.0"?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. +--> + +<!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN" + "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1"> + +<platform name="native"> + + <!-- Global filesystems to mount when booting the zone --> + <global_mount special="/dev" directory="/dev" type="dev" + opt="attrdir=%R/dev"/> + + <!-- Local filesystems to mount when booting the zone --> + <mount special="/proc" directory="/proc" type="proc" /> + <mount special="ctfs" directory="/system/contract" type="ctfs" /> + <mount special="mnttab" directory="/etc/mnttab" type="mntfs" /> + <mount special="objfs" directory="/system/object" type="objfs" /> + <mount special="swap" directory="/etc/svc/volatile" type="tmpfs" /> + + <!-- Devices to create under /dev --> + <device match="arp" /> + <device match="conslog" /> + <device match="cpu/self/cpuid" /> + <device match="crypto" /> + <device match="cryptoadm" /> + <device match="dsk" /> + <device match="dtrace/*" /> + <device match="dtrace/provider/*" /> + <device match="fd" /> + <device match="kstat" /> + <device match="lo0" /> + <device match="lo1" /> + <device match="lo2" /> + <device match="lo3" /> + <device match="log" /> + <device match="logindmux" /> + <device match="null" /> + <device match="openprom" arch="sparc" /> + <device match="poll" /> + <device match="pool" /> + <device match="ptmx" /> + <device match="pts/*" /> + <device match="random" /> + <device match="rdsk" /> + <device match="rmt" /> + <device match="sad/user" /> + <device match="swap" /> + <device match="sysevent" /> + <device match="tcp" /> + <device match="tcp6" /> + <device match="term" /> + <device match="ticlts" /> + <device match="ticots" /> + <device match="ticotsord" /> + <device match="tty" /> + <device match="udp" /> + <device match="udp6" /> + <device match="urandom" /> + <device match="zero" /> + <device match="zfs" /> + + <!-- Renamed devices to create under /dev --> + <device match="zcons/%z/zoneconsole" name="zconsole" /> + + <!-- Symlinks to create under /dev --> + <symlink source="console" target="zconsole" /> + <symlink source="dtremote" target="/dev/null" /> + <symlink source="msglog" target="zconsole" /> + <symlink source="stderr" target="./fd/2" /> + <symlink source="stdin" target="./fd/0" /> + <symlink source="stdout" target="./fd/1" /> + <symlink source="syscon" target="zconsole" /> + <symlink source="sysmsg" target="zconsole" /> + <symlink source="systty" target="zconsole" /> +</platform> diff --git a/usr/src/lib/brand/sn1/zone/sn1_boot.sh b/usr/src/lib/brand/sn1/zone/sn1_boot.sh new file mode 100644 index 0000000000..d1f47532c4 --- /dev/null +++ b/usr/src/lib/brand/sn1/zone/sn1_boot.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# sn1 boot script. +# +# The argument to this script is the root of the zone. +# + +PATH=/sbin:/usr/bin:/usr/sbin; export PATH + +ZONEROOT=$1 + +if [ `uname -p` -eq "i386" ]; then + ARCH64=amd64 +elif [ `uname -p` -eq "sparc" ]; then + ARCH64=sparcv9 +else + echo "Unsupported architecture: " `uname -p` + exit 2 +fi + +crle -u -c ${ZONEROOT}/root/var/ld/ld.config \ + -e LD_PRELOAD=/usr/lib/sn1_brand.so.1 + +if [ `isainfo -b` -eq 64 ]; then + crle -64 -u -c ${ZONEROOT}/root/var/ld/64/ld.config \ + -e LD_PRELOAD=/usr/lib/$ARCH64/sn1_brand.so.1 +fi + +exit 0 diff --git a/usr/src/lib/libbrand/Makefile b/usr/src/lib/libbrand/Makefile new file mode 100644 index 0000000000..2f40dfb9f6 --- /dev/null +++ b/usr/src/lib/libbrand/Makefile @@ -0,0 +1,73 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +include ../Makefile.lib + +SUBDIRS= $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) + +all := TARGET = all +clean := TARGET = clean +clobber := TARGET = clobber +install := TARGET = install +lint := TARGET = lint + +HDRDIR= common +HDRS= libbrand.h + +FILEMODE = 644 +OWNER = root +GROUP = bin + +BRANDDIR= brand +ROOTBRANDDIR= $(ROOTLIBDIR)/$(BRANDDIR) + +DTDS = dtd/brand.dtd.1 dtd/zone_platform.dtd.1 +DTDDIR = $(ROOT)/usr/share/lib/xml +ROOTDTDS= $(DTDS:%=$(DTDDIR)/%) + +$(DTDDIR)/%: % + $(INS.file) + +.KEEP_STATE: + +all clean clobber lint install: $(SUBDIRS) + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) + +$(ROOTBRANDDIR): + $(INS.dir) + +install: $(ROOTBRANDDIR) $(ROOTDTDS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/lib/libbrand/Makefile.com b/usr/src/lib/libbrand/Makefile.com new file mode 100644 index 0000000000..cd7d83371a --- /dev/null +++ b/usr/src/lib/libbrand/Makefile.com @@ -0,0 +1,50 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +LIBRARY= libbrand.a +VERS= .1 + +OBJECTS= libbrand.o + +include ../../Makefile.lib + +LIBS= $(DYNLIB) $(LINTLIB) +LDLIBS += -lc +$(LINTLIB) := SRCS= $(SRCDIR)/$(LINTSRC) +CPPFLAGS += -I/usr/include/libxml2 -I$(SRCDIR) -D_REENTRANT +$(DYNLIB) := LDLIBS += -lxml2 + +SRCDIR= ../common + +.KEEP_STATE: + +all: $(LIBS) + +lint: lintcheck + +install: all + +include ../../Makefile.targ diff --git a/usr/src/lib/libbrand/amd64/Makefile b/usr/src/lib/libbrand/amd64/Makefile new file mode 100644 index 0000000000..1e902f7e49 --- /dev/null +++ b/usr/src/lib/libbrand/amd64/Makefile @@ -0,0 +1,30 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/libbrand/common/libbrand.c b/usr/src/lib/libbrand/common/libbrand.c new file mode 100644 index 0000000000..ce3742adf9 --- /dev/null +++ b/usr/src/lib/libbrand/common/libbrand.c @@ -0,0 +1,790 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <assert.h> +#include <dirent.h> +#include <errno.h> +#include <fnmatch.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <strings.h> +#include <synch.h> +#include <sys/brand.h> +#include <sys/fcntl.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/systeminfo.h> +#include <sys/types.h> +#include <thread.h> +#include <zone.h> + +#include <libbrand_impl.h> +#include <libbrand.h> + +#define DTD_ELEM_BOOT ((const xmlChar *) "boot") +#define DTD_ELEM_BRAND ((const xmlChar *) "brand") +#define DTD_ELEM_COMMENT ((const xmlChar *) "comment") +#define DTD_ELEM_DEVICE ((const xmlChar *) "device") +#define DTD_ELEM_GLOBAL_MOUNT ((const xmlChar *) "global_mount") +#define DTD_ELEM_HALT ((const xmlChar *) "halt") +#define DTD_ELEM_INITNAME ((const xmlChar *) "initname") +#define DTD_ELEM_INSTALL ((const xmlChar *) "install") +#define DTD_ELEM_INSTALLOPTS ((const xmlChar *) "installopts") +#define DTD_ELEM_LOGIN_CMD ((const xmlChar *) "login_cmd") +#define DTD_ELEM_MODNAME ((const xmlChar *) "modname") +#define DTD_ELEM_MOUNT ((const xmlChar *) "mount") +#define DTD_ELEM_POSTCLONE ((const xmlChar *) "postclone") +#define DTD_ELEM_PRIVILEGE ((const xmlChar *) "privilege") +#define DTD_ELEM_SYMLINK ((const xmlChar *) "symlink") +#define DTD_ELEM_VERIFY_CFG ((const xmlChar *) "verify_cfg") +#define DTD_ELEM_VERIFY_ADM ((const xmlChar *) "verify_adm") + +#define DTD_ATTR_ARCH ((const xmlChar *) "arch") +#define DTD_ATTR_DIRECTORY ((const xmlChar *) "directory") +#define DTD_ATTR_MATCH ((const xmlChar *) "match") +#define DTD_ATTR_MODE ((const xmlChar *) "mode") +#define DTD_ATTR_NAME ((const xmlChar *) "name") +#define DTD_ATTR_OPT ((const xmlChar *) "opt") +#define DTD_ATTR_PATH ((const xmlChar *) "path") +#define DTD_ATTR_SET ((const xmlChar *) "set") +#define DTD_ATTR_SOURCE ((const xmlChar *) "source") +#define DTD_ATTR_SPECIAL ((const xmlChar *) "special") +#define DTD_ATTR_TARGET ((const xmlChar *) "target") +#define DTD_ATTR_TYPE ((const xmlChar *) "type") + +static volatile boolean_t libbrand_initialized = B_FALSE; +static char i_curr_arch[MAXNAMELEN]; +static char i_curr_zone[ZONENAME_MAX]; + +/*ARGSUSED*/ +static void +brand_error_func(void *ctx, const char *msg, ...) +{ + /* + * Ignore error messages from libxml + */ +} + +static boolean_t +libbrand_initialize() +{ + static mutex_t initialize_lock = DEFAULTMUTEX; + + (void) mutex_lock(&initialize_lock); + + if (libbrand_initialized) { + (void) mutex_unlock(&initialize_lock); + return (B_TRUE); + } + + if (sysinfo(SI_ARCHITECTURE, i_curr_arch, sizeof (i_curr_arch)) < 0) { + (void) mutex_unlock(&initialize_lock); + return (B_FALSE); + } + + if (getzonenamebyid(getzoneid(), i_curr_zone, + sizeof (i_curr_zone)) < 0) { + (void) mutex_unlock(&initialize_lock); + return (B_FALSE); + } + + /* + * Note that here we're initializing per-process libxml2 + * state. By doing so we're implicitly assuming that + * no other code in this process is also trying to + * use libxml2. But in most case we know this not to + * be true since we're almost always used in conjunction + * with libzonecfg, which also uses libxml2. Lucky for + * us, libzonecfg initializes libxml2 to essentially + * the same defaults as we're using below. + */ + xmlLineNumbersDefault(1); + xmlLoadExtDtdDefaultValue |= XML_DETECT_IDS; + xmlDoValidityCheckingDefaultValue = 1; + (void) xmlKeepBlanksDefault(0); + xmlGetWarningsDefaultValue = 0; + xmlSetGenericErrorFunc(NULL, brand_error_func); + + libbrand_initialized = B_TRUE; + (void) mutex_unlock(&initialize_lock); + return (B_TRUE); +} + +static const char * +get_curr_arch(void) +{ + if (!libbrand_initialize()) + return (NULL); + + return (i_curr_arch); +} + +static const char * +get_curr_zone(void) +{ + if (!libbrand_initialize()) + return (NULL); + + return (i_curr_zone); +} + +/* + * Internal function to open an XML file + * + * Returns the XML doc pointer, or NULL on failure. It will validate the + * document, as well as removing any comments from the document structure. + */ +static xmlDocPtr +open_xml_file(const char *file) +{ + xmlDocPtr doc; + xmlValidCtxtPtr cvp; + int valid; + + if (!libbrand_initialize()) + return (NULL); + + /* + * Parse the file + */ + if ((doc = xmlParseFile(file)) == NULL) + return (NULL); + + /* + * Validate the file + */ + if ((cvp = xmlNewValidCtxt()) == NULL) { + xmlFreeDoc(doc); + return (NULL); + } + cvp->error = brand_error_func; + cvp->warning = brand_error_func; + valid = xmlValidateDocument(cvp, doc); + xmlFreeValidCtxt(cvp); + if (valid == 0) { + xmlFreeDoc(doc); + return (NULL); + } + + return (doc); +} +/* + * Open a handle to the named brand. + * + * Returns a handle to the named brand, which is used for all subsequent brand + * interaction, or NULL if unable to open or initialize the brand. + */ +brand_handle_t * +brand_open(const char *name) +{ + brand_handle_t *bhp; + char path[MAXPATHLEN]; + xmlNodePtr node; + xmlChar *property; + struct stat statbuf; + + /* + * Make sure brand name isn't too long + */ + if (strlen(name) >= MAXNAMELEN) + return (NULL); + + /* + * Check that the brand exists + */ + (void) snprintf(path, sizeof (path), "%s/%s", BRAND_DIR, name); + + if (stat(path, &statbuf) != 0) + return (NULL); + + /* + * Allocate brand handle + */ + if ((bhp = malloc(sizeof (brand_handle_t))) == NULL) + return (NULL); + bzero(bhp, sizeof (brand_handle_t)); + + (void) strcpy(bhp->bh_name, name); + + /* + * Open the configuration file + */ + (void) snprintf(path, sizeof (path), "%s/%s/%s", BRAND_DIR, name, + BRAND_CONFIG); + if ((bhp->bh_config = open_xml_file(path)) == NULL) { + brand_close(bhp); + return (NULL); + } + + /* + * Verify that the name of the brand matches the directory in which it + * is installed. + */ + if ((node = xmlDocGetRootElement(bhp->bh_config)) == NULL) { + brand_close(bhp); + return (NULL); + } + + if (xmlStrcmp(node->name, DTD_ELEM_BRAND) != 0) { + brand_close(bhp); + return (NULL); + } + + if ((property = xmlGetProp(node, DTD_ATTR_NAME)) == NULL) { + brand_close(bhp); + return (NULL); + } + + if (strcmp((char *)property, name) != 0) { + xmlFree(property); + brand_close(bhp); + return (NULL); + } + xmlFree(property); + + /* + * Open handle to platform configuration file. + */ + (void) snprintf(path, sizeof (path), "%s/%s/%s", BRAND_DIR, name, + BRAND_PLATFORM); + if ((bhp->bh_platform = open_xml_file(path)) == NULL) { + brand_close(bhp); + return (NULL); + } + + return (bhp); +} + +/* + * Closes the given brand handle + */ +void +brand_close(brand_handle_t *bhp) +{ + if (bhp->bh_platform != NULL) + xmlFreeDoc(bhp->bh_platform); + if (bhp->bh_config != NULL) + xmlFreeDoc(bhp->bh_config); + free(bhp); +} + +static int +i_substitute_tokens(const char *sbuf, char *dbuf, int dbuf_size, + const char *zonename, const char *zoneroot, const char *username, + const char *curr_zone, int argc, char **argv) +{ + int dst, src, i; + + assert(argc >= 0); + assert((argc == 0) || (argv != NULL)); + + /* + * Walk through the characters, substituting values as needed. + */ + dbuf[0] = '\0'; + dst = 0; + for (src = 0; src < strlen((char *)sbuf) && dst < dbuf_size; src++) { + if (sbuf[src] != '%') { + dbuf[dst++] = sbuf[src]; + continue; + } + + switch (sbuf[++src]) { + case '%': + dst += strlcpy(dbuf + dst, "%", dbuf_size - dst); + break; + case 'R': + if (zoneroot == NULL) + break; + dst += strlcpy(dbuf + dst, zoneroot, dbuf_size - dst); + break; + case 'u': + if (username == NULL) + break; + dst += strlcpy(dbuf + dst, username, dbuf_size - dst); + break; + case 'Z': + if (curr_zone == NULL) + break; + /* name of the zone we're running in */ + dst += strlcpy(dbuf + dst, curr_zone, dbuf_size - dst); + break; + case 'z': + /* name of the zone we're operating on */ + if (zonename == NULL) + break; + dst += strlcpy(dbuf + dst, zonename, dbuf_size - dst); + break; + case '*': + if (argv == NULL) + break; + for (i = 0; i < argc; i++) + dst += snprintf(dbuf + dst, dbuf_size - dst, + " \"%s\"", argv[i]); + break; + } + } + + if (dst >= dbuf_size) + return (-1); + + dbuf[dst] = '\0'; + return (0); +} + +/* + * Retrieve the given tag from the brand. + * Perform the following substitutions as necessary: + * + * %% % + * %u Username + * %z Name of target zone + * %Z Name of current zone + * %R Root of zone + * %* Additional arguments (argc, argv) + * + * Returns 0 on success, -1 on failure. + */ +static int +brand_get_value(brand_handle_t *bhp, const char *zonename, + const char *zoneroot, const char *username, const char *curr_zone, + char *buf, size_t len, int argc, char **argv, const xmlChar *tagname, + boolean_t substitute, boolean_t optional) +{ + xmlNodePtr node; + xmlChar *content; + int err = 0; + + /* + * Retrieve the specified value from the XML doc + */ + if ((node = xmlDocGetRootElement(bhp->bh_config)) == NULL) + return (-1); + + if (xmlStrcmp(node->name, DTD_ELEM_BRAND) != 0) + return (-1); + + for (node = node->xmlChildrenNode; node != NULL; + node = node->next) { + if (xmlStrcmp(node->name, tagname) == 0) + break; + } + + if (node == NULL) + return (-1); + + if ((content = xmlNodeGetContent(node)) == NULL) + return (-1); + + if (strlen((char *)content) == 0) { + /* + * If the entry in the config file is empty, check to see + * whether this is an optional field. If so, we return the + * empty buffer. If not, we return an error. + */ + if (optional) { + buf[0] = '\0'; + } else { + err = -1; + } + } else { + /* Substitute token values as needed. */ + if (substitute) { + if (i_substitute_tokens((char *)content, buf, len, + zonename, zoneroot, username, curr_zone, + argc, argv) != 0) + err = -1; + } else { + if (strlcpy(buf, (char *)content, len) >= len) + err = -1; + } + } + + xmlFree(content); + + return (err); +} + +int +brand_get_boot(brand_handle_t *bhp, const char *zonename, + const char *zoneroot, char *buf, size_t len, int argc, char **argv) +{ + return (brand_get_value(bhp, zonename, zoneroot, NULL, NULL, + buf, len, argc, argv, DTD_ELEM_BOOT, B_TRUE, B_TRUE)); +} + +int +brand_get_brandname(brand_handle_t *bhp, char *buf, size_t len) +{ + if (len <= strlen(bhp->bh_name)) + return (-1); + + (void) strcpy(buf, bhp->bh_name); + + return (0); +} + +int +brand_get_halt(brand_handle_t *bhp, const char *zonename, + const char *zoneroot, char *buf, size_t len, int argc, char **argv) +{ + return (brand_get_value(bhp, zonename, zoneroot, NULL, NULL, + buf, len, argc, argv, DTD_ELEM_HALT, B_TRUE, B_TRUE)); +} + +int +brand_get_initname(brand_handle_t *bhp, char *buf, size_t len) +{ + return (brand_get_value(bhp, NULL, NULL, NULL, NULL, + buf, len, 0, NULL, DTD_ELEM_INITNAME, B_FALSE, B_FALSE)); +} + +int +brand_get_login_cmd(brand_handle_t *bhp, const char *username, + char *buf, size_t len) +{ + const char *curr_zone = get_curr_zone(); + return (brand_get_value(bhp, NULL, NULL, username, curr_zone, + buf, len, 0, NULL, DTD_ELEM_LOGIN_CMD, B_TRUE, B_FALSE)); +} + +int +brand_get_install(brand_handle_t *bhp, const char *zonename, + const char *zoneroot, char *buf, size_t len, int argc, char **argv) +{ + return (brand_get_value(bhp, zonename, zoneroot, NULL, NULL, + buf, len, argc, argv, DTD_ELEM_INSTALL, B_TRUE, B_FALSE)); +} + +int +brand_get_installopts(brand_handle_t *bhp, char *buf, size_t len) +{ + return (brand_get_value(bhp, NULL, NULL, NULL, NULL, + buf, len, 0, NULL, DTD_ELEM_INSTALLOPTS, B_FALSE, B_TRUE)); +} + +int +brand_get_modname(brand_handle_t *bhp, char *buf, size_t len) +{ + return (brand_get_value(bhp, NULL, NULL, NULL, NULL, + buf, len, 0, NULL, DTD_ELEM_MODNAME, B_FALSE, B_TRUE)); +} + +int +brand_get_postclone(brand_handle_t *bhp, const char *zonename, + const char *zoneroot, char *buf, size_t len, int argc, char **argv) +{ + return (brand_get_value(bhp, zonename, zoneroot, NULL, NULL, + buf, len, argc, argv, DTD_ELEM_POSTCLONE, B_TRUE, B_TRUE)); +} + +int +brand_get_verify_cfg(brand_handle_t *bhp, char *buf, size_t len) +{ + return (brand_get_value(bhp, NULL, NULL, NULL, NULL, + buf, len, 0, NULL, DTD_ELEM_VERIFY_CFG, B_FALSE, B_TRUE)); +} + +int +brand_get_verify_adm(brand_handle_t *bhp, const char *zonename, + const char *zoneroot, char *buf, size_t len, int argc, char **argv) +{ + return (brand_get_value(bhp, zonename, zoneroot, NULL, NULL, + buf, len, argc, argv, DTD_ELEM_VERIFY_ADM, B_TRUE, B_TRUE)); +} + +int +brand_is_native(brand_handle_t *bhp) +{ + return ((strcmp(bhp->bh_name, NATIVE_BRAND_NAME) == 0) ? 1 : 0); +} + +/* + * Iterate over brand privileges + * + * Walks the brand config, searching for <privilege> elements, calling the + * specified callback for each. Returns 0 on success, or -1 on failure. + */ +int +brand_config_iter_privilege(brand_handle_t *bhp, int (*func)(void *, + const char *, const char *), void *data) +{ + xmlNodePtr node; + xmlChar *name, *set; + int ret; + + if ((node = xmlDocGetRootElement(bhp->bh_config)) == NULL) + return (-1); + + for (node = node->xmlChildrenNode; node != NULL; node = node->next) { + + if (xmlStrcmp(node->name, DTD_ELEM_PRIVILEGE) != 0) + continue; + + name = xmlGetProp(node, DTD_ATTR_NAME); + set = xmlGetProp(node, DTD_ATTR_SET); + + if (name == NULL || set == NULL) { + if (name != NULL) + xmlFree(name); + if (set != NULL) + xmlFree(set); + return (-1); + } + + ret = func(data, (const char *)name, (const char *)set); + + xmlFree(name); + xmlFree(set); + + if (ret != 0) + return (-1); + } + + return (0); +} + +static int +i_brand_platform_iter_mounts(brand_handle_t *bhp, const char *zoneroot, + int (*func)(void *, const char *, const char *, const char *, + const char *), void *data, const xmlChar *mount_type) +{ + xmlNodePtr node; + xmlChar *special, *dir, *type, *opt; + char special_exp[MAXPATHLEN]; + char opt_exp[MAXPATHLEN]; + int ret; + + if ((node = xmlDocGetRootElement(bhp->bh_platform)) == NULL) + return (-1); + + for (node = node->xmlChildrenNode; node != NULL; node = node->next) { + + if (xmlStrcmp(node->name, mount_type) != 0) + continue; + + special = xmlGetProp(node, DTD_ATTR_SPECIAL); + dir = xmlGetProp(node, DTD_ATTR_DIRECTORY); + type = xmlGetProp(node, DTD_ATTR_TYPE); + opt = xmlGetProp(node, DTD_ATTR_OPT); + if ((special == NULL) || (dir == NULL) || (type == NULL) || + (opt == NULL)) { + ret = -1; + goto next; + } + + /* Substitute token values as needed. */ + if ((ret = i_substitute_tokens((char *)special, + special_exp, sizeof (special_exp), + NULL, zoneroot, NULL, NULL, 0, NULL)) != 0) + goto next; + + /* opt might not be defined */ + if (strlen((const char *)opt) == 0) { + xmlFree(opt); + opt = NULL; + } else { + if ((ret = i_substitute_tokens((char *)opt, + opt_exp, sizeof (opt_exp), + NULL, zoneroot, NULL, NULL, 0, NULL)) != 0) + goto next; + } + + ret = func(data, (char *)special_exp, (char *)dir, + (char *)type, ((opt != NULL) ? opt_exp : NULL)); + +next: + if (special != NULL) + xmlFree(special); + if (dir != NULL) + xmlFree(dir); + if (type != NULL) + xmlFree(type); + if (opt != NULL) + xmlFree(opt); + if (ret != 0) + return (-1); + } + return (0); +} + + +/* + * Iterate over global platform filesystems + * + * Walks the platform, searching for <global_mount> elements, calling the + * specified callback for each. Returns 0 on success, or -1 on failure. + * + * Perform the following substitutions as necessary: + * + * %R Root of zone + */ +int +brand_platform_iter_gmounts(brand_handle_t *bhp, const char *zoneroot, + int (*func)(void *, const char *, const char *, const char *, + const char *), void *data) +{ + return (i_brand_platform_iter_mounts(bhp, zoneroot, func, data, + DTD_ELEM_GLOBAL_MOUNT)); +} + +/* + * Iterate over non-global zone platform filesystems + * + * Walks the platform, searching for <mount> elements, calling the + * specified callback for each. Returns 0 on success, or -1 on failure. + */ +int +brand_platform_iter_mounts(brand_handle_t *bhp, int (*func)(void *, + const char *, const char *, const char *, const char *), void *data) +{ + return (i_brand_platform_iter_mounts(bhp, NULL, func, data, + DTD_ELEM_MOUNT)); +} + +/* + * Iterate over platform symlinks + * + * Walks the platform, searching for <symlink> elements, calling the + * specified callback for each. Returns 0 on success, or -1 on failure. + */ +int +brand_platform_iter_link(brand_handle_t *bhp, + int (*func)(void *, const char *, const char *), void *data) +{ + xmlNodePtr node; + xmlChar *source, *target; + int ret; + + if ((node = xmlDocGetRootElement(bhp->bh_platform)) == NULL) + return (-1); + + for (node = node->xmlChildrenNode; node != NULL; node = node->next) { + + if (xmlStrcmp(node->name, DTD_ELEM_SYMLINK) != 0) + continue; + + source = xmlGetProp(node, DTD_ATTR_SOURCE); + target = xmlGetProp(node, DTD_ATTR_TARGET); + + if (source == NULL || target == NULL) { + if (source != NULL) + xmlFree(source); + if (target != NULL) + xmlFree(target); + return (-1); + } + + ret = func(data, (char *)source, (char *)target); + + xmlFree(source); + xmlFree(target); + + if (ret != 0) + return (-1); + } + + return (0); +} + +/* + * Iterate over platform devices + * + * Walks the platform, searching for <device> elements, calling the + * specified callback for each. Returns 0 on success, or -1 on failure. + */ +int +brand_platform_iter_devices(brand_handle_t *bhp, const char *zonename, + int (*func)(void *, const char *, const char *), void *data) +{ + const char *curr_arch = get_curr_arch(); + xmlNodePtr node; + xmlChar *match, *name, *arch; + char match_exp[MAXPATHLEN]; + boolean_t err = B_FALSE; + int ret = 0; + + + assert(bhp != NULL); + assert(zonename != NULL); + assert(func != NULL); + + if ((node = xmlDocGetRootElement(bhp->bh_platform)) == NULL) + return (-1); + + for (node = node->xmlChildrenNode; node != NULL; node = node->next) { + + if (xmlStrcmp(node->name, DTD_ELEM_DEVICE) != 0) + continue; + + match = xmlGetProp(node, DTD_ATTR_MATCH); + name = xmlGetProp(node, DTD_ATTR_NAME); + arch = xmlGetProp(node, DTD_ATTR_ARCH); + if ((match == NULL) || (name == NULL) || (arch == NULL)) { + err = B_TRUE; + goto next; + } + + /* check if the arch matches */ + if ((strcmp((char *)arch, "all") != 0) && + (strcmp((char *)arch, curr_arch) != 0)) + goto next; + + /* Substitute token values as needed. */ + if ((ret = i_substitute_tokens((char *)match, + match_exp, sizeof (match_exp), + zonename, NULL, NULL, NULL, 0, NULL)) != 0) { + err = B_TRUE; + goto next; + } + + /* name might not be defined */ + if (strlen((const char *)name) == 0) { + xmlFree(name); + name = NULL; + } + + /* invoke the callback */ + ret = func(data, (const char *)match_exp, (const char *)name); + +next: + if (match != NULL) + xmlFree(match); + if (name != NULL) + xmlFree(name); + if (arch != NULL) + xmlFree(arch); + if (err) + return (-1); + if (ret != 0) + return (-1); + } + + return (0); +} diff --git a/usr/src/lib/libbrand/common/libbrand.h b/usr/src/lib/libbrand/common/libbrand.h new file mode 100644 index 0000000000..93855746f0 --- /dev/null +++ b/usr/src/lib/libbrand/common/libbrand.h @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBBRAND_H +#define _LIBBRAND_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> + +typedef struct brand_handle brand_handle_t; + +extern brand_handle_t *brand_open(const char *); +extern void brand_close(brand_handle_t *); + +extern int brand_is_native(brand_handle_t *); + +extern int brand_get_boot(brand_handle_t *, const char *, const char *, + char *, size_t, int, char **); +extern int brand_get_brandname(brand_handle_t *, char *, size_t); +extern int brand_get_halt(brand_handle_t *, const char *, const char *, + char *, size_t, int, char **); +extern int brand_get_initname(brand_handle_t *, char *, size_t); +extern int brand_get_install(brand_handle_t *, const char *, const char *, + char *, size_t, int, char **); +extern int brand_get_installopts(brand_handle_t *, char *, size_t); +extern int brand_get_login_cmd(brand_handle_t *, const char *, char *, size_t); +extern int brand_get_modname(brand_handle_t *, char *, size_t); +extern int brand_get_postclone(brand_handle_t *, const char *, const char *, + char *, size_t, int, char **); +extern int brand_get_verify_cfg(brand_handle_t *, char *, size_t); +extern int brand_get_verify_adm(brand_handle_t *, const char *, const char *, + char *, size_t, int, char **); + +extern int brand_config_iter_privilege(brand_handle_t *, + int (*func)(void *, const char *, const char *), void *); + +extern int brand_platform_iter_devices(brand_handle_t *, const char *, + int (*)(void *, const char *, const char *), void *); +extern int brand_platform_iter_gmounts(brand_handle_t *, const char *, + int (*)(void *, const char *, const char *, const char *, const char *), + void *); +extern int brand_platform_iter_link(brand_handle_t *, int (*)(void *, + const char *, const char *), void *); +extern int brand_platform_iter_mounts(brand_handle_t *, int (*)(void *, + const char *, const char *, const char *, const char *), void *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBBRAND_H */ diff --git a/usr/src/lib/libbrand/common/libbrand_impl.h b/usr/src/lib/libbrand/common/libbrand_impl.h new file mode 100644 index 0000000000..388e35f9a2 --- /dev/null +++ b/usr/src/lib/libbrand/common/libbrand_impl.h @@ -0,0 +1,53 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LIBBRAND_IMPL_H +#define _LIBBRAND_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> +#include <libbrand.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct brand_handle { + char bh_name[MAXNAMELEN]; + xmlDocPtr bh_config; + xmlDocPtr bh_platform; +}; + +#define BRAND_DIR "/usr/lib/brand" +#define BRAND_CONFIG "config.xml" +#define BRAND_PLATFORM "platform.xml" + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBBRAND_IMPL_H */ diff --git a/usr/src/lib/libbrand/common/llib-lbrand b/usr/src/lib/libbrand/common/llib-lbrand new file mode 100644 index 0000000000..b5b1c81d1a --- /dev/null +++ b/usr/src/lib/libbrand/common/llib-lbrand @@ -0,0 +1,31 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + #pragma ident "%Z%%M% %I% %E% SMI" + +/* LINTLIBRARY */ +/* PROTOLIB1 */ + + #include <libbrand.h> diff --git a/usr/src/lib/libbrand/common/mapfile-vers b/usr/src/lib/libbrand/common/mapfile-vers new file mode 100644 index 0000000000..a9daa750ad --- /dev/null +++ b/usr/src/lib/libbrand/common/mapfile-vers @@ -0,0 +1,51 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +SUNWprivate { + global: + brand_close; + brand_config_iter_privilege; + brand_get_boot; + brand_get_brandname; + brand_get_halt; + brand_get_initname; + brand_get_install; + brand_get_installopts; + brand_get_login_cmd; + brand_get_modname; + brand_get_postclone; + brand_get_verify_adm; + brand_get_verify_cfg; + brand_is_native; + brand_open; + brand_platform_iter_devices; + brand_platform_iter_gmounts; + brand_platform_iter_link; + brand_platform_iter_mounts; + local: + *; +}; diff --git a/usr/src/lib/libbrand/dtd/brand.dtd.1 b/usr/src/lib/libbrand/dtd/brand.dtd.1 new file mode 100644 index 0000000000..b03a2f23e8 --- /dev/null +++ b/usr/src/lib/libbrand/dtd/brand.dtd.1 @@ -0,0 +1,241 @@ +<?xml version='1.0' encoding='UTF-8' ?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. +--> + +<!-- + verify_cfg + + Identifies the program to be invoked by zonecfg to verify that the + zone's configuration is legal, and that all the configured devices, + attributes, etc. are legal for this brand. + + The program is called with a single argument: the path to a file + containing a temporary config.xml file the zone. It should return 0 + on success and non-0 on failure. Any detailed error messages should be + displayed to stderr. + + It has no attributes. + +--> +<!ELEMENT verify_cfg (#PCDATA) > +<!ATTLIST verify_cfg> +<!-- + verify_adm + + Identifies the program invoked by zoneadm to perform brand-specific + checks as to the viability of a zone on this specific machine. + + The following replacements are performed: + + %z Name of zone + %R Root of zone + %* Additional arguments, if any + + The program should return 0 on success and non-0 on failure. Any + detailed error messages should be displayed to stderr. + + It has no attributes. + +--> +<!ELEMENT verify_adm (#PCDATA) > +<!ATTLIST verify_adm> + +<!-- + install + + Identifies the program to invoke when installing a zone. The following + replacements are performed: + + %z Name of zone + %R Root of zone + %* Additional arguments, if any + + It has no attributes. +--> +<!ELEMENT install (#PCDATA) > +<!ATTLIST install> + +<!-- + installopts + + Identifies the command-line options supported by the brand's + installation program, allowing zoneadm to parse the install line + properly. + + It has no attributes. +--> +<!ELEMENT installopts (#PCDATA) > +<!ATTLIST installopts> + +<!-- + boot + + This is a program which gets run by zoneadmd when a zone is booted. + The program will be invoked as the last step in the zone booting + process before the the first process is spawned inside the zone. + + If this programs succeeds it should not generate any output. + If this program returns an error, any output generated by the + program will be sent to the zoneadmd message log. + + The following replacements are performed: + + %z Name of zone + %R Root of zone + %* Additional arguments, if any + + It has no attributes. +--> +<!ELEMENT boot (#PCDATA) > +<!ATTLIST boot> + +<!-- + halt + + This is a program which gets run by zoneadmd when a zone is being + halted. This callback is provided to allow a brand to cleanup any + special configuration that was setup during boot. + + This program will also be invoked by zoneadmd if any part of the zone + booting process fail, even if the booting process failed before the + brand boot program was invoked. It is also possible that if the zone + fails to halt after invoking this program, future attempts to halt the + zone will invoke this program again. So this program should be + designed to clean up any resources allocated to a zone but it should + also be able to gracefully handle the case where resources that it + expects to release are not actually allocated (or have been already + released.) + + If this programs succeeds it should not generate any output. If this + program returns an error, any output generated by the program will be + sent to the zoneadmd message log. + + The following replacements are performed: + + %z Name of zone + %R Root of zone + %* Additional arguments, if any + + It has no attributes. +--> +<!ELEMENT halt (#PCDATA) > +<!ATTLIST halt> + +<!-- + modname + + Path to the kernel module that implements the kernel-level + functionality of the brand. + + It has no attributes. +--> +<!ELEMENT modname (#PCDATA) > +<!ATTLIST modname> + +<!-- + initname + + Path to the initial executable that should be launched when booting a + branded zone. + + It has no attributes. +--> +<!ELEMENT initname (#PCDATA) > +<!ATTLIST initname> + +<!-- + login_cmd + + Path to the initial login binary that should be executed when + attempting to zlogin into a branded zone. + + The following replacements are performed: + + %Z Name of the current zone + %u User login name + + It has no attributes. +--> +<!ELEMENT login_cmd (#PCDATA) > +<!ATTLIST login_cmd> + +<!-- + postclone + + Path to a script that will perform any necessary post-processing on + a zone after it has been freshly copied. + + The following replacements are performed: + + %z Name of zone + %R Root of zone + %* Additional arguments, if any + + It has no attributes. +--> +<!ELEMENT postclone (#PCDATA) > +<!ATTLIST postclone> + +<!-- + privilege + + Add a privilege to the default, prohibited, or required set for all + zones of this brand. If a privilege is added to the default set all + zones of this brand on the system will inherit this privilege unless + the privilege is removed via limitpriv in zonecfg(1m). If a + privilege is added to the prohibited set it can not be added to + any zones via limitpriv in zonecfg(1m). If a privilege is added + to the required set then all zones of this brand on the system + will inherit this privilege and it can't be removed via limitpriv in + zonecfg(1m). + + Its attributes are + set The name of the set the privilege should go into. + name The name of the privilege. +--> +<!ELEMENT privilege (#PCDATA) > +<!ATTLIST privilege set ( default | prohibited | required ) #REQUIRED + name CDATA #REQUIRED > + +<!-- + brand + + The toplevel container for a brand configuration. + + Its attributes are + + name The name of the brand. This must match the name of the + directory in which the configuration file is stored. +--> + +<!ELEMENT brand (modname, initname, login_cmd, install, installopts, + boot, halt, verify_cfg, verify_adm, postclone, + privilege+)> + +<!ATTLIST brand name CDATA #REQUIRED> diff --git a/usr/src/lib/libbrand/dtd/zone_platform.dtd.1 b/usr/src/lib/libbrand/dtd/zone_platform.dtd.1 new file mode 100644 index 0000000000..a9e8c07bdd --- /dev/null +++ b/usr/src/lib/libbrand/dtd/zone_platform.dtd.1 @@ -0,0 +1,144 @@ +<?xml version='1.0' encoding='UTF-8' ?> + +<!-- + CDDL HEADER START + + The contents of this file are subject to the terms of the + Common Development and Distribution License (the "License"). + You may not use this file except in compliance with the License. + + You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + or http://www.opensolaris.org/os/licensing. + See the License for the specific language governing permissions + and limitations under the License. + + When distributing Covered Code, include this CDDL HEADER in each + file and include the License file at usr/src/OPENSOLARIS.LICENSE. + If applicable, add the following below this CDDL HEADER, with the + fields enclosed by brackets "[]" replaced with your own identifying + information: Portions Copyright [yyyy] [name of copyright owner] + + CDDL HEADER END + + Copyright 2006 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. + + ident "%Z%%M% %I% %E% SMI" + + DO NOT EDIT THIS FILE. +--> + +<!-- + device + + Defines a device (or set of devices) to be exported into the zone. + + Its attributes are + + match Pattern to match under /dev. Follows fnmatch(3c) rules. + The following replacements are performed: + %z Name of zone + + name Name of device in non-global zone. This is optional; the + default is the same name as the global zone. + + arch Identifies devices only available for certain architectures. + Can be "sparc" or "i386". + + For example, the following entry: + <device match="brand/windows/foo" name="bar" arch="sparc" /> + would result in mapping the following global zone device: + /dev/brand/windows/foo + into the zone as: + /dev/bar + but the mapping would only exist on sparc machines. + +--> +<!ELEMENT device EMPTY > + +<!ATTLIST device match CDATA #REQUIRED + name CDATA "" + arch ( sparc | i386 ) "all" > + +<!-- + symlink + + Defines a symlink to be created under /dev. + + Its attributes are + + source Link source + + target Link target +--> +<!ELEMENT symlink EMPTY > + +<!ATTLIST symlink source CDATA #REQUIRED + target CDATA #REQUIRED > + +<!-- + global_mount + + Describes a filesystem that must be mounted before the zone is booted. + This mount is performed by a thread executing in the the context of + the global zone. + + Its attributes are + + special The special device as used by the mount command. + This path is relative to the global zone. + The following replacements are performed: + %R Root of zone + + directory The directory where it will be mounted. + This path is relative to the non-global zone. + + type The filesystem type +--> +<!ELEMENT global_mount EMPTY > + +<!ATTLIST global_mount special CDATA #REQUIRED + directory CDATA #REQUIRED + opt CDATA "" + type CDATA #REQUIRED> + +<!-- + mount + + Describes a filesystem that must be mounted before the zone is booted. + This mount is performed by a thread executing in the the context of + the non-global zone. + + Its attributes are + + special The special device as used by the mount command + This path is relative to the non-global zone. + + directory The directory where it will be mounted. + This path is relative to the non-global zone. + + type The filesystem type +--> +<!ELEMENT mount EMPTY > + +<!ATTLIST mount special CDATA #REQUIRED + directory CDATA #REQUIRED + opt CDATA "" + type CDATA #REQUIRED> + +<!-- + platform + + The toplevel container for a virtual platform configuration. The virtual + platform describes the basic elements to bring up the necessary services + (filesystems, devices, etc) to boot the zone. + + Its attributes are + + name The name of the brand. This must match the name of the + directory in which this file is stored, as well as the name + of the brand that refers to it. +--> +<!ELEMENT platform (device | global_mount | mount | symlink)* > + +<!ATTLIST platform name CDATA #REQUIRED> diff --git a/usr/src/lib/libbrand/i386/Makefile b/usr/src/lib/libbrand/i386/Makefile new file mode 100644 index 0000000000..f237b7e80a --- /dev/null +++ b/usr/src/lib/libbrand/i386/Makefile @@ -0,0 +1,29 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libbrand/sparc/Makefile b/usr/src/lib/libbrand/sparc/Makefile new file mode 100644 index 0000000000..f237b7e80a --- /dev/null +++ b/usr/src/lib/libbrand/sparc/Makefile @@ -0,0 +1,29 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +include ../Makefile.com + +install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT) diff --git a/usr/src/lib/libbrand/sparcv9/Makefile b/usr/src/lib/libbrand/sparcv9/Makefile new file mode 100644 index 0000000000..1e902f7e49 --- /dev/null +++ b/usr/src/lib/libbrand/sparcv9/Makefile @@ -0,0 +1,30 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64) diff --git a/usr/src/lib/libbsm/audit_event.txt b/usr/src/lib/libbsm/audit_event.txt index 3694974722..9e383aa97f 100644 --- a/usr/src/lib/libbsm/audit_event.txt +++ b/usr/src/lib/libbsm/audit_event.txt @@ -337,6 +337,7 @@ 291:AUE_MODADDPRIV:modctl(2) - configure additional privilege:as 292:AUE_CRYPTOADM:kernel cryptographic framework:as 293:AUE_CONFIGSSL:configure kernel SSL:as +294:AUE_BRANDSYS:brandsys(2):ot # # user level audit events # 2048 - 6143 Reserved diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index 4293c30a14..9b794e55f5 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -278,6 +278,7 @@ COMSYSOBJS= \ utime.o \ utimes.o \ utssys.o \ + uucopy.o \ vhangup.o \ waitid.o \ write.o \ diff --git a/usr/src/lib/libc/common/sys/uucopy.s b/usr/src/lib/libc/common/sys/uucopy.s new file mode 100644 index 0000000000..758ef92158 --- /dev/null +++ b/usr/src/lib/libc/common/sys/uucopy.s @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + + .file "uucopy.s" + +#include <sys/asm_linkage.h> + + ANSI_PRAGMA_WEAK(uucopy,function) + ANSI_PRAGMA_WEAK(uucopystr,function) + +#include "SYS.h" + + SYSCALL_RVAL1(uucopy) + RET + SET_SIZE(uucopy) + + SYSCALL_RVAL1(uucopystr) + RET + SET_SIZE(uucopystr) diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index d90e6d05f8..aa3e7df8c1 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -300,6 +300,7 @@ COMSYSOBJS= \ utime.o \ utimes.o \ utssys.o \ + uucopy.o \ vhangup.o \ waitid.o \ write.o \ diff --git a/usr/src/lib/libc/inc/synonyms.h b/usr/src/lib/libc/inc/synonyms.h index 3dfd31722f..d0b790016a 100644 --- a/usr/src/lib/libc/inc/synonyms.h +++ b/usr/src/lib/libc/inc/synonyms.h @@ -1114,6 +1114,8 @@ extern "C" { #define utmpname _utmpname #define utmpxname _utmpxname #define utssys _utssys +#define uucopy _uucopy +#define uucopystr _uucopystr #define vfork _vfork #define vfscanf _vfscanf #define vhangup _vhangup diff --git a/usr/src/lib/libc/port/gen/setpriority.c b/usr/src/lib/libc/port/gen/setpriority.c index 933c3f916c..bb5f2848ee 100644 --- a/usr/src/lib/libc/port/gen/setpriority.c +++ b/usr/src/lib/libc/port/gen/setpriority.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,7 +46,9 @@ #include <sys/resource.h> #include <sys/procset.h> #include <sys/priocntl.h> +#include <limits.h> #include <errno.h> +#include <priv.h> static idtype_t prio_to_idtype(int which) @@ -149,6 +150,7 @@ setpriority(int which, id_t who, int prio) id_t id; idtype_t idtype; pcnice_t pcnice; + int ret; if ((idtype = prio_to_idtype(which)) == -1) { errno = EINVAL; @@ -178,5 +180,43 @@ setpriority(int which, id_t who, int prio) pcnice.pc_val = prio; pcnice.pc_op = PC_SETNICE; - return (priocntl(idtype, id, PC_DONICE, (caddr_t)&pcnice)); + ret = priocntl(idtype, id, PC_DONICE, (caddr_t)&pcnice); + + if (ret != 0 && errno == EPERM) { + int incr; + int tmp; + pcnice_t gpcnice = { 0, PC_GETNICE }; + priv_set_t *pset; + + /* + * The priocntl PC_DONICE subcommand returns EPERM if we lack + * sufficient privileges to carry out the operation, but + * setpriority(3C) needs to return EACCES. We can't just change + * EPERM to EACCES, because there are other conditions which + * legitimately cause EPERM (such as an euid/ruid mismatch + * between the current process and the target.). + */ + if ((tmp = priocntl(idtype, id, PC_DONICE, + (caddr_t)&gpcnice)) != 0) + return (tmp); + + incr = prio - gpcnice.pc_val; + + if ((pset = priv_allocset()) == NULL || + getppriv(PRIV_EFFECTIVE, pset) != 0) + return (-1); + + /* + * setpriority(3C) must return EACCES if we lack the privilege + * checked for below and we are trying to increase the process + * priority (by lowering the numeric value of its priority). + */ + if ((incr < 0 || incr > 2 * NZERO) && + !priv_ismember(pset, "proc_priocntl")) + errno = EACCES; + + priv_freeset(pset); + } + + return (ret); } diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index 35015ff874..42b24ea43d 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -106,6 +106,8 @@ SUNW_1.23 { # SunOS 5.11 (Solaris 11) timer_getoverrun; timer_gettime; timer_settime; + uucopy; + uucopystr; } SUNW_1.22.2; SUNW_1.22.2 { @@ -1956,6 +1958,8 @@ SUNWprivate_1.1 { _sem_trywait; _sem_unlink; _sem_wait; + set_setcontext_enforcement; + set_l10n_alternate_root; _setbufend; _setegid; _setenv; @@ -1966,6 +1970,7 @@ SUNWprivate_1.1 { _setpwent; _setregid; _setreuid; + setsigacthandler; _setspent; _settimeofday; _setutent; diff --git a/usr/src/lib/libc/port/threads/pthread.c b/usr/src/lib/libc/port/threads/pthread.c index 2215647391..d8c324a7d9 100644 --- a/usr/src/lib/libc/port/threads/pthread.c +++ b/usr/src/lib/libc/port/threads/pthread.c @@ -102,14 +102,18 @@ _pthread_create(pthread_t *thread, const pthread_attr_t *attr, error = _thrp_create(ap->stkaddr, ap->stksize, start_routine, arg, flag, &tid, priority, policy, ap->guardsize); if (error == 0) { + int prio_err; + if (mapped) { ulwp_t *ulwp = find_lwp(tid); ulwp->ul_pri_mapped = 1; ulwp->ul_mappedpri = mappedpri; ulwp_unlock(ulwp, udp); } - if (rt && _thrp_setlwpprio(tid, policy, priority)) - thr_panic("_thrp_setlwpprio() failed"); + + if (rt && (prio_err = _thrp_setlwpprio(tid, policy, priority))) + return (prio_err); + if (thread) *thread = tid; (void) _thr_continue(tid); @@ -269,9 +273,14 @@ _thread_setschedparam_main(pthread_t tid, int policy, if (_validate_rt_prio(policy, prio)) error = EINVAL; else { + int prio_err; + if (_private_geteuid() == 0 && - _thrp_setlwpprio(tid, policy, prio)) - thr_panic("_thrp_setlwpprio failed"); + (prio_err = _thrp_setlwpprio(tid, policy, prio))) { + error = prio_err; + goto out; + } + ulwp->ul_policy = policy; if (inheritflag == PRIO_INHERIT) ulwp->ul_epri = prio; diff --git a/usr/src/lib/libc/port/threads/sigaction.c b/usr/src/lib/libc/port/threads/sigaction.c index 101b730af3..5f65435999 100644 --- a/usr/src/lib/libc/port/threads/sigaction.c +++ b/usr/src/lib/libc/port/threads/sigaction.c @@ -306,9 +306,10 @@ sigacthandler(int sig, siginfo_t *sip, void *uvp) thr_panic("sigacthandler(): __setcontext() returned"); } -#pragma weak sigaction = _sigaction +#pragma weak sigaction = _libc_sigaction +#pragma weak _sigaction = _libc_sigaction int -_sigaction(int sig, const struct sigaction *nact, struct sigaction *oact) +_libc_sigaction(int sig, const struct sigaction *nact, struct sigaction *oact) { ulwp_t *self = curthread; uberdata_t *udp = self->ul_uberdata; @@ -404,6 +405,19 @@ _sigaction(int sig, const struct sigaction *nact, struct sigaction *oact) return (rv); } +void +setsigacthandler(void (*nsigacthandler)(int, siginfo_t *, void *), + void (**osigacthandler)(int, siginfo_t *, void *)) +{ + ulwp_t *self = curthread; + uberdata_t *udp = self->ul_uberdata; + + if (osigacthandler != NULL) + *osigacthandler = udp->sigacthandler; + + udp->sigacthandler = nsigacthandler; +} + /* * Calling set_parking_flag(curthread, 1) informs the kernel that we are * calling __lwp_park or ___lwp_cond_wait(). If we take a signal in @@ -451,6 +465,25 @@ block_all_signals(ulwp_t *self) exit_critical(self); } +/* + * _private_setcontext has code that forcibly restores the curthread + * pointer in a context passed to the setcontext(2) syscall. + * + * Certain processes may need to disable this feature, so these routines + * provide the mechanism to do so. + * + * (As an example, branded 32-bit x86 processes may use %gs for their own + * purposes, so they need to be able to specify a %gs value to be restored + * on return from a signal handler via the passed ucontext_t.) + */ +static int setcontext_enforcement = 1; + +void +set_setcontext_enforcement(int on) +{ + setcontext_enforcement = on; +} + #pragma weak setcontext = _private_setcontext #pragma weak _setcontext = _private_setcontext int @@ -490,16 +523,22 @@ _private_setcontext(const ucontext_t *ucp) /* * We don't know where this context structure has been. * Preserve the curthread pointer, at least. + * + * Allow this feature to be disabled if a particular process + * requests it. */ + if (setcontext_enforcement) { #if defined(__sparc) - uc.uc_mcontext.gregs[REG_G7] = (greg_t)self; + uc.uc_mcontext.gregs[REG_G7] = (greg_t)self; #elif defined(__amd64) - uc.uc_mcontext.gregs[REG_FS] = (greg_t)self->ul_gs; + uc.uc_mcontext.gregs[REG_FS] = (greg_t)self->ul_gs; #elif defined(__i386) - uc.uc_mcontext.gregs[GS] = (greg_t)self->ul_gs; + uc.uc_mcontext.gregs[GS] = (greg_t)self->ul_gs; #else #error "none of __sparc, __amd64, __i386 defined" #endif + } + /* * Make sure that if we return to a call to __lwp_park() * or ___lwp_cond_wait() that it returns right away diff --git a/usr/src/lib/libc/sparc/Makefile b/usr/src/lib/libc/sparc/Makefile index 2d23d8087c..f8658e34cf 100644 --- a/usr/src/lib/libc/sparc/Makefile +++ b/usr/src/lib/libc/sparc/Makefile @@ -316,6 +316,7 @@ COMSYSOBJS= \ utime.o \ utimes.o \ utssys.o \ + uucopy.o \ vhangup.o \ waitid.o \ write.o \ diff --git a/usr/src/lib/libc/sparcv9/Makefile b/usr/src/lib/libc/sparcv9/Makefile index df5eb2f5e3..2a03ea74cf 100644 --- a/usr/src/lib/libc/sparcv9/Makefile +++ b/usr/src/lib/libc/sparcv9/Makefile @@ -302,6 +302,7 @@ COMSYSOBJS= \ utime.o \ utimes.o \ utssys.o \ + uucopy.o \ vhangup.o \ waitid.o \ write.o \ diff --git a/usr/src/lib/libnsl/common/mapfile-vers b/usr/src/lib/libnsl/common/mapfile-vers index 15160078c7..a0aa823152 100644 --- a/usr/src/lib/libnsl/common/mapfile-vers +++ b/usr/src/lib/libnsl/common/mapfile-vers @@ -315,6 +315,9 @@ SUNW_0.7 { # SunOS 5.3 (Solaris 2.3) SUNWprivate_1.5 { global: clnt_create_service_timed; + __pmap_set; + __pmap_unset; + __use_portmapper; } SUNWprivate_1.4; SUNWprivate_1.4 { diff --git a/usr/src/lib/libnsl/rpc/pmap_clnt.c b/usr/src/lib/libnsl/rpc/pmap_clnt.c index 781f0f20fd..98c6e431e6 100644 --- a/usr/src/lib/libnsl/rpc/pmap_clnt.c +++ b/usr/src/lib/libnsl/rpc/pmap_clnt.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -36,10 +35,10 @@ #pragma ident "%Z%%M% %I% %E% SMI" #ifdef PORTMAP + /* * interface to pmap rpc service. */ - #include "mt.h" #include "rpc_mt.h" #include <rpc/rpc.h> @@ -48,15 +47,188 @@ #include <rpc/pmap_prot.h> #include <rpc/pmap_clnt.h> #include <rpc/pmap_rmt.h> +#include <string.h> #include <syslog.h> #include <netinet/in.h> #include <sys/socket.h> +#include <unistd.h> +int use_portmapper = 0; static const struct timeval timeout = { 5, 0 }; static const struct timeval tottimeout = { 60, 0 }; static const struct timeval rmttimeout = { 3, 0 }; /* + * Solaris hasn't trully supported local portmappers since Solaris 2.4. + * + * In Solaris 2.0 the portmapper was replaced with rpcbind. Essentially + * rpcbind implements version 3 of the portmapper protocol. (The last + * version of the portmapper protocol while it was still called + * portmap was version 2.) The rpcbind protocol provides a lot + * of improvements over the portmap protocol. (Like the ability + * to bind to non AF_INET transports like TLI and to unregister + * individual transport providers instead of entire serivices.) + * + * So in Solaris 2.0 the portmapper was replace with rpcbind, but it + * wasn't until Solaris 2.5 that all the local portmapper code was + * modified to assume that the local processes managing rpc services + * always supported the rpcbind protocol. When this happened all the + * local portmap registration code was enhanced to translated any + * portmapper requests into rpcbind requests. This is a fine assumption + * for Solaris where we always have control over the local + * portmapper/rpcbind service and we can make sure that it always + * understands the rpcbind protocol. + * + * But this is a problem for BrandZ. In BrandZ we don't have contol over + * what local portmapper is running. (Unless we want to replace it.) + * In the Linux case, current Linux distributions don't support the + * rpcbind protocol, instead they support the old portmapper protocol + * (verison 2.) So to allow Solaris services to register with the + * Linux portmapper (which we need to support to allow us to run the + * native NFS daemons) there are two things that need to be done. + * + * - The classic interfaces for registering services with the version 2 + * portmapper is via pmap_set() and pmap_unset(). In Solaris 2.5 these + * functions were changed to translate portmap requests into rpcbind + * requests. These interfaces need to be enhanced so that if we're + * trying to register with a portmapper instead of rpcbind, we don't + * translate the requests to rpcbind requests. + * + * - Libnsl provides lots of interfaces to simplify the creation of rpc + * services (see rpc_svc_*). Internally, the interfaces all assume + * that the local process that manages rpc services support the rpcbind + * protocol. To avoid having to update all rpc services that use these + * functions to be portmapper aware, we need to enhance these functions + * to support the portmapper protocol in addition to rpcbind. + * + * To address both these requirements we've introduced three key functions. + * + * __pmap_set() - Registers services using the portmapper version 2 + * protocol. (Behaves like the Pre-Solaris 2.5 pmap_set()) + * + * __pmap_unset() - Unregisters services using the portmapper version 2 + * protocol. (Behaves like the Pre-Solaris 2.5 pmap_unset()) + * + * __use_portmapper() - Tells libnsl if the local system expects + * the portmapper protocol versus the rpcbind protocol. + * + * If an rpc program uses this interface to tell libnsl + * that it want's to use portmap based services instead of + * rpcbind based services, then libnsl will internally + * replace attempts to register services via rpcbind + * with portmap. + */ + +static CLIENT * +pmap_common(const struct netconfig *nconf, int *socket) +{ + struct sockaddr_in sa_local; + CLIENT *client; + + /* we only support tcp and udp */ + if ((nconf != NULL) && + (strcmp(nconf->nc_netid, "udp") != 0) && + (strcmp(nconf->nc_netid, "tcp") != 0)) + return (NULL); + + /* try connecting to the portmapper via udp */ + get_myaddress(&sa_local); + client = clntudp_bufcreate(&sa_local, PMAPPROG, PMAPVERS, + timeout, socket, RPCSMALLMSGSIZE, RPCSMALLMSGSIZE); + if (client == NULL) { + /* try connecting to the portmapper via tcp */ + client = clnttcp_create(&sa_local, PMAPPROG, PMAPVERS, + socket, RPCSMALLMSGSIZE, RPCSMALLMSGSIZE); + if (client == NULL) + return (NULL); + } + + return (client); +} + +void +__use_portmapper(int p) +{ + use_portmapper = p; +} + +/* + * Set a mapping between program, version and address. + * Calls the portmapper service to do the mapping. + */ +bool_t +__pmap_set(const rpcprog_t program, const rpcvers_t version, + const struct netconfig *nconf, const struct netbuf *address) +{ + struct sockaddr_in *sa; + struct pmap parms; + CLIENT *client; + bool_t rslt; + int socket = RPC_ANYSOCK; + + /* address better be a sockaddr_in struct */ + if (address == NULL) + return (FALSE); + if (address->len != sizeof (struct sockaddr_in)) + return (FALSE); + + /* get a connection to the portmapper */ + if (nconf == NULL) + return (FALSE); + if ((client = pmap_common(nconf, &socket)) == NULL) + return (FALSE); + + /* LINTED pointer cast */ + sa = (struct sockaddr_in *)(address->buf); + + /* initialize the portmapper request */ + parms.pm_prog = program; + parms.pm_vers = version; + parms.pm_port = ntohs(sa->sin_port); + parms.pm_prot = + (strcmp(nconf->nc_netid, "udp") == 0) ? IPPROTO_UDP : IPPROTO_TCP; + + /* make the call */ + if (CLNT_CALL(client, PMAPPROC_SET, xdr_pmap, (caddr_t)&parms, + xdr_bool, (char *)&rslt, tottimeout) != RPC_SUCCESS) + rslt = FALSE; + + CLNT_DESTROY(client); + (void) close(socket); + return (rslt); +} + +/* + * Remove the mapping between program, version and port. + * Calls the portmapper service remotely to do the un-mapping. + */ +bool_t +__pmap_unset(const rpcprog_t program, const rpcvers_t version) +{ + struct pmap parms; + CLIENT *client; + bool_t rslt; + int socket = RPC_ANYSOCK; + + /* get a connection to the portmapper */ + if ((client = pmap_common(NULL, &socket)) == NULL) + return (FALSE); + + /* initialize the portmapper request */ + parms.pm_prog = program; + parms.pm_vers = version; + parms.pm_port = 0; + parms.pm_prot = 0; + + /* make the call */ + CLNT_CALL(client, PMAPPROC_UNSET, xdr_pmap, (caddr_t)&parms, + xdr_bool, (char *)&rslt, tottimeout); + CLNT_DESTROY(client); + (void) close(socket); + return (rslt); +} + +/* * Set a mapping between program, version and port. * Calls the pmap service remotely to do the mapping. */ @@ -80,7 +252,10 @@ pmap_set(rpcprog_t program, rpcvers_t version, rpcprot_t protocol, freenetconfigent(nconf); return (FALSE); } - rslt = rpcb_set(program, version, nconf, na); + if (!use_portmapper) + rslt = rpcb_set(program, version, nconf, na); + else + rslt = __pmap_set(program, version, nconf, na); netdir_free((char *)na, ND_ADDR); freenetconfigent(nconf); return (rslt); @@ -97,6 +272,9 @@ pmap_unset(rpcprog_t program, rpcvers_t version) bool_t udp_rslt = FALSE; bool_t tcp_rslt = FALSE; + if (use_portmapper) + return (__pmap_unset(program, version)); + nconf = __rpc_getconfip("udp"); if (nconf) { udp_rslt = rpcb_unset(program, version, nconf); @@ -225,4 +403,5 @@ pmap_rmtcall(struct sockaddr_in *addr, rpcprog_t prog, rpcvers_t vers, *port_ptr = r.port; return (stat); } + #endif /* PORTMAP */ diff --git a/usr/src/lib/libnsl/rpc/svc.c b/usr/src/lib/libnsl/rpc/svc.c index 228bd29774..d188d5d713 100644 --- a/usr/src/lib/libnsl/rpc/svc.c +++ b/usr/src/lib/libnsl/rpc/svc.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -21,7 +20,7 @@ */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -69,6 +68,11 @@ extern bool_t __svc_get_door_cred(); extern bool_t __rpc_get_local_cred(); +extern int use_portmapper; +extern bool_t __pmap_set(const rpcprog_t, const rpcvers_t, + const struct netconfig *, const struct netbuf *); +extern bool_t __pmap_unset(const rpcprog_t, const rpcvers_t); + SVCXPRT **svc_xports; static int nsvc_xports; /* total number of svc_xports allocated */ @@ -921,10 +925,15 @@ svc_reg(const SVCXPRT *xprt, const rpcprog_t prog, const rpcvers_t vers, rpcb_it: (void) rw_unlock(&svc_lock); + if (!nconf) + return (TRUE); + /* now register the information with the local binder service */ - if (nconf) + if (!use_portmapper) return (rpcb_set(prog, vers, nconf, &xprt->xp_ltaddr)); - return (TRUE); + else + return (__pmap_set(prog, vers, nconf, &xprt->xp_ltaddr)); + /*NOTREACHED*/ } /* @@ -937,7 +946,10 @@ svc_unreg(const rpcprog_t prog, const rpcvers_t vers) struct svc_callout *s; /* unregister the information anyway */ - (void) rpcb_unset(prog, vers, NULL); + if (!use_portmapper) + (void) rpcb_unset(prog, vers, NULL); + else + (void) __pmap_unset(prog, vers); (void) rw_wrlock(&svc_lock); while ((s = svc_find(prog, vers, &prev, NULL)) != NULL_SVC) { if (prev == NULL_SVC) { diff --git a/usr/src/lib/libnsl/rpc/svc_generic.c b/usr/src/lib/libnsl/rpc/svc_generic.c index 553a6390b6..23cd8910ae 100644 --- a/usr/src/lib/libnsl/rpc/svc_generic.c +++ b/usr/src/lib/libnsl/rpc/svc_generic.c @@ -71,6 +71,8 @@ extern void __svc_free_xlist(SVCXPRT_LIST **, mutex_t *); extern bool_t __rpc_try_doors(const char *, bool_t *); +extern int use_portmapper; + /* * The highest level interface for server creation. * It tries for all the nettokens in that particular class of token @@ -139,8 +141,25 @@ svc_create(void (*dispatch)(), const rpcprog_t prognum, const rpcvers_t versnum, (void) mutex_lock(&xprtlist_lock); for (l = _svc_xprtlist; l; l = l->next) { if (strcmp(l->xprt->xp_netid, nconf->nc_netid) == 0) { - /* Found an old one, use it */ - (void) rpcb_unset(prognum, versnum, nconf); + /* + * Note that if we're using a portmapper + * instead of rpcbind then we can't do an + * unregister operation here. + * + * The reason is that the portmapper unset + * operation removes all the entries for a + * given program/version regardelss of + * transport protocol. + * + * The caller of this routine needs to ensure + * that __pmap_unset() has been called for all + * program/version service pairs they plan + * to support before they start registering + * each program/version/protocol triplet. + */ + if (!use_portmapper) + (void) rpcb_unset(prognum, + versnum, nconf); if (svc_reg(l->xprt, prognum, versnum, dispatch, nconf) == FALSE) (void) syslog(LOG_ERR, @@ -199,7 +218,25 @@ svc_tp_create(void (*dispatch)(), const rpcprog_t prognum, xprt = svc_tli_create_common(RPC_ANYFD, nconf, NULL, 0, 0, anon_mlp); if (xprt == NULL) return (NULL); - (void) rpcb_unset(prognum, versnum, (struct netconfig *)nconf); + + /* + * Note that if we're using a portmapper + * instead of rpcbind then we can't do an + * unregister operation here. + * + * The reason is that the portmapper unset + * operation removes all the entries for a + * given program/version regardelss of + * transport protocol. + * + * The caller of this routine needs to ensure + * that __pmap_unset() has been called for all + * program/version service pairs they plan + * to support before they start registering + * each program/version/protocol triplet. + */ + if (!use_portmapper) + (void) rpcb_unset(prognum, versnum, (struct netconfig *)nconf); if (svc_reg(xprt, prognum, versnum, dispatch, nconf) == FALSE) { (void) syslog(LOG_ERR, "svc_tp_create: Could not register prog %d vers %d on %s", diff --git a/usr/src/lib/libnsl/rpc/svc_simple.c b/usr/src/lib/libnsl/rpc/svc_simple.c index 8de4d83391..cf56a14a23 100644 --- a/usr/src/lib/libnsl/rpc/svc_simple.c +++ b/usr/src/lib/libnsl/rpc/svc_simple.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -21,7 +20,7 @@ */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -57,6 +56,8 @@ #include <syslog.h> #include <rpc/nettype.h> +extern int use_portmapper; + static struct proglst { char *(*p_progname)(); rpcprog_t p_prognum; @@ -175,7 +176,24 @@ rpc_reg(const rpcprog_t prognum, const rpcvers_t versnum, (strcmp(pl->p_netid, netid) == 0)) break; if (pl == NULL) { /* Not yet */ - (void) rpcb_unset(prognum, versnum, nconf); + /* + * Note that if we're using a portmapper + * instead of rpcbind then we can't do an + * unregister operation here. + * + * The reason is that the portmapper unset + * operation removes all the entries for a + * given program/version regardelss of + * transport protocol. + * + * The caller of this routine needs to ensure + * that __pmap_unset() has been called for all + * program/version service pairs they plan + * to support before they start registering + * each program/version/protocol triplet. + */ + if (!use_portmapper) + (void) rpcb_unset(prognum, versnum, nconf); } else { /* so that svc_reg does not call rpcb_set() */ nconf = NULL; diff --git a/usr/src/lib/libproc/Makefile.com b/usr/src/lib/libproc/Makefile.com index b6dcff9bad..d63fd3b217 100644 --- a/usr/src/lib/libproc/Makefile.com +++ b/usr/src/lib/libproc/Makefile.com @@ -30,6 +30,7 @@ VERS = .1 CMNOBJS = \ P32ton.o \ + Pbrand.o \ Pcontrol.o \ Pcore.o \ Pexecname.o \ @@ -81,7 +82,7 @@ include ../../Makefile.rootfs SRCS = $(CMNOBJS:%.o=../common/%.c) $(ISAOBJS:%.o=%.c) LIBS = $(DYNLIB) $(LINTLIB) -LDLIBS += -lrtld_db -lelf -lctf -lc +LDLIBS += -lrtld_db -lelf -lctf -lc -lzonecfg SRCDIR = ../common $(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC) diff --git a/usr/src/lib/libproc/common/Pbrand.c b/usr/src/lib/libproc/common/Pbrand.c new file mode 100644 index 0000000000..f7f3c337ac --- /dev/null +++ b/usr/src/lib/libproc/common/Pbrand.c @@ -0,0 +1,42 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libproc.h" + +char * +Pbrandname(struct ps_prochandle *P, char *buf, size_t buflen) +{ + long addr; + + if ((addr = Pgetauxval(P, AT_SUN_BRANDNAME)) == -1) + return (NULL); + + if (ps_pread(P, addr, buf, buflen) != PS_OK) + return (NULL); + + return (buf); +} diff --git a/usr/src/lib/libproc/common/Pcontrol.c b/usr/src/lib/libproc/common/Pcontrol.c index 00e1c9f0a6..e30d5758cb 100644 --- a/usr/src/lib/libproc/common/Pcontrol.c +++ b/usr/src/lib/libproc/common/Pcontrol.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -33,6 +32,7 @@ #include <ctype.h> #include <fcntl.h> #include <string.h> +#include <strings.h> #include <memory.h> #include <errno.h> #include <dirent.h> @@ -56,6 +56,7 @@ int _libproc_debug; /* set non-zero to enable debugging printfs */ sigset_t blockable_sigs; /* signals to block when we need to be safe */ static int minfd; /* minimum file descriptor returned by dupfd(fd, 0) */ +char procfs_path[PATH_MAX] = "/proc"; /* * Function prototypes for static routines in this module. @@ -97,6 +98,12 @@ _libproc_init(void) (void) sigdelset(&blockable_sigs, SIGSTOP); } +void +Pset_procfs_path(const char *path) +{ + (void) snprintf(procfs_path, sizeof (procfs_path), "%s", path); +} + /* * Call set_minfd() once before calling dupfd() several times. * We assume that the application will not reduce its current file @@ -168,7 +175,7 @@ Pxcreate(const char *file, /* executable file name */ size_t len) /* size of the path buffer */ { char execpath[PATH_MAX]; - char procname[100]; + char procname[PATH_MAX]; struct ps_prochandle *P; pid_t pid; int fd; @@ -238,7 +245,8 @@ Pxcreate(const char *file, /* executable file name */ /* * Open the /proc/pid files. */ - (void) sprintf(procname, "/proc/%d/", (int)pid); + (void) snprintf(procname, sizeof (procname), "%s/%d/", + procfs_path, (int)pid); fname = procname + strlen(procname); (void) set_minfd(); @@ -505,7 +513,7 @@ Pgrab(pid_t pid, int flags, int *perr) { struct ps_prochandle *P; int fd, omode; - char procname[100]; + char procname[PATH_MAX]; char *fname; int rc = 0; @@ -545,7 +553,8 @@ again: /* Come back here if we lose it in the Window of Vulnerability */ /* * Open the /proc/pid files */ - (void) sprintf(procname, "/proc/%d/", (int)pid); + (void) snprintf(procname, sizeof (procname), "%s/%d/", + procfs_path, (int)pid); fname = procname + strlen(procname); (void) set_minfd(); @@ -1264,7 +1273,7 @@ int Preopen(struct ps_prochandle *P) { int fd; - char procname[100]; + char procname[PATH_MAX]; char *fname; if (P->state == PS_DEAD || P->state == PS_IDLE) @@ -1275,7 +1284,8 @@ Preopen(struct ps_prochandle *P) Pdestroy_agent(P); } - (void) sprintf(procname, "/proc/%d/", (int)P->pid); + (void) snprintf(procname, sizeof (procname), "%s/%d/", + procfs_path, (int)P->pid); fname = procname + strlen(procname); (void) strcpy(fname, "as"); @@ -2653,13 +2663,13 @@ static prheader_t * read_lfile(struct ps_prochandle *P, const char *lname) { prheader_t *Lhp; - char lpath[64]; + char lpath[PATH_MAX]; struct stat64 statb; int fd; size_t size; ssize_t rval; - (void) snprintf(lpath, sizeof (lpath), "/proc/%d/%s", + (void) snprintf(lpath, sizeof (lpath), "%s/%d/%s", procfs_path, (int)P->status.pr_pid, lname); if ((fd = open(lpath, O_RDONLY)) < 0 || fstat64(fd, &statb) != 0) { if (fd >= 0) @@ -2931,7 +2941,7 @@ Lgrab(struct ps_prochandle *P, lwpid_t lwpid, int *perr) struct ps_lwphandle **Lp; struct ps_lwphandle *L; int fd; - char procname[100]; + char procname[PATH_MAX]; char *fname; int rc = 0; @@ -2974,7 +2984,8 @@ Lgrab(struct ps_prochandle *P, lwpid_t lwpid, int *perr) /* * Open the /proc/<pid>/lwp/<lwpid> files */ - (void) sprintf(procname, "/proc/%d/lwp/%d/", (int)P->pid, (int)lwpid); + (void) snprintf(procname, sizeof (procname), "%s/%d/lwp/%d/", + procfs_path, (int)P->pid, (int)lwpid); fname = procname + strlen(procname); (void) set_minfd(); diff --git a/usr/src/lib/libproc/common/Pcontrol.h b/usr/src/lib/libproc/common/Pcontrol.h index 12dfc1a078..ec105a7ad5 100644 --- a/usr/src/lib/libproc/common/Pcontrol.h +++ b/usr/src/lib/libproc/common/Pcontrol.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -41,6 +40,7 @@ #include <rtld_db.h> #include <libproc.h> #include <libctf.h> +#include <limits.h> #ifdef __cplusplus extern "C" { @@ -247,6 +247,7 @@ extern int Padd_mapping(struct ps_prochandle *, off64_t, file_info_t *, prmap_t *); extern void Psort_mappings(struct ps_prochandle *); +extern char procfs_path[PATH_MAX]; /* * Architecture-dependent definition of the breakpoint instruction. diff --git a/usr/src/lib/libproc/common/Pcore.c b/usr/src/lib/libproc/common/Pcore.c index eb55e20894..80b7e16311 100644 --- a/usr/src/lib/libproc/common/Pcore.c +++ b/usr/src/lib/libproc/common/Pcore.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -813,8 +812,10 @@ fake_up_symtab(struct ps_prochandle *P, const elf_file_header_t *ehdr, if (symtab->sh_addr == 0 || (mp = Paddr2mptr(P, symtab->sh_addr)) == NULL || (fp = mp->map_file) == NULL || - fp->file_symtab.sym_data != NULL) + fp->file_symtab.sym_data != NULL) { + dprintf("fake_up_symtab: invalid section\n"); return; + } if (P->status.pr_dmodel == PR_MODEL_ILP32) { struct { @@ -851,6 +852,7 @@ fake_up_symtab(struct ps_prochandle *P, const elf_file_header_t *ehdr, if (pread64(P->asfd, &b->data[off], b->shdr[1].sh_size, symtab->sh_offset) != b->shdr[1].sh_size) { + dprintf("fake_up_symtab: pread of symtab[1] failed\n"); free(b); return; } @@ -866,6 +868,7 @@ fake_up_symtab(struct ps_prochandle *P, const elf_file_header_t *ehdr, if (pread64(P->asfd, &b->data[off], b->shdr[2].sh_size, strtab->sh_offset) != b->shdr[2].sh_size) { + dprintf("fake_up_symtab: pread of symtab[2] failed\n"); free(b); return; } @@ -949,8 +952,11 @@ fake_up_symtab(struct ps_prochandle *P, const elf_file_header_t *ehdr, if ((scn = elf_getscn(fp->file_symtab.sym_elf, 1)) == NULL || (fp->file_symtab.sym_data = elf_getdata(scn, NULL)) == NULL || (scn = elf_getscn(fp->file_symtab.sym_elf, 2)) == NULL || - (data = elf_getdata(scn, NULL)) == NULL) + (data = elf_getdata(scn, NULL)) == NULL) { + dprintf("fake_up_symtab: failed to get section data at %p\n", + (void *)scn); goto err; + } fp->file_symtab.sym_strs = data->d_buf; fp->file_symtab.sym_strsz = data->d_size; @@ -1447,12 +1453,16 @@ core_iter_mapping(const rd_loadobj_t *rlp, struct ps_prochandle *P) Pbuild_file_symtab(P, fp); - if (fp->file_elf == NULL) + if (fp->file_elf == NULL) { + dprintf("core_iter_mapping: no symtab - going to next\n"); return (1); /* No symbol table; advance to next mapping */ + } /* - * Locate the start of a data segment associated with this file, - * name it after the file, and establish the mp->map_file link: + * Locate the start of a data segment associated with this file. + * Starting with that data segment, name all mappings that + * fall within this file's address range after the file and + * establish their mp->map_file links. */ if ((mp = core_find_data(P, fp->file_elf, fp->file_lo)) != NULL) { dprintf("found data for %s at %p (pr_offset 0x%llx)\n", @@ -1463,14 +1473,25 @@ core_iter_mapping(const rd_loadobj_t *rlp, struct ps_prochandle *P) if (mp->map_pmap.pr_vaddr > fp->file_lo->rl_bend) break; if (mp->map_file == NULL) { + dprintf("%s: associating segment at %p\n", + fp->file_pname, + (void *)mp->map_pmap.pr_vaddr); mp->map_file = fp; fp->file_ref++; + } else { + dprintf("%s: segment at %p already associated " + "with %s\n", fp->file_pname, + (void *)mp->map_pmap.pr_vaddr, + mp->map_file->file_pname); } if (!(mp->map_pmap.pr_mflags & MA_BREAK)) (void) strcpy(mp->map_pmap.pr_mapname, fp->file_pname); } + } else { + dprintf("core_iter_mapping: no data found for %s\n", + fp->file_pname); } return (1); /* Advance to next mapping */ @@ -1514,14 +1535,17 @@ core_load_shdrs(struct ps_prochandle *P, elf_file_t *efp) * Read the section header table from the core file and then iterate * over the section headers, converting each to a GElf_Shdr. */ - shdrs = malloc(efp->e_hdr.e_shnum * sizeof (GElf_Shdr)); - nbytes = efp->e_hdr.e_shnum * efp->e_hdr.e_shentsize; - buf = malloc(nbytes); - - if (shdrs == NULL || buf == NULL) { + if ((shdrs = malloc(efp->e_hdr.e_shnum * sizeof (GElf_Shdr))) == NULL) { dprintf("failed to malloc %u section headers: %s\n", (uint_t)efp->e_hdr.e_shnum, strerror(errno)); - free(buf); + return; + } + + nbytes = efp->e_hdr.e_shnum * efp->e_hdr.e_shentsize; + if ((buf = malloc(nbytes)) == NULL) { + dprintf("failed to malloc %d bytes: %s\n", (int)nbytes, + strerror(errno)); + free(shdrs); goto out; } diff --git a/usr/src/lib/libproc/common/Pexecname.c b/usr/src/lib/libproc/common/Pexecname.c index 3abe134e41..a2c2ccf574 100644 --- a/usr/src/lib/libproc/common/Pexecname.c +++ b/usr/src/lib/libproc/common/Pexecname.c @@ -34,6 +34,7 @@ #include <stdio.h> #include <errno.h> #include <unistd.h> +#include <libzonecfg.h> #include "Pcontrol.h" @@ -84,7 +85,7 @@ Pfindexec(struct ps_prochandle *P, const char *aout, char buf[PATH_MAX]; struct stat st; uintptr_t addr; - char *p, *q; + char *p = path, *q; if (P->execname) return (P->execname); /* Already found */ @@ -118,10 +119,14 @@ Pfindexec(struct ps_prochandle *P, const char *aout, * Second try: read the string pointed to by the AT_SUN_EXECNAME * auxv element, saved when the program was exec'd. If the full * pathname try_exec() forms fails, try again using just the - * basename appended to our cwd. + * basename appended to our cwd. If that also fails, and the process + * is in a zone, try again with the zone path instead of our cwd. */ if ((addr = Pgetauxval(P, AT_SUN_EXECNAME)) != (uintptr_t)-1L && Pread_string(P, path, sizeof (path), (off_t)addr) > 0) { + char zname[ZONENAME_MAX]; + char zpath[PATH_MAX]; + const psinfo_t *pi = Ppsinfo(P); if (try_exec(cwd, path, buf, isexec, isdata)) goto found; @@ -129,6 +134,14 @@ Pfindexec(struct ps_prochandle *P, const char *aout, if (strchr(path, '/') != NULL && (p = basename(path)) != NULL && try_exec(cwd, p, buf, isexec, isdata)) goto found; + + if (getzonenamebyid(pi->pr_zoneid, zname, + sizeof (zname)) != -1 && strcmp(zname, "global") != 0 && + zone_get_zonepath(zname, zpath, sizeof (zpath)) == Z_OK) { + (void) strcat(zpath, "/root"); + if (try_exec(zpath, p, buf, isexec, isdata)) + goto found; + } } /* @@ -245,7 +258,7 @@ Pexecname(struct ps_prochandle *P, char *buf, size_t buflen) * Try to get the path information first. */ (void) snprintf(exec_name, sizeof (exec_name), - "/proc/%d/path/a.out", (int)P->pid); + "%s/%d/path/a.out", procfs_path, (int)P->pid); if ((ret = readlink(exec_name, buf, buflen - 1)) > 0) { buf[ret] = '\0'; return (buf); @@ -256,7 +269,7 @@ Pexecname(struct ps_prochandle *P, char *buf, size_t buflen) * suggestions to the actual device and inode number. */ (void) snprintf(exec_name, sizeof (exec_name), - "/proc/%d/object/a.out", (int)P->pid); + "%s/%d/object/a.out", procfs_path, (int)P->pid); if (stat64(exec_name, &st) != 0 || !S_ISREG(st.st_mode)) return (NULL); @@ -267,7 +280,7 @@ Pexecname(struct ps_prochandle *P, char *buf, size_t buflen) * not changed its current directory since it was exec'd. */ (void) snprintf(proc_cwd, sizeof (proc_cwd), - "/proc/%d/path/cwd", (int)P->pid); + "%s/%d/path/cwd", procfs_path, (int)P->pid); if ((ret = readlink(proc_cwd, cwd, PATH_MAX - 1)) > 0) cwd[ret] = '\0'; diff --git a/usr/src/lib/libproc/common/Pisprocdir.c b/usr/src/lib/libproc/common/Pisprocdir.c index b617b2edda..70e341bcbe 100644 --- a/usr/src/lib/libproc/common/Pisprocdir.c +++ b/usr/src/lib/libproc/common/Pisprocdir.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1997-2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -43,11 +42,11 @@ Pisprocdir(struct ps_prochandle *Pr, const char *dir) struct statvfs statvfsb; if (*dir == '/') - (void) snprintf(path, sizeof (path), "/proc/%d/root%s", - (int)Pr->pid, dir); + (void) snprintf(path, sizeof (path), "%s/%d/root%s", + procfs_path, (int)Pr->pid, dir); else - (void) snprintf(path, sizeof (path), "/proc/%d/cwd/%s", - (int)Pr->pid, dir); + (void) snprintf(path, sizeof (path), "%s/%d/cwd/%s", + procfs_path, (int)Pr->pid, dir); /* * We can't compare the statb.st_fstype string to "proc" because diff --git a/usr/src/lib/libproc/common/Plwpregs.c b/usr/src/lib/libproc/common/Plwpregs.c index bb16b50370..76f658a458 100644 --- a/usr/src/lib/libproc/common/Plwpregs.c +++ b/usr/src/lib/libproc/common/Plwpregs.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,6 +29,7 @@ #include <sys/uio.h> #include <string.h> #include <errno.h> +#include <limits.h> #include "Pcontrol.h" #include "P32ton.h" @@ -68,11 +68,11 @@ static int getlwpfile(struct ps_prochandle *P, lwpid_t lwpid, const char *fbase, void *rp, size_t n) { - char fname[64]; + char fname[PATH_MAX]; int fd; - (void) snprintf(fname, sizeof (fname), "/proc/%d/lwp/%d/%s", - (int)P->status.pr_pid, (int)lwpid, fbase); + (void) snprintf(fname, sizeof (fname), "%s/%d/lwp/%d/%s", + procfs_path, (int)P->status.pr_pid, (int)lwpid, fbase); if ((fd = open(fname, O_RDONLY)) >= 0) { if (read(fd, rp, n) > 0) { @@ -133,7 +133,7 @@ setlwpregs(struct ps_prochandle *P, lwpid_t lwpid, long cmd, const void *rp, size_t n) { iovec_t iov[2]; - char fname[64]; + char fname[PATH_MAX]; int fd; if (P->state != PS_STOP) { @@ -170,8 +170,8 @@ setlwpregs(struct ps_prochandle *P, lwpid_t lwpid, long cmd, * If the lwp we want is not the representative lwp, we need to * open the ctl file for that specific lwp. */ - (void) snprintf(fname, sizeof (fname), "/proc/%d/lwp/%d/lwpctl", - (int)P->status.pr_pid, (int)lwpid); + (void) snprintf(fname, sizeof (fname), "%s/%d/lwp/%d/lwpctl", + procfs_path, (int)P->status.pr_pid, (int)lwpid); if ((fd = open(fname, O_WRONLY)) >= 0) { if (writev(fd, iov, 2) > 0) { diff --git a/usr/src/lib/libproc/common/Pscantext.c b/usr/src/lib/libproc/common/Pscantext.c index 72d85c06f6..08470bcd22 100644 --- a/usr/src/lib/libproc/common/Pscantext.c +++ b/usr/src/lib/libproc/common/Pscantext.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,6 +32,7 @@ #include <unistd.h> #include <string.h> #include <errno.h> +#include <limits.h> #include "libproc.h" #include "Pcontrol.h" @@ -47,7 +47,7 @@ int Pscantext(struct ps_prochandle *P) { - char mapfile[100]; + char mapfile[PATH_MAX]; int mapfd; off_t offset; /* offset in text section */ off_t endoff; /* ending offset in text section */ @@ -80,7 +80,8 @@ Pscantext(struct ps_prochandle *P) } /* open the /proc/<pid>/map file */ - (void) sprintf(mapfile, "/proc/%d/map", (int)P->pid); + (void) snprintf(mapfile, sizeof (mapfile), "%s/%d/map", + procfs_path, (int)P->pid); if ((mapfd = open(mapfile, O_RDONLY)) < 0) { dprintf("failed to open %s: %s\n", mapfile, strerror(errno)); return (-1); diff --git a/usr/src/lib/libproc/common/Pservice.c b/usr/src/lib/libproc/common/Pservice.c index 6850696af4..cd43947171 100644 --- a/usr/src/lib/libproc/common/Pservice.c +++ b/usr/src/lib/libproc/common/Pservice.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -172,7 +171,7 @@ ps_lsetfpregs(struct ps_prochandle *P, lwpid_t lwpid, const prfpregset_t *regs) ps_err_e ps_lgetxregsize(struct ps_prochandle *P, lwpid_t lwpid, int *xrsize) { - char fname[64]; + char fname[PATH_MAX]; struct stat statb; if (P->state == PS_DEAD) { @@ -192,8 +191,8 @@ ps_lgetxregsize(struct ps_prochandle *P, lwpid_t lwpid, int *xrsize) return (PS_BADLID); } - (void) snprintf(fname, sizeof (fname), "/proc/%d/lwp/%d/xregs", - (int)P->status.pr_pid, (int)lwpid); + (void) snprintf(fname, sizeof (fname), "%s/%d/lwp/%d/xregs", + procfs_path, (int)P->status.pr_pid, (int)lwpid); if (stat(fname, &statb) != 0) return (PS_BADLID); @@ -320,6 +319,12 @@ ps_pauxv(struct ps_prochandle *P, const auxv_t **aux) return (PS_OK); } +ps_err_e +ps_pbrandname(struct ps_prochandle *P, char *buf, size_t len) +{ + return (Pbrandname(P, buf, len) ? PS_OK : PS_ERR); +} + /* * Search for a symbol by name and return the corresponding address. */ diff --git a/usr/src/lib/libproc/common/Psymtab.c b/usr/src/lib/libproc/common/Psymtab.c index c8c00fb761..d3fe42f180 100644 --- a/usr/src/lib/libproc/common/Psymtab.c +++ b/usr/src/lib/libproc/common/Psymtab.c @@ -26,6 +26,7 @@ #pragma ident "%Z%%M% %I% %E% SMI" +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <stddef.h> @@ -287,15 +288,20 @@ map_iter(const rd_loadobj_t *lop, void *cd) dprintf("encountered rd object at %p\n", (void *)lop->rl_base); - if ((mptr = Paddr2mptr(P, lop->rl_base)) == NULL) + if ((mptr = Paddr2mptr(P, lop->rl_base)) == NULL) { + dprintf("map_iter: base address doesn't match any mapping\n"); return (1); /* Base address does not match any mapping */ + } if ((fptr = mptr->map_file) == NULL && - (fptr = file_info_new(P, mptr)) == NULL) + (fptr = file_info_new(P, mptr)) == NULL) { + dprintf("map_iter: failed to allocate a new file_info_t\n"); return (1); /* Failed to allocate a new file_info_t */ + } if ((fptr->file_lo == NULL) && (fptr->file_lo = malloc(sizeof (rd_loadobj_t))) == NULL) { + dprintf("map_iter: failed to allocate rd_loadobj_t\n"); file_info_free(P, fptr); return (1); /* Failed to allocate rd_loadobj_t */ } @@ -314,6 +320,9 @@ map_iter(const rd_loadobj_t *lop, void *cd) if (Pread_string(P, buf, sizeof (buf), lop->rl_nameaddr) > 0) { if ((fptr->file_lname = strdup(buf)) != NULL) fptr->file_lbase = basename(fptr->file_lname); + } else { + dprintf("map_iter: failed to read string at %p\n", + (void *)lop->rl_nameaddr); } dprintf("loaded rd object %s lmid %lx\n", @@ -341,17 +350,13 @@ map_set(struct ps_prochandle *P, map_info_t *mptr, const char *lname) (void) memset(fptr->file_lo, 0, sizeof (rd_loadobj_t)); fptr->file_lo->rl_base = mptr->map_pmap.pr_vaddr; fptr->file_lo->rl_bend = - mptr->map_pmap.pr_vaddr + mptr->map_pmap.pr_size; + mptr->map_pmap.pr_vaddr + mptr->map_pmap.pr_size; fptr->file_lo->rl_plt_base = fptr->file_plt_base; fptr->file_lo->rl_plt_size = fptr->file_plt_size; - if (fptr->file_lname) { - free(fptr->file_lname); - fptr->file_lname = NULL; - } - - if ((fptr->file_lname = strdup(lname)) != NULL) + if (fptr->file_lname == NULL && + (fptr->file_lname = strdup(lname)) != NULL) fptr->file_lbase = basename(fptr->file_lname); } @@ -385,7 +390,7 @@ load_static_maps(struct ps_prochandle *P) void Pupdate_maps(struct ps_prochandle *P) { - char mapfile[64]; + char mapfile[PATH_MAX]; int mapfd; struct stat statb; prmap_t *Pmap = NULL; @@ -401,7 +406,8 @@ Pupdate_maps(struct ps_prochandle *P) Preadauxvec(P); - (void) sprintf(mapfile, "/proc/%d/map", (int)P->pid); + (void) snprintf(mapfile, sizeof (mapfile), "%s/%d/map", + procfs_path, (int)P->pid); if ((mapfd = open(mapfile, O_RDONLY)) < 0 || fstat(mapfd, &statb) != 0 || statb.st_size < sizeof (prmap_t) || @@ -803,7 +809,8 @@ Preadauxvec(struct ps_prochandle *P) P->nauxv = 0; } - (void) sprintf(auxfile, "/proc/%d/auxv", (int)P->pid); + (void) snprintf(auxfile, sizeof (auxfile), "%s/%d/auxv", + procfs_path, (int)P->pid); if ((fd = open(auxfile, O_RDONLY)) < 0) return; @@ -928,7 +935,7 @@ build_map_symtab(struct ps_prochandle *P, map_info_t *mptr) * fptr->file_map to be set in Pbuild_file_symtab. librtld_db may be * unaware of what's going on in the rare case that a legitimate ELF * file has been mmap(2)ed into the process address space *without* - * the use of dlopen(3x). Why would this happen? See pwdx ... :) + * the use of dlopen(3x). */ if (fptr->file_map == NULL) fptr->file_map = mptr; @@ -1128,6 +1135,11 @@ found_shdr: dyn.d_tag == DT_CHECKSUM) goto found_cksum; } + + /* + * The in-memory ELF has no DT_CHECKSUM section, but we will report it + * as matching the file anyhow. + */ return (0); found_cksum: @@ -1321,7 +1333,7 @@ fake_elf(struct ps_prochandle *P, file_info_t *fptr) uint32_t off; size_t pltsz = 0, pltentsz; - if (read_ehdr32(P, &ehdr, &phnum, addr) != 0 || + if ((read_ehdr32(P, &ehdr, &phnum, addr) != 0) || read_dynamic_phdr32(P, &ehdr, phnum, &phdr, addr) != 0) return (NULL); @@ -1337,6 +1349,14 @@ fake_elf(struct ps_prochandle *P, file_info_t *fptr) return (NULL); } + /* + * Allow librtld_db the opportunity to "fix" the program + * headers, if it needs to, before we process them. + */ + if (P->rap != NULL && ehdr.e_type == ET_DYN) { + rd_fix_phdrs(P->rap, dp, phdr.p_filesz, addr); + } + for (i = 0; i < phdr.p_filesz / sizeof (Elf32_Dyn); i++) { switch (dp[i].d_tag) { /* @@ -1422,8 +1442,11 @@ fake_elf(struct ps_prochandle *P, file_info_t *fptr) /* .dynsym section */ size += sizeof (Elf32_Shdr); if (Pread(P, &nchain, sizeof (nchain), - d[DI_HASH]->d_un.d_ptr + 4) != sizeof (nchain)) + d[DI_HASH]->d_un.d_ptr + 4) != sizeof (nchain)) { + dprintf("Pread of .dynsym at %lx failed\n", + (long)(d[DI_HASH]->d_un.d_val + 4)); goto bad32; + } size += sizeof (Elf32_Sym) * nchain; /* .dynstr section */ @@ -1446,8 +1469,10 @@ fake_elf(struct ps_prochandle *P, file_info_t *fptr) Elf32_Rela r[2]; if (Pread(P, r, sizeof (r), jmprel + - sizeof (r[0]) * ndx) != sizeof (r)) + sizeof (r[0]) * ndx) != sizeof (r)) { + dprintf("Pread of DT_RELA failed\n"); goto bad32; + } penult = r[0].r_offset; ult = r[1].r_offset; @@ -1457,12 +1482,15 @@ fake_elf(struct ps_prochandle *P, file_info_t *fptr) Elf32_Rel r[2]; if (Pread(P, r, sizeof (r), jmprel + - sizeof (r[0]) * ndx) != sizeof (r)) + sizeof (r[0]) * ndx) != sizeof (r)) { + dprintf("Pread of DT_REL failed\n"); goto bad32; + } penult = r[0].r_offset; ult = r[1].r_offset; } else { + dprintf(".plt: unknown jmprel value\n"); goto bad32; } @@ -1505,6 +1533,7 @@ fake_elf(struct ps_prochandle *P, file_info_t *fptr) if (Pread(P, &elfdata[ep->e_phoff], phnum * ep->e_phentsize, addr + ehdr.e_phoff) != phnum * ep->e_phentsize) { free(elfdata); + dprintf("failed to read program headers\n"); goto bad32; } @@ -1550,6 +1579,8 @@ fake_elf(struct ps_prochandle *P, file_info_t *fptr) if (Pread(P, &elfdata[off], sp->sh_size, d[DI_SYMTAB]->d_un.d_ptr) != sp->sh_size) { free(elfdata); + dprintf("failed to read .dynsym at %lx\n", + (long)d[DI_SYMTAB]->d_un.d_ptr); goto bad32; } @@ -1575,6 +1606,7 @@ fake_elf(struct ps_prochandle *P, file_info_t *fptr) if (Pread(P, &elfdata[off], sp->sh_size, d[DI_STRTAB]->d_un.d_ptr) != sp->sh_size) { free(elfdata); + dprintf("failed to read .dynstr\n"); goto bad32; } off += roundup(sp->sh_size, 4); @@ -1620,6 +1652,7 @@ fake_elf(struct ps_prochandle *P, file_info_t *fptr) if (Pread(P, &elfdata[off], sp->sh_size, d[DI_PLTGOT]->d_un.d_ptr) != sp->sh_size) { free(elfdata); + dprintf("failed to read .plt\n"); goto bad32; } off += roundup(sp->sh_size, 4); @@ -1768,8 +1801,13 @@ bad32: Elf64_Rela r[2]; if (Pread(P, r, sizeof (r), jmprel + - sizeof (r[0]) * ndx) != sizeof (r)) + sizeof (r[0]) * ndx) != sizeof (r)) { + dprintf("Pread jmprel DT_RELA at %p " + "failed\n", + (void *)(jmprel + + sizeof (r[0]) * ndx)); goto bad64; + } penult = r[0].r_offset; ult = r[1].r_offset; @@ -1779,12 +1817,19 @@ bad32: Elf64_Rel r[2]; if (Pread(P, r, sizeof (r), jmprel + - sizeof (r[0]) * ndx) != sizeof (r)) + sizeof (r[0]) * ndx) != sizeof (r)) { + dprintf("Pread jmprel DT_REL at %p " + "failed\n", + (void *)(jmprel + + sizeof (r[0]) * ndx)); goto bad64; + } penult = r[0].r_offset; ult = r[1].r_offset; } else { + dprintf("DT_PLTREL value %p unknown\n", + (void *)d[DI_PLTREL]->d_un.d_ptr); goto bad64; } @@ -2172,7 +2217,8 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) fptr->file_lname ? fptr->file_lname : fptr->file_pname); } else { (void) snprintf(objectfile, sizeof (objectfile), - "/proc/%d/object/%s", (int)P->pid, fptr->file_pname); + "%s/%d/object/%s", + procfs_path, (int)P->pid, fptr->file_pname); } /* @@ -2203,8 +2249,10 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) elf_getshstrndx(elf, &shstrndx) == 0 || (scn = elf_getscn(elf, shstrndx)) == NULL || (shdata = elf_getdata(scn, NULL)) == NULL) { + int err = elf_errno(); + dprintf("failed to process ELF file %s: %s\n", - objectfile, elf_errmsg(elf_errno())); + objectfile, (err == 0) ? "<null>" : elf_errmsg(err)); if ((elf = fake_elf(P, fptr)) == NULL || elf_kind(elf) != ELF_K_ELF || @@ -2264,14 +2312,22 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) * pointer, and name. We use this for handling sh_link values below. */ for (cp = cache + 1, scn = NULL; scn = elf_nextscn(elf, scn); cp++) { - if (gelf_getshdr(scn, &cp->c_shdr) == NULL) + if (gelf_getshdr(scn, &cp->c_shdr) == NULL) { + dprintf("Pbuild_file_symtab: Failed to get section " + "header\n"); goto bad; /* Failed to get section header */ + } - if ((cp->c_data = elf_getdata(scn, NULL)) == NULL) + if ((cp->c_data = elf_getdata(scn, NULL)) == NULL) { + dprintf("Pbuild_file_symtab: Failed to get section " + "data\n"); goto bad; /* Failed to get section data */ + } - if (cp->c_shdr.sh_name >= shdata->d_size) + if (cp->c_shdr.sh_name >= shdata->d_size) { + dprintf("Pbuild_file_symtab: corrupt section name"); goto bad; /* Corrupt section name */ + } cp->c_name = (const char *)shdata->d_buf + cp->c_shdr.sh_name; } @@ -2286,7 +2342,6 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) if (shp->sh_type == SHT_SYMTAB || shp->sh_type == SHT_DYNSYM) { sym_tbl_t *symp = shp->sh_type == SHT_SYMTAB ? &fptr->file_symtab : &fptr->file_dynsym; - /* * It's possible that the we already got the symbol * table from the core file itself. Either the file @@ -2298,6 +2353,8 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) * check isn't essential, but it's a good idea. */ if (symp->sym_data == NULL) { + dprintf("Symbol table found for %s\n", + objectfile); symp->sym_data = cp->c_data; symp->sym_symn = shp->sh_size / shp->sh_entsize; symp->sym_strs = @@ -2306,14 +2363,15 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) cache[shp->sh_link].c_data->d_size; symp->sym_hdr = cp->c_shdr; symp->sym_strhdr = cache[shp->sh_link].c_shdr; + } else { + dprintf("Symbol table already there for %s\n", + objectfile); } } else if (shp->sh_type == SHT_DYNAMIC) { dyn = cp; - } else if (strcmp(cp->c_name, ".plt") == 0) { plt = cp; - } else if (strcmp(cp->c_name, ".SUNW_ctf") == 0) { /* * Skip over bogus CTF sections so they don't come back @@ -2347,8 +2405,8 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) if (fptr->file_etype == ET_DYN) { fptr->file_dyn_base = fptr->file_map->map_pmap.pr_vaddr - fptr->file_map->map_pmap.pr_offset; - dprintf("setting file_dyn_base for %s to %p\n", - objectfile, (void *)fptr->file_dyn_base); + dprintf("setting file_dyn_base for %s to %lx\n", + objectfile, (long)fptr->file_dyn_base); } /* @@ -2371,8 +2429,8 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) */ if (fptr->file_etype == ET_DYN && fptr->file_lo->rl_base != fptr->file_dyn_base) { - dprintf("resetting file_dyn_base for %s to %p\n", - objectfile, (void *)fptr->file_lo->rl_base); + dprintf("resetting file_dyn_base for %s to %lx\n", + objectfile, (long)fptr->file_lo->rl_base); fptr->file_dyn_base = fptr->file_lo->rl_base; } @@ -2408,6 +2466,8 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr) for (i = 0; i < ndyn; i++) { if (gelf_getdyn(dyn->c_data, i, &d) != NULL && d.d_tag == DT_JMPREL) { + dprintf("DT_JMPREL is %p\n", + (void *)(uintptr_t)d.d_un.d_ptr); fptr->file_jmp_rel = d.d_un.d_ptr + fptr->file_dyn_base; break; @@ -2533,6 +2593,13 @@ object_to_map(struct ps_prochandle *P, Lmid_t lmid, const char *objname) uint_t i; /* + * If we have no rtld_db, then always treat a request as one for all + * link maps. + */ + if (P->rap == NULL) + lmid = PR_LMID_EVERY; + + /* * First pass: look for exact matches of the entire pathname or * basename (cases 1 and 2 above): */ @@ -2610,10 +2677,8 @@ object_name_to_map(struct ps_prochandle *P, Lmid_t lmid, const char *name) mptr = P->map_exec; else if (name == PR_OBJ_LDSO) mptr = P->map_ldso; - else if (Prd_agent(P) != NULL || P->state == PS_IDLE) - mptr = object_to_map(P, lmid, name); else - mptr = NULL; + mptr = object_to_map(P, lmid, name); return (mptr); } diff --git a/usr/src/lib/libproc/common/Psyscall.c b/usr/src/lib/libproc/common/Psyscall.c index c34a17a04c..1261eb49b4 100644 --- a/usr/src/lib/libproc/common/Psyscall.c +++ b/usr/src/lib/libproc/common/Psyscall.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -80,7 +79,7 @@ int Pcreate_agent(struct ps_prochandle *P) { int fd; - char pathname[100]; + char pathname[PATH_MAX]; char *fname; struct { long cmd; @@ -123,7 +122,8 @@ Pcreate_agent(struct ps_prochandle *P) (void) Pstopstatus(P, PCNULL, 0); /* open the agent LWP files */ - (void) sprintf(pathname, "/proc/%d/lwp/agent/", (int)P->pid); + (void) snprintf(pathname, sizeof (pathname), "%s/%d/lwp/agent/", + procfs_path, (int)P->pid); fname = pathname + strlen(pathname); (void) set_minfd(); diff --git a/usr/src/lib/libproc/common/libproc.h b/usr/src/lib/libproc/common/libproc.h index b2f37f45d7..07e58fa05c 100644 --- a/usr/src/lib/libproc/common/libproc.h +++ b/usr/src/lib/libproc/common/libproc.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -201,6 +200,7 @@ extern void Prelease(struct ps_prochandle *, int); extern void Pfree(struct ps_prochandle *); extern int Pasfd(struct ps_prochandle *); +extern char *Pbrandname(struct ps_prochandle *, char *, size_t); extern int Pctlfd(struct ps_prochandle *); extern int Pcreate_agent(struct ps_prochandle *); extern void Pdestroy_agent(struct ps_prochandle *); @@ -495,6 +495,8 @@ extern char *Pgetenv(struct ps_prochandle *, const char *, char *, size_t); extern long Pgetauxval(struct ps_prochandle *, int); extern const auxv_t *Pgetauxvec(struct ps_prochandle *); +extern void Pset_procfs_path(const char *); + /* * Symbol table iteration interface. The special lmid constants LM_ID_BASE, * LM_ID_LDSO, and PR_LMID_EVERY may be used with Psymbol_iter_by_lmid. diff --git a/usr/src/lib/libproc/common/mapfile-vers b/usr/src/lib/libproc/common/mapfile-vers index f3c517dcf5..ef256570ce 100644 --- a/usr/src/lib/libproc/common/mapfile-vers +++ b/usr/src/lib/libproc/common/mapfile-vers @@ -219,6 +219,7 @@ SUNWprivate_1.1 { pr_unlink; pr_waitid; pr_zmap; + Pset_procfs_path; Psetbkpt; Psetcred; Psetfault; @@ -238,6 +239,7 @@ SUNWprivate_1.1 { ps_lsetregs; ps_lstop; ps_pauxv; + ps_pbrandname; ps_pcontinue; ps_pdmodel; ps_pdread; diff --git a/usr/src/lib/libproc/common/proc_arg.c b/usr/src/lib/libproc/common/proc_arg.c index 2a79b016c2..c25a89b944 100644 --- a/usr/src/lib/libproc/common/proc_arg.c +++ b/usr/src/lib/libproc/common/proc_arg.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,21 +38,22 @@ #include <errno.h> #include <dirent.h> -#include "libproc.h" +#include "Pcontrol.h" static int open_psinfo(const char *arg, int *perr) { /* - * Allocate enough space for "/proc/" + arg + "/psinfo" + * Allocate enough space for procfs_path + arg + "/psinfo" */ - char *path = alloca(strlen(arg) + 14); + char *path = alloca(strlen(arg) + strlen(procfs_path) + 9); struct stat64 st; int fd; if (strchr(arg, '/') == NULL) { - (void) strcpy(path, "/proc/"); + (void) strcpy(path, procfs_path); + (void) strcat(path, "/"); (void) strcat(path, arg); } else (void) strcpy(path, arg); @@ -430,7 +430,7 @@ proc_walk(proc_walk_f *func, void *arg, int flag) DIR *procdir; struct dirent *dirent; char *errptr; - char pidstr[80]; + char pidstr[PATH_MAX]; psinfo_t psinfo; lwpsinfo_t *lwpsinfo; prheader_t prheader; @@ -445,7 +445,7 @@ proc_walk(proc_walk_f *func, void *arg, int flag) errno = EINVAL; return (-1); } - if ((procdir = opendir("/proc")) == NULL) + if ((procdir = opendir(procfs_path)) == NULL) return (-1); while (dirent = readdir(procdir)) { if (dirent->d_name[0] == '.') /* skip . and .. */ @@ -455,7 +455,7 @@ proc_walk(proc_walk_f *func, void *arg, int flag) continue; /* PR_WALK_PROC case */ (void) snprintf(pidstr, sizeof (pidstr), - "/proc/%ld/psinfo", pid); + "%s/%ld/psinfo", procfs_path, pid); fd = open(pidstr, O_RDONLY); if (fd < 0) continue; @@ -471,7 +471,7 @@ proc_walk(proc_walk_f *func, void *arg, int flag) } /* PR_WALK_LWP case */ (void) snprintf(pidstr, sizeof (pidstr), - "/proc/%ld/lpsinfo", pid); + "%s/%ld/lpsinfo", procfs_path, pid); fd = open(pidstr, O_RDONLY); if (fd < 0) continue; diff --git a/usr/src/lib/libproc/common/proc_get_info.c b/usr/src/lib/libproc/common/proc_get_info.c index d4426f7cc0..7fb88c4212 100644 --- a/usr/src/lib/libproc/common/proc_get_info.c +++ b/usr/src/lib/libproc/common/proc_get_info.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -31,7 +30,9 @@ #include <unistd.h> #include <fcntl.h> #include <string.h> -#include "libproc.h" +#include <limits.h> + +#include "Pcontrol.h" /* * These several routines simply get the indicated /proc structures @@ -50,13 +51,14 @@ int proc_get_cred(pid_t pid, prcred_t *credp, int ngroups) { - char fname[64]; + char fname[PATH_MAX]; int fd; int rv = -1; ssize_t minsize = sizeof (*credp) - sizeof (gid_t); size_t size = minsize + ngroups * sizeof (gid_t); - (void) snprintf(fname, sizeof (fname), "/proc/%d/cred", (int)pid); + (void) snprintf(fname, sizeof (fname), "%s/%d/cred", + procfs_path, (int)pid); if ((fd = open(fname, O_RDONLY)) >= 0) { if (read(fd, credp, size) >= minsize) rv = 0; @@ -71,12 +73,13 @@ proc_get_cred(pid_t pid, prcred_t *credp, int ngroups) prpriv_t * proc_get_priv(pid_t pid) { - char fname[64]; + char fname[PATH_MAX]; int fd; struct stat statb; prpriv_t *rv = NULL; - (void) snprintf(fname, sizeof (fname), "/proc/%d/priv", (int)pid); + (void) snprintf(fname, sizeof (fname), "%s/%d/priv", + procfs_path, (int)pid); if ((fd = open(fname, O_RDONLY)) >= 0) { if (fstat(fd, &statb) != 0 || (rv = malloc(statb.st_size)) == NULL || @@ -99,13 +102,14 @@ proc_get_priv(pid_t pid) int proc_get_ldt(pid_t pid, struct ssd *pldt, int nldt) { - char fname[64]; + char fname[PATH_MAX]; int fd; struct stat statb; size_t size; ssize_t ssize; - (void) snprintf(fname, sizeof (fname), "/proc/%d/ldt", (int)pid); + (void) snprintf(fname, sizeof (fname), "%s/%d/ldt", + procfs_path, (int)pid); if ((fd = open(fname, O_RDONLY)) < 0) return (-1); @@ -131,11 +135,12 @@ proc_get_ldt(pid_t pid, struct ssd *pldt, int nldt) int proc_get_psinfo(pid_t pid, psinfo_t *psp) { - char fname[64]; + char fname[PATH_MAX]; int fd; int rv = -1; - (void) snprintf(fname, sizeof (fname), "/proc/%d/psinfo", (int)pid); + (void) snprintf(fname, sizeof (fname), "%s/%d/psinfo", + procfs_path, (int)pid); if ((fd = open(fname, O_RDONLY)) >= 0) { if (read(fd, psp, sizeof (*psp)) == sizeof (*psp)) rv = 0; @@ -147,11 +152,12 @@ proc_get_psinfo(pid_t pid, psinfo_t *psp) int proc_get_status(pid_t pid, pstatus_t *psp) { - char fname[64]; + char fname[PATH_MAX]; int fd; int rv = -1; - (void) snprintf(fname, sizeof (fname), "/proc/%d/status", (int)pid); + (void) snprintf(fname, sizeof (fname), "%s/%d/status", + procfs_path, (int)pid); if ((fd = open(fname, O_RDONLY)) >= 0) { if (read(fd, psp, sizeof (*psp)) == sizeof (*psp)) rv = 0; @@ -169,11 +175,12 @@ proc_get_status(pid_t pid, pstatus_t *psp) int proc_get_auxv(pid_t pid, auxv_t *pauxv, int naux) { - char fname[64]; + char fname[PATH_MAX]; int fd; int rv = -1; - (void) snprintf(fname, sizeof (fname), "/proc/%d/auxv", (int)pid); + (void) snprintf(fname, sizeof (fname), "%s/%d/auxv", + procfs_path, (int)pid); if ((fd = open(fname, O_RDONLY)) >= 0) { if ((rv = read(fd, pauxv, naux * sizeof (auxv_t))) >= 0) rv /= sizeof (auxv_t); diff --git a/usr/src/lib/libproc/common/proc_names.c b/usr/src/lib/libproc/common/proc_names.c index 249b9745a3..5c9cd365aa 100644 --- a/usr/src/lib/libproc/common/proc_names.c +++ b/usr/src/lib/libproc/common/proc_names.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -156,7 +155,7 @@ static const char *const systable[] = { "kill", /* 37 */ "fstatfs", /* 38 */ "pgrpsys", /* 39 */ - NULL, /* 40 was xenix */ + "uucopystr", /* 40 */ "dup", /* 41 */ "pipe", /* 42 */ "times", /* 43 */ @@ -296,7 +295,7 @@ static const char *const systable[] = { "pwrite", /* 174 */ "llseek", /* 175 */ "inst_sync", /* 176 */ - NULL, /* 177 */ + "brand", /* 177 */ "kaio", /* 178 */ "cpc", /* 179 */ "lgrpsys", /* 180 */ @@ -373,7 +372,7 @@ static const char *const systable[] = { "lwp_mutex_trylock", /* 251 */ "lwp_mutex_init", /* 252 */ "cladm", /* 253 */ - NULL, /* 254 */ + "uucopy", /* 254 */ "umount2" /* 255 */ }; diff --git a/usr/src/lib/libzonecfg/Makefile b/usr/src/lib/libzonecfg/Makefile index 513695f532..e672d0caa0 100644 --- a/usr/src/lib/libzonecfg/Makefile +++ b/usr/src/lib/libzonecfg/Makefile @@ -56,24 +56,13 @@ $(ROOTDTDS) := GROUP = bin $(DTDDIR)/%: % $(INS.file) -XMLFILES= zones/SUNWblank.xml zones/SUNWdefault.xml -XMLDIR = $(ROOT)/etc -ROOTXMLFILES= $(XMLFILES:%=$(XMLDIR)/%) - -$(ROOTXMLFILES) := FILEMODE = 444 -$(ROOTXMLFILES) := OWNER = root -$(ROOTXMLFILES) := GROUP = bin - -$(XMLDIR)/%: % - $(INS.file) - .KEEP_STATE: all clean clobber install lint: $(SUBDIRS) $(POFILE): pofile_MSGFILES -install: $(ROOTDTDS) $(ROOTXMLFILES) +install: $(ROOTDTDS) _msg: $(MSGDOMAINPOFILE) diff --git a/usr/src/lib/libzonecfg/Makefile.com b/usr/src/lib/libzonecfg/Makefile.com index 54caa0dd26..b89a44fce3 100644 --- a/usr/src/lib/libzonecfg/Makefile.com +++ b/usr/src/lib/libzonecfg/Makefile.com @@ -32,7 +32,7 @@ OBJECTS= libzonecfg.o getzoneent.o scratchops.o include ../../Makefile.lib LIBS = $(DYNLIB) $(LINTLIB) -LDLIBS += -lc -lsocket -lnsl -luuid -lnvpair -lsysevent -lsec +LDLIBS += -lc -lsocket -lnsl -luuid -lnvpair -lsysevent -lsec -lbrand # DYNLIB libraries do not have lint libs and are not linted $(DYNLIB) := LDLIBS += -lxml2 diff --git a/usr/src/lib/libzonecfg/common/libzonecfg.c b/usr/src/lib/libzonecfg/common/libzonecfg.c index 569d0f6ba9..af54927068 100644 --- a/usr/src/lib/libzonecfg/common/libzonecfg.c +++ b/usr/src/lib/libzonecfg/common/libzonecfg.c @@ -33,17 +33,18 @@ #include <fnmatch.h> #include <strings.h> #include <unistd.h> -#include <sys/stat.h> #include <assert.h> #include <libgen.h> #include <libintl.h> #include <alloca.h> #include <ctype.h> +#include <sys/acl.h> +#include <sys/stat.h> +#include <sys/brand.h> #include <sys/mntio.h> #include <sys/mnttab.h> -#include <sys/types.h> #include <sys/nvpair.h> -#include <sys/acl.h> +#include <sys/types.h> #include <ftw.h> #include <arpa/inet.h> @@ -54,8 +55,8 @@ #include <libdevinfo.h> #include <uuid/uuid.h> - #include <dirent.h> +#include <libbrand.h> #include <libzonecfg.h> #include "zonecfg_impl.h" @@ -107,6 +108,7 @@ #define DTD_ATTR_GID (const xmlChar *) "gid" #define DTD_ATTR_MODE (const xmlChar *) "mode" #define DTD_ATTR_ACL (const xmlChar *) "acl" +#define DTD_ATTR_BRAND (const xmlChar *) "brand" #define DTD_ENTITY_BOOLEAN "boolean" #define DTD_ENTITY_DEVPATH "devpath" @@ -513,6 +515,7 @@ zonecfg_get_handle_impl(const char *zonename, const char *filename, if (zonename == NULL) return (Z_NO_ZONE); + if ((handle->zone_dh_doc = xmlParseFile(filename)) == NULL) { /* distinguish file not found vs. found but not parsed */ if (stat(filename, &statbuf) == 0) @@ -604,6 +607,21 @@ zonecfg_get_template_handle(const char *template, const char *zonename, return (setrootattr(handle, DTD_ATTR_NAME, zonename)); } +int +zonecfg_get_xml_handle(const char *path, zone_dochandle_t handle) +{ + struct stat buf; + int err; + + if (stat(path, &buf) == -1) + return (Z_MISC_FS); + + if ((err = zonecfg_get_handle_impl("xml", path, handle)) != Z_OK) + return (err); + handle->zone_dh_newzone = B_TRUE; + return (Z_OK); +} + /* * Initialize two handles from the manifest read on fd. The rem_handle * is initialized from the input file, including the sw inventory. The @@ -843,6 +861,31 @@ zonecfg_set_zonepath(zone_dochandle_t handle, char *zonepath) } int +zonecfg_get_brand(zone_dochandle_t handle, char *brand, size_t brandsize) +{ + int ret, sz; + + ret = getrootattr(handle, DTD_ATTR_BRAND, brand, brandsize); + + /* If the zone has no brand, it is native. */ + if (ret == Z_OK && brand[0] == '\0') { + sz = strlcpy(brand, NATIVE_BRAND_NAME, brandsize); + if (sz >= brandsize) + ret = Z_TOO_BIG; + else + ret = Z_OK; + } + + return (ret); +} + +int +zonecfg_set_brand(zone_dochandle_t handle, char *brand) +{ + return (setrootattr(handle, DTD_ATTR_BRAND, brand)); +} + +int zonecfg_get_autoboot(zone_dochandle_t handle, boolean_t *autoboot) { char autobootstr[DTD_ENTITY_BOOL_LEN]; @@ -1007,7 +1050,7 @@ zonecfg_save_impl(zone_dochandle_t handle, char *filename) { char tmpfile[MAXPATHLEN]; char bakdir[MAXPATHLEN], bakbase[MAXPATHLEN], bakfile[MAXPATHLEN]; - int tmpfd, err; + int tmpfd, err, valid; xmlValidCtxt cvp = { NULL }; boolean_t backup; @@ -1026,10 +1069,12 @@ zonecfg_save_impl(zone_dochandle_t handle, char *filename) cvp.warning = zonecfg_error_func; /* - * We do a final validation of the document-- but the library has - * malfunctioned if it fails to validate, so it's an assert. + * We do a final validation of the document. Since the library has + * malfunctioned if it fails to validate, we follow-up with an + * assert() that the doc is valid. */ - assert(xmlValidateDocument(&cvp, handle->zone_dh_doc) != 0); + valid = xmlValidateDocument(&cvp, handle->zone_dh_doc); + assert(valid != 0); if (xmlSaveFormatFile(tmpfile, handle->zone_dh_doc, 1) <= 0) goto err; @@ -1152,6 +1197,33 @@ zonecfg_save(zone_dochandle_t handle) } int +zonecfg_verify_save(zone_dochandle_t handle, char *filename) +{ + int valid; + + xmlValidCtxt cvp = { NULL }; + + if (zonecfg_check_handle(handle) != Z_OK) + return (Z_BAD_HANDLE); + + cvp.error = zonecfg_error_func; + cvp.warning = zonecfg_error_func; + + /* + * We do a final validation of the document. Since the library has + * malfunctioned if it fails to validate, we follow-up with an + * assert() that the doc is valid. + */ + valid = xmlValidateDocument(&cvp, handle->zone_dh_doc); + assert(valid != 0); + + if (xmlSaveFormatFile(filename, handle->zone_dh_doc, 1) <= 0) + return (Z_SAVING_FILE); + + return (Z_OK); +} + +int zonecfg_detach_save(zone_dochandle_t handle, uint_t flags) { char zname[ZONENAME_MAX]; @@ -1159,6 +1231,7 @@ zonecfg_detach_save(zone_dochandle_t handle, uint_t flags) char migpath[MAXPATHLEN]; xmlValidCtxt cvp = { NULL }; int err = Z_SAVING_FILE; + int valid; if (zonecfg_check_handle(handle) != Z_OK) return (Z_BAD_HANDLE); @@ -1195,10 +1268,12 @@ zonecfg_detach_save(zone_dochandle_t handle, uint_t flags) cvp.warning = zonecfg_error_func; /* - * We do a final validation of the document-- but the library has - * malfunctioned if it fails to validate, so it's an assert. + * We do a final validation of the document. Since the library has + * malfunctioned if it fails to validate, we follow-up with an + * assert() that the doc is valid. */ - assert(xmlValidateDocument(&cvp, handle->zone_dh_doc) != 0); + valid = xmlValidateDocument(&cvp, handle->zone_dh_doc); + assert(valid != 0); if (xmlSaveFormatFile(migpath, handle->zone_dh_doc, 1) <= 0) return (Z_SAVING_FILE); @@ -2344,7 +2419,6 @@ zonecfg_devperms_apply(zone_dochandle_t hdl, const char *inpath, uid_t owner, * If the callback function returns non-zero zonecfg_find_mounts * aborts with an error. */ - int zonecfg_find_mounts(char *rootpath, int (*callback)(const char *, void *), void *priv) { @@ -2970,6 +3044,9 @@ zonecfg_strerror(int errnum) case Z_PRIV_UNKNOWN: return (dgettext(TEXT_DOMAIN, "Specified privilege is unknown")); + case Z_BRAND_ERROR: + return (dgettext(TEXT_DOMAIN, + "Brand-specific error")); default: return (dgettext(TEXT_DOMAIN, "Unknown error")); } @@ -3331,11 +3408,27 @@ zonecfg_endattrent(zone_dochandle_t handle) /* * The privileges available on the system and described in privileges(5) - * fall into four categories with respect to non-global zones; those that - * are required in order for a non-global zone to boot, those which are in - * the default set of privileges available to non-global zones, those - * privileges which should not be allowed to be given to non-global zones - * and all other privileges, which are optional and potentially useful for + * fall into four categories with respect to non-global zones: + * + * Default set of privileges considered safe for all non-global + * zones. These privileges are "safe" in the sense that a + * privileged process in the zone cannot affect processes in any + * other zone on the system. + * + * Set of privileges not currently permitted within a non-global + * zone. These privileges are considered by default, "unsafe," + * and include ones which affect global resources (such as the + * system clock or physical memory) or are overly broad and cover + * more than one mechanism in the system. In other cases, there + * has not been sufficient virtualization in the parts of the + * system the privilege covers to allow its use within a + * non-global zone. + * + * Set of privileges required in order to get a zone booted and + * init(1M) started. These cannot be removed from the zone's + * privilege set. + * + * All other privileges are optional and are potentially useful for * processes executing inside a non-global zone. * * When privileges are added to the system, a determination needs to be @@ -3346,76 +3439,6 @@ zonecfg_endattrent(zone_dochandle_t handle) */ /* - * Set of privileges required in order to get a zone booted and init(1M) - * started. These cannot be removed from the zone's privilege set. - */ -static const char *required_priv_list[] = { - PRIV_PROC_EXEC, - PRIV_PROC_FORK, - PRIV_SYS_MOUNT, - NULL -}; - -/* - * Default set of privileges considered safe for all non-global zones. - * These privileges are "safe" in the sense that a privileged process in - * the zone cannot affect processes in other non-global zones on the - * system or in the global zone. Privileges which are considered by - * default, "unsafe", include ones which affect a global resource, such as - * the system clock or physical memory. - */ -static const char *default_priv_list[] = { - PRIV_CONTRACT_EVENT, - PRIV_CONTRACT_OBSERVER, - PRIV_FILE_CHOWN, - PRIV_FILE_CHOWN_SELF, - PRIV_FILE_DAC_EXECUTE, - PRIV_FILE_DAC_READ, - PRIV_FILE_DAC_SEARCH, - PRIV_FILE_DAC_WRITE, - PRIV_FILE_OWNER, - PRIV_FILE_SETID, - PRIV_IPC_DAC_READ, - PRIV_IPC_DAC_WRITE, - PRIV_IPC_OWNER, - PRIV_NET_BINDMLP, - PRIV_NET_ICMPACCESS, - PRIV_NET_MAC_AWARE, - PRIV_NET_PRIVADDR, - PRIV_PROC_CHROOT, - PRIV_SYS_AUDIT, - PRIV_PROC_AUDIT, - PRIV_PROC_OWNER, - PRIV_PROC_SETID, - PRIV_PROC_TASKID, - PRIV_SYS_ACCT, - PRIV_SYS_ADMIN, - PRIV_SYS_MOUNT, - PRIV_SYS_NFS, - PRIV_SYS_RESOURCE, - NULL -}; - -/* - * Set of privileges not currently permitted within a non-global zone. - * Some of these privileges are overly broad and cover more than one - * mechanism in the system. In other cases, there has not been sufficient - * virtualization in the parts of the system the privilege covers to allow - * its use within a non-global zone. - */ -static const char *prohibited_priv_list[] = { - PRIV_DTRACE_KERNEL, - PRIV_PROC_ZONE, - PRIV_SYS_CONFIG, - PRIV_SYS_DEVICES, - PRIV_SYS_LINKDIR, - PRIV_SYS_NET_CONFIG, - PRIV_SYS_RES_CONFIG, - PRIV_SYS_SUSER_COMPAT, - NULL -}; - -/* * Define some of the tokens that priv_str_to_set(3C) recognizes. Since * the privilege string separator can be any character, although it is * usually a comma character, define these here as well in the event that @@ -3427,10 +3450,118 @@ static const char *prohibited_priv_list[] = { #define TOKEN_PRIV_CHAR ',' #define TOKEN_PRIV_STR "," -int -zonecfg_default_privset(priv_set_t *privs) +typedef struct priv_node { + struct priv_node *pn_next; /* Next privilege */ + char *pn_priv; /* Privileges name */ +} priv_node_t; + +/* Privileges lists can differ across brands */ +typedef struct priv_lists { + /* Privileges considered safe for all non-global zones of a brand */ + struct priv_node *pl_default; + + /* Privileges not permitted for all non-global zones of a brand */ + struct priv_node *pl_prohibited; + + /* Privileges required for all non-global zones of a brand */ + struct priv_node *pl_required; +} priv_lists_t; + +static int +priv_lists_cb(void *data, const char *name, const char *set) { - const char **strp; + priv_lists_t *plp = (priv_lists_t *)data; + priv_node_t *pnp; + + /* Allocate a new priv list node. */ + if ((pnp = malloc(sizeof (*pnp))) == NULL) + return (-1); + if ((pnp->pn_priv = strdup(name)) == NULL) { + free(pnp); + return (-1); + } + + /* Insert the new priv list node into the right list */ + if (strcmp(set, "default") == 0) { + pnp->pn_next = plp->pl_default; + plp->pl_default = pnp; + } else if (strcmp(set, "prohibited") == 0) { + pnp->pn_next = plp->pl_prohibited; + plp->pl_prohibited = pnp; + } else if (strcmp(set, "required") == 0) { + pnp->pn_next = plp->pl_required; + plp->pl_required = pnp; + } else { + free(pnp->pn_priv); + free(pnp); + return (-1); + } + return (0); +} + +static void +priv_lists_destroy(priv_lists_t *plp) +{ + priv_node_t *pnp; + + assert(plp != NULL); + + while ((pnp = plp->pl_default) != NULL) { + plp->pl_default = pnp->pn_next; + free(pnp->pn_priv); + free(pnp); + } + while ((pnp = plp->pl_prohibited) != NULL) { + plp->pl_prohibited = pnp->pn_next; + free(pnp->pn_priv); + free(pnp); + } + while ((pnp = plp->pl_required) != NULL) { + plp->pl_required = pnp->pn_next; + free(pnp->pn_priv); + free(pnp); + } + free(plp); +} + +static int +priv_lists_create(zone_dochandle_t handle, priv_lists_t **plpp) +{ + priv_lists_t *plp; + brand_handle_t *bhp; + char brand[MAXNAMELEN]; + + if (handle != NULL) { + if (zonecfg_get_brand(handle, brand, sizeof (brand)) != 0) + return (Z_BRAND_ERROR); + } else { + (void) strlcpy(brand, NATIVE_BRAND_NAME, MAXNAMELEN); + } + + if ((bhp = brand_open(brand)) == NULL) + return (Z_BRAND_ERROR); + + if ((plp = calloc(1, sizeof (priv_lists_t))) == NULL) { + brand_close(bhp); + return (Z_NOMEM); + } + + /* construct the privilege lists */ + if (brand_config_iter_privilege(bhp, priv_lists_cb, plp) != 0) { + priv_lists_destroy(plp); + brand_close(bhp); + return (Z_BRAND_ERROR); + } + + brand_close(bhp); + *plpp = plp; + return (Z_OK); +} + +static int +get_default_privset(priv_set_t *privs, priv_lists_t *plp) +{ + priv_node_t *pnp; priv_set_t *basic; basic = priv_str_to_set(BASIC_TOKEN, TOKEN_PRIV_STR, NULL); @@ -3440,14 +3571,27 @@ zonecfg_default_privset(priv_set_t *privs) priv_union(basic, privs); priv_freeset(basic); - for (strp = default_priv_list; *strp != NULL; strp++) { - if (priv_addset(privs, *strp) != 0) { + for (pnp = plp->pl_default; pnp != NULL; pnp = pnp->pn_next) { + if (priv_addset(privs, pnp->pn_priv) != 0) return (Z_INVAL); - } } + return (Z_OK); } +int +zonecfg_default_privset(priv_set_t *privs) +{ + priv_lists_t *plp; + int ret; + + if ((ret = priv_lists_create(NULL, &plp)) != Z_OK) + return (ret); + ret = get_default_privset(privs, plp); + priv_lists_destroy(plp); + return (ret); +} + void append_priv_token(char *priv, char *str, size_t strlen) { @@ -3465,14 +3609,12 @@ append_priv_token(char *priv, char *str, size_t strlen) */ static int verify_privset(char *privbuf, priv_set_t *privs, char **privname, - boolean_t add_default) + boolean_t add_default, priv_lists_t *plp) { - char *cp; - char *lasts; + priv_node_t *pnp; + char *tmp, *cp, *lasts; size_t len; priv_set_t *mergeset; - const char **strp; - char *tmp; const char *token; /* @@ -3505,14 +3647,15 @@ verify_privset(char *privbuf, priv_set_t *privs, char **privname, * set along with those of the "limitpriv" property. */ len = strlen(privbuf) + sizeof (BASIC_TOKEN) + 2; - for (strp = default_priv_list; *strp != NULL; strp++) - len += strlen(*strp) + 1; + + for (pnp = plp->pl_default; pnp != NULL; pnp = pnp->pn_next) + len += strlen(pnp->pn_priv) + 1; tmp = alloca(len); *tmp = '\0'; append_priv_token(BASIC_TOKEN, tmp, len); - for (strp = default_priv_list; *strp != NULL; strp++) - append_priv_token((char *)*strp, tmp, len); + for (pnp = plp->pl_default; pnp != NULL; pnp = pnp->pn_next) + append_priv_token(pnp->pn_priv, tmp, len); (void) strlcat(tmp, TOKEN_PRIV_STR, len); (void) strlcat(tmp, privbuf, len); } else { @@ -3545,10 +3688,10 @@ verify_privset(char *privbuf, priv_set_t *privs, char **privname, * Next, verify that none of the prohibited zone privileges are * present in the merged privilege set. */ - for (strp = prohibited_priv_list; *strp != NULL; strp++) { - if (priv_ismember(mergeset, *strp)) { + for (pnp = plp->pl_prohibited; pnp != NULL; pnp = pnp->pn_next) { + if (priv_ismember(mergeset, pnp->pn_priv)) { priv_freeset(mergeset); - if ((*privname = strdup(*strp)) == NULL) + if ((*privname = strdup(pnp->pn_priv)) == NULL) return (Z_NOMEM); else return (Z_PRIV_PROHIBITED); @@ -3559,10 +3702,10 @@ verify_privset(char *privbuf, priv_set_t *privs, char **privname, * Finally, verify that all of the required zone privileges are * present in the merged privilege set. */ - for (strp = required_priv_list; *strp != NULL; strp++) { - if (!priv_ismember(mergeset, *strp)) { + for (pnp = plp->pl_required; pnp != NULL; pnp = pnp->pn_next) { + if (!priv_ismember(mergeset, pnp->pn_priv)) { priv_freeset(mergeset); - if ((*privname = strdup(*strp)) == NULL) + if ((*privname = strdup(pnp->pn_priv)) == NULL) return (Z_NOMEM); else return (Z_PRIV_REQUIRED); @@ -3588,23 +3731,27 @@ int zonecfg_get_privset(zone_dochandle_t handle, priv_set_t *privs, char **privname) { - char *cp; - int err; - int limitlen; - char *limitpriv = NULL; + priv_lists_t *plp; + char *cp, *limitpriv = NULL; + int err, limitlen; /* * Attempt to lookup the "limitpriv" property. If it does not * exist or matches the string DEFAULT_TOKEN exactly, then the * default, safe privilege set is returned. */ - err = zonecfg_get_limitpriv(handle, &limitpriv); - if (err != Z_OK) + if ((err = zonecfg_get_limitpriv(handle, &limitpriv)) != Z_OK) + return (err); + + if ((err = priv_lists_create(handle, &plp)) != Z_OK) return (err); + limitlen = strlen(limitpriv); if (limitlen == 0 || strcmp(limitpriv, DEFAULT_TOKEN) == 0) { free(limitpriv); - return (zonecfg_default_privset(privs)); + err = get_default_privset(privs, plp); + priv_lists_destroy(plp); + return (err); } /* @@ -3614,11 +3761,12 @@ zonecfg_get_privset(zone_dochandle_t handle, priv_set_t *privs, cp = strchr(limitpriv, TOKEN_PRIV_CHAR); if (cp != NULL && strncmp(limitpriv, DEFAULT_TOKEN, cp - limitpriv) == 0) - err = verify_privset(cp + 1, privs, privname, B_TRUE); + err = verify_privset(cp + 1, privs, privname, B_TRUE, plp); else - err = verify_privset(limitpriv, privs, privname, B_FALSE); + err = verify_privset(limitpriv, privs, privname, B_FALSE, plp); free(limitpriv); + priv_lists_destroy(plp); return (err); } @@ -3699,6 +3847,46 @@ zone_get_rootpath(char *zone_name, char *rootpath, size_t rp_sz) return (Z_OK); } +int +zone_get_brand(char *zone_name, char *brandname, size_t rp_sz) +{ + int err; + zone_dochandle_t handle; + char myzone[MAXNAMELEN]; + int myzoneid = getzoneid(); + + /* + * If we are not in the global zone, then we don't have the zone + * .xml files with the brand name available. Thus, we are going to + * have to ask the kernel for the information. + */ + if (myzoneid != GLOBAL_ZONEID) { + if (zone_getattr(myzoneid, ZONE_ATTR_NAME, myzone, + sizeof (myzone)) < 0) + return (Z_NO_ZONE); + if (strncmp(zone_name, myzone, MAXNAMELEN) != NULL) + return (Z_NO_ZONE); + err = zone_getattr(myzoneid, ZONE_ATTR_BRAND, brandname, rp_sz); + if (err < 0) + return ((errno == EFAULT) ? Z_TOO_BIG : Z_INVAL); + return (Z_OK); + } + + if (strcmp(zone_name, "global") == NULL) { + (void) strlcpy(brandname, NATIVE_BRAND_NAME, rp_sz); + return (0); + } + if ((handle = zonecfg_init_handle()) == NULL) + return (Z_NOMEM); + + err = zonecfg_get_handle((char *)zone_name, handle); + if (err == Z_OK) + err = zonecfg_get_brand(handle, brandname, rp_sz); + + zonecfg_fini_handle(handle); + return (err); +} + /* * Return the appropriate root for the active /dev. * For normal zone, the path is $ZONEPATH/root; diff --git a/usr/src/lib/libzonecfg/common/mapfile-vers b/usr/src/lib/libzonecfg/common/mapfile-vers index 79d3de5d15..a9d59548d3 100644 --- a/usr/src/lib/libzonecfg/common/mapfile-vers +++ b/usr/src/lib/libzonecfg/common/mapfile-vers @@ -86,6 +86,7 @@ SUNWprivate_1.1 { zonecfg_get_attr_uint; zonecfg_get_autoboot; zonecfg_get_bootargs; + zonecfg_get_brand; zonecfg_get_detach_info; zonecfg_getdevent; zonecfg_getdevperment; @@ -107,6 +108,7 @@ SUNWprivate_1.1 { zonecfg_get_snapshot_handle; zonecfg_get_template_handle; zonecfg_get_uuid; + zonecfg_get_xml_handle; zonecfg_get_zonepath; zonecfg_in_alt_root; zonecfg_init_handle; @@ -142,6 +144,7 @@ SUNWprivate_1.1 { zonecfg_setattrent; zonecfg_set_autoboot; zonecfg_set_bootargs; + zonecfg_set_brand; zonecfg_setdevent; zonecfg_setdevperment; zonecfg_setdsent; @@ -163,6 +166,8 @@ SUNWprivate_1.1 { zonecfg_valid_rctl; zonecfg_valid_rctlblk; zonecfg_valid_rctlname; + zonecfg_verify_save; + zone_get_brand; zone_get_devroot; zone_get_id; zone_get_rootpath; diff --git a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 index 46e10f761b..3208af7a79 100644 --- a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 +++ b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 @@ -119,4 +119,5 @@ pool CDATA "" limitpriv CDATA "" bootargs CDATA "" + brand CDATA "" version NMTOKEN #FIXED '1'> diff --git a/usr/src/pkgdefs/Makefile b/usr/src/pkgdefs/Makefile index f26c774f97..95ee614c61 100644 --- a/usr/src/pkgdefs/Makefile +++ b/usr/src/pkgdefs/Makefile @@ -114,6 +114,8 @@ i386_SUBDIRS= \ SUNWgrub \ SUNWgrubS \ SUNWkvm.i \ + SUNWlxr \ + SUNWlxu \ SUNWonmtst.i \ SUNWos86r \ SUNWpsdcr \ @@ -312,6 +314,8 @@ COMMON_SUBDIRS= \ SUNWslpr \ SUNWslpu \ SUNWsmapi \ + SUNWsn1rint \ + SUNWsn1uint \ SUNWsndmr \ SUNWsndmu \ SUNWspnego \ diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com index 225c9eba5f..ae04a36b45 100644 --- a/usr/src/pkgdefs/SUNWhea/prototype_com +++ b/usr/src/pkgdefs/SUNWhea/prototype_com @@ -524,6 +524,7 @@ f none usr/include/sys/bofi_impl.h 644 root bin f none usr/include/sys/bootconf.h 644 root bin f none usr/include/sys/bpp_io.h 644 root bin f none usr/include/sys/bootstat.h 644 root bin +f none usr/include/sys/brand.h 644 root bin f none usr/include/sys/buf.h 644 root bin f none usr/include/sys/bufmod.h 644 root bin f none usr/include/sys/bustypes.h 644 root bin diff --git a/usr/src/pkgdefs/SUNWlxr/Makefile b/usr/src/pkgdefs/SUNWlxr/Makefile new file mode 100644 index 0000000000..7883e974b3 --- /dev/null +++ b/usr/src/pkgdefs/SUNWlxr/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +all: $(FILES) depend +install: all pkg + +include ../Makefile.targ + + diff --git a/usr/src/pkgdefs/SUNWlxr/depend b/usr/src/pkgdefs/SUNWlxr/depend new file mode 100644 index 0000000000..386be78755 --- /dev/null +++ b/usr/src/pkgdefs/SUNWlxr/depend @@ -0,0 +1,38 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +P SUNWcar Core Architecture, (Root) +P SUNWcakr Core Solaris Kernel Architecture (Root) +P SUNWkvm Core Architecture, (Kvm) +P SUNWcsr Core Solaris, (Root) +P SUNWckr Core Solaris Kernel (Root) +P SUNWcnetr Core Solaris Network Infrastructure (Root) +P SUNWcsu Core Solaris, (Usr) +P SUNWcsd Core Solaris Devices +P SUNWcsl Core Solaris Libraries +P SUNWzoneu Solaris Zones (Usr) +P SUNWzoner Solaris Zones (Root) diff --git a/usr/src/pkgdefs/SUNWlxr/pkginfo.tmpl b/usr/src/pkgdefs/SUNWlxr/pkginfo.tmpl new file mode 100644 index 0000000000..596febb254 --- /dev/null +++ b/usr/src/pkgdefs/SUNWlxr/pkginfo.tmpl @@ -0,0 +1,56 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This required package information file describes characteristics of the +# package, such as package abbreviation, full package name, package version, +# and package architecture. +# +PKG="SUNWlxr" +NAME="lx Brand (Root)" +BASEDIR=/ +ARCH="ISA" +VERSION="ONVERS,REV=0.0.0" +CATEGORY="system" +SUNW_PRODNAME="SunOS" +SUNW_PRODVERS="RELEASE/VERSION" +SUNW_PKGTYPE="root" +DESC="Support for the 'lx' Brand" +VENDOR="Sun Microsystems, Inc." +EMAIL= +SUNW_PKGVERS=1.0 +CLASSES="none" +MAXINST="1000" +HOTLINE="Please contact your local service provider" +SUNW_PKG_ALLZONES="true" +SUNW_PKG_HOLLOW="true" +SUNW_PKG_THISZONE="false" +#VSTOCK="<reserved by Release Engineering for package part #>" +#ISTATES="<developer defined>" +#RSTATES='<developer defined>' +#ULIMIT="<developer defined>" +#ORDER="<developer defined>" +#PSTAMP="<developer defined>" +#INTONLY="<developer defined>" diff --git a/usr/src/pkgdefs/SUNWlxr/prototype_i386 b/usr/src/pkgdefs/SUNWlxr/prototype_i386 new file mode 100644 index 0000000000..cc667e75b5 --- /dev/null +++ b/usr/src/pkgdefs/SUNWlxr/prototype_i386 @@ -0,0 +1,86 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# lx Brand +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. +# + +# packaging files +i pkginfo +i copyright +i depend + +# lx brand template +d none etc 755 root sys +d none etc/zones 755 root sys +f none etc/zones/SUNWlx.xml 444 root bin + +# The kernel brand module +d none kernel 0755 root sys +d none kernel/brand 0755 root sys +f none kernel/brand/lx_brand 0755 root sys +d none kernel/brand/amd64 0755 root sys +f none kernel/brand/amd64/lx_brand 0755 root sys + +# generic driver dirs +d none kernel/drv 755 root sys +d none kernel/drv/amd64 755 root sys +d none kernel/dtrace/amd64 755 root sys + +# Brand syscall tracing +f none kernel/drv/lx_systrace 755 root sys +f none kernel/drv/lx_systrace.conf 644 root sys +f none kernel/drv/amd64/lx_systrace 755 root sys +d none kernel/dtrace 755 root sys +l none kernel/dtrace/lx_systrace=../../kernel/drv/lx_systrace +l none kernel/dtrace/amd64/lx_systrace=../../../kernel/drv/amd64/lx_systrace + +# Brand drivers +f none kernel/drv/lx_ptm 755 root sys +f none kernel/drv/lx_ptm.conf 644 root sys +f none kernel/drv/amd64/lx_ptm 755 root sys +f none kernel/drv/lx_audio 755 root sys +f none kernel/drv/lx_audio.conf 644 root sys +f none kernel/drv/amd64/lx_audio 755 root sys + +# supporting modules: ldterm and lxprocfs +d none kernel/strmod 0755 root sys +f none kernel/strmod/ldlinux 0755 root sys +d none kernel/strmod/amd64 0755 root sys +f none kernel/strmod/amd64/ldlinux 0755 root sys +d none kernel/fs 0755 root sys +f none kernel/fs/lx_afs 0755 root sys +f none kernel/fs/lx_proc 0755 root sys +d none kernel/fs/amd64 0755 root sys +f none kernel/fs/amd64/lx_afs 0755 root sys +f none kernel/fs/amd64/lx_proc 0755 root sys diff --git a/usr/src/pkgdefs/SUNWlxu/Makefile b/usr/src/pkgdefs/SUNWlxu/Makefile new file mode 100644 index 0000000000..7883e974b3 --- /dev/null +++ b/usr/src/pkgdefs/SUNWlxu/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +all: $(FILES) depend +install: all pkg + +include ../Makefile.targ + + diff --git a/usr/src/pkgdefs/SUNWlxu/depend b/usr/src/pkgdefs/SUNWlxu/depend new file mode 100644 index 0000000000..edce7939aa --- /dev/null +++ b/usr/src/pkgdefs/SUNWlxu/depend @@ -0,0 +1,39 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +P SUNWcar Core Architecture, (Root) +P SUNWcakr Core Solaris Kernel Architecture (Root) +P SUNWkvm Core Architecture, (Kvm) +P SUNWcsr Core Solaris, (Root) +P SUNWckr Core Solaris Kernel (Root) +P SUNWcnetr Core Solaris Network Infrastructure (Root) +P SUNWcsu Core Solaris, (Usr) +P SUNWcsd Core Solaris Devices +P SUNWcsl Core Solaris Libraries +P SUNWzoneu Solaris Zones (Usr) +P SUNWzoner Solaris Zones (Root) +P SUNWlxr lx Brand (Root) diff --git a/usr/src/pkgdefs/SUNWlxu/pkginfo.tmpl b/usr/src/pkgdefs/SUNWlxu/pkginfo.tmpl new file mode 100644 index 0000000000..99170d7dfd --- /dev/null +++ b/usr/src/pkgdefs/SUNWlxu/pkginfo.tmpl @@ -0,0 +1,56 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This required package information file describes characteristics of the +# package, such as package abbreviation, full package name, package version, +# and package architecture. +# +PKG="SUNWlxu" +NAME="lx Brand (Usr)" +BASEDIR=/ +ARCH="ISA" +VERSION="ONVERS,REV=0.0.0" +CATEGORY="system" +SUNW_PRODNAME="SunOS" +SUNW_PRODVERS="RELEASE/VERSION" +SUNW_PKGTYPE="usr" +DESC="Support for the 'lx' Brand" +VENDOR="Sun Microsystems, Inc." +EMAIL= +SUNW_PKGVERS=1.0 +CLASSES="none" +MAXINST="1000" +HOTLINE="Please contact your local service provider" +SUNW_PKG_ALLZONES="true" +SUNW_PKG_HOLLOW="true" +SUNW_PKG_THISZONE="false" +#VSTOCK="<reserved by Release Engineering for package part #>" +#ISTATES="<developer defined>" +#RSTATES='<developer defined>' +#ULIMIT="<developer defined>" +#ORDER="<developer defined>" +#PSTAMP="<developer defined>" +#INTONLY="<developer defined>" diff --git a/usr/src/pkgdefs/SUNWlxu/prototype_i386 b/usr/src/pkgdefs/SUNWlxu/prototype_i386 new file mode 100644 index 0000000000..dc359f9515 --- /dev/null +++ b/usr/src/pkgdefs/SUNWlxu/prototype_i386 @@ -0,0 +1,92 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)prototype_i386 1.23 06/06/22 SMI" +# + +# +# lx Brand +# +# This required package information file contains a list of package contents. +# The 'pkgmk' command uses this file to identify the contents of a package +# and their location on the development machine when building the package. +# Can be created via a text editor or through use of the 'pkgproto' command. +# + +# packaging files +i pkginfo +i copyright +i depend + +# The user library +d none usr 0755 root sys +d none usr/lib 0755 root bin +f none usr/lib/lx_brand.so.1 0755 root bin + +# brand zone and debugging support +d none usr/lib/brand 755 root bin +d none usr/lib/brand/lx 755 root bin +d none usr/lib/brand/lx/amd64 755 root bin +s none usr/lib/brand/lx/64=amd64 755 root bin +f none usr/lib/brand/lx/amd64/lx_librtld_db.so.1 755 root bin +f none usr/lib/brand/lx/config.xml 444 root bin +d none usr/lib/brand/lx/distros 755 root bin +f none usr/lib/brand/lx/distros/centos35.distro 444 root bin +f none usr/lib/brand/lx/distros/centos36.distro 444 root bin +f none usr/lib/brand/lx/distros/centos37.distro 444 root bin +f none usr/lib/brand/lx/distros/rhel35.distro 444 root bin +f none usr/lib/brand/lx/distros/rhel36.distro 444 root bin +f none usr/lib/brand/lx/distros/rhel37.distro 444 root bin +f none usr/lib/brand/lx/distros/rhel_centos_common 444 root bin +f none usr/lib/brand/lx/lx_distro_install 755 root bin +f none usr/lib/brand/lx/lx_init_zone 755 root bin +f none usr/lib/brand/lx/lx_install 755 root bin +f none usr/lib/brand/lx/lx_librtld_db.so.1 755 root bin +f none usr/lib/brand/lx/lx_support 755 root bin +f none usr/lib/brand/lx/platform.xml 444 root bin + +# NFS support commands and libraries +f none usr/lib/brand/lx/etc_netconfig 444 root sys +f none usr/lib/brand/lx/etc_default_nfs 444 root sys +f none usr/lib/brand/lx/lx_lockd 755 root bin +f none usr/lib/brand/lx/lx_native 755 root bin +f none usr/lib/brand/lx/lx_statd 755 root bin +f none usr/lib/brand/lx/lx_thunk 755 root bin +f none usr/lib/brand/lx/lx_thunk.so.1 755 root bin +f none usr/lib/brand/lx/amd64/lx_thunk.so.1 755 root bin +f none usr/lib/brand/lx/lx_nametoaddr.so.1 755 root bin +f none usr/lib/brand/lx/amd64/lx_nametoaddr.so.1 755 root bin + +# devfsadm link module +d none usr/lib/devfsadm 755 root sys +d none usr/lib/devfsadm/linkmod 755 root sys +f none usr/lib/devfsadm/linkmod/SUNW_lx_link_i386.so 755 root sys diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_com b/usr/src/pkgdefs/SUNWmdb/prototype_com index 2779f8c1cd..a3a4ec37de 100644 --- a/usr/src/pkgdefs/SUNWmdb/prototype_com +++ b/usr/src/pkgdefs/SUNWmdb/prototype_com @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # #pragma ident "%Z%%M% %I% %E% SMI" @@ -47,6 +46,7 @@ f none usr/lib/mdb/proc/ld.so 555 root sys f none usr/lib/mdb/proc/libavl.so 555 root sys f none usr/lib/mdb/proc/libc.so 555 root sys f none usr/lib/mdb/proc/libnvpair.so 555 root sys +f none usr/lib/mdb/proc/libproc.so 555 root sys f none usr/lib/mdb/proc/libsysevent.so 555 root sys f none usr/lib/mdb/proc/libumem.so 555 root sys f none usr/lib/mdb/proc/libuutil.so 555 root sys diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_i386 b/usr/src/pkgdefs/SUNWmdb/prototype_i386 index cdda764aff..ff996ca924 100644 --- a/usr/src/pkgdefs/SUNWmdb/prototype_i386 +++ b/usr/src/pkgdefs/SUNWmdb/prototype_i386 @@ -101,6 +101,7 @@ d none usr/lib/mdb/proc/amd64 755 root bin f none usr/lib/mdb/proc/amd64/libavl.so 555 root sys f none usr/lib/mdb/proc/amd64/libc.so 555 root sys f none usr/lib/mdb/proc/amd64/libnvpair.so 555 root sys +f none usr/lib/mdb/proc/amd64/libproc.so 555 root sys f none usr/lib/mdb/proc/amd64/libsysevent.so 555 root sys f none usr/lib/mdb/proc/amd64/libumem.so 555 root sys f none usr/lib/mdb/proc/amd64/libuutil.so 555 root sys diff --git a/usr/src/pkgdefs/SUNWmdb/prototype_sparc b/usr/src/pkgdefs/SUNWmdb/prototype_sparc index b18d3d40c8..f597cae61d 100644 --- a/usr/src/pkgdefs/SUNWmdb/prototype_sparc +++ b/usr/src/pkgdefs/SUNWmdb/prototype_sparc @@ -73,6 +73,7 @@ d none usr/lib/mdb/proc/sparcv9 755 root sys f none usr/lib/mdb/proc/sparcv9/libavl.so 555 root sys f none usr/lib/mdb/proc/sparcv9/libc.so 555 root sys f none usr/lib/mdb/proc/sparcv9/libnvpair.so 555 root sys +f none usr/lib/mdb/proc/sparcv9/libproc.so 555 root sys f none usr/lib/mdb/proc/sparcv9/libsysevent.so 555 root sys f none usr/lib/mdb/proc/sparcv9/libumem.so 555 root sys f none usr/lib/mdb/proc/sparcv9/libuutil.so 555 root sys diff --git a/usr/src/pkgdefs/SUNWsn1rint/Makefile b/usr/src/pkgdefs/SUNWsn1rint/Makefile new file mode 100644 index 0000000000..7883e974b3 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1rint/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +all: $(FILES) depend +install: all pkg + +include ../Makefile.targ + + diff --git a/usr/src/pkgdefs/SUNWsn1rint/depend b/usr/src/pkgdefs/SUNWsn1rint/depend new file mode 100644 index 0000000000..386be78755 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1rint/depend @@ -0,0 +1,38 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +P SUNWcar Core Architecture, (Root) +P SUNWcakr Core Solaris Kernel Architecture (Root) +P SUNWkvm Core Architecture, (Kvm) +P SUNWcsr Core Solaris, (Root) +P SUNWckr Core Solaris Kernel (Root) +P SUNWcnetr Core Solaris Network Infrastructure (Root) +P SUNWcsu Core Solaris, (Usr) +P SUNWcsd Core Solaris Devices +P SUNWcsl Core Solaris Libraries +P SUNWzoneu Solaris Zones (Usr) +P SUNWzoner Solaris Zones (Root) diff --git a/usr/src/pkgdefs/SUNWsn1rint/pkginfo.tmpl b/usr/src/pkgdefs/SUNWsn1rint/pkginfo.tmpl new file mode 100644 index 0000000000..76a5c4a381 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1rint/pkginfo.tmpl @@ -0,0 +1,55 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This required package information file describes characteristics of the +# package, such as package abbreviation, full package name, package version, +# and package architecture. +# +PKG="SUNWsn1rint" +NAME="Fake Solaris N-1 Brand (Root)" +BASEDIR=/ +ARCH="ISA" +VERSION="ONVERS,REV=0.0.0" +CATEGORY="system" +SUNW_PRODNAME="SunOS" +SUNW_PRODVERS="RELEASE/VERSION" +SUNW_PKGTYPE="root" +DESC="Support for the Fake Solaris N-1 Brand" +VENDOR="Sun Microsystems, Inc." +EMAIL= +SUNW_PKGVERS=1.0 +CLASSES="none" +MAXINST="1000" +HOTLINE="Please contact your local service provider" +SUNW_PKG_ALLZONES="true" +SUNW_PKG_HOLLOW="false" +#VSTOCK="<reserved by Release Engineering for package part #>" +#ISTATES="<developer defined>" +#RSTATES='<developer defined>' +#ULIMIT="<developer defined>" +#ORDER="<developer defined>" +#PSTAMP="<developer defined>" +#INTONLY="<developer defined>" diff --git a/usr/src/pkgdefs/SUNWsn1rint/prototype_com b/usr/src/pkgdefs/SUNWsn1rint/prototype_com new file mode 100644 index 0000000000..3794e49830 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1rint/prototype_com @@ -0,0 +1,37 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# Fake Solaris N-1 Brand +# + +i pkginfo +i copyright +i depend + +# Sn-1 brand template +d none etc 755 root sys +d none etc/zones 755 root sys +f none etc/zones/SUNWsn1.xml 444 root bin diff --git a/usr/src/pkgdefs/SUNWsn1rint/prototype_i386 b/usr/src/pkgdefs/SUNWsn1rint/prototype_i386 new file mode 100644 index 0000000000..1202a15661 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1rint/prototype_i386 @@ -0,0 +1,40 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# Fake Solaris N-1 Brand +# + +# +# Include ISA independent files (prototype_com) +# +!include prototype_com + +# the kernel brand module +d none kernel 0755 root sys +d none kernel/brand 0755 root sys +f none kernel/brand/sn1_brand 0755 root sys +d none kernel/brand/amd64 0755 root sys +f none kernel/brand/amd64/sn1_brand 0755 root sys diff --git a/usr/src/pkgdefs/SUNWsn1rint/prototype_sparc b/usr/src/pkgdefs/SUNWsn1rint/prototype_sparc new file mode 100644 index 0000000000..3c8fd04ce9 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1rint/prototype_sparc @@ -0,0 +1,39 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# Fake Solaris N-1 Brand +# + +# +# Include ISA independent files (prototype_com) +# +!include prototype_com + +# the kernel brand module +d none kernel 0755 root sys +d none kernel/brand 0755 root sys +d none kernel/brand/sparcv9 0755 root sys +f none kernel/brand/sparcv9/sn1_brand 0755 root sys diff --git a/usr/src/pkgdefs/SUNWsn1uint/Makefile b/usr/src/pkgdefs/SUNWsn1uint/Makefile new file mode 100644 index 0000000000..7883e974b3 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1uint/Makefile @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# + +include ../Makefile.com + +all: $(FILES) depend +install: all pkg + +include ../Makefile.targ + + diff --git a/usr/src/pkgdefs/SUNWsn1uint/depend b/usr/src/pkgdefs/SUNWsn1uint/depend new file mode 100644 index 0000000000..eb09c5aa3d --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1uint/depend @@ -0,0 +1,39 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +P SUNWcar Core Architecture, (Root) +P SUNWcakr Core Solaris Kernel Architecture (Root) +P SUNWkvm Core Architecture, (Kvm) +P SUNWcsr Core Solaris, (Root) +P SUNWckr Core Solaris Kernel (Root) +P SUNWcnetr Core Solaris Network Infrastructure (Root) +P SUNWcsu Core Solaris, (Usr) +P SUNWcsd Core Solaris Devices +P SUNWcsl Core Solaris Libraries +P SUNWzoneu Solaris Zones (Usr) +P SUNWzoner Solaris Zones (Root) +P SUNWsn1rint Fake Solaris N-1 Brand (Root) diff --git a/usr/src/pkgdefs/SUNWsn1uint/pkginfo.tmpl b/usr/src/pkgdefs/SUNWsn1uint/pkginfo.tmpl new file mode 100644 index 0000000000..b8613db561 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1uint/pkginfo.tmpl @@ -0,0 +1,55 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This required package information file describes characteristics of the +# package, such as package abbreviation, full package name, package version, +# and package architecture. +# +PKG="SUNWsn1uint" +NAME="Fake Solaris N-1 Brand (Usr)" +BASEDIR=/ +ARCH="ISA" +VERSION="ONVERS,REV=0.0.0" +CATEGORY="system" +SUNW_PRODNAME="SunOS" +SUNW_PRODVERS="RELEASE/VERSION" +SUNW_PKGTYPE="usr" +DESC="Support for the Fake Solaris N-1 Brand" +VENDOR="Sun Microsystems, Inc." +EMAIL= +SUNW_PKGVERS=1.0 +CLASSES="none" +MAXINST="1000" +HOTLINE="Please contact your local service provider" +SUNW_PKG_ALLZONES="true" +SUNW_PKG_HOLLOW="false" +#VSTOCK="<reserved by Release Engineering for package part #>" +#ISTATES="<developer defined>" +#RSTATES='<developer defined>' +#ULIMIT="<developer defined>" +#ORDER="<developer defined>" +#PSTAMP="<developer defined>" +#INTONLY="<developer defined>" diff --git a/usr/src/pkgdefs/SUNWsn1uint/prototype_com b/usr/src/pkgdefs/SUNWsn1uint/prototype_com new file mode 100644 index 0000000000..f5515eff44 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1uint/prototype_com @@ -0,0 +1,37 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" + +i pkginfo +i copyright +i depend + +d none usr 755 root sys +d none usr/lib 755 root bin +d none usr/lib/brand 755 root bin +d none usr/lib/brand/sn1 755 root sys +f none usr/lib/brand/sn1/config.xml 444 root bin +f none usr/lib/brand/sn1/platform.xml 444 root bin +f none usr/lib/brand/sn1/sn1_boot 755 root bin diff --git a/usr/src/pkgdefs/SUNWsn1uint/prototype_i386 b/usr/src/pkgdefs/SUNWsn1uint/prototype_i386 new file mode 100644 index 0000000000..6e5a79ddea --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1uint/prototype_i386 @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# Fake Solaris N-1 Brand +# + +!include prototype_com + +# The user library +f none usr/lib/sn1_brand.so.1 0755 root bin +d none usr/lib/amd64 0755 root bin +f none usr/lib/amd64/sn1_brand.so.1 0755 root bin diff --git a/usr/src/pkgdefs/SUNWsn1uint/prototype_sparc b/usr/src/pkgdefs/SUNWsn1uint/prototype_sparc new file mode 100644 index 0000000000..64dc2ed860 --- /dev/null +++ b/usr/src/pkgdefs/SUNWsn1uint/prototype_sparc @@ -0,0 +1,35 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# Fake Solaris N-1 Brand +# + +!include prototype_com + +# The user library +f none usr/lib/sn1_brand.so.1 0755 root bin +d none usr/lib/sparcv9 0755 root bin +f none usr/lib/sparcv9/sn1_brand.so.1 0755 root bin diff --git a/usr/src/pkgdefs/SUNWzoneint/prototype_com b/usr/src/pkgdefs/SUNWzoneint/prototype_com index 432d948fef..0af97217e1 100644 --- a/usr/src/pkgdefs/SUNWzoneint/prototype_com +++ b/usr/src/pkgdefs/SUNWzoneint/prototype_com @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -47,8 +46,12 @@ i depend # d none usr 755 root sys d none usr/include 755 root bin +f none usr/include/libbrand.h 644 root bin f none usr/include/libzonecfg.h 644 root bin d none usr/lib 755 root bin +s none usr/lib/libbrand.so=./libbrand.so.1 s none usr/lib/libzonecfg.so=./libzonecfg.so.1 +f none usr/lib/llib-lbrand 644 root bin +f none usr/lib/llib-lbrand.ln 644 root bin f none usr/lib/llib-lzonecfg 644 root bin f none usr/lib/llib-lzonecfg.ln 644 root bin diff --git a/usr/src/pkgdefs/SUNWzoneint/prototype_i386 b/usr/src/pkgdefs/SUNWzoneint/prototype_i386 index 6aeeb57a3f..caf8ca6236 100644 --- a/usr/src/pkgdefs/SUNWzoneint/prototype_i386 +++ b/usr/src/pkgdefs/SUNWzoneint/prototype_i386 @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -48,5 +47,7 @@ # SUNWzoneint # d none usr/lib/amd64 755 root bin +s none usr/lib/amd64/libbrand.so=./libbrand.so.1 +f none usr/lib/amd64/llib-lbrand.ln 644 root bin s none usr/lib/amd64/libzonecfg.so=./libzonecfg.so.1 f none usr/lib/amd64/llib-lzonecfg.ln 644 root bin diff --git a/usr/src/pkgdefs/SUNWzoneint/prototype_sparc b/usr/src/pkgdefs/SUNWzoneint/prototype_sparc index e3da6d7ec9..5145bde05a 100644 --- a/usr/src/pkgdefs/SUNWzoneint/prototype_sparc +++ b/usr/src/pkgdefs/SUNWzoneint/prototype_sparc @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -48,5 +47,7 @@ # SUNWzoneint # d none usr/lib/sparcv9 755 root bin +s none usr/lib/sparcv9/libbrand.so=./libbrand.so.1 +f none usr/lib/sparcv9/llib-lbrand.ln 644 root bin s none usr/lib/sparcv9/libzonecfg.so=./libzonecfg.so.1 f none usr/lib/sparcv9/llib-lzonecfg.ln 644 root bin diff --git a/usr/src/pkgdefs/SUNWzoner/prototype_com b/usr/src/pkgdefs/SUNWzoner/prototype_com index e2c70b4240..009de7fb9f 100644 --- a/usr/src/pkgdefs/SUNWzoner/prototype_com +++ b/usr/src/pkgdefs/SUNWzoner/prototype_com @@ -50,9 +50,9 @@ i r.manifest # d none etc 755 root sys d none etc/zones 755 root sys -f none etc/zones/SUNWblank.xml 444 root bin -f none etc/zones/SUNWdefault.xml 444 root bin e preserve etc/zones/index 644 root sys +f none etc/zones/SUNWdefault.xml 444 root bin +f none etc/zones/SUNWblank.xml 444 root bin d none lib 755 root bin d none lib/svc 0755 root bin d none lib/svc/method 0755 root bin diff --git a/usr/src/pkgdefs/SUNWzoneu/prototype_com b/usr/src/pkgdefs/SUNWzoneu/prototype_com index e01941441c..358607fd25 100644 --- a/usr/src/pkgdefs/SUNWzoneu/prototype_com +++ b/usr/src/pkgdefs/SUNWzoneu/prototype_com @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -51,6 +50,12 @@ s none usr/bin/zonename=../../sbin/zonename d none usr/kernel 755 root sys d none usr/kernel/drv 755 root sys d none usr/lib 755 root bin +d none usr/lib/brand 755 root bin +d none usr/lib/brand/native 755 root sys +f none usr/lib/brand/native/config.xml 444 root bin +f none usr/lib/brand/native/platform.xml 444 root bin +f none usr/lib/brand/native/postclone 755 root bin +f none usr/lib/libbrand.so.1 755 root bin f none usr/lib/libzonecfg.so.1 755 root bin d none usr/lib/zones 755 root bin f none usr/lib/zones/zoneadmd 555 root bin @@ -62,4 +67,6 @@ d none usr/share 755 root sys d none usr/share/lib 755 root sys d none usr/share/lib/xml 755 root sys d none usr/share/lib/xml/dtd 755 root sys +f none usr/share/lib/xml/dtd/brand.dtd.1 644 root bin +f none usr/share/lib/xml/dtd/zone_platform.dtd.1 644 root bin f none usr/share/lib/xml/dtd/zonecfg.dtd.1 444 root bin diff --git a/usr/src/pkgdefs/SUNWzoneu/prototype_i386 b/usr/src/pkgdefs/SUNWzoneu/prototype_i386 index 31cc3632cc..e08985fb8d 100644 --- a/usr/src/pkgdefs/SUNWzoneu/prototype_i386 +++ b/usr/src/pkgdefs/SUNWzoneu/prototype_i386 @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -51,4 +50,5 @@ f none usr/kernel/drv/zcons 755 root sys d none usr/kernel/drv/amd64 755 root sys f none usr/kernel/drv/amd64/zcons 755 root sys d none usr/lib/amd64 755 root bin +f none usr/lib/amd64/libbrand.so.1 755 root bin f none usr/lib/amd64/libzonecfg.so.1 755 root bin diff --git a/usr/src/pkgdefs/SUNWzoneu/prototype_sparc b/usr/src/pkgdefs/SUNWzoneu/prototype_sparc index 7352586b25..6c7a390e08 100644 --- a/usr/src/pkgdefs/SUNWzoneu/prototype_sparc +++ b/usr/src/pkgdefs/SUNWzoneu/prototype_sparc @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -50,4 +49,5 @@ d none usr/kernel/drv/sparcv9 755 root sys f none usr/kernel/drv/sparcv9/zcons 755 root sys d none usr/lib/sparcv9 755 root bin +f none usr/lib/sparcv9/libbrand.so.1 755 root bin f none usr/lib/sparcv9/libzonecfg.so.1 755 root bin diff --git a/usr/src/pkgdefs/common_files/i.minorperm_i386 b/usr/src/pkgdefs/common_files/i.minorperm_i386 index 71d802e5ff..34df10b4ca 100644 --- a/usr/src/pkgdefs/common_files/i.minorperm_i386 +++ b/usr/src/pkgdefs/common_files/i.minorperm_i386 @@ -62,7 +62,7 @@ # NOTE: this list should also contain entries for nodes that previously # were not in /etc/minor_perm (which means the default mode of 600, # owner/group == root/sys), but now have an entry -# +# make_chattr_list() { @@ -113,15 +113,15 @@ EOF # The fields are: # # <device>:<minor> <optional list of logical devices to be deleted> -# +# make_delete_list() { cat > /tmp/delete.$$ << EOF rip:rawip consfb:consfb -clone:el -clone:elx +clone:el +clone:elx clone:sle clone:sie clone:sp @@ -233,6 +233,8 @@ fbt:fbt profile:profile sdt:sdt systrace:systrace +lx_ptm:lx_ptmajor +lx_systrace:* EOF } diff --git a/usr/src/tools/scripts/acr.sh b/usr/src/tools/scripts/acr.sh index 41f3ab087e..22d5a20e92 100644 --- a/usr/src/tools/scripts/acr.sh +++ b/usr/src/tools/scripts/acr.sh @@ -327,7 +327,7 @@ acr_a_root() { # if [ $bfu_alt_reality = "false" ]; then zoneadm list -pi | nawk -F: '{ - if ($3 == "installed") { + if ($3 == "installed" && $6 != "lx") { printf "%s %s\n", $2, $4 } }' > $local_zone_info_file diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh index 603231e1cf..8e30a7c410 100644 --- a/usr/src/tools/scripts/bfu.sh +++ b/usr/src/tools/scripts/bfu.sh @@ -6552,8 +6552,13 @@ done if [ -s $local_zone_info_file ]; then cat $local_zone_info_file | while read zone zonepath; do - print "\nNow for zone $zone..." - mondo_loop $zonepath/root $zone + # + # Ignore linux zones + # + if [ -z `grep "brand=\"lx\"" /etc/zones/$zone.xml` ]; then + print "\nNow for zone $zone..." + mondo_loop $zonepath/root $zone + fi done # diff --git a/usr/src/uts/Makefile.targ b/usr/src/uts/Makefile.targ index ac467cb4ed..e6bb07bb73 100644 --- a/usr/src/uts/Makefile.targ +++ b/usr/src/uts/Makefile.targ @@ -179,6 +179,9 @@ $(ROOT_MISC_DIR)/%: $(OBJS_DIR)/% $(ROOT_MISC_DIR) FRC $(ROOT_DACF_DIR)/%: $(OBJS_DIR)/% $(ROOT_DACF_DIR) FRC $(INS.file) +$(ROOT_BRAND_DIR)/%: $(OBJS_DIR)/% $(ROOT_BRAND_DIR) FRC + $(INS.file) + $(ROOT_CRYPTO_DIR)/%: $(OBJS_DIR)/% $(ROOT_CRYPTO_DIR) FRC $(INS.file) diff --git a/usr/src/uts/Makefile.uts b/usr/src/uts/Makefile.uts index 609c329580..5cb5d61fe0 100644 --- a/usr/src/uts/Makefile.uts +++ b/usr/src/uts/Makefile.uts @@ -438,6 +438,7 @@ SUBDIR64 = $(SUBDIR64_$(MACH)) ROOT_MOD_DIR = $(ROOT)/kernel ROOT_KERN_DIR_32 = $(ROOT_MOD_DIR) +ROOT_BRAND_DIR_32 = $(ROOT_MOD_DIR)/brand ROOT_DRV_DIR_32 = $(ROOT_MOD_DIR)/drv ROOT_DTRACE_DIR_32 = $(ROOT_MOD_DIR)/dtrace ROOT_EXEC_DIR_32 = $(ROOT_MOD_DIR)/exec @@ -459,6 +460,7 @@ ROOT_MAC_DIR_32 = $(ROOT_MOD_DIR)/mac ROOT_DEVNAME_DIR_32 = $(ROOT_MOD_DIR)/devname ROOT_KERN_DIR_64 = $(ROOT_MOD_DIR)/$(SUBDIR64) +ROOT_BRAND_DIR_64 = $(ROOT_MOD_DIR)/brand/$(SUBDIR64) ROOT_DRV_DIR_64 = $(ROOT_MOD_DIR)/drv/$(SUBDIR64) ROOT_DTRACE_DIR_64 = $(ROOT_MOD_DIR)/dtrace/$(SUBDIR64) ROOT_EXEC_DIR_64 = $(ROOT_MOD_DIR)/exec/$(SUBDIR64) @@ -480,6 +482,7 @@ ROOT_MAC_DIR_64 = $(ROOT_MOD_DIR)/mac/$(SUBDIR64) ROOT_DEVNAME_DIR_64 = $(ROOT_MOD_DIR)/devname/$(SUBDIR64) ROOT_KERN_DIR = $(ROOT_KERN_DIR_$(CLASS)) +ROOT_BRAND_DIR = $(ROOT_BRAND_DIR_$(CLASS)) ROOT_DRV_DIR = $(ROOT_DRV_DIR_$(CLASS)) ROOT_DTRACE_DIR = $(ROOT_DTRACE_DIR_$(CLASS)) ROOT_EXEC_DIR = $(ROOT_EXEC_DIR_$(CLASS)) @@ -500,8 +503,9 @@ ROOT_CRYPTO_DIR = $(ROOT_CRYPTO_DIR_$(CLASS)) ROOT_MAC_DIR = $(ROOT_MAC_DIR_$(CLASS)) ROOT_DEVNAME_DIR = $(ROOT_DEVNAME_DIR_$(CLASS)) -ROOT_MOD_DIRS_32 = $(ROOT_DRV_DIR_32) $(ROOT_EXEC_DIR_32) -ROOT_MOD_DIRS_32 += $(ROOT_DTRACE_DIR_32) +ROOT_MOD_DIRS_32 = $(ROOT_BRAND_DIR_32) $(ROOT_DRV_DIR_32) +ROOT_MOD_DIRS_32 = $(ROOT_BRAND_DIR_32) $(ROOT_DRV_DIR_32) +ROOT_MOD_DIRS_32 += $(ROOT_EXEC_DIR_32) $(ROOT_DTRACE_DIR_32) ROOT_MOD_DIRS_32 += $(ROOT_FS_DIR_32) $(ROOT_SCHED_DIR_32) ROOT_MOD_DIRS_32 += $(ROOT_STRMOD_DIR_32) $(ROOT_SYS_DIR_32) ROOT_MOD_DIRS_32 += $(ROOT_IPP_DIR_32) diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index b022fcd0c9..f0203dfeb9 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -41,6 +41,7 @@ sparc_CORE_OBJS += COMMON_CORE_OBJS += \ atomic.o \ bp_map.o \ + brand.o \ chip.o \ cpu.o \ cpu_intr.o \ @@ -78,6 +79,7 @@ GENUNIX_OBJS += \ bio.o \ bitmap.o \ blabel.o \ + brandsys.o \ callb.o \ callout.o \ chdir.o \ @@ -318,6 +320,7 @@ GENUNIX_OBJS += \ urw.o \ utime.o \ utssys.o \ + uucopy.o \ vfs.o \ vfs_conf.o \ vmem.o \ @@ -360,6 +363,8 @@ PROFILE_OBJS += profile.o SYSTRACE_OBJS += systrace.o +LX_SYSTRACE_OBJS += lx_systrace.o + LOCKSTAT_OBJS += lockstat.o FASTTRAP_OBJS += fasttrap.o fasttrap_isa.o @@ -397,6 +402,10 @@ PTSL_OBJS += tty_pts.o PTM_OBJS += ptm.o +LX_PTM_OBJS += lx_ptm.o + +LX_AUDIO_OBJS += lx_audio.o + PTS_OBJS += pts.o PTY_OBJS += ptms_conf.o @@ -937,6 +946,8 @@ DEDUMP_OBJS += dedump.o DRCOMPAT_OBJS += drcompat.o +LDLINUX_OBJS += ldlinux.o + LDTERM_OBJS += ldterm.o uwidth.o PCKT_OBJS += pckt.o diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index 69e32b7ee5..27b347c937 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -70,6 +70,10 @@ $(OBJS_DIR)/%.o: $(COMMONBASE)/avl/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/sn1/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(UTSBASE)/common/c2/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -911,6 +915,9 @@ $(LINTS_DIR)/%.ln: $(COMMONBASE)/acl/%.c $(LINTS_DIR)/%.ln: $(COMMONBASE)/avl/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/sn1/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/common/c2/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/common/brand/lx/autofs/lx_autofs.c b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c new file mode 100644 index 0000000000..ecd4e8e44d --- /dev/null +++ b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c @@ -0,0 +1,1558 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <fs/fs_subr.h> +#include <sys/atomic.h> +#include <sys/cmn_err.h> +#include <sys/dirent.h> +#include <sys/fs/fifonode.h> +#include <sys/modctl.h> +#include <sys/mount.h> +#include <sys/policy.h> +#include <sys/sunddi.h> + +#include <sys/sysmacros.h> +#include <sys/vfs.h> + +#include <sys/lx_autofs_impl.h> + +/* + * External functions + */ +extern uintptr_t space_fetch(char *key); +extern int space_store(char *key, uintptr_t ptr); + +/* + * Globals + */ +static vfsops_t *lx_autofs_vfsops; +static vnodeops_t *lx_autofs_vn_ops = NULL; +static int lx_autofs_fstype; +static major_t lx_autofs_major; +static minor_t lx_autofs_minor = 0; + +/* + * Support functions + */ +static void +i_strfree(char *str) +{ + kmem_free(str, strlen(str) + 1); +} + +static char * +i_strdup(char *str) +{ + int n = strlen(str); + char *ptr = kmem_alloc(n + 1, KM_SLEEP); + bcopy(str, ptr, n + 1); + return (ptr); +} + +static int +i_str_to_int(char *str, int *val) +{ + long res; + + if (str == NULL) + return (-1); + + if ((ddi_strtol(str, NULL, 10, &res) != 0) || + (res < INT_MIN) || (res > INT_MAX)) + return (-1); + + *val = res; + return (0); +} + +static void +i_stack_init(list_t *lp) +{ + list_create(lp, + sizeof (stack_elem_t), offsetof(stack_elem_t, se_list)); +} + +static void +i_stack_fini(list_t *lp) +{ + ASSERT(list_head(lp) == NULL); + list_destroy(lp); +} + +static void +i_stack_push(list_t *lp, caddr_t ptr1, caddr_t ptr2, caddr_t ptr3) +{ + stack_elem_t *se; + + se = kmem_alloc(sizeof (*se), KM_SLEEP); + se->se_ptr1 = ptr1; + se->se_ptr2 = ptr2; + se->se_ptr3 = ptr3; + list_insert_head(lp, se); +} + +static int +i_stack_pop(list_t *lp, caddr_t *ptr1, caddr_t *ptr2, caddr_t *ptr3) +{ + stack_elem_t *se; + + if ((se = list_head(lp)) == NULL) + return (-1); + list_remove(lp, se); + if (ptr1 != NULL) + *ptr1 = se->se_ptr1; + if (ptr2 != NULL) + *ptr2 = se->se_ptr2; + if (ptr3 != NULL) + *ptr3 = se->se_ptr3; + kmem_free(se, sizeof (*se)); + return (0); +} + +static vnode_t * +fifo_peer_vp(vnode_t *vp) +{ + fifonode_t *fnp = VTOF(vp); + fifonode_t *fn_dest = fnp->fn_dest; + return (FTOV(fn_dest)); +} + +static vnode_t * +i_vn_alloc(vfs_t *vfsp, vnode_t *uvp) +{ + lx_autofs_vfs_t *data = vfsp->vfs_data; + vnode_t *vp, *vp_old; + + /* Allocate a new vnode structure in case we need it. */ + vp = vn_alloc(KM_SLEEP); + vn_setops(vp, lx_autofs_vn_ops); + VN_SET_VFS_TYPE_DEV(vp, vfsp, uvp->v_type, uvp->v_rdev); + vp->v_data = uvp; + ASSERT(vp->v_count == 1); + + /* + * Take a hold on the vfs structure. This is how unmount will + * determine if there are any active vnodes in the file system. + */ + VFS_HOLD(vfsp); + + /* + * Check if we already have a vnode allocated for this underlying + * vnode_t. + */ + mutex_enter(&data->lav_lock); + if (mod_hash_find(data->lav_vn_hash, + (mod_hash_key_t)uvp, (mod_hash_val_t *)&vp_old) != 0) { + + /* + * Didn't find an existing node. + * Add this node to the hash and return. + */ + VERIFY(mod_hash_insert(data->lav_vn_hash, + (mod_hash_key_t)uvp, + (mod_hash_val_t)vp) == 0); + mutex_exit(&data->lav_lock); + return (vp); + } + + /* Get a hold on the existing vnode and free up the one we allocated. */ + VN_HOLD(vp_old); + mutex_exit(&data->lav_lock); + + /* Free up the new vnode we allocated. */ + VN_RELE(uvp); + VFS_RELE(vfsp); + vn_invalid(vp); + vn_free(vp); + + return (vp_old); +} + +static void +i_vn_free(vnode_t *vp) +{ + vfs_t *vfsp = vp->v_vfsp; + lx_autofs_vfs_t *data = vfsp->vfs_data; + vnode_t *uvp = vp->v_data; + vnode_t *vp_tmp; + + ASSERT(MUTEX_HELD((&data->lav_lock))); + ASSERT(MUTEX_HELD((&vp->v_lock))); + + ASSERT(vp->v_count == 0); + + /* We're about to free this vnode so take it out of the hash. */ + (void) mod_hash_remove(data->lav_vn_hash, + (mod_hash_key_t)uvp, (mod_hash_val_t)&vp_tmp); + + /* + * No one else can lookup this vnode any more so there's no need + * to hold locks. + */ + mutex_exit(&data->lav_lock); + mutex_exit(&vp->v_lock); + + /* Release the underlying vnode. */ + VN_RELE(uvp); + VFS_RELE(vfsp); + vn_invalid(vp); + vn_free(vp); +} + +static lx_autofs_lookup_req_t * +i_lalr_alloc(lx_autofs_vfs_t *data, int *dup_request, char *nm) +{ + lx_autofs_lookup_req_t *lalr, *lalr_dup; + + /* Pre-allocate a new automounter request before grabbing locks. */ + lalr = kmem_zalloc(sizeof (*lalr), KM_SLEEP); + mutex_init(&lalr->lalr_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&lalr->lalr_cv, NULL, CV_DEFAULT, NULL); + lalr->lalr_ref = 1; + lalr->lalr_pkt.lap_protover = LX_AUTOFS_PROTO_VERSION; + + /* Assign a unique id for this request. */ + lalr->lalr_pkt.lap_id = id_alloc(data->lav_ids); + + /* + * The token expected by the linux automount is the name of + * the directory entry to look up. (And not the entire + * path that is being accessed.) + */ + lalr->lalr_pkt.lap_name_len = strlen(nm); + if (lalr->lalr_pkt.lap_name_len > + (sizeof (lalr->lalr_pkt.lap_name) - 1)) { + zcmn_err(getzoneid(), CE_NOTE, + "invalid autofs lookup: \"%s\"", nm); + id_free(data->lav_ids, lalr->lalr_pkt.lap_id); + kmem_free(lalr, sizeof (*lalr)); + return (NULL); + } + (void) strlcpy(lalr->lalr_pkt.lap_name, nm, + sizeof (lalr->lalr_pkt.lap_name)); + + /* Check for an outstanding request for this path. */ + mutex_enter(&data->lav_lock); + if (mod_hash_find(data->lav_path_hash, + (mod_hash_key_t)nm, (mod_hash_val_t *)&lalr_dup) == 0) { + /* + * There's already an outstanding request for this + * path so we don't need a new one. + */ + id_free(data->lav_ids, lalr->lalr_pkt.lap_id); + kmem_free(lalr, sizeof (*lalr)); + lalr = lalr_dup; + + /* Bump the ref count on the old request. */ + atomic_add_int(&lalr->lalr_ref, 1); + + *dup_request = 1; + } else { + /* Add it to the hashes. */ + VERIFY(mod_hash_insert(data->lav_id_hash, + (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id, + (mod_hash_val_t)lalr) == 0); + VERIFY(mod_hash_insert(data->lav_path_hash, + (mod_hash_key_t)i_strdup(nm), + (mod_hash_val_t)lalr) == 0); + + *dup_request = 0; + } + mutex_exit(&data->lav_lock); + + return (lalr); +} + +static lx_autofs_lookup_req_t * +i_lalr_find(lx_autofs_vfs_t *data, int id) +{ + lx_autofs_lookup_req_t *lalr; + + /* Check for an outstanding request for this id. */ + mutex_enter(&data->lav_lock); + if (mod_hash_find(data->lav_id_hash, (mod_hash_key_t)(uintptr_t)id, + (mod_hash_val_t *)&lalr) != 0) { + mutex_exit(&data->lav_lock); + return (NULL); + } + atomic_add_int(&lalr->lalr_ref, 1); + mutex_exit(&data->lav_lock); + return (lalr); +} + +static void +i_lalr_complete(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr) +{ + lx_autofs_lookup_req_t *lalr_tmp; + + /* Remove this request from the hashes so no one can look it up. */ + mutex_enter(&data->lav_lock); + (void) mod_hash_remove(data->lav_id_hash, + (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id, + (mod_hash_val_t)&lalr_tmp); + (void) mod_hash_remove(data->lav_path_hash, + (mod_hash_key_t)lalr->lalr_pkt.lap_name, + (mod_hash_val_t)&lalr_tmp); + mutex_exit(&data->lav_lock); + + /* Mark this requst as complete and wakeup anyone waiting on it. */ + mutex_enter(&lalr->lalr_lock); + lalr->lalr_complete = 1; + cv_broadcast(&lalr->lalr_cv); + mutex_exit(&lalr->lalr_lock); +} + +static void +i_lalr_release(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr) +{ + ASSERT(!MUTEX_HELD(&lalr->lalr_lock)); + if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0) + return; + ASSERT(lalr->lalr_ref == 0); + id_free(data->lav_ids, lalr->lalr_pkt.lap_id); + kmem_free(lalr, sizeof (*lalr)); +} + +static void +i_lalr_abort(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr) +{ + lx_autofs_lookup_req_t *lalr_tmp; + + /* + * This is a little tricky. We're aborting the wait for this + * request. So if anyone else is waiting for this request we + * can't free it, but if no one else is waiting for the request + * we should free it. + */ + mutex_enter(&data->lav_lock); + if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0) { + mutex_exit(&data->lav_lock); + return; + } + ASSERT(lalr->lalr_ref == 0); + + /* Remove this request from the hashes so no one can look it up. */ + (void) mod_hash_remove(data->lav_id_hash, + (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id, + (mod_hash_val_t)&lalr_tmp); + (void) mod_hash_remove(data->lav_path_hash, + (mod_hash_key_t)lalr->lalr_pkt.lap_name, + (mod_hash_val_t)&lalr_tmp); + mutex_exit(&data->lav_lock); + + /* It's ok to free this now because the ref count was zero. */ + id_free(data->lav_ids, lalr->lalr_pkt.lap_id); + kmem_free(lalr, sizeof (*lalr)); +} + +static int +i_fifo_lookup(pid_t pgrp, int fd, file_t **fpp_wr, file_t **fpp_rd) +{ + proc_t *prp; + uf_info_t *fip; + uf_entry_t *ufp_wr, *ufp_rd; + file_t *fp_wr, *fp_rd; + vnode_t *vp_wr, *vp_rd; + int i; + + /* + * sprlock() is zone aware, so assuming this mount call was + * initiated by a process in a zone, if it tries to specify + * a pgrp outside of it's zone this call will fail. + * + * Also, we want to grab hold of the main automounter process + * and its going to be the group leader for pgrp, so its + * pid will be equal to pgrp. + */ + prp = sprlock(pgrp); + if (prp == NULL) + return (-1); + mutex_exit(&prp->p_lock); + + /* Now we want to access the processes open file descriptors. */ + fip = P_FINFO(prp); + mutex_enter(&fip->fi_lock); + + /* Sanity check fifo write fd. */ + if (fd >= fip->fi_nfiles) { + mutex_exit(&fip->fi_lock); + mutex_enter(&prp->p_lock); + sprunlock(prp); + return (-1); + } + + /* Get a pointer to the write fifo. */ + UF_ENTER(ufp_wr, fip, fd); + if (((fp_wr = ufp_wr->uf_file) == NULL) || + ((vp_wr = fp_wr->f_vnode) == NULL) || (vp_wr->v_type != VFIFO)) { + /* Invalid fifo fd. */ + UF_EXIT(ufp_wr); + mutex_exit(&fip->fi_lock); + mutex_enter(&prp->p_lock); + sprunlock(prp); + return (-1); + } + + /* + * Now we need to find the read end of the fifo (for reasons + * explained below.) We assume that the read end of the fifo + * is in the same process as the write end. + */ + vp_rd = fifo_peer_vp(fp_wr->f_vnode); + for (i = 0; i < fip->fi_nfiles; i++) { + UF_ENTER(ufp_rd, fip, i); + if (((fp_rd = ufp_rd->uf_file) != NULL) && + (fp_rd->f_vnode == vp_rd)) + break; + UF_EXIT(ufp_rd); + } + if (i == fip->fi_nfiles) { + /* Didn't find it. */ + UF_EXIT(ufp_wr); + mutex_exit(&fip->fi_lock); + mutex_enter(&prp->p_lock); + sprunlock(prp); + return (-1); + } + + /* + * We need to drop fi_lock before we can try to aquire f_tlock + * the good news is that the file pointers are protected because + * we're still holding uf_lock. + */ + mutex_exit(&fip->fi_lock); + + /* + * Here we bump the open counts on the fifos. The reason + * that we do this is because when we go to write to the + * fifo we want to ensure that they are actually open (and + * not in the process of being closed) without having to + * stop the automounter. (If the write end of the fifo + * were closed and we tried to write to it we would panic. + * If the read end of the fifo was closed and we tried to + * write to the other end, the process that invoked the + * lookup operation would get an unexpected SIGPIPE.) + */ + mutex_enter(&fp_wr->f_tlock); + fp_wr->f_count++; + ASSERT(fp_wr->f_count >= 2); + mutex_exit(&fp_wr->f_tlock); + + mutex_enter(&fp_rd->f_tlock); + fp_rd->f_count++; + ASSERT(fp_rd->f_count >= 2); + mutex_exit(&fp_rd->f_tlock); + + /* Release all our locks. */ + UF_EXIT(ufp_wr); + UF_EXIT(ufp_rd); + mutex_enter(&prp->p_lock); + sprunlock(prp); + + /* Return the file pointers. */ + *fpp_rd = fp_rd; + *fpp_wr = fp_wr; + return (0); +} + +static uint_t +/*ARGSUSED*/ +i_fifo_close_cb(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ + int *id = (int *)arg; + /* Return the key and terminate the walk. */ + *id = (uintptr_t)key; + return (MH_WALK_TERMINATE); +} + +static void +i_fifo_close(lx_autofs_vfs_t *data) +{ + /* + * Close the fifo to prevent any future requests from + * getting sent to the automounter. + */ + mutex_enter(&data->lav_lock); + if (data->lav_fifo_wr != NULL) { + (void) closef(data->lav_fifo_wr); + data->lav_fifo_wr = NULL; + } + if (data->lav_fifo_rd != NULL) { + (void) closef(data->lav_fifo_rd); + data->lav_fifo_rd = NULL; + } + mutex_exit(&data->lav_lock); + + /* + * Wakeup any threads currently waiting for the automounter + * note that it's possible for multiple threads to have entered + * this function and to be doing the work below simultaneously. + */ + for (;;) { + lx_autofs_lookup_req_t *lalr; + int id; + + /* Lookup the first entry in the hash. */ + id = -1; + mod_hash_walk(data->lav_id_hash, + i_fifo_close_cb, &id); + if (id == -1) { + /* No more id's in the hash. */ + break; + } + if ((lalr = i_lalr_find(data, id)) == NULL) { + /* Someone else beat us to it. */ + continue; + } + + /* Mark the request as compleate and release it. */ + i_lalr_complete(data, lalr); + i_lalr_release(data, lalr); + } +} + +static int +i_fifo_verify_rd(lx_autofs_vfs_t *data) +{ + proc_t *prp; + uf_info_t *fip; + uf_entry_t *ufp_rd; + file_t *fp_rd; + vnode_t *vp_rd; + int i; + + ASSERT(MUTEX_HELD((&data->lav_lock))); + + /* Check if we've already been shut down. */ + if (data->lav_fifo_wr == NULL) { + ASSERT(data->lav_fifo_rd == NULL); + return (-1); + } + vp_rd = fifo_peer_vp(data->lav_fifo_wr->f_vnode); + + /* + * sprlock() is zone aware, so assuming this mount call was + * initiated by a process in a zone, if it tries to specify + * a pgrp outside of it's zone this call will fail. + * + * Also, we want to grab hold of the main automounter process + * and its going to be the group leader for pgrp, so its + * pid will be equal to pgrp. + */ + prp = sprlock(data->lav_pgrp); + if (prp == NULL) + return (-1); + mutex_exit(&prp->p_lock); + + /* Now we want to access the processes open file descriptors. */ + fip = P_FINFO(prp); + mutex_enter(&fip->fi_lock); + + /* + * Now we need to find the read end of the fifo (for reasons + * explained below.) We assume that the read end of the fifo + * is in the same process as the write end. + */ + for (i = 0; i < fip->fi_nfiles; i++) { + UF_ENTER(ufp_rd, fip, i); + if (((fp_rd = ufp_rd->uf_file) != NULL) && + (fp_rd->f_vnode == vp_rd)) + break; + UF_EXIT(ufp_rd); + } + if (i == fip->fi_nfiles) { + /* Didn't find it. */ + mutex_exit(&fip->fi_lock); + mutex_enter(&prp->p_lock); + sprunlock(prp); + return (-1); + } + + /* + * Seems the automounter still has the read end of the fifo + * open, we're done here. Release all our locks and exit. + */ + mutex_exit(&fip->fi_lock); + UF_EXIT(ufp_rd); + mutex_enter(&prp->p_lock); + sprunlock(prp); + + return (0); +} + +static int +i_fifo_write(lx_autofs_vfs_t *data, lx_autofs_pkt_t *lap) +{ + struct uio uio; + struct iovec iov; + file_t *fp_wr, *fp_rd; + int error; + + /* + * The catch here is we need to make sure _we_ don't close + * the the fifo while writing to it. (Another thread could come + * along and realize the automounter process is gone and close + * the fifo. To do this we bump the open count before we + * write to the fifo. + */ + mutex_enter(&data->lav_lock); + if (data->lav_fifo_wr == NULL) { + ASSERT(data->lav_fifo_rd == NULL); + mutex_exit(&data->lav_lock); + return (ENOENT); + } + fp_wr = data->lav_fifo_wr; + fp_rd = data->lav_fifo_rd; + + /* Bump the open count on the write fifo. */ + mutex_enter(&fp_wr->f_tlock); + fp_wr->f_count++; + mutex_exit(&fp_wr->f_tlock); + + /* Bump the open count on the read fifo. */ + mutex_enter(&fp_rd->f_tlock); + fp_rd->f_count++; + mutex_exit(&fp_rd->f_tlock); + + mutex_exit(&data->lav_lock); + + iov.iov_base = (caddr_t)lap; + iov.iov_len = sizeof (*lap); + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_loffset = 0; + uio.uio_segflg = (short)UIO_SYSSPACE; + uio.uio_resid = sizeof (*lap); + uio.uio_llimit = 0; + uio.uio_fmode = FWRITE | FNDELAY | FNONBLOCK; + + error = VOP_WRITE(fp_wr->f_vnode, &uio, 0, kcred, NULL); + (void) closef(fp_wr); + (void) closef(fp_rd); + + /* + * After every write we verify that the automounter still has + * these files open. + */ + mutex_enter(&data->lav_lock); + if (i_fifo_verify_rd(data) != 0) { + /* + * Something happened to the automounter. + * Close down the communication pipe we setup. + */ + mutex_exit(&data->lav_lock); + i_fifo_close(data); + if (error != 0) + return (error); + return (ENOENT); + } + mutex_exit(&data->lav_lock); + + return (error); +} + +static int +i_bs_readdir(vnode_t *dvp, list_t *dir_stack, list_t *file_stack) +{ + struct iovec iov; + struct uio uio; + dirent64_t *dp, *dbuf; + vnode_t *vp; + size_t dlen, dbuflen; + int eof, error, ndirents = 64; + char *nm; + + dlen = ndirents * (sizeof (*dbuf)); + dbuf = kmem_alloc(dlen, KM_SLEEP); + + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_fmode = 0; + uio.uio_extflg = UIO_COPY_CACHED; + uio.uio_loffset = 0; + uio.uio_llimit = MAXOFFSET_T; + + eof = 0; + error = 0; + while (!error && !eof) { + uio.uio_resid = dlen; + iov.iov_base = (char *)dbuf; + iov.iov_len = dlen; + + (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); + if (VOP_READDIR(dvp, &uio, kcred, &eof) != 0) { + VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); + kmem_free(dbuf, dlen); + return (-1); + } + VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); + + if ((dbuflen = dlen - uio.uio_resid) == 0) { + /* We're done. */ + break; + } + + for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); + dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { + + nm = dp->d_name; + + if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) + continue; + + if (VOP_LOOKUP(dvp, + nm, &vp, NULL, 0, NULL, kcred) != 0) { + kmem_free(dbuf, dlen); + return (-1); + } + if (vp->v_type == VDIR) { + if (dir_stack != NULL) { + i_stack_push(dir_stack, (caddr_t)dvp, + (caddr_t)vp, i_strdup(nm)); + } else { + VN_RELE(vp); + } + } else { + if (file_stack != NULL) { + i_stack_push(file_stack, (caddr_t)dvp, + (caddr_t)vp, i_strdup(nm)); + } else { + VN_RELE(vp); + } + } + } + } + kmem_free(dbuf, dlen); + return (0); +} + +static void +i_bs_destroy(vnode_t *dvp, char *path) +{ + list_t search_stack; + list_t dir_stack; + list_t file_stack; + vnode_t *pdvp, *vp; + char *dpath, *fpath; + int ret; + + if (VOP_LOOKUP(dvp, path, &vp, NULL, 0, NULL, kcred) != 0) { + /* A directory entry with this name doesn't actually exist. */ + return; + } + + if ((vp->v_type & VDIR) == 0) { + /* Easy, the directory entry is a file so delete it. */ + VN_RELE(vp); + (void) VOP_REMOVE(dvp, path, kcred); + return; + } + + /* + * The directory entry is a subdirectory, now we have a bit more + * work to do. (We'll have to recurse into the sub directory.) + * It would have been much easier to do this recursively but kernel + * stacks are notoriously small. + */ + i_stack_init(&search_stack); + i_stack_init(&dir_stack); + i_stack_init(&file_stack); + + /* Save our newfound subdirectory into a list. */ + i_stack_push(&search_stack, (caddr_t)dvp, (caddr_t)vp, i_strdup(path)); + + /* Do a recursive depth first search into the subdirectories. */ + while (i_stack_pop(&search_stack, + (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) { + + /* Get a list of the subdirectories in this directory. */ + if (i_bs_readdir(dvp, &search_stack, NULL) != 0) + goto exit; + + /* Save the current directory a seperate stack. */ + i_stack_push(&dir_stack, (caddr_t)pdvp, (caddr_t)dvp, dpath); + } + + /* + * Now dir_stack contains a list of directories, the deepest paths + * are at the top of the list. So let's go through and process them. + */ + while (i_stack_pop(&dir_stack, + (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) { + + /* Get a list of the files in this directory. */ + if (i_bs_readdir(dvp, NULL, &file_stack) != 0) { + VN_RELE(dvp); + i_strfree(dpath); + goto exit; + } + + /* Delete all the files in this directory. */ + while (i_stack_pop(&file_stack, + NULL, (caddr_t *)&vp, &fpath) == 0) { + VN_RELE(vp) + ret = VOP_REMOVE(dvp, fpath, kcred); + i_strfree(fpath); + if (ret != 0) { + i_strfree(dpath); + goto exit; + } + } + + /* Delete this directory. */ + VN_RELE(dvp); + ret = VOP_RMDIR(pdvp, dpath, pdvp, kcred); + i_strfree(dpath); + if (ret != 0) + goto exit; + } + +exit: + while ( + (i_stack_pop(&search_stack, NULL, (caddr_t *)&vp, &path) == 0) || + (i_stack_pop(&dir_stack, NULL, (caddr_t *)&vp, &path) == 0) || + (i_stack_pop(&file_stack, NULL, (caddr_t *)&vp, &path) == 0)) { + VN_RELE(vp); + i_strfree(path); + } + i_stack_fini(&search_stack); + i_stack_fini(&dir_stack); + i_stack_fini(&file_stack); +} + +static vnode_t * +i_bs_create(vnode_t *dvp, char *bs_name) +{ + vnode_t *vp; + vattr_t vattr; + + /* + * After looking at the mkdir syscall path it seems we don't need + * to initialize all of the vattr_t structure. + */ + bzero(&vattr, sizeof (vattr)); + vattr.va_type = VDIR; + vattr.va_mode = 0755; /* u+rwx,og=rx */ + vattr.va_mask = AT_TYPE|AT_MODE; + + if (VOP_MKDIR(dvp, bs_name, &vattr, &vp, kcred) != 0) + return (NULL); + return (vp); +} + +static int +i_automounter_call(vnode_t *dvp, char *nm) +{ + lx_autofs_lookup_req_t *lalr; + lx_autofs_vfs_t *data; + int error, dup_request; + + /* Get a pointer to the vfs mount data. */ + data = dvp->v_vfsp->vfs_data; + + /* The automounter only support queries in the root directory. */ + if (dvp != data->lav_root) + return (ENOENT); + + /* + * Check if the current process is in the automounters process + * group. (If it is, the current process is either the autmounter + * itself or one of it's forked child processes.) If so, don't + * redirect this lookup back into the automounter because we'll + * hang. + */ + mutex_enter(&pidlock); + if (data->lav_pgrp == curproc->p_pgrp) { + mutex_exit(&pidlock); + return (ENOENT); + } + mutex_exit(&pidlock); + + /* Verify that the automount process pipe still exists. */ + mutex_enter(&data->lav_lock); + if (data->lav_fifo_wr == NULL) { + ASSERT(data->lav_fifo_rd == NULL); + mutex_exit(&data->lav_lock); + return (ENOENT); + } + mutex_exit(&data->lav_lock); + + /* Allocate an automounter request structure. */ + if ((lalr = i_lalr_alloc(data, &dup_request, nm)) == NULL) + return (ENOENT); + + /* + * If we were the first one to allocate this request then we + * need to send it to the automounter. + */ + if ((!dup_request) && + ((error = i_fifo_write(data, &lalr->lalr_pkt)) != 0)) { + /* + * Unable to send the request to the automounter. + * Unblock any other threads waiting on the request + * and release the request. + */ + i_lalr_complete(data, lalr); + i_lalr_release(data, lalr); + return (error); + } + + /* Wait for someone to signal us that this request has compleated. */ + mutex_enter(&lalr->lalr_lock); + while (!lalr->lalr_complete) { + if (cv_wait_sig(&lalr->lalr_cv, &lalr->lalr_lock) == 0) { + /* We got a signal, abort this lookup. */ + mutex_exit(&lalr->lalr_lock); + i_lalr_abort(data, lalr); + return (EINTR); + } + } + mutex_exit(&lalr->lalr_lock); + i_lalr_release(data, lalr); + + return (0); +} + +static int +i_automounter_ioctl(vnode_t *vp, int cmd, intptr_t arg) +{ + lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vp->v_vfsp->vfs_data; + + /* + * Be strict. + * We only accept ioctls from the automounter process group. + */ + mutex_enter(&pidlock); + if (data->lav_pgrp != curproc->p_pgrp) { + mutex_exit(&pidlock); + return (ENOENT); + } + mutex_exit(&pidlock); + + if ((cmd == LX_AUTOFS_IOC_READY) || (cmd == LX_AUTOFS_IOC_FAIL)) { + lx_autofs_lookup_req_t *lalr; + int id = arg; + + /* + * We don't actually care if the request failed or succeeded. + * We do the same thing either way. + */ + if ((lalr = i_lalr_find(data, id)) == NULL) + return (ENXIO); + + /* Mark the request as compleate and release it. */ + i_lalr_complete(data, lalr); + i_lalr_release(data, lalr); + return (0); + } + if (cmd == LX_AUTOFS_IOC_CATATONIC) { + /* The automounter is shutting down. */ + i_fifo_close(data); + return (0); + } + return (ENOTSUP); +} + +static int +i_parse_mntopt(vfs_t *vfsp, lx_autofs_vfs_t *data) +{ + char *fd_str, *pgrp_str, *minproto_str, *maxproto_str; + int fd, pgrp, minproto, maxproto; + file_t *fp_wr, *fp_rd; + + /* Require all options to be present. */ + if ((vfs_optionisset(vfsp, LX_MNTOPT_FD, &fd_str) != 1) || + (vfs_optionisset(vfsp, LX_MNTOPT_PGRP, &pgrp_str) != 1) || + (vfs_optionisset(vfsp, LX_MNTOPT_MINPROTO, &minproto_str) != 1) || + (vfs_optionisset(vfsp, LX_MNTOPT_MAXPROTO, &maxproto_str) != 1)) + return (EINVAL); + + /* Get the values for each parameter. */ + if ((i_str_to_int(fd_str, &fd) != 0) || + (i_str_to_int(pgrp_str, &pgrp) != 0) || + (i_str_to_int(minproto_str, &minproto) != 0) || + (i_str_to_int(maxproto_str, &maxproto) != 0)) + return (EINVAL); + + /* + * We support v2 of the linux kernel automounter protocol. + * Make sure the mount request we got indicates support + * for this version of the protocol. + */ + if ((minproto > 2) || (maxproto < 2)) + return (EINVAL); + + /* + * Now we need to lookup the fifos we'll be using + * to talk to the userland automounter process. + */ + if (i_fifo_lookup(pgrp, fd, &fp_wr, &fp_rd) != 0) + return (EINVAL); + + /* Save the mount options and fifo pointers. */ + data->lav_fd = fd; + data->lav_pgrp = pgrp; + data->lav_fifo_rd = fp_rd; + data->lav_fifo_wr = fp_wr; + return (0); +} + +/* + * VFS entry points + */ +static int +lx_autofs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) +{ + lx_autofs_vfs_t *data; + dev_t dev; + char name[40]; + int error; + + if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) + return (EPERM); + + if (mvp->v_type != VDIR) + return (ENOTDIR); + + if ((uap->flags & MS_OVERLAY) == 0 && + (mvp->v_count > 1 || (mvp->v_flag & VROOT))) + return (EBUSY); + + /* We don't support mountes in the global zone. */ + if (getzoneid() == GLOBAL_ZONEID) + return (EPERM); + + /* We don't support mounting on top of ourselves. */ + if (vn_matchops(mvp, lx_autofs_vn_ops)) + return (EPERM); + + /* Allocate a vfs struct. */ + data = kmem_zalloc(sizeof (lx_autofs_vfs_t), KM_SLEEP); + + /* Parse mount options. */ + if ((error = i_parse_mntopt(vfsp, data)) != 0) { + kmem_free(data, sizeof (lx_autofs_vfs_t)); + return (error); + } + + /* Initialize the backing store. */ + i_bs_destroy(mvp, LX_AUTOFS_BS_DIR); + if ((data->lav_bs_vp = i_bs_create(mvp, LX_AUTOFS_BS_DIR)) == NULL) { + kmem_free(data, sizeof (lx_autofs_vfs_t)); + return (EBUSY); + } + data->lav_bs_name = LX_AUTOFS_BS_DIR; + + /* We have to hold the underlying vnode we're mounted on. */ + data->lav_mvp = mvp; + VN_HOLD(mvp); + + /* Initialize vfs fields */ + vfsp->vfs_bsize = DEV_BSIZE; + vfsp->vfs_fstype = lx_autofs_fstype; + vfsp->vfs_data = data; + + /* Invent a dev_t (sigh) */ + do { + dev = makedevice(lx_autofs_major, + atomic_add_32_nv(&lx_autofs_minor, 1) & L_MAXMIN32); + } while (vfs_devismounted(dev)); + vfsp->vfs_dev = dev; + vfs_make_fsid(&vfsp->vfs_fsid, dev, lx_autofs_fstype); + + /* Create an id space arena for automounter requests. */ + (void) snprintf(name, sizeof (name), "lx_autofs_id_%d", + getminor(vfsp->vfs_dev)); + data->lav_ids = id_space_create(name, 1, INT_MAX); + + /* Create hashes to keep track of automounter requests. */ + mutex_init(&data->lav_lock, NULL, MUTEX_DEFAULT, NULL); + (void) snprintf(name, sizeof (name), "lx_autofs_path_hash_%d", + getminor(vfsp->vfs_dev)); + data->lav_path_hash = mod_hash_create_strhash(name, + LX_AUTOFS_VFS_PATH_HASH_SIZE, mod_hash_null_valdtor); + (void) snprintf(name, sizeof (name), "lx_autofs_id_hash_%d", + getminor(vfsp->vfs_dev)); + data->lav_id_hash = mod_hash_create_idhash(name, + LX_AUTOFS_VFS_ID_HASH_SIZE, mod_hash_null_valdtor); + + /* Create a hash to keep track of vnodes. */ + (void) snprintf(name, sizeof (name), "lx_autofs_vn_hash_%d", + getminor(vfsp->vfs_dev)); + data->lav_vn_hash = mod_hash_create_ptrhash(name, + LX_AUTOFS_VFS_VN_HASH_SIZE, mod_hash_null_valdtor, + sizeof (vnode_t)); + + /* Create root vnode */ + data->lav_root = i_vn_alloc(vfsp, data->lav_bs_vp); + data->lav_root->v_flag |= + VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT; + + return (0); +} + +static int +lx_autofs_unmount(vfs_t *vfsp, int flag, struct cred *cr) +{ + lx_autofs_vfs_t *data; + + if (secpolicy_fs_unmount(cr, vfsp) != 0) + return (EPERM); + + /* We do not currently support forced unmounts. */ + if (flag & MS_FORCE) + return (ENOTSUP); + + /* + * We should never have a reference count of less than 2: one for the + * caller, one for the root vnode. + */ + ASSERT(vfsp->vfs_count >= 2); + + /* If there are any outstanding vnodes, we can't unmount. */ + if (vfsp->vfs_count > 2) + return (EBUSY); + + /* Check for any remaining holds on the root vnode. */ + data = vfsp->vfs_data; + ASSERT(data->lav_root->v_vfsp == vfsp); + if (data->lav_root->v_count > 1) + return (EBUSY); + + /* Close the fifo to the automount process. */ + if (data->lav_fifo_wr != NULL) + (void) closef(data->lav_fifo_wr); + if (data->lav_fifo_rd != NULL) + (void) closef(data->lav_fifo_rd); + + /* + * We have to release our hold on our root vnode before we can + * delete the backing store. (Since the root vnode is linked + * to the backing store.) + */ + VN_RELE(data->lav_root); + + /* Cleanup the backing store. */ + i_bs_destroy(data->lav_mvp, data->lav_bs_name); + VN_RELE(data->lav_mvp); + + /* Cleanup out remaining data structures. */ + mod_hash_destroy_strhash(data->lav_path_hash); + mod_hash_destroy_idhash(data->lav_id_hash); + mod_hash_destroy_ptrhash(data->lav_vn_hash); + id_space_destroy(data->lav_ids); + kmem_free(data, sizeof (lx_autofs_vfs_t)); + + return (0); +} + +static int +lx_autofs_root(vfs_t *vfsp, vnode_t **vpp) +{ + lx_autofs_vfs_t *data = vfsp->vfs_data; + + *vpp = data->lav_root; + VN_HOLD(*vpp); + + return (0); +} + +static int +lx_autofs_statvfs(vfs_t *vfsp, statvfs64_t *sp) +{ + lx_autofs_vfs_t *data = vfsp->vfs_data; + vnode_t *urvp = data->lav_root->v_data; + dev32_t d32; + int error; + + if ((error = VFS_STATVFS(urvp->v_vfsp, sp)) != 0) + return (error); + + /* Update some of values before returning. */ + (void) cmpldev(&d32, vfsp->vfs_dev); + sp->f_fsid = d32; + (void) strlcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name, + sizeof (sp->f_basetype)); + sp->f_flag = vf_to_stf(vfsp->vfs_flag); + bzero(sp->f_fstr, sizeof (sp->f_fstr)); + return (0); +} + +static const fs_operation_def_t lx_autofs_vfstops[] = { + { VFSNAME_MOUNT, lx_autofs_mount }, + { VFSNAME_UNMOUNT, lx_autofs_unmount }, + { VFSNAME_ROOT, lx_autofs_root }, + { VFSNAME_STATVFS, lx_autofs_statvfs }, + { NULL, NULL } +}; + +/* + * VOP entry points - simple passthrough + * + * For most VOP entry points we can simply pass the request on to + * the underlying filesystem we're mounted on. + */ +static int +lx_autofs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) +{ + vnode_t *uvp = vp->v_data; + return (VOP_CLOSE(uvp, flag, count, offset, cr)); +} + +static int +lx_autofs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp) +{ + vnode_t *uvp = vp->v_data; + return (VOP_READDIR(uvp, uiop, cr, eofp)); +} + +static int +lx_autofs_access(vnode_t *vp, int mode, int flags, cred_t *cr) +{ + vnode_t *uvp = vp->v_data; + return (VOP_ACCESS(uvp, mode, flags, cr)); +} + +static int +lx_autofs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) +{ + vnode_t *uvp = vp->v_data; + return (VOP_RWLOCK(uvp, write_lock, ctp)); +} + +static void +lx_autofs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) +{ + vnode_t *uvp = vp->v_data; + VOP_RWUNLOCK(uvp, write_lock, ctp); +} + +/*ARGSUSED*/ +static int +lx_autofs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr) +{ + vnode_t *udvp = dvp->v_data; + + /* + * cdir is the calling processes current directory. + * If cdir is lx_autofs vnode then get its real underlying + * vnode ptr. (It seems like the only thing cdir is + * ever used for is to make sure the user doesn't delete + * their current directory.) + */ + if (vn_matchops(cdir, lx_autofs_vn_ops)) { + vnode_t *ucdir = cdir->v_data; + return (VOP_RMDIR(udvp, nm, ucdir, cr)); + } + + return (VOP_RMDIR(udvp, nm, cdir, cr)); +} + +/* + * VOP entry points - special passthrough + * + * For some VOP entry points we will first pass the request on to + * the underlying filesystem we're mounted on. If there's an error + * then we immediatly return the error, but if the request succeedes + * we have to do some extra work before returning. + */ +static int +lx_autofs_open(vnode_t **vpp, int flag, cred_t *cr) +{ + vnode_t *ovp = *vpp; + vnode_t *uvp = ovp->v_data; + int error; + + if ((error = VOP_OPEN(&uvp, flag, cr)) != 0) + return (error); + + /* Check for clone opens. */ + if (uvp == ovp->v_data) + return (0); + + /* Deal with clone opens by returning a new vnode. */ + *vpp = i_vn_alloc(ovp->v_vfsp, uvp); + VN_RELE(ovp); + return (0); +} + +static int +lx_autofs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) +{ + vnode_t *uvp = vp->v_data; + int error; + + if ((error = VOP_GETATTR(uvp, vap, flags, cr)) != 0) + return (error); + + /* Update the attributes with our filesystem id. */ + vap->va_fsid = vp->v_vfsp->vfs_dev; + return (0); +} + +static int +lx_autofs_mkdir(vnode_t *dvp, char *nm, struct vattr *vap, vnode_t **vpp, + cred_t *cr) +{ + vnode_t *udvp = dvp->v_data; + vnode_t *uvp = NULL; + int error; + + if ((error = VOP_MKDIR(udvp, nm, vap, &uvp, cr)) != 0) + return (error); + + /* Update the attributes with our filesystem id. */ + vap->va_fsid = dvp->v_vfsp->vfs_dev; + + /* Allocate a new vnode. */ + *vpp = i_vn_alloc(dvp->v_vfsp, uvp); + return (0); +} + +/* + * VOP entry points - custom + */ +/*ARGSUSED*/ +static void +lx_autofs_inactive(struct vnode *vp, struct cred *cr) +{ + lx_autofs_vfs_t *data = vp->v_vfsp->vfs_data; + + /* + * We need to hold the vfs lock because if we're going to free + * this vnode we have to prevent anyone from looking it up + * in the vnode hash. + */ + mutex_enter(&data->lav_lock); + mutex_enter(&vp->v_lock); + + if (vp->v_count < 1) { + panic("lx_autofs_inactive: bad v_count"); + /*NOTREACHED*/ + } + + /* Drop the temporary hold by vn_rele now. */ + if (--vp->v_count > 0) { + mutex_exit(&vp->v_lock); + mutex_exit(&data->lav_lock); + return; + } + + /* + * No one should have been blocked on this lock because we're + * about to free this vnode. + */ + i_vn_free(vp); +} + +static int +lx_autofs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, + int flags, vnode_t *rdir, cred_t *cr) +{ + vnode_t *udvp = dvp->v_data; + vnode_t *uvp = NULL; + int error; + + /* First try to lookup if this path component already exitst. */ + if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr)) == 0) { + *vpp = i_vn_alloc(dvp->v_vfsp, uvp); + return (0); + } + + /* Only query the automounter if the path does not exist. */ + if (error != ENOENT) + return (error); + + /* Refer the lookup to the automounter. */ + if ((error = i_automounter_call(dvp, nm)) != 0) + return (error); + + /* Retry the lookup operation. */ + if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr)) == 0) { + *vpp = i_vn_alloc(dvp->v_vfsp, uvp); + return (0); + } + return (error); +} + +/*ARGSUSED*/ +static int +lx_autofs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr, + int *rvalp) +{ + vnode_t *uvp = vp->v_data; + + /* Intercept certain ioctls. */ + switch ((uint_t)cmd) { + case LX_AUTOFS_IOC_READY: + case LX_AUTOFS_IOC_FAIL: + case LX_AUTOFS_IOC_CATATONIC: + case LX_AUTOFS_IOC_EXPIRE: + case LX_AUTOFS_IOC_PROTOVER: + case LX_AUTOFS_IOC_SETTIMEOUT: + return (i_automounter_ioctl(vp, cmd, arg)); + } + + /* Pass any remaining ioctl on. */ + return (VOP_IOCTL(uvp, cmd, arg, mode, cr, rvalp)); +} + +/* + * VOP entry points definitions + */ +static const fs_operation_def_t lx_autofs_tops_root[] = { + { VOPNAME_OPEN, lx_autofs_open }, + { VOPNAME_CLOSE, lx_autofs_close }, + { VOPNAME_IOCTL, lx_autofs_ioctl }, + { VOPNAME_RWLOCK, lx_autofs_rwlock }, + { VOPNAME_RWUNLOCK, (fs_generic_func_p)lx_autofs_rwunlock }, + { VOPNAME_GETATTR, lx_autofs_getattr }, + { VOPNAME_ACCESS, lx_autofs_access }, + { VOPNAME_READDIR, lx_autofs_readdir }, + { VOPNAME_LOOKUP, lx_autofs_lookup }, + { VOPNAME_INACTIVE, (fs_generic_func_p)lx_autofs_inactive }, + { VOPNAME_MKDIR, lx_autofs_mkdir }, + { VOPNAME_RMDIR, lx_autofs_rmdir }, + { NULL } +}; + +/* + * lx_autofs_init() gets invoked via the mod_install() call in + * this modules _init() routine. Therefor, the code that cleans + * up the structures we allocate below is actually found in + * our _fini() routine. + */ +/* ARGSUSED */ +static int +lx_autofs_init(int fstype, char *name) +{ + int error; + + if ((lx_autofs_major = + (major_t)space_fetch(LX_AUTOFS_SPACE_KEY_UDEV)) == 0) { + + if ((lx_autofs_major = getudev()) == (major_t)-1) { + cmn_err(CE_WARN, "lx_autofs_init: " + "can't get unique device number"); + return (EAGAIN); + } + + if (space_store(LX_AUTOFS_SPACE_KEY_UDEV, + (uintptr_t)lx_autofs_major) != 0) { + cmn_err(CE_WARN, "lx_autofs_init: " + "can't save unique device number"); + return (EAGAIN); + } + } + + lx_autofs_fstype = fstype; + if ((error = vfs_setfsops( + fstype, lx_autofs_vfstops, &lx_autofs_vfsops)) != 0) { + cmn_err(CE_WARN, "lx_autofs_init: bad vfs ops template"); + return (error); + } + + if ((error = vn_make_ops("lx_autofs vnode ops", + lx_autofs_tops_root, &lx_autofs_vn_ops)) != 0) { + VERIFY(vfs_freevfsops_by_type(fstype) == 0); + lx_autofs_vn_ops = NULL; + return (error); + } + + return (0); +} + + +/* + * Module linkage + */ +static mntopt_t lx_autofs_mntopt[] = { + { LX_MNTOPT_FD, NULL, 0, MO_HASVALUE }, + { LX_MNTOPT_PGRP, NULL, 0, MO_HASVALUE }, + { LX_MNTOPT_MINPROTO, NULL, 0, MO_HASVALUE }, + { LX_MNTOPT_MAXPROTO, NULL, 0, MO_HASVALUE } +}; + +static mntopts_t lx_autofs_mntopts = { + sizeof (lx_autofs_mntopt) / sizeof (mntopt_t), + lx_autofs_mntopt +}; + +static vfsdef_t vfw = { + VFSDEF_VERSION, + LX_AUTOFS_NAME, + lx_autofs_init, + VSW_HASPROTO | VSW_VOLATILEDEV, + &lx_autofs_mntopts +}; + +extern struct mod_ops mod_fsops; + +static struct modlfs modlfs = { + &mod_fsops, "linux autofs filesystem", &vfw +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlfs, NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + int error; + + if ((error = mod_remove(&modlinkage)) != 0) + return (error); + + if (lx_autofs_vn_ops != NULL) { + vn_freevnodeops(lx_autofs_vn_ops); + lx_autofs_vn_ops = NULL; + } + + /* + * In our init routine, if we get an error after calling + * vfs_setfsops() we cleanup by calling vfs_freevfsops_by_type(). + * But we don't need to call vfs_freevfsops_by_type() here + * because the fs framework did this for us as part of the + * mod_remove() call above. + */ + return (0); +} diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c new file mode 100644 index 0000000000..ae049e2792 --- /dev/null +++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c @@ -0,0 +1,395 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/modctl.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/stat.h> +#include <sys/conf.h> +#include <sys/frame.h> +#include <sys/dtrace.h> +#include <sys/dtrace_impl.h> + +#include <sys/lx_impl.h> + +#define LX_SYSTRACE_SHIFT 16 +#define LX_SYSTRACE_ISENTRY(x) ((int)(x) >> LX_SYSTRACE_SHIFT) +#define LX_SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << LX_SYSTRACE_SHIFT) - 1)) +#define LX_SYSTRACE_ENTRY(id) ((1 << LX_SYSTRACE_SHIFT) | (id)) +#define LX_SYSTRACE_RETURN(id) (id) + +#define LX_SYSTRACE_ENTRY_AFRAMES 2 +#define LX_SYSTRACE_RETURN_AFRAMES 4 + +typedef struct lx_systrace_sysent { + const char *lss_name; + dtrace_id_t lss_entry; + dtrace_id_t lss_return; +} lx_systrace_sysent_t; + +static dev_info_t *lx_systrace_devi; +static dtrace_provider_id_t lx_systrace_id; +static kmutex_t lx_systrace_lock; +static uint_t lx_systrace_nenabled; + +static int lx_systrace_nsysent; +static lx_systrace_sysent_t *lx_systrace_sysent; + +/*ARGSUSED*/ +static void +lx_systrace_entry(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2, + ulong_t arg3, ulong_t arg4, ulong_t arg5) +{ + dtrace_id_t id; + + if (sysnum >= lx_systrace_nsysent) + return; + + if ((id = lx_systrace_sysent[sysnum].lss_entry) == DTRACE_IDNONE) + return; + + dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); +} + +/*ARGSUSED*/ +static void +lx_systrace_return(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2, + ulong_t arg3, ulong_t arg4, ulong_t arg5) +{ + dtrace_id_t id; + + if (sysnum >= lx_systrace_nsysent) + return; + + if ((id = lx_systrace_sysent[sysnum].lss_return) == DTRACE_IDNONE) + return; + + dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); +} + +/*ARGSUSED*/ +static void +lx_systrace_provide(void *arg, const dtrace_probedesc_t *desc) +{ + int i; + + if (desc != NULL) + return; + + for (i = 0; i < lx_systrace_nsysent; i++) { + if (dtrace_probe_lookup(lx_systrace_id, NULL, + lx_systrace_sysent[i].lss_name, "entry") != 0) + continue; + + (void) dtrace_probe_create(lx_systrace_id, NULL, + lx_systrace_sysent[i].lss_name, "entry", + LX_SYSTRACE_ENTRY_AFRAMES, + (void *)((uintptr_t)LX_SYSTRACE_ENTRY(i))); + + (void) dtrace_probe_create(lx_systrace_id, NULL, + lx_systrace_sysent[i].lss_name, "return", + LX_SYSTRACE_RETURN_AFRAMES, + (void *)((uintptr_t)LX_SYSTRACE_RETURN(i))); + + lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE; + lx_systrace_sysent[i].lss_return = DTRACE_IDNONE; + } +} + +/*ARGSUSED*/ +static void +lx_systrace_enable(void *arg, dtrace_id_t id, void *parg) +{ + int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg); + + ASSERT(sysnum < lx_systrace_nsysent); + + mutex_enter(&lx_systrace_lock); + if (lx_systrace_nenabled++ == 0) + lx_brand_systrace_enable(); + mutex_exit(&lx_systrace_lock); + + if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) { + lx_systrace_sysent[sysnum].lss_entry = id; + } else { + lx_systrace_sysent[sysnum].lss_return = id; + } +} + +/*ARGSUSED*/ +static void +lx_systrace_disable(void *arg, dtrace_id_t id, void *parg) +{ + int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg); + + ASSERT(sysnum < lx_systrace_nsysent); + + if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) { + lx_systrace_sysent[sysnum].lss_entry = DTRACE_IDNONE; + } else { + lx_systrace_sysent[sysnum].lss_return = DTRACE_IDNONE; + } + + mutex_enter(&lx_systrace_lock); + if (--lx_systrace_nenabled == 0) + lx_brand_systrace_disable(); + mutex_exit(&lx_systrace_lock); +} + +/*ARGSUSED*/ +static void +lx_systrace_destroy(void *arg, dtrace_id_t id, void *parg) +{ +} + +/*ARGSUSED*/ +static uint64_t +lx_systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + struct frame *fp = (struct frame *)dtrace_getfp(); + uintptr_t *stack; + uint64_t val = 0; + int i; + + if (argno >= 6) + return (0); + + /* + * Walk the four frames down the stack to the entry or return callback. + * Our callback calls dtrace_probe() which calls dtrace_dif_variable() + * which invokes this function to get the extended arguments. We get + * the frame pointer in via call to dtrace_getfp() above which makes for + * four frames. + */ + for (i = 0; i < 4; i++) { + fp = (struct frame *)fp->fr_savfp; + } + + stack = (uintptr_t *)&fp[1]; + + /* + * Skip the first argument to the callback -- the system call number. + */ + argno++; + +#ifdef __amd64 + /* + * On amd64, the first 6 arguments are passed in registers while + * subsequent arguments are on the stack. + */ + argno -= 6; +#endif + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + val = stack[argno]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (val); +} + + +static const dtrace_pattr_t lx_systrace_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, +}; + +static dtrace_pops_t lx_systrace_pops = { + lx_systrace_provide, + NULL, + lx_systrace_enable, + lx_systrace_disable, + NULL, + NULL, + NULL, + lx_systrace_getarg, + NULL, + lx_systrace_destroy +}; + +static int +lx_systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +{ + int i; + + switch (cmd) { + case DDI_ATTACH: + break; + case DDI_RESUME: + return (DDI_SUCCESS); + default: + return (DDI_FAILURE); + } + + if (ddi_create_minor_node(devi, "lx_systrace", S_IFCHR, + 0, DDI_PSEUDO, NULL) == DDI_FAILURE || + dtrace_register("lx-syscall", &lx_systrace_attr, + DTRACE_PRIV_KERNEL, 0, &lx_systrace_pops, NULL, + &lx_systrace_id) != 0) { + ddi_remove_minor_node(devi, NULL); + return (DDI_FAILURE); + } + + ddi_report_dev(devi); + lx_systrace_devi = devi; + + /* + * Count up the lx_brand system calls. + */ + for (i = 0; lx_sysent[i].sy_callc != NULL; i++) + continue; + + /* + * Initialize our corresponding table. + */ + lx_systrace_sysent = kmem_zalloc(i * sizeof (lx_systrace_sysent_t), + KM_SLEEP); + lx_systrace_nsysent = i; + + for (i = 0; i < lx_systrace_nsysent; i++) { + lx_systrace_sysent[i].lss_name = lx_sysent[i].sy_name; + lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE; + lx_systrace_sysent[i].lss_return = DTRACE_IDNONE; + } + + /* + * Install probe triggers. + */ + lx_systrace_entry_ptr = lx_systrace_entry; + lx_systrace_return_ptr = lx_systrace_return; + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +lx_systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) +{ + switch (cmd) { + case DDI_DETACH: + break; + case DDI_SUSPEND: + return (DDI_SUCCESS); + default: + return (DDI_FAILURE); + } + + if (dtrace_unregister(lx_systrace_id) != 0) + return (DDI_FAILURE); + + /* + * Free table. + */ + kmem_free(lx_systrace_sysent, lx_systrace_nsysent * + sizeof (lx_systrace_sysent_t)); + lx_systrace_sysent = NULL; + lx_systrace_nsysent = 0; + + /* + * Reset probe triggers. + */ + lx_systrace_entry_ptr = NULL; + lx_systrace_return_ptr = NULL; + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +lx_systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) +{ + return (0); +} + +static struct cb_ops lx_systrace_cb_ops = { + lx_systrace_open, /* open */ + nodev, /* close */ + nulldev, /* strategy */ + nulldev, /* print */ + nodev, /* dump */ + nodev, /* read */ + nodev, /* write */ + nodev, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + ddi_prop_op, /* cb_prop_op */ + 0, /* streamtab */ + D_NEW | D_MP /* Driver compatibility flag */ +}; + +static struct dev_ops lx_systrace_ops = { + DEVO_REV, /* devo_rev */ + 0, /* refcnt */ + ddi_getinfo_1to1, /* get_dev_info */ + nulldev, /* identify */ + nulldev, /* probe */ + lx_systrace_attach, /* attach */ + lx_systrace_detach, /* detach */ + nodev, /* reset */ + &lx_systrace_cb_ops, /* driver operations */ + NULL, /* bus operations */ + nodev /* dev power */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, /* module type (this is a pseudo driver) */ + "Linux Brand System Call Tracing", /* name of module */ + &lx_systrace_ops /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + (void *)&modldrv, + NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf new file mode 100644 index 0000000000..e4499c8a5b --- /dev/null +++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf @@ -0,0 +1,27 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +name="lx_systrace" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/brand/lx/io/ldlinux.c b/usr/src/uts/common/brand/lx/io/ldlinux.c new file mode 100644 index 0000000000..76c5e1d255 --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/ldlinux.c @@ -0,0 +1,297 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/ddi.h> +#include <sys/cmn_err.h> +#include <sys/modctl.h> +#include <sys/ptms.h> +#include <sys/stropts.h> +#include <sys/strsun.h> +#include <sys/sunddi.h> + +#include <sys/ldlinux.h> + + +/* + * ldlinuxopen - open routine gets called when the module gets pushed onto the + * stream. + */ +/* ARGSUSED */ +static int +ldlinuxopen( + queue_t *q, /* pointer to the read side queue */ + dev_t *devp, /* pointer to stream tail's dev */ + int oflag, /* the user open(2) supplied flags */ + int sflag, /* open state flag */ + cred_t *credp) /* credentials */ +{ + struct ldlinux *tp; /* ldlinux entry for this module */ + mblk_t *mop; + struct stroptions *sop; + struct termios *termiosp; + int len; + + if (sflag != MODOPEN) + return (EINVAL); + + if (q->q_ptr != NULL) { + /* It's already attached. */ + return (0); + } + + mop = allocb(sizeof (struct stroptions), BPRI_MED); + if (mop == NULL) + return (ENOSR); + mop->b_datap->db_type = M_SETOPTS; + mop->b_wptr += sizeof (struct stroptions); + sop = (struct stroptions *)mop->b_rptr; + sop->so_flags = SO_ISTTY; + + /* + * Allocate state structure. + */ + tp = kmem_alloc(sizeof (*tp), KM_SLEEP); + + /* Stash a pointer to our private data in q_ptr. */ + q->q_ptr = WR(q)->q_ptr = tp; + + /* + * Get termios defaults. These are stored as + * a property in the "options" node. + */ + if (ddi_getlongprop(DDI_DEV_T_ANY, ddi_root_node(), 0, "ttymodes", + (caddr_t)&termiosp, &len) == DDI_PROP_SUCCESS && + len == sizeof (struct termios)) { + if (termiosp->c_lflag & ICANON) { + tp->veof = termiosp->c_cc[VEOF]; + tp->veol = termiosp->c_cc[VEOL]; + tp->vmin = 1; + tp->vtime = 0; + } else { + tp->veof = 0; + tp->veol = 0; + tp->vmin = termiosp->c_cc[VMIN]; + tp->vtime = termiosp->c_cc[VTIME]; + } + kmem_free(termiosp, len); + } else { + /* + * winge winge winge... + */ + cmn_err(CE_WARN, + "ldlinuxopen: Couldn't get ttymodes property!"); + bzero(tp, sizeof (*tp)); + } + + tp->state = 0; + + /* + * Commit to the open and send the M_SETOPTS off to the stream head. + */ + qprocson(q); + putnext(q, mop); + + return (0); +} + + +/* + * ldlinuxclose - This routine gets called when the module gets + * popped off of the stream. + */ +/* ARGSUSED */ +static int +ldlinuxclose(queue_t *q, int flag, cred_t *credp) +{ + struct ldlinux *tp; + + qprocsoff(q); + tp = q->q_ptr; + kmem_free(tp, sizeof (*tp)); + q->q_ptr = WR(q)->q_ptr = NULL; + return (0); +} + + +static void +do_ioctl(queue_t *q, mblk_t *mp) +{ + struct ldlinux *tp = q->q_ptr; + struct iocblk *iocp = (struct iocblk *)mp->b_rptr; + struct lx_cc *cb; + mblk_t *tmp; + int error; + + switch (iocp->ioc_cmd) { + case TIOCSETLD: + /* prepare caller supplied data for access */ + error = miocpullup(mp, sizeof (struct lx_cc)); + if (error != 0) { + miocnak(q, mp, 0, error); + return; + } + + /* get a pointer to the caller supplied data */ + cb = (struct lx_cc *)mp->b_cont->b_rptr; + + /* save caller supplied data in our per-stream cache */ + tp->veof = cb->veof; + tp->veol = cb->veol; + tp->vmin = cb->vmin; + tp->vtime = cb->vtime; + + /* initialize and send a reply indicating that we're done */ + miocack(q, mp, 0, 0); + return; + + case TIOCGETLD: + /* allocate a reply message */ + if ((tmp = allocb(sizeof (struct lx_cc), BPRI_MED)) == NULL) { + miocnak(q, mp, 0, ENOSR); + return; + } + + /* initialize the reply message */ + mioc2ack(mp, tmp, sizeof (struct lx_cc), 0); + + /* get a pointer to the reply data */ + cb = (struct lx_cc *)mp->b_cont->b_rptr; + + /* copy data from our per-stream cache into the reply data */ + cb->veof = tp->veof; + cb->veol = tp->veol; + cb->vmin = tp->vmin; + cb->vtime = tp->vtime; + + /* send the reply indicating that we're done */ + qreply(q, mp); + return; + + case PTSSTTY: + tp->state |= ISPTSTTY; + break; + + default: + break; + } + + putnext(q, mp); +} + + +/* + * ldlinuxput - Module read and write queue put procedure. + */ +static void +ldlinuxput(queue_t *q, mblk_t *mp) +{ + struct ldlinux *tp = q->q_ptr; + + switch (DB_TYPE(mp)) { + default: + break; + case M_IOCTL: + if ((q->q_flag & QREADR) == 0) { + do_ioctl(q, mp); + return; + } + break; + + case M_FLUSH: + /* + * Handle read and write flushes. + */ + if ((((q->q_flag & QREADR) != 0) && (*mp->b_rptr & FLUSHR)) || + (((q->q_flag & QREADR) == 0) && (*mp->b_rptr & FLUSHW))) { + if ((tp->state & ISPTSTTY) && (*mp->b_rptr & FLUSHBAND)) + flushband(q, *(mp->b_rptr + 1), FLUSHDATA); + else + flushq(q, FLUSHDATA); + } + break; + } + putnext(q, mp); +} + + +static struct module_info ldlinux_info = { + LDLINUX_MODID, + LDLINUX_MOD, + 0, + INFPSZ, + 0, + 0 +}; + +static struct qinit ldlinuxinit = { + (int (*)()) ldlinuxput, + NULL, + ldlinuxopen, + ldlinuxclose, + NULL, + &ldlinux_info +}; + +static struct streamtab ldlinuxinfo = { + &ldlinuxinit, + &ldlinuxinit +}; + +/* + * Module linkage information for the kernel. + */ +static struct fmodsw fsw = { + LDLINUX_MOD, + &ldlinuxinfo, + D_MTQPAIR | D_MP +}; + +static struct modlstrmod modlstrmod = { + &mod_strmodops, "termios extensions for lx brand", &fsw +}; + +static struct modlinkage modlinkage = { + MODREV_1, &modlstrmod, NULL +}; + +int +_init() +{ + return (mod_install(&modlinkage)); +} + +int +_fini() +{ + return (mod_remove(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.c b/usr/src/uts/common/brand/lx/io/lx_audio.c new file mode 100644 index 0000000000..07c3bd0949 --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/lx_audio.c @@ -0,0 +1,2026 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/audio.h> +#include <sys/conf.h> +#include <sys/debug.h> +#include <sys/disp.h> +#include <sys/ddi.h> +#include <sys/file.h> +#include <sys/id_space.h> +#include <sys/kmem.h> +#include <sys/lx_audio.h> +#include <sys/mixer.h> +#include <sys/modhash.h> +#include <sys/stat.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/sysmacros.h> +#include <sys/stropts.h> +#include <sys/types.h> +#include <sys/zone.h> + +/* Properties used by the lx_audio driver */ +#define LXA_PROP_INPUTDEV "inputdev" +#define LXA_PROP_OUTPUTDEV "outputdev" + +/* default device paths used by this driver */ +#define LXA_DEV_DEFAULT "/dev/audio" +#define LXA_DEV_CUSTOM_DIR "/dev/sound/" + +/* maximum possible number of concurrent opens of this driver */ +#define LX_AUDIO_MAX_OPENS 1024 + +/* + * these are default fragment size and fragment count values. + * these values were chosen to make quake work well on my + * laptop: 2Ghz Pentium M + NVIDIA GeForce Go 6400. + * + * for reference: + * - 1 sec of stereo output at 44Khz is about 171 Kb of data + * - 1 sec of mono output at 8Khz is about 8Kb of data + */ +#define LXA_OSS_FRAG_SIZE (1024) /* 1/8 sec at 8Khz mono */ +#define LXA_OSS_FRAG_CNT (1024 * 2) + +/* maximum ammount of fragment memory we'll allow a process to mmap */ +#define LXA_OSS_FRAG_MEM (1024 * 1024 * 2) /* 2Mb */ + +/* forward declarations */ +typedef struct lxa_state lxa_state_t; +typedef struct lxa_zstate lxa_zstate_t; + +/* + * Structure and enum declarations + */ +typedef enum { + LXA_TYPE_INVALID = 0, + LXA_TYPE_AUDIO = 1, /* audio device */ + LXA_TYPE_AUDIOCTL = 2 /* audio control/mixer device */ +} lxa_dev_type_t; + +struct lxa_zstate { + char *lxa_zs_zonename; + + /* + * we could store the input/output audio device setting here, + * but instead we're keeing them as device node properties + * so that a user can easily see the audio configuration for + * a zone via prtconf. + */ + + /* + * OSS doesn't support multiple opens of the audio device. + * (multiple opens of the mixer device are supported.) + * so here we'll keep a pointer to any open input/output + * streams. (OSS does support two opens if one is for input + * and the other is for output.) + */ + lxa_state_t *lxa_zs_istate; + lxa_state_t *lxa_zs_ostate; + + /* + * we need to cache channel gain and balance. channel gain and + * balance map to PCM volume in OSS, which are supposedly a property + * of the underlying hardware. but in solaris, channels are + * implemented in software and only exist when an audio device + * is actually open. (each open returns a unique channel.) OSS + * apps will expect consistent PCM volume set/get operations to + * work even if no audio device is open. hence, if no underlying + * device is open we need to cache the gain and balance setting. + */ + lxa_mixer_levels_t lxa_zs_pcm_levels; +}; + +struct lxa_state { + lxa_zstate_t *lxas_zs; /* zone state pointer */ + + dev_t lxas_dev_old; /* dev_t used to open the device */ + dev_t lxas_dev_new; /* new dev_t assigned to an open */ + int lxas_flags; /* original flags passed to open */ + lxa_dev_type_t lxas_type; /* type of device that was opened */ + + int lxas_devs_same; /* input and output device the same? */ + + /* input device variables */ + ldi_handle_t lxas_idev_lh; /* ldi handle for access */ + int lxas_idev_flags; /* flags used for open */ + + /* output device variables */ + ldi_handle_t lxas_odev_lh; /* ldi handle for access */ + int lxas_odev_flags; /* flags used for open */ + + /* + * since we support multiplexing of devices we need to remember + * certain parameters about the devices + */ + uint_t lxas_hw_features; + uint_t lxas_sw_features; + + uint_t lxas_frag_size; + uint_t lxas_frag_cnt; + + /* + * members needed to support mmap device access. note that to + * simplifly things we only support one mmap access per open. + */ + ddi_umem_cookie_t lxas_umem_cookie; + char *lxas_umem_ptr; + size_t lxas_umem_len; + kthread_t *lxas_mmap_thread; + int lxas_mmap_thread_running; + int lxas_mmap_thread_exit; + int lxas_mmap_thread_frag; +}; + +/* + * Global variables + */ +dev_info_t *lxa_dip = NULL; +kmutex_t lxa_lock; +id_space_t *lxa_minor_id = NULL; +mod_hash_t *lxa_state_hash = NULL; +mod_hash_t *lxa_zstate_hash = NULL; +size_t lxa_state_hash_size = 15; +size_t lxa_zstate_hash_size = 15; +size_t lxa_registered_zones = 0; + +/* + * function declarations + */ +static void lxa_mmap_output_disable(lxa_state_t *); + +/* + * functions + */ +static void +lxa_state_close(lxa_state_t *lxa_state) +{ + lxa_zstate_t *lxa_zs = lxa_state->lxas_zs; + minor_t minor = getminor(lxa_state->lxas_dev_new); + + /* disable any mmap output that might still be going on */ + lxa_mmap_output_disable(lxa_state); + + /* + * if this was the active input/output device, unlink it from + * the global zone state so that other opens of the audio device + * can now succeed. + */ + mutex_enter(&lxa_lock); + if (lxa_zs->lxa_zs_istate == lxa_state) + lxa_zs->lxa_zs_istate = NULL; + if (lxa_zs->lxa_zs_ostate == lxa_state) { + lxa_zs->lxa_zs_ostate = NULL; + } + mutex_exit(&lxa_lock); + + /* remove this state structure from the hash (if it's there) */ + (void) mod_hash_remove(lxa_state_hash, + (mod_hash_key_t)(uintptr_t)minor, (mod_hash_val_t *)&lxa_state); + + /* close any audio device that we have open */ + if (lxa_state->lxas_idev_lh != NULL) + (void) ldi_close(lxa_state->lxas_idev_lh, + lxa_state->lxas_idev_flags, kcred); + if (lxa_state->lxas_odev_lh != NULL) + (void) ldi_close(lxa_state->lxas_odev_lh, + lxa_state->lxas_odev_flags, kcred); + + /* free up any memory allocated by mmaps */ + if (lxa_state->lxas_umem_cookie != NULL) + ddi_umem_free(lxa_state->lxas_umem_cookie); + + /* release the id associated with this state structure */ + id_free(lxa_minor_id, minor); + + kmem_free(lxa_state, sizeof (*lxa_state)); +} + +static char * +getzonename(void) +{ + return (curproc->p_zone->zone_name); +} + +static void +strfree(char *str) +{ + kmem_free(str, strlen(str) + 1); +} + +static char * +strdup(char *str) +{ + int n = strlen(str); + char *ptr = kmem_alloc(n + 1, KM_SLEEP); + bcopy(str, ptr, n + 1); + return (ptr); +} + +static char * +lxa_devprop_name(char *zname, char *pname) +{ + char *zpname; + int n; + + ASSERT((pname != NULL) && (zname != NULL)); + + /* prepend the zone name to the property name */ + n = snprintf(NULL, 0, "%s_%s", zname, pname) + 1; + zpname = kmem_alloc(n, KM_SLEEP); + (void) snprintf(zpname, n, "%s_%s", zname, pname); + + return (zpname); +} + +static int +lxa_devprop_verify(char *pval) +{ + int n; + + ASSERT(pval != NULL); + + if (strcmp(pval, "default") == 0) + return (0); + + /* make sure the value is an integer */ + for (n = 0; pval[n] != '\0'; n++) { + if ((pval[n] < '0') && (pval[n] > '9')) { + return (-1); + } + } + + return (0); +} + +static char * +lxa_devprop_lookup(char *zname, char *pname, lxa_dev_type_t lxa_type) +{ + char *zprop_name, *pval; + char *dev_path; + int n, rv; + + ASSERT((pname != NULL) && (zname != NULL)); + ASSERT((lxa_type == LXA_TYPE_AUDIO) || (lxa_type == LXA_TYPE_AUDIOCTL)); + + zprop_name = lxa_devprop_name(zname, pname); + + /* attempt to lookup the property */ + rv = ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, zprop_name, &pval); + strfree(zprop_name); + + if (rv != DDI_PROP_SUCCESS) + return (NULL); + + if (lxa_devprop_verify(pval) != 0) { + ddi_prop_free(pval); + return (NULL); + } + + if (strcmp(pval, "none") == 0) { + /* there is no audio device specified */ + return (NULL); + } else if (strcmp(pval, "default") == 0) { + /* use the default audio device on the system */ + dev_path = strdup(LXA_DEV_DEFAULT); + } else { + /* a custom audio device was specified, generate a path */ + n = snprintf(NULL, 0, "%s%s", LXA_DEV_CUSTOM_DIR, pval) + 1; + dev_path = kmem_alloc(n, KM_SLEEP); + (void) snprintf(dev_path, n, "%s%s", LXA_DEV_CUSTOM_DIR, pval); + } + ddi_prop_free(pval); + + /* + * if this is an audio control device so we need to append + * "ctl" to the path + */ + if (lxa_type == LXA_TYPE_AUDIOCTL) { + char *tmp; + n = snprintf(NULL, 0, "%s%s", dev_path, "ctl") + 1; + tmp = kmem_alloc(n, KM_SLEEP); + (void) snprintf(tmp, n, "%s%s", dev_path, "ctl"); + strfree(dev_path); + dev_path = tmp; + } + + return (dev_path); +} + +static int +lxa_dev_getfeatures(lxa_state_t *lxa_state) +{ + audio_info_t ai_idev, ai_odev; + int n, rv; + + /* set a default fragment size */ + lxa_state->lxas_frag_size = LXA_OSS_FRAG_SIZE; + lxa_state->lxas_frag_cnt = LXA_OSS_FRAG_CNT; + + /* get info for the currently open audio devices */ + if ((lxa_state->lxas_idev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, + AUDIO_GETINFO, (intptr_t)&ai_idev, FKIOCTL, kcred, &n)) != 0)) + return (rv); + if ((lxa_state->lxas_odev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, + AUDIO_GETINFO, (intptr_t)&ai_odev, FKIOCTL, kcred, &n)) != 0)) + return (rv); + + /* if we're only open for reading or writing then it's easy */ + if (lxa_state->lxas_idev_lh == NULL) { + lxa_state->lxas_sw_features = ai_odev.sw_features; + lxa_state->lxas_hw_features = ai_odev.hw_features; + return (0); + } else if (lxa_state->lxas_odev_lh == NULL) { + lxa_state->lxas_sw_features = ai_idev.sw_features; + lxa_state->lxas_hw_features = ai_idev.hw_features; + return (0); + } + + /* + * well if we're open for reading and writing but the underlying + * device is the same then it's also pretty easy + */ + if (lxa_state->lxas_devs_same) { + if ((ai_odev.sw_features != ai_idev.sw_features) || + (ai_odev.hw_features != ai_idev.hw_features)) { + zcmn_err(getzoneid(), CE_WARN, "lx_audio error: " + "audio device reported inconsistent features"); + return (EIO); + } + lxa_state->lxas_sw_features = ai_odev.sw_features; + lxa_state->lxas_hw_features = ai_odev.hw_features; + return (0); + } + + /* + * figure out which software features we're going to support. + * we will report a feature as supported if both the input + * and output device support it. + */ + lxa_state->lxas_sw_features = 0; + n = ai_idev.sw_features & ai_odev.sw_features; + if (n & AUDIO_SWFEATURE_MIXER) + lxa_state->lxas_sw_features |= AUDIO_SWFEATURE_MIXER; + + /* + * figure out which hardware features we're going to support. + * for a first pass we will report a feature as supported if + * both the input and output device support it. + */ + lxa_state->lxas_hw_features = 0; + n = ai_idev.hw_features & ai_odev.hw_features; + if (n & AUDIO_HWFEATURE_MSCODEC) + lxa_state->lxas_hw_features |= AUDIO_HWFEATURE_MSCODEC; + + /* + * if we made it here then we have different audio input and output + * devices. this will allow us to report support for additional + * hardware features that may not supported by just the input or + * output device alone. + */ + + /* always report tha we support both playback and recording */ + lxa_state->lxas_hw_features = + AUDIO_HWFEATURE_PLAY | AUDIO_HWFEATURE_RECORD; + + /* always report full duplex support */ + lxa_state->lxas_hw_features = AUDIO_HWFEATURE_DUPLEX; + + /* never report that we have input to output loopback support */ + ASSERT((lxa_state->lxas_hw_features & AUDIO_HWFEATURE_IN2OUT) == 0); + return (0); +} + +static int +lxa_dev_open(lxa_state_t *lxa_state) +{ + char *idev, *odev; + int flags, rv; + ldi_handle_t lh; + ldi_ident_t li = NULL; + + ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) || + (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL)); + + /* + * check if we have configuration properties for this zone. + * if we don't then audio isn't supported in this zone. + */ + idev = lxa_devprop_lookup(getzonename(), LXA_PROP_INPUTDEV, + lxa_state->lxas_type); + odev = lxa_devprop_lookup(getzonename(), LXA_PROP_OUTPUTDEV, + lxa_state->lxas_type); + + /* make sure there is at least one device to read from or write to */ + if ((idev == NULL) && (odev == NULL)) + return (ENODEV); + + /* see if the input and output devices are actually the same device */ + if (((idev != NULL) && (odev != NULL)) && + (strcmp(idev, odev) == 0)) + lxa_state->lxas_devs_same = 1; + + /* we don't respect FEXCL */ + flags = lxa_state->lxas_flags & ~FEXCL; + if (lxa_state->lxas_type == LXA_TYPE_AUDIO) { + /* + * if we're opening audio devices then we need to muck + * with the FREAD/FWRITE flags. + * + * certain audio device may only support input or output + * (but not both.) so if we're multiplexing input/output + * to different devices we need to make sure we don't try + * and open the output device for reading and the input + * device for writing. + * + * if we're using the same device for input/output we still + * need to do this because some audio devices won't let + * themselves be opened multiple times for read access. + */ + lxa_state->lxas_idev_flags = flags & ~FWRITE; + lxa_state->lxas_odev_flags = flags & ~FREAD; + + /* make sure we have devices to read from and write to */ + if (((flags & FREAD) && (idev == NULL)) || + ((flags & FWRITE) && (odev == NULL))) { + rv = ENODEV; + goto out; + } + } else { + lxa_state->lxas_idev_flags = lxa_state->lxas_odev_flags = flags; + } + + /* get an ident to open the devices */ + if (ldi_ident_from_dev(lxa_state->lxas_dev_new, &li) != 0) { + rv = ENODEV; + goto out; + } + + /* open the input device */ + lxa_state->lxas_idev_lh = NULL; + if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) || + (lxa_state->lxas_idev_flags & FREAD)) && + (idev != NULL)) { + rv = ldi_open_by_name(idev, lxa_state->lxas_idev_flags, + kcred, &lh, li); + if (rv != 0) { + zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: " + "unable to open audio device: %s", idev); + zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: " + "possible zone audio configuration error"); + goto out; + } + lxa_state->lxas_idev_lh = lh; + } + + /* open the output device */ + lxa_state->lxas_odev_lh = NULL; + if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) || + (lxa_state->lxas_odev_flags & FWRITE)) && + (odev != NULL)) { + rv = ldi_open_by_name(odev, lxa_state->lxas_odev_flags, + kcred, &lh, li); + if (rv != 0) { + /* if we opened an input device, close it now */ + if (lxa_state->lxas_idev_lh != NULL) { + (void) ldi_close(lxa_state->lxas_idev_lh, + lxa_state->lxas_idev_flags, kcred); + } + + zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: " + "unable to open audio device: %s", odev); + zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: " + "possible zone audio configuration error"); + goto out; + } + lxa_state->lxas_odev_lh = lh; + } + + /* free up stuff */ +out: + if (li != NULL) + ldi_ident_release(li); + if (idev != NULL) + strfree(idev); + if (odev != NULL) + strfree(odev); + + return (rv); +} + +void +lxa_mmap_thread_exit(lxa_state_t *lxa_state) +{ + mutex_enter(&lxa_lock); + lxa_state->lxas_mmap_thread = NULL; + lxa_state->lxas_mmap_thread_frag = 0; + lxa_state->lxas_mmap_thread_running = 0; + lxa_state->lxas_mmap_thread_exit = 0; + mutex_exit(&lxa_lock); + thread_exit(); + /*NOTREACHED*/ +} + +void +lxa_mmap_thread(lxa_state_t *lxa_state) +{ + struct uio uio, uio_null; + iovec_t iovec, iovec_null; + uint_t bytes_per_sec, usec_per_frag, ticks_per_frag; + int rv, junk, eof, retry; + audio_info_t ai; + + /* we better be setup for writing to the output device */ + ASSERT((lxa_state->lxas_flags & FWRITE) != 0); + ASSERT(lxa_state->lxas_odev_lh != NULL); + + /* setup a uio to output one fragment */ + uio.uio_iov = &iovec; + uio.uio_iovcnt = 1; + uio.uio_offset = 0; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_fmode = 0; + uio.uio_extflg = 0; + uio.uio_llimit = MAXOFFSET_T; + + /* setup a uio to output a eof (a fragment with a length of 0) */ + uio_null.uio_iov = &iovec_null; + uio_null.uio_iov->iov_len = 0; + uio_null.uio_iov->iov_base = NULL; + uio_null.uio_iovcnt = 1; + uio_null.uio_offset = 0; + uio_null.uio_segflg = UIO_SYSSPACE; + uio_null.uio_fmode = 0; + uio_null.uio_extflg = 0; + uio_null.uio_llimit = MAXOFFSET_T; + uio_null.uio_resid = 0; + +lxa_mmap_thread_top: + ASSERT(!MUTEX_HELD(&lxa_lock)); + + /* first drain any pending audio output */ + if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, + AUDIO_DRAIN, NULL, FKIOCTL, kcred, &junk)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "AUDIO_DRAIN failed, aborting audio output"); + lxa_mmap_thread_exit(lxa_state); + /*NOTREACHED*/ + } + + /* + * we depend on the ai.play.eof value to keep track of + * audio output progress so reset it here. + */ + AUDIO_INITINFO(&ai); + ai.play.eof = 0; + if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, + AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "AUDIO_SETINFO failed, aborting audio output"); + lxa_mmap_thread_exit(lxa_state); + /*NOTREACHED*/ + } + + /* + * we're going to need to know the sampling rate and number + * of output channels to estimate how long we can sleep between + * requests. + */ + if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO, + (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "AUDIO_GETINFO failed, aborting audio output"); + lxa_mmap_thread_exit(lxa_state); + /*NOTREACHED*/ + } + + /* estimate how many ticks it takes to output a fragment of data */ + bytes_per_sec = (ai.play.sample_rate * ai.play.channels * + ai.play.precision) / 8; + usec_per_frag = MICROSEC * lxa_state->lxas_frag_size / bytes_per_sec; + ticks_per_frag = drv_usectohz(usec_per_frag); + + /* queue up three fragments of of data into the output stream */ + eof = 3; + + /* sanity check the eof value */ + ASSERT(ai.play.eof == 0); + ai.play.eof = 0; + + /* we always start audio output at fragment 0 */ + mutex_enter(&lxa_lock); + lxa_state->lxas_mmap_thread_frag = 0; + + /* + * we shouldn't have allowed the mapping if it isn't a multiple + * of the fragment size + */ + ASSERT((lxa_state->lxas_umem_len % lxa_state->lxas_frag_size) == 0); + + while (!lxa_state->lxas_mmap_thread_exit) { + size_t start, end; + + /* + * calculate the start and ending offsets of the next + * fragment to output + */ + start = lxa_state->lxas_mmap_thread_frag * + lxa_state->lxas_frag_size; + end = start + lxa_state->lxas_frag_size; + + ASSERT(start < lxa_state->lxas_umem_len); + ASSERT(end <= lxa_state->lxas_umem_len); + + /* setup the uio to output one fragment of audio */ + uio.uio_resid = end - start; + uio.uio_iov->iov_len = end - start; + uio.uio_iov->iov_base = &lxa_state->lxas_umem_ptr[start]; + + /* increment the current fragment index */ + lxa_state->lxas_mmap_thread_frag = + (lxa_state->lxas_mmap_thread_frag + 1) % + (lxa_state->lxas_umem_len / lxa_state->lxas_frag_size); + + /* drop the audio lock before actually outputting data */ + mutex_exit(&lxa_lock); + + /* + * write the fragment of audio data to the device stream + * then write a eof to the stream to tell the device to + * increment ai.play.eof when it's done processing the + * fragment we just wrote + */ + if ((rv = ldi_write(lxa_state->lxas_odev_lh, + &uio, kcred)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "ldi_write() failed (%d), " + "resetting audio output", rv); + goto lxa_mmap_thread_top; + } + if ((rv = ldi_write(lxa_state->lxas_odev_lh, + &uio_null, kcred)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "ldi_write(eof) failed (%d), " + "resetting audio output", rv); + goto lxa_mmap_thread_top; + } + + /* + * we want to avoid buffer underrun so ensure that + * there is always at least one fragment of data in the + * output stream. + */ + mutex_enter(&lxa_lock); + if (--eof > 0) { + continue; + } + + /* + * now we wait until the audio device has finished outputting + * at least one fragment of data. + */ + retry = 0; + while (!lxa_state->lxas_mmap_thread_exit && (eof == 0)) { + uint_t ai_eof_old = ai.play.eof; + + mutex_exit(&lxa_lock); + + /* + * delay for the number of ticks it takes + * to output one fragment of data + */ + if (ticks_per_frag > 0) + delay(ticks_per_frag); + + /* check if we've managed to output any fragments */ + if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, + AUDIO_GETINFO, (intptr_t)&ai, + FKIOCTL, kcred, &junk)) != 0) { + cmn_err(CE_WARN, "lxa_mmap_thread: " + "AUDIO_GETINFO failed (%d), " + "resetting audio output", rv); + /* re-start mmap audio output */ + goto lxa_mmap_thread_top; + } + + if (ai_eof_old == ai.play.eof) { + /* institute a random retry limit */ + if (retry++ < 100) { + mutex_enter(&lxa_lock); + continue; + } + cmn_err(CE_WARN, "lxa_mmap_thread: " + "output stalled, " + "resetting audio output"); + /* re-start mmap audio output */ + goto lxa_mmap_thread_top; + } + + if (ai.play.eof > ai_eof_old) { + eof = ai.play.eof - ai_eof_old; + } else { + /* eof counter wrapped around */ + ASSERT(ai_eof_old < ai.play.eof); + eof = ai.play.eof + (ai_eof_old - UINTMAX_MAX); + } + /* we're done with this loop so re-aquire the lock */ + ASSERT(eof != 0); + mutex_enter(&lxa_lock); + } + } + mutex_exit(&lxa_lock); + lxa_mmap_thread_exit(lxa_state); + /*NOTREACHED*/ +} + +static void +lxa_mmap_output_disable(lxa_state_t *lxa_state) +{ + kt_did_t tid; + + mutex_enter(&lxa_lock); + + /* if the output thread isn't running there's nothing to do */ + if (lxa_state->lxas_mmap_thread_running == 0) { + mutex_exit(&lxa_lock); + return; + } + + /* tell the pcm mmap output thread to exit */ + lxa_state->lxas_mmap_thread_exit = 1; + + /* wait for the mmap output thread to exit */ + tid = lxa_state->lxas_mmap_thread->t_did; + mutex_exit(&lxa_lock); + thread_join(tid); +} + +static void +lxa_mmap_output_enable(lxa_state_t *lxa_state) +{ + mutex_enter(&lxa_lock); + + /* if the output thread is already running there's nothing to do */ + if (lxa_state->lxas_mmap_thread_running != 0) { + mutex_exit(&lxa_lock); + return; + } + + /* setup output state */ + lxa_state->lxas_mmap_thread_running = 1; + lxa_state->lxas_mmap_thread_exit = 0; + lxa_state->lxas_mmap_thread_frag = 0; + + /* kick off a thread to do the mmap pcm output */ + lxa_state->lxas_mmap_thread = thread_create(NULL, 0, + (void (*)())lxa_mmap_thread, lxa_state, + 0, &p0, TS_RUN, minclsyspri); + ASSERT(lxa_state->lxas_mmap_thread != NULL); + + mutex_exit(&lxa_lock); +} + +static int +lxa_ioc_mmap_output(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + uint_t trigger; + + /* we only support output via mmap */ + if ((lxa_state->lxas_flags & FWRITE) == 0) + return (EINVAL); + + /* if the user hasn't mmap the device then there's nothing to do */ + if (lxa_state->lxas_umem_cookie == NULL) + return (EINVAL); + + /* copy in the request */ + if (ddi_copyin((void *)arg, &trigger, sizeof (trigger), mode) != 0) + return (EFAULT); + + /* a zero value disables output */ + if (trigger == 0) { + lxa_mmap_output_disable(lxa_state); + return (0); + } + + /* a non-zero value enables output */ + lxa_mmap_output_enable(lxa_state); + return (0); +} + +static int +lxa_ioc_mmap_ptr(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + int ptr; + + /* we only support output via mmap */ + if ((lxa_state->lxas_flags & FWRITE) == 0) + return (EINVAL); + + /* if the user hasn't mmap the device then there's nothing to do */ + if (lxa_state->lxas_umem_cookie == NULL) + return (EINVAL); + + /* if the output thread isn't running then there's nothing to do */ + if (lxa_state->lxas_mmap_thread_running == 0) + return (EINVAL); + + mutex_enter(&lxa_lock); + ptr = lxa_state->lxas_mmap_thread_frag * lxa_state->lxas_frag_size; + mutex_exit(&lxa_lock); + + if (ddi_copyout(&ptr, (void *)arg, sizeof (ptr), mode) != 0) + return (EFAULT); + + return (0); +} + +static int +lxa_ioc_get_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + lxa_frag_info_t fi; + + fi.lxa_fi_size = lxa_state->lxas_frag_size; + fi.lxa_fi_cnt = lxa_state->lxas_frag_cnt; + + if (ddi_copyout(&fi, (void *)arg, sizeof (fi), mode) != 0) + return (EFAULT); + + return (0); +} + +static int +lxa_ioc_set_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + lxa_frag_info_t fi; + + /* if the device is mmaped we can't change the fragment settings */ + if (lxa_state->lxas_umem_cookie != NULL) + return (EINVAL); + + /* copy in the request */ + if (ddi_copyin((void *)arg, &fi, sizeof (fi), mode) != 0) + return (EFAULT); + + /* do basic bounds checking */ + if ((fi.lxa_fi_cnt == 0) || (fi.lxa_fi_size < 16)) + return (EINVAL); + + /* don't accept size values less than 16 */ + + lxa_state->lxas_frag_size = fi.lxa_fi_size; + lxa_state->lxas_frag_cnt = fi.lxa_fi_cnt; + + return (0); +} + +static int +lxa_audio_drain(lxa_state_t *lxa_state) +{ + int junk; + + /* only applies to output buffers */ + if (lxa_state->lxas_odev_lh == NULL) + return (EINVAL); + + /* can't fail so ignore the return value */ + (void) ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_DRAIN, NULL, + FKIOCTL, kcred, &junk); + return (0); +} + +/* + * lxa_audio_info_merge() usage notes: + * + * - it's important to make sure NOT to get the ai_idev and ai_odev + * parameters mixed up when calling lxa_audio_info_merge(). + * + * - it's important for the caller to make sure that AUDIO_GETINFO + * was called for the input device BEFORE the output device. (see + * the comments for merging the monitor_gain setting to see why.) + */ +static void +lxa_audio_info_merge(lxa_state_t *lxa_state, + audio_info_t *ai_idev, audio_info_t *ai_odev, audio_info_t *ai_merged) +{ + /* if we're not setup for output return the intput device info */ + if (lxa_state->lxas_odev_lh == NULL) { + *ai_merged = *ai_idev; + return; + } + + /* if we're not setup for input return the output device info */ + if (lxa_state->lxas_idev_lh == NULL) { + *ai_merged = *ai_odev; + return; + } + + /* get record values from the input device */ + ai_merged->record = ai_idev->record; + + /* get play values from the output device */ + ai_merged->play = ai_odev->play; + + /* muting status only matters for the output device */ + ai_merged->output_muted = ai_odev->output_muted; + + /* we don't support device reference counts, always return 1 */ + ai_merged->ref_cnt = 1; + + /* + * for supported hw/sw features report the combined feature + * set we calcuated out earlier. + */ + ai_merged->hw_features = lxa_state->lxas_hw_features; + ai_merged->sw_features = lxa_state->lxas_sw_features; + + if (!lxa_state->lxas_devs_same) { + /* + * if the input and output devices are different + * physical devices then we don't support input to + * output loopback so we always report the input + * to output loopback gain to be zero. + */ + ai_merged->monitor_gain = 0; + } else { + /* + * the intput and output devices are actually the + * same physical device. hence it probably supports + * intput to output loopback. regardless we should + * pass back the intput to output gain reported by + * the device. when we pick a value to passback we + * use the output device value since that was + * the most recently queried. (we base this + * decision on the assumption that io gain is + * actually hardware setting in the device and + * hence if it is changed on one open instance of + * the device the change will be visable to all + * other instances of the device.) + */ + ai_merged->monitor_gain = ai_odev->monitor_gain; + } + + /* + * for currently enabled software features always return the + * merger of the two. (of course the enabled software features + * for the input and output devices should alway be the same, + * so if it isn't complain.) + */ + if (ai_idev->sw_features_enabled != ai_odev->sw_features_enabled) + zcmn_err(getzoneid(), CE_WARN, "lx_audio: " + "unexpected sofware feature state"); + ai_merged->sw_features_enabled = + ai_idev->sw_features_enabled & ai_odev->sw_features_enabled; +} + +static int +lxa_audio_setinfo(lxa_state_t *lxa_state, int cmd, intptr_t arg, + int mode) +{ + audio_info_t ai, ai_null, ai_idev, ai_odev; + int rv, junk; + + /* copy in the request */ + if (ddi_copyin((void *)arg, &ai, sizeof (ai), mode) != 0) + return (EFAULT); + + /* + * if the caller is attempting to enable a software feature that + * we didn't report as supported the return an error + */ + if ((ai.sw_features_enabled != -1) && + (ai.sw_features_enabled & ~lxa_state->lxas_sw_features)) + return (EINVAL); + + /* + * if a process has mmaped this device then we don't allow + * changes to the play.eof field (since mmap output depends + * on this field. + */ + if ((lxa_state->lxas_umem_cookie != NULL) && + (ai.play.eof != -1)) + return (EIO); + + /* initialize the new requests */ + AUDIO_INITINFO(&ai_null); + ai_idev = ai_odev = ai; + + /* remove audio input settings from the output device request */ + ai_odev.record = ai_null.record; + + /* remove audio output settings from the input device request */ + ai_idev.play = ai_null.play; + ai_idev.output_muted = ai_null.output_muted; + + /* apply settings to the intput device */ + if ((lxa_state->lxas_idev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, cmd, + (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0)) + return (rv); + + /* apply settings to the output device */ + if ((lxa_state->lxas_odev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, cmd, + (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0)) + return (rv); + + /* + * a AUDIO_SETINFO call performs an implicit AUDIO_GETINFO to + * return values (see the coments in audioio.h.) so we need + * to combine the values returned from the input and output + * device back into the users buffer. + */ + lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai); + + /* copyout the results */ + if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0) { + return (EFAULT); + } + + return (0); +} + +static int +lxa_audio_getinfo(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + audio_info_t ai, ai_idev, ai_odev; + int rv, junk; + + /* get the settings from the input device */ + if ((lxa_state->lxas_idev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, AUDIO_GETINFO, + (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0)) + return (rv); + + /* get the settings from the output device */ + if ((lxa_state->lxas_odev_lh != NULL) && + ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO, + (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0)) + return (rv); + + /* + * we need to combine the values returned from the input + * and output device back into a single user buffer. + */ + lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai); + + /* copyout the results */ + if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0) + return (EFAULT); + + return (0); +} + +static int +lxa_mixer_ai_from_lh(ldi_handle_t lh, audio_info_t *ai) +{ + am_control_t *actl; + int rv, ch_count, junk; + + ASSERT((lh != NULL) && (ai != NULL)); + + /* get the number of channels for the underlying device */ + if ((rv = ldi_ioctl(lh, AUDIO_GET_NUM_CHS, + (intptr_t)&ch_count, FKIOCTL, kcred, &junk)) != 0) + return (rv); + + /* allocate the am_control_t structure */ + actl = kmem_alloc(AUDIO_MIXER_CTL_STRUCT_SIZE(ch_count), KM_SLEEP); + + /* get the device state and channel state */ + if ((rv = ldi_ioctl(lh, AUDIO_MIXERCTL_GETINFO, + (intptr_t)actl, FKIOCTL, kcred, &junk)) != 0) { + kmem_free(actl, AUDIO_MIXER_CTL_STRUCT_SIZE(ch_count)); + return (rv); + } + + /* return the audio_info structure */ + *ai = actl->dev_info; + kmem_free(actl, AUDIO_MIXER_CTL_STRUCT_SIZE(ch_count)); + return (0); +} + +static int +lxa_mixer_get_ai(lxa_state_t *lxa_state, audio_info_t *ai) +{ + audio_info_t ai_idev, ai_odev; + int rv; + + /* if there is no input device, query the output device */ + if (lxa_state->lxas_idev_lh == NULL) + return (lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh, ai)); + + /* if there is no ouput device, query the intput device */ + if (lxa_state->lxas_odev_lh == NULL) + return (lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh, ai)); + + /* + * now get the audio_info and channel information for the + * underlying output device. + */ + if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh, + &ai_idev)) != 0) + return (rv); + if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh, + &ai_odev)) != 0) + return (rv); + + /* now merge the audio_info structures */ + lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, ai); + return (0); +} + +static int +lxa_mixer_get_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode) +{ + lxa_mixer_levels_t lxa_ml; + audio_info_t ai; + int rv; + + ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL); + + if ((rv = lxa_mixer_get_ai(lxa_state, &ai)) != 0) + return (rv); + + switch (cmd) { + case LXA_IOC_MIXER_GET_VOL: + lxa_ml.lxa_ml_gain = ai.play.gain; + lxa_ml.lxa_ml_balance = ai.play.balance; + break; + case LXA_IOC_MIXER_GET_MIC: + lxa_ml.lxa_ml_gain = ai.record.gain; + lxa_ml.lxa_ml_balance = ai.record.balance; + break; + } + + if (ddi_copyout(&lxa_ml, (void *)arg, sizeof (lxa_ml), mode) != 0) + return (EFAULT); + return (0); +} + +static int +lxa_mixer_set_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode) +{ + lxa_mixer_levels_t lxa_ml; + audio_info_t ai; + + ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL); + + /* get the new mixer settings */ + if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0) + return (EFAULT); + + /* sanity check the mixer settings */ + if (!LXA_MIXER_LEVELS_OK(&lxa_ml)) + return (EINVAL); + + /* initialize an audio_info struct with the new settings */ + AUDIO_INITINFO(&ai); + switch (cmd) { + case LXA_IOC_MIXER_SET_VOL: + ai.play.gain = lxa_ml.lxa_ml_gain; + ai.play.balance = lxa_ml.lxa_ml_balance; + break; + case LXA_IOC_MIXER_SET_MIC: + ai.record.gain = lxa_ml.lxa_ml_gain; + ai.record.balance = lxa_ml.lxa_ml_balance; + break; + } + + /* + * we're going to cheat here. normally the + * MIXERCTL_SETINFO ioctl take am_control_t and the + * AUDIO_SETINFO takes an audio_info_t. as it turns + * out the first element in a am_control_t is an + * audio_info_t. also, the rest of the am_control_t + * structure is normally ignored for a MIXERCTL_SETINFO + * ioctl. so here we'll try to fall back to the code + * that handles AUDIO_SETINFO ioctls. + */ + return (lxa_audio_setinfo(lxa_state, AUDIO_MIXERCTL_SETINFO, + (intptr_t)&ai, FKIOCTL)); +} + +static int +lxa_mixer_get_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL); + + /* simply return the cached pcm mixer settings */ + mutex_enter(&lxa_lock); + if (ddi_copyout(&lxa_state->lxas_zs->lxa_zs_pcm_levels, + (void *)arg, + sizeof (lxa_state->lxas_zs->lxa_zs_pcm_levels), mode) != 0) { + mutex_exit(&lxa_lock); + return (EFAULT); + } + mutex_exit(&lxa_lock); + return (0); +} + +static int +lxa_mixer_set_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode) +{ + lxa_mixer_levels_t lxa_ml; + int rv; + + ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL); + + /* get the new mixer settings */ + if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0) + return (EFAULT); + + /* sanity check the mixer settings */ + if (!LXA_MIXER_LEVELS_OK(&lxa_ml)) + return (EINVAL); + + mutex_enter(&lxa_lock); + + /* if there is an active output channel, update it */ + if (lxa_state->lxas_zs->lxa_zs_ostate != NULL) { + audio_info_t ai; + + /* initialize an audio_info struct with the new settings */ + AUDIO_INITINFO(&ai); + ai.play.gain = lxa_ml.lxa_ml_gain; + ai.play.balance = lxa_ml.lxa_ml_balance; + + if ((rv = lxa_audio_setinfo(lxa_state->lxas_zs->lxa_zs_ostate, + AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) { + mutex_exit(&lxa_lock); + return (rv); + } + } + + /* update the cached mixer settings */ + lxa_state->lxas_zs->lxa_zs_pcm_levels = lxa_ml; + + mutex_exit(&lxa_lock); + return (0); +} + +static int +lxa_zone_reg(intptr_t arg, int mode) +{ + lxa_zone_reg_t lxa_zr; + lxa_zstate_t *lxa_zs = NULL; + char *idev_name = NULL, *odev_name = NULL, *pval = NULL; + int i, junk; + + if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0) + return (EFAULT); + + /* make sure that zone_name is a valid string */ + for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++) + if (lxa_zr.lxa_zr_zone_name[i] == '\0') + break; + if (i == sizeof (lxa_zr.lxa_zr_zone_name)) + return (EINVAL); + + /* make sure that inputdev is a valid string */ + for (i = 0; i < sizeof (lxa_zr.lxa_zr_inputdev); i++) + if (lxa_zr.lxa_zr_inputdev[i] == '\0') + break; + if (i == sizeof (lxa_zr.lxa_zr_inputdev)) + return (EINVAL); + + /* make sure it's a valid inputdev property value */ + if (lxa_devprop_verify(lxa_zr.lxa_zr_inputdev) != 0) + return (EINVAL); + + /* make sure that outputdev is a valid string */ + for (i = 0; i < sizeof (lxa_zr.lxa_zr_outputdev); i++) + if (lxa_zr.lxa_zr_outputdev[i] == '\0') + break; + if (i == sizeof (lxa_zr.lxa_zr_outputdev)) + return (EINVAL); + + /* make sure it's a valid outputdev property value */ + if (lxa_devprop_verify(lxa_zr.lxa_zr_outputdev) != 0) + return (EINVAL); + + /* get the property names */ + idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name, + LXA_PROP_INPUTDEV); + odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name, + LXA_PROP_OUTPUTDEV); + + /* + * allocate and initialize a zone state structure + * since the audio device can't possibly be opened yet + * (since we're setting it up now and the zone isn't booted + * yet) assign some some resonable default pcm channel settings. + * also, default to one mixer channel. + */ + lxa_zs = kmem_zalloc(sizeof (*lxa_zs), KM_SLEEP); + lxa_zs->lxa_zs_zonename = strdup(lxa_zr.lxa_zr_zone_name); + lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain = AUDIO_MID_GAIN; + lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance = AUDIO_MID_BALANCE; + + mutex_enter(&lxa_lock); + + /* + * make sure this zone isn't already registered + * a zone is registered with properties for that zone exist + * or there is a zone state structure for that zone + */ + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + idev_name, &pval) == DDI_PROP_SUCCESS) { + goto err_unlock; + } + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + odev_name, &pval) == DDI_PROP_SUCCESS) { + goto err_unlock; + } + if (mod_hash_find(lxa_zstate_hash, + (mod_hash_key_t)lxa_zs->lxa_zs_zonename, + (mod_hash_val_t *)&junk) == 0) + goto err_unlock; + + /* + * create the new properties and insert the zone state structure + * into the global hash + */ + if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip, + idev_name, lxa_zr.lxa_zr_inputdev) != DDI_PROP_SUCCESS) + goto err_prop_remove; + if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip, + odev_name, lxa_zr.lxa_zr_outputdev) != DDI_PROP_SUCCESS) + goto err_prop_remove; + if (mod_hash_insert(lxa_zstate_hash, + (mod_hash_key_t)lxa_zs->lxa_zs_zonename, + (mod_hash_val_t)lxa_zs) != 0) + goto err_prop_remove; + + /* success! */ + lxa_registered_zones++; + mutex_exit(&lxa_lock); + + /* cleanup */ + strfree(idev_name); + strfree(odev_name); + return (0); + +err_prop_remove: + (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name); + (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name); + +err_unlock: + mutex_exit(&lxa_lock); + +err: + if (lxa_zs != NULL) { + strfree(lxa_zs->lxa_zs_zonename); + kmem_free(lxa_zs, sizeof (*lxa_zs)); + } + if (pval != NULL) + ddi_prop_free(pval); + if (idev_name != NULL) + strfree(idev_name); + if (odev_name != NULL) + strfree(odev_name); + return (EIO); +} + +static int +lxa_zone_unreg(intptr_t arg, int mode) +{ + lxa_zone_reg_t lxa_zr; + lxa_zstate_t *lxa_zs = NULL; + char *idev_name = NULL, *odev_name = NULL, *pval = NULL; + int rv, i; + + if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0) + return (EFAULT); + + /* make sure that zone_name is a valid string */ + for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++) + if (lxa_zr.lxa_zr_zone_name[i] == '\0') + break; + if (i == sizeof (lxa_zr.lxa_zr_zone_name)) + return (EINVAL); + + /* get the property names */ + idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name, + LXA_PROP_INPUTDEV); + odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name, + LXA_PROP_OUTPUTDEV); + + mutex_enter(&lxa_lock); + + if (lxa_registered_zones <= 0) { + rv = ENOENT; + goto err_unlock; + } + + /* make sure this zone is actually registered */ + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + idev_name, &pval) != DDI_PROP_SUCCESS) { + rv = ENOENT; + goto err_unlock; + } + ddi_prop_free(pval); + pval = NULL; + if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + odev_name, &pval) != DDI_PROP_SUCCESS) { + rv = ENOENT; + goto err_unlock; + } + ddi_prop_free(pval); + pval = NULL; + if (mod_hash_find(lxa_zstate_hash, + (mod_hash_key_t)lxa_zr.lxa_zr_zone_name, + (mod_hash_val_t *)&lxa_zs) != 0) { + rv = ENOENT; + goto err_unlock; + } + ASSERT(strcmp(lxa_zr.lxa_zr_zone_name, lxa_zs->lxa_zs_zonename) == 0); + + /* + * if the audio device is currently in use then refuse to + * unregister the zone + */ + if ((lxa_zs->lxa_zs_ostate != NULL) || + (lxa_zs->lxa_zs_ostate != NULL)) { + rv = EBUSY; + goto err_unlock; + } + + /* success! cleanup zone config state */ + (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name); + (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name); + + /* + * note, the action of removing the zone state structure from the + * hash will automatically free lxa_zs->lxa_zs_zonename. + * + * the reason for this is that we used lxa_zs->lxa_zs_zonename + * as the hash key and by default mod_hash_create_strhash() uses + * mod_hash_strkey_dtor() as a the hash key destructor. (which + * free's the key for us. + */ + (void) mod_hash_remove(lxa_zstate_hash, + (mod_hash_key_t)lxa_zr.lxa_zr_zone_name, + (mod_hash_val_t *)&lxa_zs); + lxa_registered_zones--; + mutex_exit(&lxa_lock); + + /* cleanup */ + kmem_free(lxa_zs, sizeof (*lxa_zs)); + strfree(idev_name); + strfree(odev_name); + return (0); + +err_unlock: + mutex_exit(&lxa_lock); + +err: + if (pval != NULL) + ddi_prop_free(pval); + if (idev_name != NULL) + strfree(idev_name); + if (odev_name != NULL) + strfree(odev_name); + return (rv); +} + +static int +lxa_ioctl_devctl(int cmd, intptr_t arg, int mode) +{ + /* devctl ioctls are only allowed from the global zone */ + ASSERT(getzoneid() == 0); + if (getzoneid() != 0) + return (EINVAL); + + switch (cmd) { + case LXA_IOC_ZONE_REG: + return (lxa_zone_reg(arg, mode)); + case LXA_IOC_ZONE_UNREG: + return (lxa_zone_unreg(arg, mode)); + } + + return (EINVAL); +} + +static int +/*ARGSUSED*/ +lxa_open(dev_t *devp, int flags, int otyp, cred_t *credp) +{ + lxa_dev_type_t open_type = LXA_TYPE_INVALID; + lxa_zstate_t *lxa_zs; + lxa_state_t *lxa_state; + minor_t minor; + int rv; + + if (getminor(*devp) == LXA_MINORNUM_DEVCTL) { + /* + * this is a devctl node, it exists to administer this + * pseudo driver so it doesn't actually need access to + * any underlying audio devices. hence there is nothing + * really to do here. course, this driver should + * only be administered from the global zone. + */ + ASSERT(getzoneid() == 0); + if (getzoneid() != 0) + return (EINVAL); + return (0); + } + + /* lookup the zone state structure */ + if (mod_hash_find(lxa_zstate_hash, (mod_hash_key_t)getzonename(), + (mod_hash_val_t *)&lxa_zs) != 0) { + return (EIO); + } + + /* determine what type of device was opened */ + switch (getminor(*devp)) { + case LXA_MINORNUM_DSP: + open_type = LXA_TYPE_AUDIO; + break; + case LXA_MINORNUM_MIXER: + open_type = LXA_TYPE_AUDIOCTL; + break; + default: + return (EINVAL); + } + ASSERT(open_type != LXA_TYPE_INVALID); + + /* all other opens are clone opens so get a new minor node */ + minor = id_alloc(lxa_minor_id); + + /* allocate and initialize the new lxa_state structure */ + lxa_state = kmem_zalloc(sizeof (*lxa_state), KM_SLEEP); + lxa_state->lxas_zs = lxa_zs; + lxa_state->lxas_dev_old = *devp; + lxa_state->lxas_dev_new = makedevice(getmajor(*devp), minor); + lxa_state->lxas_flags = flags; + lxa_state->lxas_type = open_type; + + /* initialize the input and output device */ + if (((rv = lxa_dev_open(lxa_state)) != 0) || + ((rv = lxa_dev_getfeatures(lxa_state)) != 0)) { + lxa_state_close(lxa_state); + return (rv); + } + + /* + * save this audio statue structure into a hash indexed + * by it's minor device number. (this will provide a convient + * way to lookup the state structure on future operations.) + */ + if (mod_hash_insert(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t)lxa_state) != 0) { + lxa_state_close(lxa_state); + return (EIO); + } + + mutex_enter(&lxa_lock); + + /* apply the currently cached zone PCM mixer levels */ + if ((lxa_state->lxas_type == LXA_TYPE_AUDIO) && + (lxa_state->lxas_odev_lh != NULL)) { + audio_info_t ai; + + AUDIO_INITINFO(&ai); + ai.play.gain = lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain; + ai.play.balance = lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance; + + if ((rv = lxa_audio_setinfo(lxa_state, + AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) { + mutex_exit(&lxa_lock); + lxa_state_close(lxa_state); + return (rv); + } + } + + /* + * we only allow one active open of the input or output device. + * check here for duplicate opens + */ + if (lxa_state->lxas_type == LXA_TYPE_AUDIO) { + if ((lxa_state->lxas_idev_lh != NULL) && + (lxa_zs->lxa_zs_istate != NULL)) { + mutex_exit(&lxa_lock); + lxa_state_close(lxa_state); + return (EBUSY); + } + if ((lxa_state->lxas_odev_lh != NULL) && + (lxa_zs->lxa_zs_ostate != NULL)) { + mutex_exit(&lxa_lock); + lxa_state_close(lxa_state); + return (EBUSY); + } + + /* not a duplicate open, update the global zone state */ + if (lxa_state->lxas_idev_lh != NULL) + lxa_zs->lxa_zs_istate = lxa_state; + if (lxa_state->lxas_odev_lh != NULL) + lxa_zs->lxa_zs_ostate = lxa_state; + } + mutex_exit(&lxa_lock); + + /* make sure to return our newly allocated dev_t */ + *devp = lxa_state->lxas_dev_new; + return (0); +} + +static int +/*ARGSUSED*/ +lxa_close(dev_t dev, int flags, int otyp, cred_t *credp) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + + /* handle devctl minor nodes (these nodes don't have a handle */ + if (getminor(dev) == LXA_MINORNUM_DEVCTL) + return (0); + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) return + (EINVAL); + + lxa_state_close(lxa_state); + return (0); +} + +static int +/*ARGSUSED*/ +lxa_read(dev_t dev, struct uio *uiop, cred_t *credp) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) + return (EINVAL); + + /* + * if a process has mmaped this device then we don't allow + * any more reads or writes to the device + */ + if (lxa_state->lxas_umem_cookie != NULL) + return (EIO); + + /* we can't do a read if there is no input device */ + if (lxa_state->lxas_idev_lh == NULL) + return (EBADF); + + /* pass the request on */ + return (ldi_read(lxa_state->lxas_idev_lh, uiop, kcred)); +} + +static int +/*ARGSUSED*/ +lxa_write(dev_t dev, struct uio *uiop, cred_t *credp) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) + return (EINVAL); + + /* + * if a process has mmaped this device then we don't allow + * any more reads or writes to the device + */ + if (lxa_state->lxas_umem_cookie != NULL) + return (EIO); + + /* we can't do a write if there is no output device */ + if (lxa_state->lxas_odev_lh == NULL) + return (EBADF); + + /* pass the request on */ + return (ldi_write(lxa_state->lxas_odev_lh, uiop, kcred)); +} + +static int +/*ARGSUSED*/ +lxa_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, + int *rvalp) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + + /* handle devctl minor nodes (these nodes don't have a handle */ + if (getminor(dev) == LXA_MINORNUM_DEVCTL) + return (lxa_ioctl_devctl(cmd, arg, mode)); + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) + return (EINVAL); + + ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) || + (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL)); + + switch (cmd) { + case LXA_IOC_GETMINORNUM: + { + int minornum = getminor(lxa_state->lxas_dev_old); + if (ddi_copyout(&minornum, (void *)arg, + sizeof (minornum), mode) != 0) + return (EFAULT); + } + return (0); + } + + if (lxa_state->lxas_type == LXA_TYPE_AUDIO) { + /* deal with native ioctl */ + switch (cmd) { + case LXA_IOC_MMAP_OUTPUT: + return (lxa_ioc_mmap_output(lxa_state, arg, mode)); + case LXA_IOC_MMAP_PTR: + return (lxa_ioc_mmap_ptr(lxa_state, arg, mode)); + case LXA_IOC_GET_FRAG_INFO: + return (lxa_ioc_get_frag_info(lxa_state, arg, mode)); + case LXA_IOC_SET_FRAG_INFO: + return (lxa_ioc_set_frag_info(lxa_state, arg, mode)); + } + + /* deal with layered ioctls */ + switch (cmd) { + case AUDIO_DRAIN: + return (lxa_audio_drain(lxa_state)); + case AUDIO_SETINFO: + return (lxa_audio_setinfo(lxa_state, + AUDIO_SETINFO, arg, mode)); + case AUDIO_GETINFO: + return (lxa_audio_getinfo(lxa_state, arg, mode)); + } + } + + if (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) { + /* deal with native ioctl */ + switch (cmd) { + case LXA_IOC_MIXER_GET_VOL: + return (lxa_mixer_get_common(lxa_state, + cmd, arg, mode)); + case LXA_IOC_MIXER_SET_VOL: + return (lxa_mixer_set_common(lxa_state, + cmd, arg, mode)); + case LXA_IOC_MIXER_GET_MIC: + return (lxa_mixer_get_common(lxa_state, + cmd, arg, mode)); + case LXA_IOC_MIXER_SET_MIC: + return (lxa_mixer_set_common(lxa_state, + cmd, arg, mode)); + case LXA_IOC_MIXER_GET_PCM: + return (lxa_mixer_get_pcm(lxa_state, arg, mode)); + case LXA_IOC_MIXER_SET_PCM: + return (lxa_mixer_set_pcm(lxa_state, arg, mode)); + } + + } + + return (EINVAL); +} + +static int +/*ARGSUSED*/ +lxa_devmap(dev_t dev, devmap_cookie_t dhp, + offset_t off, size_t len, size_t *maplen, uint_t model) +{ + lxa_state_t *lxa_state; + minor_t minor = getminor(dev); + ddi_umem_cookie_t umem_cookie; + void *umem_ptr; + int rv; + + /* get the handle for this device */ + if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor, + (mod_hash_val_t *)&lxa_state) != 0) + return (EINVAL); + + /* we only support mmaping of audio devices */ + if (lxa_state->lxas_type != LXA_TYPE_AUDIO) + return (EINVAL); + + /* we only support output via mmap */ + if ((lxa_state->lxas_flags & FWRITE) == 0) + return (EINVAL); + + /* sanity check the amount of memory the user is allocating */ + if ((len == 0) || + (len > LXA_OSS_FRAG_MEM) || + ((len % lxa_state->lxas_frag_size) != 0)) + return (EINVAL); + + /* allocate and clear memory to mmap */ + umem_ptr = ddi_umem_alloc(len, DDI_UMEM_NOSLEEP, &umem_cookie); + if (umem_ptr == NULL) + return (ENOMEM); + bzero(umem_ptr, len); + + /* setup the memory mappings */ + rv = devmap_umem_setup(dhp, lxa_dip, NULL, umem_cookie, 0, len, + PROT_USER | PROT_READ | PROT_WRITE, 0, NULL); + if (rv != 0) { + ddi_umem_free(umem_cookie); + return (EIO); + } + + mutex_enter(&lxa_lock); + + /* we only support one mmap per open */ + if (lxa_state->lxas_umem_cookie != NULL) { + ASSERT(lxa_state->lxas_umem_ptr != NULL); + mutex_exit(&lxa_lock); + ddi_umem_free(umem_cookie); + return (EBUSY); + } + ASSERT(lxa_state->lxas_umem_ptr == NULL); + + *maplen = len; + lxa_state->lxas_umem_len = len; + lxa_state->lxas_umem_ptr = umem_ptr; + lxa_state->lxas_umem_cookie = umem_cookie; + mutex_exit(&lxa_lock); + return (0); +} + +static int +/*ARGSUSED*/ +lxa_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + int instance = ddi_get_instance(dip); + + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + ASSERT(instance == 0); + if (instance != 0) + return (DDI_FAILURE); + + lxa_dip = dip; + mutex_init(&lxa_lock, NULL, MUTEX_DEFAULT, NULL); + + /* create our minor nodes */ + if (ddi_create_minor_node(dip, LXA_MINORNAME_DEVCTL, S_IFCHR, + LXA_MINORNUM_DEVCTL, DDI_PSEUDO, 0) != DDI_SUCCESS) + return (DDI_FAILURE); + + if (ddi_create_minor_node(dip, LXA_MINORNAME_DSP, S_IFCHR, + LXA_MINORNUM_DSP, DDI_PSEUDO, 0) != DDI_SUCCESS) + return (DDI_FAILURE); + + if (ddi_create_minor_node(dip, LXA_MINORNAME_MIXER, S_IFCHR, + LXA_MINORNUM_MIXER, DDI_PSEUDO, 0) != DDI_SUCCESS) + return (DDI_FAILURE); + + /* allocate our data structures */ + lxa_minor_id = id_space_create("lxa_minor_id", + LXA_MINORNUM_COUNT, LX_AUDIO_MAX_OPENS); + lxa_state_hash = mod_hash_create_idhash("lxa_state_hash", + lxa_state_hash_size, mod_hash_null_valdtor); + lxa_zstate_hash = mod_hash_create_strhash("lxa_zstate_hash", + lxa_zstate_hash_size, mod_hash_null_valdtor); + + return (DDI_SUCCESS); +} + +static int +/*ARGSUSED*/ +lxa_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + ASSERT(!MUTEX_HELD(&lxa_lock)); + if (lxa_registered_zones > 0) + return (DDI_FAILURE); + + mod_hash_destroy_idhash(lxa_state_hash); + mod_hash_destroy_idhash(lxa_zstate_hash); + id_space_destroy(lxa_minor_id); + lxa_state_hash = NULL; + lxa_dip = NULL; + + return (DDI_SUCCESS); +} + +static int +/*ARGSUSED*/ +lxa_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) +{ + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *resultp = lxa_dip; + return (DDI_SUCCESS); + + case DDI_INFO_DEVT2INSTANCE: + *resultp = (void *)0; + return (DDI_SUCCESS); + } + return (DDI_FAILURE); +} + +/* + * Driver flags + */ +static struct cb_ops lxa_cb_ops = { + lxa_open, /* open */ + lxa_close, /* close */ + nodev, /* strategy */ + nodev, /* print */ + nodev, /* dump */ + lxa_read, /* read */ + lxa_write, /* write */ + lxa_ioctl, /* ioctl */ + lxa_devmap, /* devmap */ + nodev, /* mmap */ + ddi_devmap_segmap, /* segmap */ + nochpoll, /* chpoll */ + ddi_prop_op, /* prop_op */ + NULL, /* cb_str */ + D_NEW | D_MP | D_DEVMAP, + CB_REV, + NULL, + NULL +}; + +static struct dev_ops lxa_ops = { + DEVO_REV, + 0, + lxa_getinfo, + nulldev, + nulldev, + lxa_attach, + lxa_detach, + nodev, + &lxa_cb_ops, + NULL, + NULL +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, /* type of module */ + "linux audio driver 'lx_audio' %I%", + &lxa_ops /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + &modldrv, + NULL +}; + +/* + * standard module entry points + */ +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.conf b/usr/src/uts/common/brand/lx/io/lx_audio.conf new file mode 100644 index 0000000000..2eeb5eb7ee --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/lx_audio.conf @@ -0,0 +1,27 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +name="lx_audio" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.c b/usr/src/uts/common/brand/lx/io/lx_ptm.c new file mode 100644 index 0000000000..e4079df133 --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/lx_ptm.c @@ -0,0 +1,1137 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This driver attempts to emulate some of the the behaviors of + * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris + * + * It does this by layering over the /dev/ptmx device and intercepting + * opens to it. + * + * This driver makes the following assumptions about the way the ptm/pts + * drivers on Solaris work: + * + * - all opens of the /dev/ptmx device node return a unique dev_t. + * + * - the dev_t minor node value for each open ptm instance corrospondes + * to it's associated slave terminal device number. ie. the path to + * the slave terminal device associated with an open ptm instance + * who's dev_t minor node vaue is 5, is /dev/pts/5. + * + * - the ptm driver always allocates the lowest numbered slave terminal + * device possible. + */ + +#include <sys/conf.h> +#include <sys/ddi.h> +#include <sys/devops.h> +#include <sys/file.h> +#include <sys/filio.h> +#include <sys/kstr.h> +#include <sys/ldlinux.h> +#include <sys/lx_ptm.h> +#include <sys/modctl.h> +#include <sys/pathname.h> +#include <sys/ptms.h> +#include <sys/ptyvar.h> +#include <sys/stat.h> +#include <sys/stropts.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/sysmacros.h> +#include <sys/types.h> + +#define LP_PTM_PATH "/dev/ptmx" +#define LP_PTS_PATH "/dev/pts/" +#define LP_PTS_DRV_NAME "pts" +#define LP_PTS_USEC_DELAY (5 * 1000) /* 5 ms */ +#define LP_PTS_USEC_DELAY_MAX (5 * MILLISEC) /* 5 ms */ + +/* + * this driver is layered on top of the ptm driver. we'd like to + * make this drivers minor name space a mirror of the ptm drivers + * namespace, but we can't actually do this. the reason is that the + * ptm driver is opened via the clone driver. there for no minor nodes + * of the ptm driver are actually accessible via the filesystem. + * since we're not a streams device we can't be opened by the clone + * driver. there for we need to have at least minor node accessible + * via the filesystem so that consumers can open it. we use the device + * node with a minor number of 0 for this purpose. what this means is + * that minor node 0 can't be used to map ptm minor node 0. since this + * minor node is now reserved we need to shift our ptm minor node + * mappings by one. ie. a ptm minor node with a value of 0 will + * corrospond to our minor node with a value of 1. these mappings are + * managed with the following macros. + */ +#define DEVT_TO_INDEX(x) LX_PTM_DEV_TO_PTS(x) +#define INDEX_TO_MINOR(x) ((x) + 1) + +/* + * grow our layered handle array by the same size increment that the ptm + * driver uses to grow the pty device space - PTY_MAXDELTA + */ +#define LP_PTY_INC 128 + +/* + * lx_ptm_ops contains state information about outstanding operations on the + * underlying master terminal device. Currently we only track information + * for read operations. + * + * Note that this data has not been rolled directly into the lx_ptm_handle + * structure because we can't put mutex's of condition variables into + * lx_ptm_handle structure. The reason is that the array of lx_ptm_handle + * structures linked to from the global lx_ptm state can be resized + * dynamically, and when it's resized, the new array is at a different + * memory location and the old array memory is discarded. Mutexs and cvs + * are accessed based off their address, so if this array was re-sized while + * there were outstanding operations on any mutexs or cvs in the array + * then the system would tip over. In the future the lx_ptm_handle structure + * array should probably be replaced with either an array of pointers to + * lx_ptm_handle structures or some other kind of data structure containing + * pointers to lx_ptm_handle structures. Then the lx_ptm_ops structure + * could be folded directly into the lx_ptm_handle structures. (This will + * also require the definition of a new locking mechanism to protect the + * contents of lx_ptm_handle structures.) + */ +typedef struct lx_ptm_ops { + int lpo_rops; + kcondvar_t lpo_rops_cv; + kmutex_t lpo_rops_lock; +} lx_ptm_ops_t; + +/* + * Every open of the master terminal device in a zone results in a new + * lx_ptm_handle handle allocation. These handles are stored in an array + * hanging off the lx_ptm_state structure. + */ +typedef struct lx_ptm_handle { + /* Device handle to the underlying real /dev/ptmx master terminal. */ + ldi_handle_t lph_handle; + + /* Flag to indicate if TIOCPKT mode has been enabled. */ + int lph_pktio; + + /* Number of times the slave device has been opened/closed. */ + int lph_eofed; + + /* Callback handler in the ptm driver to check if slave is open. */ + ptmptsopencb_t lph_ppocb; + + /* Pointer to state for operations on underlying device. */ + lx_ptm_ops_t *lph_lpo; +} lx_ptm_handle_t; + +/* + * Global state for the lx_ptm driver. + */ +typedef struct lx_ptm_state { + /* lx_ptm device devinfo pointer */ + dev_info_t *lps_dip; + + /* LDI ident used to open underlying real /dev/ptmx master terminals. */ + ldi_ident_t lps_li; + + /* pts drivers major number */ + major_t lps_pts_major; + + /* rw lock used to manage access and growth of lps_lh_array */ + krwlock_t lps_lh_rwlock; + + /* number of elements in lps_lh_array */ + uint_t lps_lh_count; + + /* Array of handles to underlying real /dev/ptmx master terminals. */ + lx_ptm_handle_t *lps_lh_array; +} lx_ptm_state_t; + +/* Pointer to the lx_ptm global state structure. */ +static lx_ptm_state_t lps; + +/* + * List of modules to be autopushed onto slave terminal devices when they + * are opened in an lx branded zone. + */ +static char *lx_pts_mods[] = { + "ptem", + "ldterm", + "ttcompat", + LDLINUX_MOD, + NULL +}; + +static void +lx_ptm_lh_grow(uint_t index) +{ + uint_t new_lh_count, old_lh_count; + lx_ptm_handle_t *new_lh_array, *old_lh_array; + + /* + * allocate a new array. we drop the rw lock on the array so that + * readers can still access devices in case our memory allocation + * blocks. + */ + new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1); + new_lh_array = + kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP); + + /* + * double check that we still actually need to increase the size + * of the array + */ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + if (index < lps.lps_lh_count) { + /* someone beat us to it so there's nothing more to do */ + rw_exit(&lps.lps_lh_rwlock); + kmem_free(new_lh_array, + sizeof (lx_ptm_handle_t) * new_lh_count); + return; + } + + /* copy the existing data into the new array */ + ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL)); + ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL)); + if (lps.lps_lh_count != 0) { + bcopy(lps.lps_lh_array, new_lh_array, + sizeof (lx_ptm_handle_t) * lps.lps_lh_count); + } + + /* save info on the old array */ + old_lh_array = lps.lps_lh_array; + old_lh_count = lps.lps_lh_count; + + /* install the new array */ + lps.lps_lh_array = new_lh_array; + lps.lps_lh_count = new_lh_count; + + rw_exit(&lps.lps_lh_rwlock); + + /* free the old array */ + if (old_lh_array != NULL) { + kmem_free(old_lh_array, + sizeof (lx_ptm_handle_t) * old_lh_count); + } +} + +static void +lx_ptm_lh_insert(uint_t index, ldi_handle_t lh) +{ + lx_ptm_ops_t *lpo; + + ASSERT(lh != NULL); + + /* Allocate and initialize the ops structure */ + lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP); + mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL); + + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + /* check if we need to grow the size of the layered handle array */ + if (index >= lps.lps_lh_count) { + rw_exit(&lps.lps_lh_rwlock); + lx_ptm_lh_grow(index); + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + } + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle == NULL); + ASSERT(lps.lps_lh_array[index].lph_pktio == 0); + ASSERT(lps.lps_lh_array[index].lph_eofed == 0); + ASSERT(lps.lps_lh_array[index].lph_lpo == NULL); + + /* insert the new handle and return */ + lps.lps_lh_array[index].lph_handle = lh; + lps.lps_lh_array[index].lph_pktio = 0; + lps.lps_lh_array[index].lph_eofed = 0; + lps.lps_lh_array[index].lph_lpo = lpo; + + rw_exit(&lps.lps_lh_rwlock); +} + +static ldi_handle_t +lx_ptm_lh_remove(uint_t index) +{ + ldi_handle_t lh; + + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0); + ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock)); + + /* free the write handle */ + kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t)); + lps.lps_lh_array[index].lph_lpo = NULL; + + /* remove the handle and return it */ + lh = lps.lps_lh_array[index].lph_handle; + lps.lps_lh_array[index].lph_handle = NULL; + lps.lps_lh_array[index].lph_pktio = 0; + lps.lps_lh_array[index].lph_eofed = 0; + rw_exit(&lps.lps_lh_rwlock); + return (lh); +} + +static void +lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb) +{ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + *ppocb = lps.lps_lh_array[index].lph_ppocb; + rw_exit(&lps.lps_lh_rwlock); +} + +static void +lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb) +{ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + lps.lps_lh_array[index].lph_ppocb = *ppocb; + rw_exit(&lps.lps_lh_rwlock); +} + +static ldi_handle_t +lx_ptm_lh_lookup(uint_t index) +{ + ldi_handle_t lh; + + rw_enter(&lps.lps_lh_rwlock, RW_READER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* return the handle */ + lh = lps.lps_lh_array[index].lph_handle; + rw_exit(&lps.lps_lh_rwlock); + return (lh); +} + +static lx_ptm_ops_t * +lx_ptm_lpo_lookup(uint_t index) +{ + lx_ptm_ops_t *lpo; + + rw_enter(&lps.lps_lh_rwlock, RW_READER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_lpo != NULL); + + /* return the handle */ + lpo = lps.lps_lh_array[index].lph_lpo; + rw_exit(&lps.lps_lh_rwlock); + return (lpo); +} + +static int +lx_ptm_lh_pktio_get(uint_t index) +{ + int pktio; + + rw_enter(&lps.lps_lh_rwlock, RW_READER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* return the pktio state */ + pktio = lps.lps_lh_array[index].lph_pktio; + rw_exit(&lps.lps_lh_rwlock); + return (pktio); +} + +static void +lx_ptm_lh_pktio_set(uint_t index, int pktio) +{ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* set the pktio state */ + lps.lps_lh_array[index].lph_pktio = pktio; + rw_exit(&lps.lps_lh_rwlock); +} + +static int +lx_ptm_lh_eofed_get(uint_t index) +{ + int eofed; + + rw_enter(&lps.lps_lh_rwlock, RW_READER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* return the eofed state */ + eofed = lps.lps_lh_array[index].lph_eofed; + rw_exit(&lps.lps_lh_rwlock); + return (eofed); +} + +static void +lx_ptm_lh_eofed_set(uint_t index) +{ + rw_enter(&lps.lps_lh_rwlock, RW_WRITER); + + ASSERT(index < lps.lps_lh_count); + ASSERT(lps.lps_lh_array[index].lph_handle != NULL); + + /* set the eofed state */ + lps.lps_lh_array[index].lph_eofed++; + rw_exit(&lps.lps_lh_rwlock); +} + +static int +lx_ptm_read_start(dev_t dev) +{ + lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev)); + + mutex_enter(&lpo->lpo_rops_lock); + ASSERT(lpo->lpo_rops >= 0); + + /* Wait for other read operations to finish */ + while (lpo->lpo_rops != 0) { + if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) { + mutex_exit(&lpo->lpo_rops_lock); + return (-1); + } + } + + /* Start a read operation */ + VERIFY(++lpo->lpo_rops == 1); + mutex_exit(&lpo->lpo_rops_lock); + return (0); +} + +static void +lx_ptm_read_end(dev_t dev) +{ + lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev)); + + mutex_enter(&lpo->lpo_rops_lock); + ASSERT(lpo->lpo_rops >= 0); + + /* End a read operation */ + VERIFY(--lpo->lpo_rops == 0); + cv_signal(&lpo->lpo_rops_cv); + + mutex_exit(&lpo->lpo_rops_lock); +} + +static int +lx_ptm_pts_isopen(dev_t dev) +{ + ptmptsopencb_t ppocb; + + lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb); + return (ppocb.ppocb_func(ppocb.ppocb_arg)); +} + +static void +lx_ptm_eof_read(ldi_handle_t lh) +{ + struct uio uio; + iovec_t iov; + char junk[1]; + + /* + * We can remove any EOF message from the head of the stream by + * doing a zero byte read from the stream. + */ + iov.iov_len = 0; + iov.iov_base = junk; + uio.uio_iovcnt = 1; + uio.uio_iov = &iov; + uio.uio_resid = iov.iov_len; + uio.uio_offset = 0; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_fmode = 0; + uio.uio_extflg = 0; + uio.uio_llimit = MAXOFFSET_T; + (void) ldi_read(lh, &uio, kcred); +} + +static int +lx_ptm_eof_drop_1(dev_t dev, int *rvalp) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err, msg_size, msg_count; + + *rvalp = 0; + + /* + * Check if there is an EOF message (represented by a zero length + * data message) at the head of the stream. Note that the + * I_NREAD ioctl is a streams framework ioctl so it will succeed + * even if there have been previous write errors on this stream. + */ + if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size, + FKIOCTL, kcred, &msg_count)) != 0) + return (err); + + if ((msg_count == 0) || (msg_size != 0)) { + /* No EOF message found */ + return (0); + } + + /* Record the fact that the slave device has been closed. */ + lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev)); + + /* drop the EOF */ + lx_ptm_eof_read(lh); + *rvalp = 1; + return (0); +} + +static int +lx_ptm_eof_drop(dev_t dev, int *rvalp) +{ + int rval, err; + + if (rvalp != NULL) + *rvalp = 0; + for (;;) { + if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0) + return (err); + if (rval == 0) + return (0); + if (rvalp != NULL) + *rvalp = 1; + } +} + +static int +lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err; + + *rvalp = 0; + if (ignore_eof) { + int size, rval; + + if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size, + FKIOCTL, kcred, &rval)) != 0) + return (err); + if (size != 0) + *rvalp = 1; + } else { + int msg_size, msg_count; + + if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size, + FKIOCTL, kcred, &msg_count)) != 0) + return (err); + if (msg_count != 0) + *rvalp = 1; + } + return (0); +} + +static int +lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + int err; + + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR, + ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) + return (DDI_FAILURE); + + err = ldi_ident_from_dip(dip, &lps.lps_li); + if (err != 0) { + ddi_remove_minor_node(dip, ddi_get_name(dip)); + return (DDI_FAILURE); + } + + lps.lps_dip = dip; + lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME); + + rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL); + lps.lps_lh_count = 0; + lps.lps_lh_array = NULL; + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + ldi_ident_release(lps.lps_li); + lps.lps_dip = NULL; + + ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL)); + ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL)); + if (lps.lps_lh_array != NULL) { + kmem_free(lps.lps_lh_array, + sizeof (lx_ptm_handle_t) * lps.lps_lh_count); + lps.lps_lh_array = NULL; + lps.lps_lh_count = 0; + } + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp) +{ + struct strioctl iocb; + ptmptsopencb_t ppocb = { NULL, NULL }; + ldi_handle_t lh; + major_t maj, our_major = getmajor(*devp); + minor_t min, lastmin; + uint_t index, anchor = 1; + dev_t ptm_dev; + int err, rval = 0; + + /* + * Don't support the FNDELAY flag and FNONBLOCK until we either + * find a Linux app that opens /dev/ptmx with the O_NDELAY + * or O_NONBLOCK flags explicitly, or until we create test cases + * to determine how reads of master terminal devices opened with + * these flags behave in different situations on Linux. Supporting + * these flags will involve enhancing our read implementation + * and changing the way it deals with EOF notifications. + */ + if (flag & (FNDELAY | FNONBLOCK)) + return (ENOTSUP); + + /* + * we're layered on top of the ptm driver so open that driver + * first. (note that we're opening /dev/ptmx in the global + * zone, not ourselves in the Linux zone.) + */ + err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li); + if (err != 0) + return (err); + + /* get the devt returned by the ptmx open */ + err = ldi_get_dev(lh, &ptm_dev); + if (err != 0) { + (void) ldi_close(lh, flag, credp); + return (err); + } + + /* + * we're a cloning driver so here's well change the devt that we + * return. the ptmx is also a cloning driver so we'll just use + * it's minor number as our minor number (it already manages it's + * minor name space so no reason to duplicate the effort.) + */ + index = getminor(ptm_dev); + *devp = makedevice(our_major, INDEX_TO_MINOR(index)); + + /* Get a callback function to query if the pts device is open. */ + iocb.ic_cmd = PTMPTSOPENCB; + iocb.ic_timout = 0; + iocb.ic_len = sizeof (ppocb); + iocb.ic_dp = (char *)&ppocb; + + err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval); + if ((err != 0) || (rval != 0)) { + (void) ldi_close(lh, flag, credp); + return (EIO); /* XXX return something else here? */ + } + ASSERT(ppocb.ppocb_func != NULL); + + /* + * now setup autopush for the terminal slave device. this is + * necessary so that when a Linux program opens the device we + * can push required strmod modules onto the stream. in Solaris + * this is normally done by the application that actually + * allocates the terminal. + */ + maj = lps.lps_pts_major; + min = index; + lastmin = 0; + err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin, + &anchor, lx_pts_mods); + if (err != 0) { + (void) ldi_close(lh, flag, credp); + return (EIO); /* XXX return something else here? */ + } + + /* save off this layered handle for future accesses */ + lx_ptm_lh_insert(index, lh); + lx_ptm_lh_set_ppocb(index, &ppocb); + return (0); +} + +/*ARGSUSED*/ +static int +lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp) +{ + ldi_handle_t lh; + major_t maj; + minor_t min, lastmin; + uint_t index; + int err; + + index = DEVT_TO_INDEX(dev); + + /* + * we must cleanup all the state associated with this major/minor + * terminal pair before actually closing the ptm master device. + * this is required because once the close of the ptm device is + * complete major/minor terminal pair is immediatly available for + * re-use in any zone. + */ + + /* free up our saved reference for this layered handle */ + lh = lx_ptm_lh_remove(index); + + /* unconfigure autopush for the associated terminal slave device */ + maj = lps.lps_pts_major; + min = index; + lastmin = 0; + do { + /* + * we loop here because we don't want to release this ptm + * node if autopush can't be disabled on the associated + * slave device because then bad things could happen if + * another brand were to get this terminal allocated + * to them. + * + * XXX should we ever give up? + */ + err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin, + 0, NULL); + } while (err != 0); + + err = ldi_close(lh, flag, credp); + + /* + * note that we don't have to bother with changing the permissions + * on the associated slave device here. the reason is that no one + * can actually open the device untill it's associated master + * device is re-opened, which will result in the permissions on + * it being reset. + */ + return (err); +} + +static int +lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err, rval; + struct uio uio = *uiop; + + *loop = 0; + + /* + * Here's another way that Linux master terminals behave differently + * from Solaris master terminals. If you do a read on a Linux + * master terminal (that was opened witout NDELAY and NONBLOCK) + * who's corrosponding slave terminal is currently closed and + * has been opened and closed at least once, Linux return -1 and + * set errno to EIO where as Solaris blocks. + */ + if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) { + /* Slave has been opened and closed at least once. */ + if (lx_ptm_pts_isopen(dev) == 0) { + /* + * Slave is closed. Make sure that data is avaliable + * before attempting a read. + */ + if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0) + return (err); + + /* If there is no data available then return. */ + if (rval == 0) + return (EIO); + } + } + + /* Actually do the read operation. */ + if ((err = ldi_read(lh, uiop, credp)) != 0) + return (err); + + /* If read returned actual data then return. */ + if (uio.uio_resid != uiop->uio_resid) + return (0); + + /* + * This was a zero byte read (ie, an EOF). This indicates + * that the slave terinal device has been closed. Record + * the fact that the slave device has been closed and retry + * the read operation. + */ + lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev)); + *loop = 1; + return (0); +} + +static int +lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp) +{ + int pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev)); + int err, loop; + struct uio uio; + struct iovec iovp; + + ASSERT(uiop->uio_iovcnt > 0); + + /* + * If packet mode has been enabled (via TIOCPKT) we need to pad + * all read requests with a leading byte that indicates any + * relevant control status information. + */ + if (pktio != 0) { + /* + * We'd like to write the control information into + * the current buffer but we can't yet. We don't + * want to modify userspace memory here only to have + * the read operation fail later. So instead + * what we'll do here is read one character from the + * beginning of the memory pointed to by the uio + * structure. This will advance the output pointer + * by one. Then when the read completes successfully + * we can update the byte that we passed over. Before + * we do the read make a copy of the current uiop and + * iovec structs so we can write to them later. + */ + uio = *uiop; + iovp = *uiop->uio_iov; + uio.uio_iov = &iovp; + + if (uwritec(uiop) == -1) + return (EFAULT); + } + + do { + /* + * Serialize all reads. We need to do this so that we can + * properly emulate the behavior of master terminals on Linux. + * In reality this serializaion should not pose any kind of + * performance problem since it would be very strange to have + * multiple threads trying to read from the same master + * terminal device concurrently. + */ + if (lx_ptm_read_start(dev) != 0) + return (EINTR); + + err = lx_ptm_read_loop(dev, uiop, credp, &loop); + lx_ptm_read_end(dev); + if (err != 0) + return (err); + } while (loop != 0); + + if (pktio != 0) { + uint8_t pktio_data = TIOCPKT_DATA; + + /* + * Note that the control status information we + * pass back is faked up in the sense that we + * don't actually report any events, we always + * report a status of 0. + */ + if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0) + return (EFAULT); + } + + return (0); +} + +static int +lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err; + + err = ldi_write(lh, uiop, credp); + + return (err); +} + +static int +lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, + int *rvalp) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + int err; + + /* + * here we need to make sure that we never allow the + * I_SETSIG and I_ESETSIG ioctls to pass through. we + * do this because we can't support them. + * + * the native Solaris ptm device supports these ioctls because + * they are streams framework ioctls and all streams devices + * support them by default. these ioctls cause the current + * process to be registered with a stream and receive signals + * when certain stream events occur. + * + * a problem arises with cleanup of these registrations + * for layered drivers. + * + * normally the streams framework is notified whenever a + * process closes any reference to a stream and it goes ahead + * and cleans up these registrations. but actual device drivers + * are not notified when a process performs a close operation + * unless the process is closing the last opened reference to + * the device on the entire system. + * + * so while we could pass these ioctls on and allow processes + * to register for signal delivery, we would never receive + * any notification when those processes exit (or close a + * stream) and we wouldn't be able to unregister them. + * + * luckily these operations are streams specific and Linux + * doesn't support streams devices. so it doesn't actually + * seem like we need to support these ioctls. if it turns + * out that we do need to support them for some reason in + * the future, the current driver model will have to be + * enhanced to better support streams device layering. + */ + if ((cmd == I_SETSIG) || (cmd == I_ESETSIG)) + return (EINVAL); + + /* + * here we fake up support for TIOCPKT. Linux applications expect + * /etc/ptmx to support this ioctl, but on Solaris it doesn't. + * (it is supported on older bsd style ptys.) so we'll fake + * up support for it here. + * + * the reason that this ioctl is emulated here instead of in + * userland is that this ioctl affects the results returned + * from read() operations. if this ioctl was emulated in + * userland the brand library would need to intercept all + * read operations and check to see if pktio was enabled + * for the fd being read from. since this ioctl only needs + * to be supported on the ptmx device it makes more sense + * to support it here where we can easily update the results + * returned for read() operations performed on ourselves. + */ + if (cmd == TIOCPKT) { + int pktio; + + if (ddi_copyin((void *)arg, &pktio, sizeof (pktio), + mode) != DDI_SUCCESS) + return (EFAULT); + + if (pktio == 0) + lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0); + else + lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1); + + return (0); + } + + err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp); + + return (err); +} + +static int +lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp, + struct pollhead **phpp, int *loop) +{ + ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev)); + short reventsp2; + int err, rval; + + *loop = 0; + + /* + * If the slave device has been opened and closed at least + * once and the slave device is currently closed, then poll + * always needs to returns immediatly. + */ + if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) && + (lx_ptm_pts_isopen(dev) == 0)) { + /* In this case always return POLLHUP */ + *reventsp = POLLHUP; + + /* + * Check if there really is data on the stream. + * If so set the correct return flags. + */ + if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) { + /* Something went wrong. */ + return (err); + } + if (rval != 0) + *reventsp |= (events & (POLLIN | POLLRDNORM)); + + /* + * Is the user checking for writability? Note that for ptm + * devices Linux seems to ignore the POLLWRBAND write flag. + */ + if ((events & POLLWRNORM) == 0) + return (0); + + /* + * To check if the stream is writable we have to actually + * call poll, but make sure to set anyyet to 1 to prevent + * the streams framework from setting up callbacks. + */ + if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0) + return (err); + + *reventsp |= (reventsp2 & POLLWRNORM); + } else { + int lockstate; + + /* The slave device is open, do the poll */ + if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0) + return (err); + + /* + * Drop any leading EOFs on the stream. + * + * Note that we have to use pollunlock() here to avoid + * recursive mutex enters in the poll framework. The + * reason is that if there is an EOF message on the stream + * then the act of reading from the queue to remove the + * message can cause the ptm drivers event service + * routine to be invoked, and if there is no open + * slave device then the ptm driver may generate + * error messages and put them on the stream. This + * in turn will generate a poll event and the poll + * framework will try to invoke any poll callbacks + * associated with the stream. In the process of + * doing that the poll framework will try to aquire + * locks that we are already holding. So we need to + * drop those locks here before we do our read. + */ + lockstate = pollunlock(); + err = lx_ptm_eof_drop(dev, &rval); + pollrelock(lockstate); + if (err) + return (err); + + /* If no EOF was dropped then return */ + if (rval == 0) + return (0); + + /* + * An EOF was removed from the stream. Retry the entire + * poll operation from the top because polls on the ptm + * device should behave differently now. + */ + *loop = 1; + } + return (0); +} + +static int +lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp, + struct pollhead **phpp) +{ + int loop, err; + + do { + /* Serialize ourself wrt read operations. */ + if (lx_ptm_read_start(dev) != 0) + return (EINTR); + + err = lx_ptm_poll_loop(dev, + events, anyyet, reventsp, phpp, &loop); + lx_ptm_read_end(dev); + if (err != 0) + return (err); + } while (loop != 0); + return (0); +} + +static struct cb_ops lx_ptm_cb_ops = { + lx_ptm_open, /* open */ + lx_ptm_close, /* close */ + nodev, /* strategy */ + nodev, /* print */ + nodev, /* dump */ + lx_ptm_read, /* read */ + lx_ptm_write, /* write */ + lx_ptm_ioctl, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + lx_ptm_poll, /* chpoll */ + ddi_prop_op, /* prop_op */ + NULL, /* cb_str */ + D_NEW | D_MP, + CB_REV, + NULL, + NULL +}; + +static struct dev_ops lx_ptm_ops = { + DEVO_REV, + 0, + ddi_getinfo_1to1, + nulldev, + nulldev, + lx_ptm_attach, + lx_ptm_detach, + nodev, + &lx_ptm_cb_ops, + NULL, + NULL +}; + +static struct modldrv modldrv = { + &mod_driverops, + "Linux master terminal driver 'lx_ptm' %I%", + &lx_ptm_ops +}; + +static struct modlinkage modlinkage = { + MODREV_1, + &modldrv, + NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.conf b/usr/src/uts/common/brand/lx/io/lx_ptm.conf new file mode 100644 index 0000000000..481b4e3c74 --- /dev/null +++ b/usr/src/uts/common/brand/lx/io/lx_ptm.conf @@ -0,0 +1,27 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +name="lx_ptm" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c new file mode 100644 index 0000000000..d993c1eefc --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_brand.c @@ -0,0 +1,836 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/kmem.h> +#include <sys/errno.h> +#include <sys/thread.h> +#include <sys/systm.h> +#include <sys/syscall.h> +#include <sys/proc.h> +#include <sys/modctl.h> +#include <sys/cmn_err.h> +#include <sys/model.h> +#include <sys/exec.h> +#include <sys/lx_impl.h> +#include <sys/machbrand.h> +#include <sys/lx_syscalls.h> +#include <sys/lx_pid.h> +#include <sys/lx_futex.h> +#include <sys/lx_brand.h> +#include <sys/termios.h> +#include <sys/sunddi.h> +#include <sys/ddi.h> +#include <sys/exec.h> +#include <sys/vnode.h> +#include <sys/pathname.h> +#include <sys/machelf.h> +#include <sys/auxv.h> +#include <sys/priv.h> +#include <sys/regset.h> +#include <sys/privregs.h> +#include <sys/archsystm.h> +#include <sys/zone.h> +#include <sys/brand.h> + +int lx_debug = 0; + +void lx_setbrand(proc_t *); +int lx_getattr(zone_t *, int, void *, size_t *); +int lx_setattr(zone_t *, int, void *, size_t); +int lx_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t); +void lx_copy_procdata(proc_t *, proc_t *); + +extern void lx_setrval(klwp_t *, int, int); +extern void lx_proc_exit(proc_t *, klwp_t *); +extern void lx_exec(); +extern int lx_initlwp(klwp_t *); +extern void lx_forklwp(klwp_t *, klwp_t *); +extern void lx_exitlwp(klwp_t *); +extern void lx_freelwp(klwp_t *); +extern greg_t lx_fixsegreg(greg_t, model_t); +extern int lx_sched_affinity(int, uintptr_t, int, uintptr_t, int64_t *); + +int lx_systrace_brand_enabled; + +lx_systrace_f *lx_systrace_entry_ptr; +lx_systrace_f *lx_systrace_return_ptr; + +static int lx_systrace_enabled; + +static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, + struct intpdata *idata, int level, long *execsz, int setid, + caddr_t exec_file, struct cred *cred, int brand_action); + +/* lx brand */ +struct brand_ops lx_brops = { + lx_brandsys, + lx_setbrand, + lx_getattr, + lx_setattr, + lx_copy_procdata, + lx_proc_exit, + lx_exec, + lx_setrval, + lx_initlwp, + lx_forklwp, + lx_freelwp, + lx_exitlwp, + lx_elfexec +}; + +struct brand_mach_ops lx_mops = { + NULL, + lx_brand_int80_callback, + NULL, + NULL, + NULL, + lx_fixsegreg, +}; + +struct brand lx_brand = { + BRAND_VER_1, + "lx", + &lx_brops, + &lx_mops +}; + +static struct modlbrand modlbrand = { + &mod_brandops, "lx brand %I%", &lx_brand +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlbrand, NULL +}; + +void +lx_proc_exit(proc_t *p, klwp_t *lwp) +{ + zone_t *z = p->p_zone; + + ASSERT(p->p_brand != NULL); + ASSERT(p->p_brand_data != NULL); + + /* + * If init is dying and we aren't explicitly shutting down the zone + * or the system, then Solaris is about to restart init. The Linux + * init is not designed to handle a restart, which it interprets as + * a reboot. To give it a sane environment in which to run, we + * reboot the zone. + */ + if (p->p_pid == z->zone_proc_initpid) { + if (z->zone_boot_err == 0 && + z->zone_restart_init && + zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && + zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) + (void) zone_kadmin(A_REBOOT, 0, NULL, CRED()); + } else { + lx_exitlwp(lwp); + kmem_free(p->p_brand_data, sizeof (struct lx_proc_data)); + p->p_brand_data = NULL; + p->p_brand = &native_brand; + } +} + +void +lx_setbrand(proc_t *p) +{ + kthread_t *t = p->p_tlist; + int err; + + ASSERT(p->p_brand_data == NULL); + ASSERT(ttolxlwp(curthread) == NULL); + + p->p_brand_data = kmem_zalloc(sizeof (struct lx_proc_data), KM_SLEEP); + + /* + * This routine can only be called for single-threaded processes. + * Since lx_initlwp() can only fail if we run out of PIDs for + * multithreaded processes, we know that this can never fail. + */ + err = lx_initlwp(t->t_lwp); + ASSERT(err == 0); +} + +/* ARGSUSED */ +int +lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) +{ + boolean_t val; + + if (attr == LX_ATTR_RESTART_INIT) { + if (bufsize > sizeof (boolean_t)) + return (ERANGE); + if (copyin(buf, &val, sizeof (val)) != 0) + return (EFAULT); + if (val != B_TRUE && val != B_FALSE) + return (EINVAL); + zone->zone_restart_init = val; + return (0); + } + return (EINVAL); +} + +/* ARGSUSED */ +int +lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) +{ + if (attr == LX_ATTR_RESTART_INIT) { + if (*bufsize < sizeof (boolean_t)) + return (ERANGE); + if (copyout(&zone->zone_restart_init, buf, + sizeof (boolean_t)) != 0) + return (EFAULT); + *bufsize = sizeof (boolean_t); + return (0); + } + return (-EINVAL); +} + +/* + * Enable ptrace system call tracing for the given LWP. This is done by + * both setting the flag in that LWP's brand data (in the kernel) and setting + * the process-wide trace flag (in the brand library of the traced process). + */ +static int +lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set) +{ + proc_t *p; + kthread_t *t; + klwp_t *lwp; + lx_proc_data_t *lpdp; + lx_lwp_data_t *lldp; + uintptr_t addr; + int ret, flag = 1; + + if ((p = sprlock(pid)) == NULL) + return (ESRCH); + + if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) { + sprunlock(p); + return (EPERM); + } + + if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) { + sprunlock(p); + return (ESRCH); + } + + if ((lpdp = p->p_brand_data) == NULL || + (lldp = lwp->lwp_brand) == NULL) { + sprunlock(p); + return (ESRCH); + } + + if (set) { + /* + * Enable the ptrace flag for this LWP and this process. Note + * that we will turn off the LWP's ptrace flag, but we don't + * turn off the process's ptrace flag. + */ + lldp->br_ptrace = 1; + lpdp->l_ptrace = 1; + + addr = lpdp->l_traceflag; + + mutex_exit(&p->p_lock); + + /* + * This can fail only in some rare corner cases where the + * process is exiting or we're completely out of memory. In + * these cases, it's sufficient to return an error to the ptrace + * consumer and leave the process-wide flag set. + */ + ret = uwrite(p, &flag, sizeof (flag), addr); + + mutex_enter(&p->p_lock); + + /* + * If we couldn't set the trace flag, unset the LWP's ptrace + * flag as there ptrace consumer won't expect this LWP to stop. + */ + if (ret != 0) + lldp->br_ptrace = 0; + } else { + lldp->br_ptrace = 0; + ret = 0; + } + + sprunlock(p); + + if (ret != 0) + ret = EIO; + + return (ret); +} + +static void +lx_ptrace_fire(void) +{ + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + lx_lwp_data_t *lldp = lwp->lwp_brand; + + /* + * The ptrace flag only applies until the next event is encountered + * for the given LWP. If it's set, turn off the flag and poke the + * controlling process by raising a signal. + */ + if (lldp->br_ptrace) { + lldp->br_ptrace = 0; + tsignal(t, SIGTRAP); + } +} + +void +lx_brand_systrace_enable(void) +{ + extern void lx_brand_int80_enable(void); + + ASSERT(!lx_systrace_enabled); + + lx_brand_int80_enable(); + + lx_systrace_enabled = 1; +} + +void +lx_brand_systrace_disable(void) +{ + extern void lx_brand_int80_disable(void); + + ASSERT(lx_systrace_enabled); + + lx_brand_int80_disable(); + + lx_systrace_enabled = 0; +} + +/* + * Get the addresses of the user-space system call handler and attach it to + * the proc structure. Returning 0 indicates success; the value returned + * by the system call is the value stored in rval. Returning a non-zero + * value indicates a failure; the value returned is used to set errno, -1 + * is returned from the syscall and the contents of rval are ignored. To + * set errno and have the syscall return a value other than -1 we can + * manually set errno and rval and return 0. + */ +int +lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, + uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) +{ + kthread_t *t = curthread; + proc_t *p = ttoproc(t); + lx_proc_data_t *pd; + int linux_call; + struct termios *termios; + uint_t termios_len; + int error; + lx_brand_registration_t reg; + + /* + * There is one operation that is suppored for non-branded + * process. B_EXEC_BRAND. This is the equilivant of an + * exec call, but the new process that is created will be + * a branded process. + */ + if (cmd == B_EXEC_BRAND) { + ASSERT(p->p_zone != NULL); + ASSERT(p->p_zone->zone_brand == &lx_brand); + return (exec_common( + (char *)arg1, (const char **)arg2, (const char **)arg3, + EBA_BRAND)); + } + + /* For all other operations this must be a branded process. */ + if (p->p_brand == NULL) + return (set_errno(ENOSYS)); + + ASSERT(p->p_brand == &lx_brand); + ASSERT(p->p_brand_data != NULL); + + switch (cmd) { + case B_REGISTER: + if (p->p_model == DATAMODEL_NATIVE) { + if (copyin((void *)arg1, ®, sizeof (reg)) != 0) { + lx_print("Failed to copyin brand registration " + "at 0x%p\n", (void *)arg1); + return (EFAULT); + } +#ifdef _LP64 + } else { + lx_brand_registration32_t reg32; + + if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0) { + lx_print("Failed to copyin brand registration " + "at 0x%p\n", (void *)arg1); + return (EFAULT); + } + + reg.lxbr_version = (uint_t)reg32.lxbr_version; + reg.lxbr_handler = + (void *)(uintptr_t)reg32.lxbr_handler; + reg.lxbr_tracehandler = + (void *)(uintptr_t)reg32.lxbr_tracehandler; + reg.lxbr_traceflag = + (void *)(uintptr_t)reg32.lxbr_traceflag; +#endif + } + + if (reg.lxbr_version != LX_VERSION_1) { + lx_print("Invalid brand library version (%u)\n", + reg.lxbr_version); + return (EINVAL); + } + + lx_print("Assigning brand 0x%p and handler 0x%p to proc 0x%p\n", + (void *)&lx_brand, (void *)reg.lxbr_handler, (void *)p); + pd = p->p_brand_data; + pd->l_handler = (uintptr_t)reg.lxbr_handler; + pd->l_tracehandler = (uintptr_t)reg.lxbr_tracehandler; + pd->l_traceflag = (uintptr_t)reg.lxbr_traceflag; + *rval = 0; + return (0); + case B_TTYMODES: + /* This is necessary for emulating TCGETS ioctls. */ + if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ddi_root_node(), + DDI_PROP_NOTPROM, "ttymodes", (uchar_t **)&termios, + &termios_len) != DDI_SUCCESS) + return (EIO); + + ASSERT(termios_len == sizeof (*termios)); + + if (copyout(&termios, (void *)arg1, sizeof (termios)) != 0) { + ddi_prop_free(termios); + return (EFAULT); + } + + ddi_prop_free(termios); + *rval = 0; + return (0); + + case B_ELFDATA: + pd = curproc->p_brand_data; + if (copyout(&pd->l_elf_data, (void *)arg1, + sizeof (lx_elf_data_t)) != 0) { + (void) set_errno(EFAULT); + return (*rval = -1); + } + *rval = 0; + return (0); + + case B_EXEC_NATIVE: + error = exec_common( + (char *)arg1, (const char **)arg2, (const char **)arg3, + EBA_NATIVE); + if (error) { + (void) set_errno(error); + return (*rval = -1); + } + return (*rval = 0); + + case B_LPID_TO_SPAIR: + /* + * Given a Linux pid as arg1, return the Solaris pid in arg2 and + * the Solaris LWP in arg3. We also translate pid 1 (which is + * hardcoded in many applications) to the zone's init process. + */ + { + pid_t s_pid; + id_t s_tid; + + if ((pid_t)arg1 == 1) { + s_pid = p->p_zone->zone_proc_initpid; + /* handle the dead/missing init(1M) case */ + if (s_pid == -1) + s_pid = 1; + s_tid = 1; + } else if (lx_lpid_to_spair((pid_t)arg1, &s_pid, + &s_tid) < 0) + return (ESRCH); + + if (copyout(&s_pid, (void *)arg2, + sizeof (s_pid)) != 0 || + copyout(&s_tid, (void *)arg3, sizeof (s_tid)) != 0) + return (EFAULT); + + *rval = 0; + return (0); + } + + case B_PTRACE_SYSCALL: + *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2, + (int)arg3); + return (0); + + case B_SYSENTRY: + if (lx_systrace_enabled) { + uint32_t args[6]; + + ASSERT(lx_systrace_entry_ptr != NULL); + + if (copyin((void *)arg2, args, sizeof (args)) != 0) + return (EFAULT); + + (*lx_systrace_entry_ptr)(arg1, args[0], args[1], + args[2], args[3], args[4], args[5]); + } + + lx_ptrace_fire(); + + pd = p->p_brand_data; + + /* + * If neither DTrace not ptrace are interested in tracing + * this process any more, turn off the trace flag. + */ + if (!lx_systrace_enabled && !pd->l_ptrace) + (void) suword32((void *)pd->l_traceflag, 0); + + *rval = 0; + return (0); + + case B_SYSRETURN: + if (lx_systrace_enabled) { + ASSERT(lx_systrace_return_ptr != NULL); + + (*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0); + } + + lx_ptrace_fire(); + + pd = p->p_brand_data; + + /* + * If neither DTrace not ptrace are interested in tracing + * this process any more, turn off the trace flag. + */ + if (!lx_systrace_enabled && !pd->l_ptrace) + (void) suword32((void *)pd->l_traceflag, 0); + + *rval = 0; + return (0); + + case B_SET_AFFINITY_MASK: + case B_GET_AFFINITY_MASK: + /* + * Retrieve or store the CPU affinity mask for the + * requested linux pid. + * + * arg1 is a linux PID (0 means curthread). + * arg2 is the size of the given mask. + * arg3 is the address of the affinity mask. + */ + return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval)); + + default: + linux_call = cmd - B_EMULATE_SYSCALL; + if (linux_call >= 0 && linux_call < LX_NSYSCALLS) { + *rval = lx_emulate_syscall(linux_call, arg1, arg2, + arg3, arg4, arg5, arg6); + return (0); + } + } + + return (EINVAL); +} + +/* + * Copy the per-process brand data from a parent proc to a child. + */ +void +lx_copy_procdata(proc_t *child, proc_t *parent) +{ + lx_proc_data_t *cpd, *ppd; + + ppd = parent->p_brand_data; + + ASSERT(ppd != NULL); + + cpd = kmem_alloc(sizeof (lx_proc_data_t), KM_SLEEP); + *cpd = *ppd; + + child->p_brand_data = cpd; +} + +#if defined(_ELF32_COMPAT) +/* + * Currently, only 32-bit branded ELF executables are supported. + */ +#define elfexec elf32exec +#define mapexec_brand mapexec32_brand +#endif /* __amd64 */ + +extern int elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, + intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file, + cred_t *cred, int brand_action); +extern int mapexec_brand(vnode_t *, uarg_t *, Ehdr *ehdr, Elf32_Addr *, + intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *); + +/* + * Exec routine called by elfexec() to load 32-bit Linux binaries. + */ +static int +lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, + struct intpdata *idata, int level, long *execsz, int setid, + caddr_t exec_file, struct cred *cred, int brand_action) +{ + int error; + vnode_t *nvp; + auxv32_t phdr_auxv = { AT_SUN_BRAND_PHDR, 0 }; + Ehdr ehdr; + Elf32_Addr uphdr_vaddr; + intptr_t voffset; + int interp; + int i; + struct execenv env; + struct user *up = PTOU(ttoproc(curthread)); + lx_elf_data_t *edp = + &((lx_proc_data_t *)ttoproc(curthread)->p_brand_data)->l_elf_data; + + ASSERT(ttoproc(curthread)->p_brand == &lx_brand); + ASSERT(ttoproc(curthread)->p_brand_data != NULL); + + /* + * Set the brandname and library name for the new process so that + * elfexec() puts them onto the stack. + */ + args->brandname = LX_BRANDNAME; + args->emulator = LX_LIB_PATH; + + /* + * We will exec the brand library, and map in the linux linker and the + * linux executable. + */ + if (error = lookupname(LX_LIB_PATH, UIO_SYSSPACE, FOLLOW, NULLVPP, + &nvp)) { + uprintf("%s: not found.", LX_LIB); + return (error); + } + + if (error = elfexec(nvp, uap, args, idata, level + 1, execsz, setid, + exec_file, cred, brand_action)) { + VN_RELE(nvp); + return (error); + } + VN_RELE(nvp); + + bzero(&env, sizeof (env)); + + if (error = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset, + exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase, + &env.ex_brksize)) + return (error); + + /* + * Save off the important properties of the lx executable. The brand + * library will ask us for this data later, when it is ready to set + * things up for the lx executable. + */ + edp->ed_phdr = (uphdr_vaddr == -1) ? voffset + ehdr.e_phoff : + voffset + uphdr_vaddr; + edp->ed_entry = voffset + ehdr.e_entry; + edp->ed_phent = ehdr.e_phentsize; + edp->ed_phnum = ehdr.e_phnum; + + if (interp) { + if (ehdr.e_type == ET_DYN) { + /* + * This is a shared object executable, so we need to + * pick a reasonable place to put the heap. Just don't + * use the first page. + */ + env.ex_brkbase = (caddr_t)PAGESIZE; + env.ex_bssbase = (caddr_t)PAGESIZE; + } + + /* + * If the program needs an interpreter (most do), map it in and + * store relevant information about it in the aux vector, where + * the brand library can find it. + */ + if (error = lookupname(LX_LINKER, UIO_SYSSPACE, FOLLOW, NULLVPP, + &nvp)) { + uprintf("%s: not found.", LX_LINKER); + return (error); + } + if (error = mapexec_brand(nvp, args, &ehdr, &uphdr_vaddr, + &voffset, exec_file, &interp, NULL, NULL, NULL)) { + VN_RELE(nvp); + return (error); + } + VN_RELE(nvp); + + /* + * Now that we know the base address of the brand's linker, + * place it in the aux vector. + */ + edp->ed_base = voffset; + edp->ed_ldentry = voffset + ehdr.e_entry; + } else { + /* + * This program has no interpreter. The lx brand library will + * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector, + * so in this case, put the entry point of the main executable + * there. + */ + if (ehdr.e_type == ET_EXEC) { + /* + * An executable with no interpreter, this must be a + * statically linked executable, which means we loaded + * it at the address specified in the elf header, in + * which case the e_entry field of the elf header is an + * absolute address. + */ + edp->ed_ldentry = ehdr.e_entry; + edp->ed_entry = ehdr.e_entry; + } else { + /* + * A shared object with no interpreter, we use the + * calculated address from above. + */ + edp->ed_ldentry = edp->ed_entry; + } + + /* + * Delay setting the brkbase until the first call to brk(); + * see elfexec() for details. + */ + env.ex_bssbase = (caddr_t)0; + env.ex_brkbase = (caddr_t)0; + env.ex_brksize = 0; + } + + env.ex_vp = vp; + setexecenv(&env); + + /* + * We don't need to copy this stuff out. It is only used by our + * tools to locate the lx linker's debug section. But we should at + * least try to keep /proc's view of the aux vector consistent with + * what's on the process stack. + */ + phdr_auxv.a_un.a_val = edp->ed_phdr; + if (copyout(&phdr_auxv, args->brand_auxp, sizeof (phdr_auxv)) == -1) + return (EFAULT); + + /* + * /proc uses the AT_ENTRY aux vector entry to deduce + * the location of the executable in the address space. The user + * structure contains a copy of the aux vector that needs to have those + * entries patched with the values of the real lx executable (they + * currently contain the values from the lx brand library that was + * elfexec'd, above). + * + * For live processes, AT_BASE is used to locate the linker segment, + * which /proc and friends will later use to find Solaris symbols + * (such as rtld_db_preinit). However, for core files, /proc uses + * AT_ENTRY to find the right segment to label as the executable. + * So we set AT_ENTRY to be the entry point of the linux executable, + * but leave AT_BASE to be the address of the Solaris linker. + */ + for (i = 0; i < __KERN_NAUXV_IMPL; i++) { + if (up->u_auxv[i].a_type == AT_ENTRY) + up->u_auxv[i].a_un.a_val = edp->ed_entry; + if (up->u_auxv[i].a_type == AT_SUN_BRAND_PHDR) + up->u_auxv[i].a_un.a_val = edp->ed_phdr; + } + + return (0); +} + +int +_init(void) +{ + int err = 0; + + /* pid/tid conversion hash tables */ + lx_pid_init(); + + /* for lx_futex() */ + lx_futex_init(); + + err = mod_install(&modlinkage); + if (err != 0) { + cmn_err(CE_WARN, "Couldn't install lx brand module"); + + /* + * This looks drastic, but it should never happen. These + * two data structures should be completely free-able until + * they are used by Linux processes. Since the brand + * wasn't loaded there should be no Linux processes, and + * thus no way for these data structures to be modified. + */ + if (lx_futex_fini()) + panic("lx brand module cannot be loaded or unloaded."); + } + return (err); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + int err; + int futex_done = 0; + + /* + * If there are any zones using this brand, we can't allow it to be + * unloaded. + */ + if (brand_zone_count(&lx_brand)) + return (EBUSY); + + lx_pid_fini(); + + if ((err = lx_futex_fini()) != 0) + goto done; + futex_done = 1; + + err = mod_remove(&modlinkage); + +done: + if (err) { + /* + * If we can't unload the module, then we have to get it + * back into a sane state. + */ + lx_pid_init(); + + if (futex_done) + lx_futex_init(); + + } + + return (err); +} diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c new file mode 100644 index 0000000000..375b99fa46 --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_misc.c @@ -0,0 +1,383 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/errno.h> +#include <sys/systm.h> +#include <sys/archsystm.h> +#include <sys/privregs.h> +#include <sys/exec.h> +#include <sys/lwp.h> +#include <sys/sem.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_pid.h> +#include <sys/lx_futex.h> + +/* Linux specific functions and definitions */ +void lx_setrval(klwp_t *, int, int); +void lx_exec(); +int lx_initlwp(klwp_t *); +void lx_forklwp(klwp_t *, klwp_t *); +void lx_exitlwp(klwp_t *); +void lx_freelwp(klwp_t *); +static void lx_save(klwp_t *); +static void lx_restore(klwp_t *); +extern void lx_ptrace_free(proc_t *); + +/* + * Set the return code for the forked child, always zero + */ +/*ARGSUSED*/ +void +lx_setrval(klwp_t *lwp, int v1, int v2) +{ + lwptoregs(lwp)->r_r0 = 0; +} + +/* + * Reset process state on exec(2) + */ +void +lx_exec() +{ + klwp_t *lwp = ttolwp(curthread); + struct lx_lwp_data *lwpd = lwptolxlwp(lwp); + int err; + + /* + * There are two mutually exclusive special cases we need to + * address. First, if this was a native process prior to this + * exec(), then this lwp won't have its brand-specific data + * initialized and it won't be assigned a Linux PID yet. Second, + * if this was a multi-threaded Linux process and this lwp wasn't + * the main lwp, then we need to make its Solaris and Linux PIDS + * match. + */ + if (lwpd == NULL) { + err = lx_initlwp(lwp); + /* + * Only possible failure from this routine should be an + * inability to allocate a new PID. Since single-threaded + * processes don't need a new PID, we should never hit this + * error. + */ + ASSERT(err == 0); + lwpd = lwptolxlwp(lwp); + } else if (curthread->t_tid != 1) { + lx_pid_reassign(curthread); + } + + installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save, + NULL); + + /* + * clear out the tls array + */ + bzero(lwpd->br_tls, sizeof (lwpd->br_tls)); + + /* + * reset the tls entries in the gdt + */ + kpreempt_disable(); + lx_restore(lwp); + kpreempt_enable(); +} + +void +lx_exitlwp(klwp_t *lwp) +{ + struct lx_lwp_data *lwpd = lwptolxlwp(lwp); + proc_t *p; + kthread_t *t; + sigqueue_t *sqp = NULL; + pid_t ppid; + id_t ptid; + + if (lwpd == NULL) + return; /* second time thru' */ + + if (lwpd->br_clear_ctidp != NULL) { + (void) suword32(lwpd->br_clear_ctidp, 0); + (void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1, + NULL, NULL, 0); + } + + if (lwpd->br_signal != 0) { + /* + * The first thread in a process doesn't cause a signal to + * be sent when it exits. It was created by a fork(), not + * a clone(), so the parent should get signalled when the + * process exits. + */ + if (lwpd->br_ptid == -1) + goto free; + + sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); + /* + * If br_ppid is 0, it means this is a CLONE_PARENT thread, + * so the signal goes to the parent process - not to a + * specific thread in this process. + */ + p = lwptoproc(lwp); + if (lwpd->br_ppid == 0) { + mutex_enter(&p->p_lock); + ppid = p->p_ppid; + t = NULL; + } else { + /* + * If we have been reparented to init or if our + * parent thread is gone, then nobody gets + * signaled. + */ + if ((lx_lwp_ppid(lwp, &ppid, &ptid) == 1) || + (ptid == -1)) + goto free; + + mutex_enter(&pidlock); + if ((p = prfind(ppid)) == NULL || p->p_stat == SIDL) { + mutex_exit(&pidlock); + goto free; + } + mutex_enter(&p->p_lock); + mutex_exit(&pidlock); + + if ((t = idtot(p, ptid)) == NULL) { + mutex_exit(&p->p_lock); + goto free; + } + } + + sqp->sq_info.si_signo = lwpd->br_signal; + sqp->sq_info.si_code = lwpd->br_exitwhy; + sqp->sq_info.si_status = lwpd->br_exitwhat; + sqp->sq_info.si_pid = lwpd->br_pid; + sqp->sq_info.si_uid = crgetruid(CRED()); + sigaddqa(p, t, sqp); + mutex_exit(&p->p_lock); + sqp = NULL; + } + +free: + if (sqp) + kmem_free(sqp, sizeof (sigqueue_t)); + + lx_freelwp(lwp); +} + +void +lx_freelwp(klwp_t *lwp) +{ + struct lx_lwp_data *lwpd = lwptolxlwp(lwp); + + if (lwpd != NULL) { + (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore, + NULL, NULL, lx_save, NULL); + if (lwpd->br_pid != 0) + lx_pid_rele(lwptoproc(lwp)->p_pid, + lwptot(lwp)->t_tid); + + lwp->lwp_brand = NULL; + kmem_free(lwpd, sizeof (struct lx_lwp_data)); + } +} + +int +lx_initlwp(klwp_t *lwp) +{ + struct lx_lwp_data *lwpd; + struct lx_lwp_data *plwpd; + kthread_t *tp = lwptot(lwp); + + lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP); + lwpd->br_exitwhy = CLD_EXITED; + lwpd->br_lwp = lwp; + lwpd->br_clear_ctidp = NULL; + lwpd->br_set_ctidp = NULL; + lwpd->br_signal = 0; + lwpd->br_affinitymask = 0; + + /* + * The first thread in a process has ppid set to the parent + * process's pid, and ptid set to -1. Subsequent threads in the + * process have their ppid set to the pid of the thread that + * created them, and their ptid to that thread's tid. + */ + if (tp->t_next == tp) { + lwpd->br_ppid = tp->t_procp->p_ppid; + lwpd->br_ptid = -1; + } else if (ttolxlwp(curthread) != NULL) { + plwpd = ttolxlwp(curthread); + bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls)); + lwpd->br_ppid = plwpd->br_pid; + lwpd->br_ptid = curthread->t_tid; + } else { + /* + * Oddball case: the parent thread isn't a Linux process. + */ + lwpd->br_ppid = 0; + lwpd->br_ptid = -1; + } + lwp->lwp_brand = lwpd; + + if (lx_pid_assign(tp)) { + kmem_free(lwpd, sizeof (struct lx_lwp_data)); + lwp->lwp_brand = NULL; + return (-1); + } + lwpd->br_tgid = lwpd->br_pid; + + installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, + lx_save, NULL); + + return (0); +} + +/* + * There is no need to have any locking for either the source or + * destination struct lx_lwp_data structs. This is always run in the + * thread context of the source thread, and the destination thread is + * always newly created and not referred to from anywhere else. + */ +void +lx_forklwp(klwp_t *srclwp, klwp_t *dstlwp) +{ + struct lx_lwp_data *src = srclwp->lwp_brand; + struct lx_lwp_data *dst = dstlwp->lwp_brand; + + dst->br_ppid = src->br_pid; + dst->br_ptid = lwptot(srclwp)->t_tid; + bcopy(src->br_tls, dst->br_tls, sizeof (dst->br_tls)); + + /* + * copy only these flags + */ + dst->br_lwp_flags = src->br_lwp_flags & BR_CPU_BOUND; + dst->br_clone_args = NULL; +} + +/* + * When switching a Linux process off the CPU, clear its GDT entries. + */ +/* ARGSUSED */ +static void +lx_save(klwp_t *t) +{ + static user_desc_t null_desc; + static int inited; + user_desc_t *gdt; + int i; + + if (inited == 0) { + bzero(&null_desc, sizeof (null_desc)); + inited = 1; + } + + gdt = cpu_get_gdt(); + for (i = 0; i < LX_TLSNUM; i++) + gdt[GDT_TLSMIN + i] = null_desc; +} + +/* + * When switching a Linux process on the CPU, set its GDT entries. + */ +static void +lx_restore(klwp_t *t) +{ + struct lx_lwp_data *lwpd = lwptolxlwp(t); + user_desc_t *gdt; + user_desc_t *tls; + int i; + + ASSERT(lwpd); + + gdt = cpu_get_gdt(); + tls = lwpd->br_tls; + for (i = 0; i < LX_TLSNUM; i++) + gdt[GDT_TLSMIN + i] = tls[i]; +} + +void +lx_set_gdt(int entry, user_desc_t *descrp) +{ + user_desc_t *gdt = cpu_get_gdt(); + + gdt[entry] = *descrp; +} + +void +lx_clear_gdt(int entry) +{ + user_desc_t *gdt = cpu_get_gdt(); + + bzero(&gdt[entry], sizeof (user_desc_t)); +} + +longlong_t +lx_nosys() +{ + return (set_errno(ENOSYS)); +} + +longlong_t +lx_opnotsupp() +{ + return (set_errno(EOPNOTSUPP)); +} + +/* + * Brand-specific routine to check if given non-Solaris standard segment + * register values should be used as-is or if they should be modified to other + * values. + */ +/*ARGSUSED*/ +greg_t +lx_fixsegreg(greg_t sr, model_t datamodel) +{ + struct lx_lwp_data *lxlwp = ttolxlwp(curthread); + + /* + * If the segreg is the same as the %gs the brand callback was last + * entered with, allow it to be used unmodified. + */ + ASSERT(sr == (sr & 0xffff)); + + if (sr == (lxlwp->br_ugs & 0xffff)) + return (sr); + + /* + * Force the SR into the LDT in ring 3 for 32-bit processes. + * + * 64-bit processes get the null GDT selector since they are not + * allowed to have a private LDT. + */ +#if defined(__amd64) + return (datamodel == DATAMODEL_ILP32 ? (sr | SEL_TI_LDT | SEL_UPL) : 0); +#elif defined(__i386) + datamodel = datamodel; /* datamodel currently unused for 32-bit */ + return (sr | SEL_TI_LDT | SEL_UPL); +#endif /* __amd64 */ +} diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c new file mode 100644 index 0000000000..4f22efd1ee --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_pid.c @@ -0,0 +1,348 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/sysmacros.h> +#include <sys/bitmap.h> +#include <sys/var.h> +#include <sys/thread.h> +#include <sys/proc.h> +#include <sys/brand.h> +#include <sys/zone.h> +#include <sys/lx_brand.h> +#include <sys/lx_pid.h> + +#define LINUX_PROC_FACTOR 8 /* factor down the hash table by this */ +static int hash_len = 4; /* desired average hash chain length */ +static int hash_size; /* no of buckets in the hash table */ + +static struct lx_pid **stol_pid_hash; +static struct lx_pid **ltos_pid_hash; + +#define LTOS_HASH(pid) ((pid) & (hash_size - 1)) +#define STOL_HASH(pid, tid) (((pid) + (tid)) & (hash_size - 1)) + +static kmutex_t hash_lock; + +static void +lx_pid_insert_hash(struct lx_pid *lpidp) +{ + int shash = STOL_HASH(lpidp->s_pid, lpidp->s_tid); + int lhash = LTOS_HASH(lpidp->l_pid); + + ASSERT(MUTEX_HELD(&hash_lock)); + + lpidp->stol_next = stol_pid_hash[shash]; + stol_pid_hash[shash] = lpidp; + + lpidp->ltos_next = ltos_pid_hash[lhash]; + ltos_pid_hash[lhash] = lpidp; +} + +static struct lx_pid * +lx_pid_remove_hash(pid_t pid, id_t tid) +{ + struct lx_pid **hpp; + struct lx_pid *lpidp; + + ASSERT(MUTEX_HELD(&hash_lock)); + + hpp = &stol_pid_hash[STOL_HASH(pid, tid)]; + while (*hpp) { + if ((*hpp)->s_pid == pid && (*hpp)->s_tid == tid) { + lpidp = *hpp; + *hpp = (*hpp)->stol_next; + break; + } + hpp = &(*hpp)->stol_next; + } + + /* + * when called during error recovery the pid may already + * be released + */ + if (lpidp == NULL) + return (NULL); + + hpp = <os_pid_hash[LTOS_HASH(lpidp->l_pid)]; + while (*hpp) { + if (*hpp == lpidp) { + *hpp = lpidp->ltos_next; + break; + } + hpp = &(*hpp)->ltos_next; + } + + return (lpidp); +} + +/* + * given a solaris pid/tid pair, create a linux pid + */ +int +lx_pid_assign(kthread_t *t) +{ + proc_t *p = ttoproc(t); + pid_t s_pid = p->p_pid; + id_t s_tid = t->t_tid; + struct pid *pidp; + struct lx_pid *lpidp; + lx_lwp_data_t *lwpd = ttolxlwp(t); + pid_t newpid; + + if (p->p_lwpcnt > 0) { + /* + * Allocate a pid for any thread other than the first + */ + if ((newpid = pid_allocate(p, 0)) < 0) + return (-1); + + pidp = pid_find(newpid); + } else { + pidp = NULL; + newpid = s_pid; + } + + lpidp = kmem_alloc(sizeof (struct lx_pid), KM_SLEEP); + lpidp->l_pid = newpid; + lpidp->s_pid = s_pid; + lpidp->s_tid = s_tid; + lpidp->l_pidp = pidp; + lpidp->l_start = t->t_start; + + /* + * now put the pid into the linux-solaris and solaris-linux + * conversion hash tables + */ + mutex_enter(&hash_lock); + lx_pid_insert_hash(lpidp); + mutex_exit(&hash_lock); + + lwpd->br_pid = newpid; + + return (0); +} + +/* + * If we are exec()ing the process, this thread's tid is about to be reset + * to 1. Make sure the Linux PID bookkeeping reflects that change. + */ +void +lx_pid_reassign(kthread_t *t) +{ + proc_t *p = ttoproc(t); + struct pid *old_pidp; + struct lx_pid *lpidp; + + ASSERT(p->p_lwpcnt == 1); + + mutex_enter(&hash_lock); + + /* + * Clean up all the traces of this thread's 'fake' Linux PID. + */ + lpidp = lx_pid_remove_hash(p->p_pid, t->t_tid); + ASSERT(lpidp != NULL); + old_pidp = lpidp->l_pidp; + lpidp->l_pidp = NULL; + + /* + * Now register this thread as (pid, 1). + */ + lpidp->l_pid = p->p_pid; + lpidp->s_pid = p->p_pid; + lpidp->s_tid = 1; + lx_pid_insert_hash(lpidp); + + mutex_exit(&hash_lock); + + if (old_pidp) + (void) pid_rele(old_pidp); +} + +/* + * release a solaris pid/tid pair + */ +void +lx_pid_rele(pid_t pid, id_t tid) +{ + struct lx_pid *lpidp; + + mutex_enter(&hash_lock); + lpidp = lx_pid_remove_hash(pid, tid); + mutex_exit(&hash_lock); + + if (lpidp) { + if (lpidp->l_pidp) + (void) pid_rele(lpidp->l_pidp); + + kmem_free(lpidp, sizeof (*lpidp)); + } +} + +/* + * given a linux pid, return the solaris pid/tid pair + */ +int +lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid) +{ + struct lx_pid *hp; + + mutex_enter(&hash_lock); + for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) + if (l_pid == hp->l_pid) { + if (s_pid) + *s_pid = hp->s_pid; + if (s_tid) + *s_tid = hp->s_tid; + break; + } + mutex_exit(&hash_lock); + + return (hp ? 0 : -1); +} + +/* + * Given an lwp, return the Linux pid of its parent. If the caller + * wants them, we return the Solaris (pid, tid) as well. + */ +pid_t +lx_lwp_ppid(klwp_t *lwp, pid_t *ppidp, id_t *ptidp) +{ + lx_lwp_data_t *lwpd = lwptolxlwp(lwp); + proc_t *p = lwptoproc(lwp); + struct lx_pid *hp; + pid_t zoneinit = curproc->p_zone->zone_proc_initpid; + pid_t lppid, ppid; + + /* + * Be sure not to return a parent pid that should be invisible + * within this zone. + */ + ppid = ((p->p_flag & SZONETOP) + ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); + + /* + * If the parent process's pid is the zone's init process, force it + * to the Linux init pid value of 1. + */ + if (ppid == zoneinit) + ppid = 1; + + /* + * There are two cases in which the Linux definition of a 'parent' + * matches that of Solaris: + * + * - if our tgid is the same as our PID, then we are either the + * first thread in the process or a CLONE_THREAD thread. + * + * - if the brand lwp value for ppid is 0, then we are either the + * child of a differently-branded process or a CLONE_PARENT thread. + */ + if (p->p_pid == lwpd->br_tgid || lwpd->br_ppid == 0) { + if (ppidp != NULL) + *ppidp = ppid; + if (ptidp != NULL) + *ptidp = -1; + return (ppid); + } + + /* + * Set the default Linux parent pid to be the pid of the zone's init + * process; this will get converted back to the Linux default of 1 + * later. + */ + lppid = zoneinit; + + /* + * If the process's parent isn't init, try and look up the Linux "pid" + * corresponding to the process's parent. + */ + if (ppid != 1) { + /* + * In all other cases, we are looking for the parent of this + * specific thread, which in Linux refers to the thread that + * clone()d it. We stashed that thread's PID away when this + * thread was created. + */ + mutex_enter(&hash_lock); + for (hp = ltos_pid_hash[LTOS_HASH(lwpd->br_ppid)]; hp; + hp = hp->ltos_next) { + if (lwpd->br_ppid == hp->l_pid) { + /* + * We found the PID we were looking for, but + * since we cached its value in this LWP's brand + * structure, it has exited and been reused by + * another process. + */ + if (hp->l_start > lwptot(lwp)->t_start) + break; + + lppid = lwpd->br_ppid; + if (ppidp != NULL) + *ppidp = hp->s_pid; + if (ptidp != NULL) + *ptidp = hp->s_tid; + + break; + } + } + mutex_exit(&hash_lock); + } + + if (lppid == zoneinit) { + lppid = 1; + + if (ppidp != NULL) + *ppidp = lppid; + if (ptidp != NULL) + *ptidp = -1; + } + + return (lppid); +} + +void +lx_pid_init(void) +{ + hash_size = 1 << highbit(v.v_proc / (hash_len * LINUX_PROC_FACTOR)); + + stol_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size, + KM_SLEEP); + ltos_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size, + KM_SLEEP); + + mutex_init(&hash_lock, NULL, MUTEX_DEFAULT, NULL); +} + +void +lx_pid_fini(void) +{ + kmem_free(stol_pid_hash, sizeof (struct lx_pid *) * hash_size); + kmem_free(ltos_pid_hash, sizeof (struct lx_pid *) * hash_size); +} diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c new file mode 100644 index 0000000000..686afea458 --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c @@ -0,0 +1,409 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/kmem.h> +#include <sys/errno.h> +#include <sys/thread.h> +#include <sys/systm.h> +#include <sys/syscall.h> +#include <sys/proc.h> +#include <sys/modctl.h> +#include <sys/cmn_err.h> +#include <sys/model.h> +#include <sys/brand.h> +#include <sys/machbrand.h> +#include <sys/lx_syscalls.h> +#include <sys/lx_brand.h> +#include <sys/lx_impl.h> + +/* + * Some system calls return either a 32-bit or a 64-bit value, depending + * on the datamodel. + */ +#ifdef _LP64 +#define V_RVAL SE_64RVAL +#else +#define V_RVAL SE_32RVAL1 +#endif + +/* + * Define system calls that return a native 'long' quantity i.e. a 32-bit + * or 64-bit integer - depending on how the kernel is itself compiled + * e.g. read(2) returns 'ssize_t' in the kernel and in userland. + */ +#define LX_CL(name, call, narg) \ + { V_RVAL, (name), (llfcn_t)(call), (narg) } + +/* + * Returns a 32 bit quantity regardless of datamodel + */ +#define LX_CI(name, call, narg) \ + { SE_32RVAL1, (name), (llfcn_t)(call), (narg) } + +extern longlong_t lx_nosys(void); +#define LX_NOSYS(name) \ + {SE_64RVAL, (name), (llfcn_t)lx_nosys, 0} + +lx_sysent_t lx_sysent[] = +{ + LX_NOSYS("lx_nosys"), /* 0 */ + LX_NOSYS("exit"), /* 0 */ + LX_NOSYS("lx_fork"), + LX_NOSYS("read"), + LX_NOSYS("write"), + LX_NOSYS("open"), + LX_NOSYS("close"), + LX_NOSYS("waitpid"), + LX_NOSYS("creat"), + LX_NOSYS("link"), + LX_NOSYS("unlink"), /* 10 */ + LX_NOSYS("exec"), + LX_NOSYS("chdir"), + LX_NOSYS("gtime"), + LX_NOSYS("mknod"), + LX_NOSYS("chmod"), + LX_NOSYS("lchown16"), + LX_NOSYS("break"), + LX_NOSYS("stat"), + LX_NOSYS("lseek"), + LX_CL("getpid", lx_getpid, 0), /* 20 */ + LX_NOSYS("mount"), + LX_NOSYS("umount"), + LX_NOSYS("setuid16"), + LX_NOSYS("getuid16"), + LX_NOSYS("stime"), + LX_NOSYS("ptrace"), + LX_NOSYS("alarm"), + LX_NOSYS("fstat"), + LX_NOSYS("pause"), + LX_NOSYS("utime"), /* 30 */ + LX_NOSYS("stty"), + LX_NOSYS("gtty"), + LX_NOSYS("access"), + LX_NOSYS("nice"), + LX_NOSYS("ftime"), + LX_NOSYS("sync"), + LX_CL("kill", lx_kill, 2), + LX_NOSYS("rename"), + LX_NOSYS("mkdir"), + LX_NOSYS("rmdir"), /* 40 */ + LX_NOSYS("dup"), + LX_NOSYS("pipe"), + LX_NOSYS("times"), + LX_NOSYS("prof"), + LX_CL("brk", lx_brk, 1), + LX_NOSYS("setgid16"), + LX_NOSYS("getgid16"), + LX_NOSYS("signal"), + LX_NOSYS("geteuid16"), + LX_NOSYS("getegid16"), /* 50 */ + LX_NOSYS("sysacct"), + LX_NOSYS("umount2"), + LX_NOSYS("lock"), + LX_NOSYS("ioctl"), + LX_NOSYS("fcntl"), + LX_NOSYS("mpx"), + LX_NOSYS("setpgid"), + LX_NOSYS("ulimit"), + LX_NOSYS("olduname"), + LX_NOSYS("umask"), /* 60 */ + LX_NOSYS("chroot"), + LX_NOSYS("ustat"), + LX_NOSYS("dup2"), + LX_CL("getppid", lx_getppid, 0), + LX_NOSYS("pgrp"), + LX_NOSYS("setsid"), + LX_NOSYS("sigaction"), + LX_NOSYS("sgetmask"), + LX_NOSYS("ssetmask"), + LX_NOSYS("setreuid16"), /* 70 */ + LX_NOSYS("setregid16"), + LX_NOSYS("sigsuspend"), + LX_NOSYS("sigpending"), + LX_NOSYS("sethostname"), + LX_NOSYS("setrlimit"), + LX_NOSYS("old_getrlimit"), + LX_NOSYS("getrusage"), + LX_NOSYS("gettimeofday"), + LX_NOSYS("settimeofday"), + LX_NOSYS("getgroups16"), /* 80 */ + LX_NOSYS("setgroups16"), + LX_NOSYS("old_select"), + LX_NOSYS("symlink"), + LX_NOSYS("oldlstat"), + LX_NOSYS("readlink"), + LX_NOSYS("uselib"), + LX_NOSYS("swapon"), + LX_NOSYS("reboot"), + LX_NOSYS("old_readdir"), + LX_NOSYS("old_mmap"), /* 90 */ + LX_NOSYS("munmap"), + LX_NOSYS("truncate"), + LX_NOSYS("ftruncate"), + LX_NOSYS("fchmod"), + LX_NOSYS("fchown16"), + LX_NOSYS("getpriority"), + LX_NOSYS("setpriority"), + LX_NOSYS("profil"), + LX_NOSYS("statfs"), + LX_NOSYS("fstatfs"), /* 100 */ + LX_NOSYS("ioperm"), + LX_NOSYS("socketcall"), + LX_NOSYS("syslog"), + LX_NOSYS("setitimer"), + LX_NOSYS("getitimer"), + LX_NOSYS("newstat"), + LX_NOSYS("newsltat"), + LX_NOSYS("newsftat"), + LX_NOSYS("uname"), + LX_NOSYS("oldiopl"), /* 110 */ + LX_NOSYS("oldvhangup"), + LX_NOSYS("idle"), + LX_NOSYS("vm86old"), + LX_NOSYS("wait4"), + LX_NOSYS("swapoff"), + LX_CL("sysinfo", lx_sysinfo, 1), + LX_NOSYS("ipc"), + LX_NOSYS("fsync"), + LX_NOSYS("sigreturn"), + LX_CL("clone", lx_clone, 5), /* 120 */ + LX_NOSYS("setdomainname"), + LX_NOSYS("newuname"), + LX_CL("modify_ldt", lx_modify_ldt, 3), + LX_NOSYS("adjtimex"), + LX_NOSYS("mprotect"), + LX_NOSYS("sigprocmask"), + LX_NOSYS("create_module"), + LX_NOSYS("init_module"), + LX_NOSYS("delete_module"), + LX_NOSYS("get_kernel_syms"), /* 130 */ + LX_NOSYS("quotactl"), + LX_NOSYS("getpgid"), + LX_NOSYS("fchdir"), + LX_NOSYS("bdflush"), + LX_NOSYS("sysfs"), + LX_NOSYS("personality"), + LX_NOSYS("afs_syscall"), + LX_NOSYS("setfsuid16"), + LX_NOSYS("setfsgid16"), + LX_NOSYS("llseek"), /* 140 */ + LX_NOSYS("getdents"), + LX_NOSYS("select"), + LX_NOSYS("flock"), + LX_NOSYS("msync"), + LX_NOSYS("readv"), + LX_NOSYS("writev"), + LX_NOSYS("getsid"), + LX_NOSYS("fdatasync"), + LX_NOSYS("sysctl"), + LX_NOSYS("mlock"), /* 150 */ + LX_NOSYS("munlock"), + LX_NOSYS("mlockall"), + LX_NOSYS("munlockall"), + LX_CL("sched_setparam", lx_sched_setparam, 2), + LX_CL("sched_getparam", lx_sched_getparam, 2), + LX_NOSYS("sched_setscheduler"), + LX_NOSYS("sched_getscheduler"), + LX_NOSYS("yield"), + LX_NOSYS("sched_get_priority_max"), + LX_NOSYS("sched_get_priority_min"), /* 160 */ + LX_CL("sched_rr_get_interval", lx_sched_rr_get_interval, 2), + LX_NOSYS("nanosleep"), + LX_NOSYS("mremap"), + LX_CL("setresuid16", lx_setresuid16, 3), + LX_NOSYS("getresuid16"), + LX_NOSYS("vm86"), + LX_NOSYS("query_module"), + LX_NOSYS("poll"), + LX_NOSYS("nfsserctl"), + LX_CL("setresgid16", lx_setresgid16, 3), /* 170 */ + LX_NOSYS("getresgid16"), + LX_NOSYS("prctl"), + LX_NOSYS("rt_sigreturn"), + LX_NOSYS("rt_sigaction"), + LX_NOSYS("rt_sigprocmask"), + LX_NOSYS("rt_sigpending"), + LX_NOSYS("rt_sigtimedwait"), + LX_NOSYS("rt_sigqueueinfo"), + LX_NOSYS("rt_sigsuspend"), + LX_NOSYS("pread64"), /* 180 */ + LX_NOSYS("pwrite64"), + LX_NOSYS("chown16"), + LX_NOSYS("getcwd"), + LX_NOSYS("capget"), + LX_NOSYS("capset"), + LX_NOSYS("sigaltstack"), + LX_NOSYS("sendfile"), + LX_NOSYS("getpmsg"), + LX_NOSYS("putpmsg"), + LX_NOSYS("vfork"), /* 190 */ + LX_NOSYS("getrlimit"), + LX_NOSYS("mmap2"), + LX_NOSYS("truncate64"), + LX_NOSYS("ftruncate64"), + LX_NOSYS("stat64"), + LX_NOSYS("lstat64"), + LX_NOSYS("fstat64"), + LX_NOSYS("lchown"), + LX_NOSYS("getuid"), + LX_NOSYS("getgid"), /* 200 */ + LX_NOSYS("geteuid"), + LX_NOSYS("getegid"), + LX_NOSYS("setreuid"), + LX_NOSYS("setregid"), + LX_NOSYS("getgroups"), + LX_CL("setgroups", lx_setgroups, 2), + LX_NOSYS("fchown"), + LX_CL("setresuid", lx_setresuid, 3), + LX_NOSYS("getresuid"), + LX_CL("setresgid", lx_setresgid, 3), /* 210 */ + LX_NOSYS("getresgid"), + LX_NOSYS("chown"), + LX_NOSYS("setuid"), + LX_NOSYS("setgid"), + LX_NOSYS("setfsuid"), + LX_NOSYS("setfsgid"), + LX_NOSYS("pivot_root"), + LX_NOSYS("mincore"), + LX_NOSYS("madvise"), + LX_NOSYS("getdents64"), /* 220 */ + LX_NOSYS("fcntl64"), + LX_NOSYS("lx_nosys"), + LX_NOSYS("security"), + LX_CL("gettid", lx_gettid, 0), + LX_NOSYS("readahead"), + LX_NOSYS("setxattr"), + LX_NOSYS("lsetxattr"), + LX_NOSYS("fsetxattr"), + LX_NOSYS("getxattr"), + LX_NOSYS("lgetxattr"), /* 230 */ + LX_NOSYS("fgetxattr"), + LX_NOSYS("listxattr"), + LX_NOSYS("llistxattr"), + LX_NOSYS("flistxattr"), + LX_NOSYS("removexattr"), + LX_NOSYS("lremovexattr"), + LX_NOSYS("fremovexattr"), + LX_CL("tkill", lx_tkill, 2), + LX_NOSYS("sendfile64"), + LX_CL("futex", lx_futex, 6), /* 240 */ + LX_NOSYS("sched_setaffinity"), + LX_NOSYS("sched_getaffinity"), + LX_CL("set_thread_area", lx_set_thread_area, 1), + LX_CL("get_thread_area", lx_get_thread_area, 1), + LX_NOSYS("io_setup"), + LX_NOSYS("io_destroy"), + LX_NOSYS("io_getevents"), + LX_NOSYS("io_submit"), + LX_NOSYS("io_cancel"), + LX_NOSYS("fadvise64"), /* 250 */ + LX_NOSYS("lx_nosys"), + LX_NOSYS("exit_group"), + LX_NOSYS("lookup_dcookie"), + LX_NOSYS("epoll_create"), + LX_NOSYS("epoll_ctl"), + LX_NOSYS("epoll_wait"), + LX_NOSYS("remap_file_pages"), + LX_CL("set_tid_address", lx_set_tid_address, 1), + LX_NOSYS("timer_create"), + LX_NOSYS("timer_settime"), /* 260 */ + LX_NOSYS("timer_gettime"), + LX_NOSYS("timer_getoverrun"), + LX_NOSYS("timer_delete"), + LX_NOSYS("clock_settime"), + LX_NOSYS("clock_gettime"), + LX_NOSYS("clock_getres"), + LX_NOSYS("clock_nanosleep"), + LX_NOSYS("statfs64"), + LX_NOSYS("fstatfs64"), + LX_NOSYS("tgkill"), /* 270 */ + LX_NOSYS("utimes"), + LX_NOSYS("fadvise64_64"), + LX_NOSYS("vserver"), + NULL /* NULL-termination is required for lx_systrace */ +}; + +int64_t +lx_emulate_syscall(int num, uintptr_t arg1, uintptr_t arg2, + uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) +{ + struct lx_sysent *jsp; + int64_t rval; + + rval = (int64_t)0; + + jsp = &(lx_sysent[num]); + + switch (jsp->sy_narg) { + case 0: { + lx_print("--> %s()\n", jsp->sy_name); + rval = (int64_t)jsp->sy_callc(); + break; + } + case 1: { + lx_print("--> %s(0x%lx)\n", jsp->sy_name, arg1); + rval = (int64_t)jsp->sy_callc(arg1); + break; + } + case 2: { + lx_print("--> %s(0x%lx, 0x%lx)\n", jsp->sy_name, arg1, arg2); + rval = (int64_t)jsp->sy_callc(arg1, arg2); + break; + } + case 3: { + lx_print("--> %s(0x%lx, 0x%lx, 0x%lx)\n", + jsp->sy_name, arg1, arg2, arg3); + rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3); + break; + } + case 4: { + lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx)\n", + jsp->sy_name, arg1, arg2, arg3, arg4); + rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4); + break; + } + case 5: { + lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n", + jsp->sy_name, arg1, arg2, arg3, arg4, arg5); + rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5); + break; + } + case 6: { + lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx," + " 0x%lx, 0x%lx)\n", + jsp->sy_name, arg1, arg2, arg3, arg4, arg5, arg6); + rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5, + arg6); + break; + } + default: + panic("Invalid syscall entry: #%d at 0x%p\n", num, jsp); + } + lx_print("----------> return (0x%llx)\n", (long long)rval); + return (rval); +} diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h new file mode 100644 index 0000000000..c79e3fa590 --- /dev/null +++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h @@ -0,0 +1,233 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LXPROC_H +#define _LXPROC_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * lxproc.h: declarations, data structures and macros for lxprocfs + */ + + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/policy.h> +#include <sys/debug.h> +#include <sys/dirent.h> +#include <sys/errno.h> +#include <sys/file.h> +#include <sys/kmem.h> +#include <sys/pathname.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/var.h> +#include <sys/user.h> +#include <sys/t_lock.h> +#include <sys/sysmacros.h> +#include <sys/cred_impl.h> +#include <sys/vnode.h> +#include <sys/vfs.h> +#include <sys/statvfs.h> +#include <sys/cmn_err.h> +#include <sys/zone.h> +#include <sys/uio.h> +#include <sys/utsname.h> +#include <sys/dnlc.h> +#include <sys/atomic.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <vm/as.h> +#include <vm/anon.h> + +/* + * Convert a vnode into an lxpr_mnt_t + */ +#define VTOLXPM(vp) ((lxpr_mnt_t *)(vp)->v_vfsp->vfs_data) + +/* + * convert a vnode into an lxpr_node + */ +#define VTOLXP(vp) ((lxpr_node_t *)(vp)->v_data) + +/* + * convert a lxprnode into a vnode + */ +#define LXPTOV(lxpnp) ((lxpnp)->lxpr_vnode) + +/* + * convert a lxpr_node into zone for fs + */ +#define LXPTOZ(lxpnp) \ + (((lxpr_mnt_t *)(lxpnp)->lxpr_vnode->v_vfsp->vfs_data)->lxprm_zone) + +#define LXPNSIZ 256 /* max size of lx /proc file name entries */ + +/* + * Pretend that a directory entry takes 16 bytes + */ +#define LXPR_SDSIZE 16 + +/* + * Node/file types for lx /proc files + * (directories and files contained therein). + */ +typedef enum lxpr_nodetype { + LXPR_PROCDIR, /* /proc */ + LXPR_PIDDIR, /* /proc/<pid> */ + LXPR_PID_CMDLINE, /* /proc/<pid>/cmdline */ + LXPR_PID_CPU, /* /proc/<pid>/cpu */ + LXPR_PID_CURDIR, /* /proc/<pid>/cwd */ + LXPR_PID_ENV, /* /proc/<pid>/environ */ + LXPR_PID_EXE, /* /proc/<pid>/exe */ + LXPR_PID_MAPS, /* /proc/<pid>/maps */ + LXPR_PID_MEM, /* /proc/<pid>/mem */ + LXPR_PID_ROOTDIR, /* /proc/<pid>/root */ + LXPR_PID_STAT, /* /proc/<pid>/stat */ + LXPR_PID_STATM, /* /proc/<pid>/statm */ + LXPR_PID_STATUS, /* /proc/<pid>/status */ + LXPR_PID_FDDIR, /* /proc/<pid>/fd */ + LXPR_PID_FD_FD, /* /proc/<pid>/fd/nn */ + LXPR_CMDLINE, /* /proc/cmdline */ + LXPR_CPUINFO, /* /proc/cpuinfo */ + LXPR_DEVICES, /* /proc/devices */ + LXPR_DMA, /* /proc/dma */ + LXPR_FILESYSTEMS, /* /proc/filesystems */ + LXPR_INTERRUPTS, /* /proc/interrupts */ + LXPR_IOPORTS, /* /proc/ioports */ + LXPR_KCORE, /* /proc/kcore */ + LXPR_KMSG, /* /proc/kmsg */ + LXPR_LOADAVG, /* /proc/loadavg */ + LXPR_MEMINFO, /* /proc/meminfo */ + LXPR_MOUNTS, /* /proc/mounts */ + LXPR_NETDIR, /* /proc/net */ + LXPR_NET_ARP, /* /proc/net/arp */ + LXPR_NET_DEV, /* /proc/net/dev */ + LXPR_NET_DEV_MCAST, /* /proc/net/dev_mcast */ + LXPR_NET_IGMP, /* /proc/net/igmp */ + LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */ + LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */ + LXPR_NET_MCFILTER, /* /proc/net/mcfilter */ + LXPR_NET_NETSTAT, /* /proc/net/netstat */ + LXPR_NET_RAW, /* /proc/net/raw */ + LXPR_NET_ROUTE, /* /proc/net/route */ + LXPR_NET_RPC, /* /proc/net/rpc */ + LXPR_NET_RT_CACHE, /* /proc/net/rt_cache */ + LXPR_NET_SOCKSTAT, /* /proc/net/sockstat */ + LXPR_NET_SNMP, /* /proc/net/snmp */ + LXPR_NET_STAT, /* /proc/net/stat */ + LXPR_NET_TCP, /* /proc/net/tcp */ + LXPR_NET_UDP, /* /proc/net/udp */ + LXPR_NET_UNIX, /* /proc/net/unix */ + LXPR_PARTITIONS, /* /proc/partitions */ + LXPR_SELF, /* /proc/self */ + LXPR_STAT, /* /proc/stat */ + LXPR_UPTIME, /* /proc/uptime */ + LXPR_VERSION, /* /proc/version */ + LXPR_NFILES /* number of lx /proc file types */ +} lxpr_nodetype_t; + + +/* + * Number of fds allowed for in the inode number calculation + * per process (if a process has more fds then inode numbers + * may be duplicated) + */ +#define LXPR_FD_PERPROC 2000 + +/* + * external dirent characteristics + */ +#define LXPRMAXNAMELEN 14 +typedef struct { + lxpr_nodetype_t d_type; + char d_name[LXPRMAXNAMELEN]; +} lxpr_dirent_t; + +/* + * This is the lxprocfs private data object + * which is attached to v_data in the vnode structure + */ +typedef struct lxpr_node { + lxpr_nodetype_t lxpr_type; /* type of this node */ + vnode_t *lxpr_vnode; /* vnode for the node */ + vnode_t *lxpr_parent; /* parent directory */ + vnode_t *lxpr_realvp; /* real vnode, file in dirs */ + timestruc_t lxpr_time; /* creation etc time for file */ + mode_t lxpr_mode; /* file mode bits */ + uid_t lxpr_uid; /* file owner */ + gid_t lxpr_gid; /* file group owner */ + pid_t lxpr_pid; /* pid of proc referred to */ + ino_t lxpr_ino; /* node id */ + ldi_handle_t lxpr_cons_ldih; /* ldi handle for console device */ +} lxpr_node_t; + +struct zone; /* forward declaration */ + +/* + * This is the lxprocfs private data object + * which is attached to vfs_data in the vfs structure + */ +typedef struct lxpr_mnt { + lxpr_node_t *lxprm_node; /* node at root of proc mount */ + struct zone *lxprm_zone; /* zone for this mount */ + ldi_ident_t lxprm_li; /* ident for ldi */ +} lxpr_mnt_t; + +extern vnodeops_t *lxpr_vnodeops; +extern int nproc_highbit; /* highbit(v.v_nproc) */ + +typedef struct mounta mounta_t; + +extern void lxpr_initnodecache(); +extern void lxpr_fininodecache(); +extern void lxpr_initrootnode(lxpr_node_t **, vfs_t *); +extern ino_t lxpr_inode(lxpr_nodetype_t, pid_t, int); +extern ino_t lxpr_parentinode(lxpr_node_t *); +extern lxpr_node_t *lxpr_getnode(vnode_t *, lxpr_nodetype_t, proc_t *, int); +extern void lxpr_freenode(lxpr_node_t *); + +typedef struct lxpr_uiobuf lxpr_uiobuf_t; +extern lxpr_uiobuf_t *lxpr_uiobuf_new(uio_t *); +extern void lxpr_uiobuf_free(lxpr_uiobuf_t *); +extern int lxpr_uiobuf_flush(lxpr_uiobuf_t *); +extern void lxpr_uiobuf_seek(lxpr_uiobuf_t *, offset_t); +extern void lxpr_uiobuf_write(lxpr_uiobuf_t *, const char *, size_t); +extern void lxpr_uiobuf_printf(lxpr_uiobuf_t *, const char *, ...); +extern void lxpr_uiobuf_seterr(lxpr_uiobuf_t *, int); + +proc_t *lxpr_lock(pid_t); +void lxpr_unlock(proc_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LXPROC_H */ diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c new file mode 100644 index 0000000000..5d252207fb --- /dev/null +++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c @@ -0,0 +1,494 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * lxprsubr.c: Various functions for the /lxproc vnodeops. + */ + +#include <sys/varargs.h> + +#include <sys/cpuvar.h> +#include <sys/mman.h> +#include <sys/vmsystm.h> +#include <sys/prsystm.h> + +#include "lx_proc.h" + +#define LXPRCACHE_NAME "lxpr_cache" + +static int lxpr_node_constructor(void*, void*, int); +static void lxpr_node_destructor(void*, void*); + +static kmem_cache_t *lxpr_node_cache; + +struct lxpr_uiobuf { + uio_t *uiop; + char *buffer; + uint32_t buffsize; + char *pos; + size_t beg; + int error; +}; + +#define BUFSIZE 4000 + +struct lxpr_uiobuf * +lxpr_uiobuf_new(uio_t *uiop) +{ + /* Allocate memory for both lxpr_uiobuf and output buffer */ + struct lxpr_uiobuf *uiobuf = + kmem_alloc(sizeof (struct lxpr_uiobuf) + BUFSIZE, KM_SLEEP); + + uiobuf->uiop = uiop; + uiobuf->buffer = (char *)&uiobuf[1]; + uiobuf->buffsize = BUFSIZE; + uiobuf->pos = uiobuf->buffer; + uiobuf->beg = 0; + uiobuf->error = 0; + + return (uiobuf); +} + +void +lxpr_uiobuf_free(struct lxpr_uiobuf *uiobuf) +{ + ASSERT(uiobuf != NULL); + ASSERT(uiobuf->pos == uiobuf->buffer); + + kmem_free(uiobuf, sizeof (struct lxpr_uiobuf) + uiobuf->buffsize); +} + +void +lxpr_uiobuf_seek(struct lxpr_uiobuf *uiobuf, offset_t offset) +{ + uiobuf->uiop->uio_offset = offset; +} + +void +lxpr_uiobuf_seterr(struct lxpr_uiobuf *uiobuf, int err) +{ + ASSERT(uiobuf->error == 0); + + uiobuf->error = err; +} + +int +lxpr_uiobuf_flush(struct lxpr_uiobuf *uiobuf) +{ + off_t off = uiobuf->uiop->uio_offset; + caddr_t uaddr = uiobuf->buffer; + size_t beg = uiobuf->beg; + + size_t size = uiobuf->pos - uaddr; + + if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) { + ASSERT(off >= beg); + + if (beg+size > off && off >= 0) + uiobuf->error = + uiomove(uaddr+(off-beg), size-(off-beg), + UIO_READ, uiobuf->uiop); + + uiobuf->beg += size; + } + + uiobuf->pos = uaddr; + + return (uiobuf->error); +} + +void +lxpr_uiobuf_write(struct lxpr_uiobuf *uiobuf, const char *buf, size_t size) +{ + /* While we can still carry on */ + while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) { + uint_t remain + = uiobuf->buffsize-(uiobuf->pos-uiobuf->buffer); + + /* Enough space in buffer? */ + if (remain >= size) { + bcopy(buf, uiobuf->pos, size); + uiobuf->pos += size; + return; + } + + /* Not enough space, so copy all we can and try again */ + bcopy(buf, uiobuf->pos, remain); + uiobuf->pos += remain; + (void) lxpr_uiobuf_flush(uiobuf); + buf += remain; + size -= remain; + } +} + +#define TYPBUFFSIZE 256 +void +lxpr_uiobuf_printf(struct lxpr_uiobuf *uiobuf, const char *fmt, ...) +{ + va_list args; + char buff[TYPBUFFSIZE]; + int len; + char *buffer; + + /* Can we still do any output */ + if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0) + return; + + va_start(args, fmt); + + /* Try using stack allocated buffer */ + len = vsnprintf(buff, TYPBUFFSIZE, fmt, args); + if (len < TYPBUFFSIZE) { + va_end(args); + lxpr_uiobuf_write(uiobuf, buff, len); + return; + } + + /* Not enough space in pre-allocated buffer */ + buffer = kmem_alloc(len+1, KM_SLEEP); + + /* + * We know we allocated the correct amount of space + * so no check on the return value + */ + (void) vsnprintf(buffer, len+1, fmt, args); + lxpr_uiobuf_write(uiobuf, buffer, len); + va_end(args); + kmem_free(buffer, len+1); +} + +/* + * lxpr_lock(): + * + * Lookup process from pid and return with p_plock and P_PR_LOCK held. + */ +proc_t * +lxpr_lock(pid_t pid) +{ + proc_t *p; + kmutex_t *mp; + + ASSERT(!MUTEX_HELD(&pidlock)); + + for (;;) { + mutex_enter(&pidlock); + + /* + * If the pid is 1, we really want the zone's init process + */ + p = prfind((pid == 1) ? + curproc->p_zone->zone_proc_initpid : pid); + + if (p == NULL || p->p_stat == SIDL) { + mutex_exit(&pidlock); + return (NULL); + } + /* + * p_lock is persistent, but p itself is not -- it could + * vanish during cv_wait(). Load p->p_lock now so we can + * drop it after cv_wait() without referencing p. + */ + mp = &p->p_lock; + mutex_enter(mp); + + mutex_exit(&pidlock); + + if (!(p->p_proc_flag & P_PR_LOCK)) + break; + + cv_wait(&pr_pid_cv[p->p_slot], mp); + mutex_exit(mp); + } + p->p_proc_flag |= P_PR_LOCK; + THREAD_KPRI_REQUEST(); + return (p); +} + +/* + * lxpr_unlock() + * + * Unlock locked process + */ +void +lxpr_unlock(proc_t *p) +{ + ASSERT(p->p_proc_flag & P_PR_LOCK); + ASSERT(MUTEX_HELD(&p->p_lock)); + ASSERT(!MUTEX_HELD(&pidlock)); + + cv_signal(&pr_pid_cv[p->p_slot]); + p->p_proc_flag &= ~P_PR_LOCK; + mutex_exit(&p->p_lock); + THREAD_KPRI_RELEASE(); +} + +void +lxpr_initnodecache() +{ + lxpr_node_cache = + kmem_cache_create(LXPRCACHE_NAME, + sizeof (lxpr_node_t), 0, + lxpr_node_constructor, lxpr_node_destructor, NULL, + NULL, NULL, 0); +} + +void +lxpr_fininodecache() +{ + kmem_cache_destroy(lxpr_node_cache); +} + +/* ARGSUSED */ +static int +lxpr_node_constructor(void *buf, void *un, int kmflags) +{ + lxpr_node_t *lxpnp = buf; + vnode_t *vp; + + vp = lxpnp->lxpr_vnode = vn_alloc(KM_SLEEP); + + (void) vn_setops(vp, lxpr_vnodeops); + vp->v_data = (caddr_t)lxpnp; + + return (0); +} + +/* ARGSUSED */ +static void +lxpr_node_destructor(void *buf, void *un) +{ + lxpr_node_t *lxpnp = buf; + + vn_free(LXPTOV(lxpnp)); +} + +/* + * Calculate an inode number + * + * This takes various bits of info and munges them + * to give the inode number for an lxproc node + */ +ino_t +lxpr_inode(lxpr_nodetype_t type, pid_t pid, int fd) +{ + if (pid == 1) + pid = curproc->p_zone->zone_proc_initpid; + + switch (type) { + case LXPR_PIDDIR: + return (pid + 1); + case LXPR_PROCDIR: + return (maxpid + 2); + case LXPR_PID_FD_FD: + return (maxpid + 2 + + (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) + + LXPR_NFILES + fd); + default: + return (maxpid + 2 + + (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) + + type); + } +} + +/* + * Return inode number of parent (directory) + */ +ino_t +lxpr_parentinode(lxpr_node_t *lxpnp) +{ + /* + * If the input node is the root then the parent inode + * is the mounted on inode so just return our inode number + */ + if (lxpnp->lxpr_type != LXPR_PROCDIR) + return (VTOLXP(lxpnp->lxpr_parent)->lxpr_ino); + else + return (lxpnp->lxpr_ino); +} + +/* + * Allocate a new lxproc node + * + * This also allocates the vnode associated with it + */ +lxpr_node_t * +lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd) +{ + lxpr_node_t *lxpnp; + vnode_t *vp; + user_t *up; + timestruc_t now; + + /* + * Allocate a new node. It is deallocated in vop_innactive + */ + lxpnp = kmem_cache_alloc(lxpr_node_cache, KM_SLEEP); + + /* + * Set defaults (may be overridden below) + */ + gethrestime(&now); + lxpnp->lxpr_type = type; + lxpnp->lxpr_realvp = NULL; + lxpnp->lxpr_parent = dp; + VN_HOLD(dp); + if (p != NULL) { + lxpnp->lxpr_pid = ((p->p_pid == + curproc->p_zone->zone_proc_initpid) ? 1 : p->p_pid); + + lxpnp->lxpr_time = PTOU(p)->u_start; + lxpnp->lxpr_uid = crgetruid(p->p_cred); + lxpnp->lxpr_gid = crgetrgid(p->p_cred); + lxpnp->lxpr_ino = lxpr_inode(type, p->p_pid, fd); + } else { + /* Pretend files without a proc belong to sched */ + lxpnp->lxpr_pid = 0; + lxpnp->lxpr_time = now; + lxpnp->lxpr_uid = lxpnp->lxpr_gid = 0; + lxpnp->lxpr_ino = lxpr_inode(type, 0, 0); + } + + /* initialize the vnode data */ + vp = lxpnp->lxpr_vnode; + vn_reinit(vp); + vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; + vp->v_vfsp = dp->v_vfsp; + + /* + * Do node specific stuff + */ + switch (type) { + case LXPR_PROCDIR: + vp->v_flag |= VROOT; + vp->v_type = VDIR; + lxpnp->lxpr_mode = 0555; /* read-search by everyone */ + break; + + case LXPR_PID_CURDIR: + ASSERT(p != NULL); + up = PTOU(p); + lxpnp->lxpr_realvp = up->u_cdir; + ASSERT(lxpnp->lxpr_realvp != NULL); + VN_HOLD(lxpnp->lxpr_realvp); + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0777; /* anyone does anything ! */ + break; + + case LXPR_PID_ROOTDIR: + ASSERT(p != NULL); + up = PTOU(p); + lxpnp->lxpr_realvp = up->u_rdir != NULL ? up->u_rdir : rootdir; + ASSERT(lxpnp->lxpr_realvp != NULL); + VN_HOLD(lxpnp->lxpr_realvp); + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0777; /* anyone does anything ! */ + break; + + case LXPR_PID_EXE: + ASSERT(p != NULL); + lxpnp->lxpr_realvp = p->p_exec; + if (lxpnp->lxpr_realvp != NULL) { + VN_HOLD(lxpnp->lxpr_realvp); + } + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0777; + break; + + case LXPR_SELF: + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0777; /* anyone does anything ! */ + break; + + case LXPR_PID_FD_FD: + ASSERT(p != NULL); + /* lxpr_realvp is set after we return */ + vp->v_type = VLNK; + lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */ + break; + + case LXPR_PID_FDDIR: + ASSERT(p != NULL); + vp->v_type = VDIR; + lxpnp->lxpr_mode = 0500; /* read-search by owner only */ + break; + + case LXPR_PIDDIR: + ASSERT(p != NULL); + vp->v_type = VDIR; + lxpnp->lxpr_mode = 0511; + break; + + case LXPR_NETDIR: + vp->v_type = VDIR; + lxpnp->lxpr_mode = 0555; /* read-search by all */ + break; + + case LXPR_PID_ENV: + case LXPR_PID_MEM: + ASSERT(p != NULL); + /*FALLTHRU*/ + case LXPR_KCORE: + vp->v_type = VREG; + lxpnp->lxpr_mode = 0400; /* read-only by owner only */ + break; + + default: + vp->v_type = VREG; + lxpnp->lxpr_mode = 0444; /* read-only by all */ + break; + } + + return (lxpnp); +} + + +/* + * Free the storage obtained from lxpr_getnode(). + */ +void +lxpr_freenode(lxpr_node_t *lxpnp) +{ + ASSERT(lxpnp != NULL); + ASSERT(LXPTOV(lxpnp) != NULL); + + /* + * delete any association with realvp + */ + if (lxpnp->lxpr_realvp != NULL) + VN_RELE(lxpnp->lxpr_realvp); + + /* + * delete any association with parent vp + */ + if (lxpnp->lxpr_parent != NULL) + VN_RELE(lxpnp->lxpr_parent); + + /* + * Release the lxprnode. + */ + kmem_cache_free(lxpr_node_cache, lxpnp); +} diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c new file mode 100644 index 0000000000..44891dc612 --- /dev/null +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c @@ -0,0 +1,373 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * lxprvfsops.c: vfs operations for /lxprocfs. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/cmn_err.h> +#include <sys/cred.h> +#include <sys/debug.h> +#include <sys/errno.h> +#include <sys/proc.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/var.h> +#include <sys/vfs.h> +#include <sys/vnode.h> +#include <sys/mode.h> +#include <sys/signal.h> +#include <sys/user.h> +#include <sys/mount.h> +#include <sys/bitmap.h> +#include <sys/kmem.h> +#include <sys/policy.h> +#include <sys/modctl.h> +#include <sys/sunddi.h> +#include <sys/sunldi.h> +#include <sys/lx_impl.h> + +#include "lx_proc.h" + +/* Module level parameters */ +static int lxprocfstype; +static dev_t lxprocdev; +static kmutex_t lxpr_mount_lock; + +int nproc_highbit; /* highbit(v.v_nproc) */ + +static int lxpr_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *); +static int lxpr_unmount(vfs_t *, int, cred_t *); +static int lxpr_root(vfs_t *, vnode_t **); +static int lxpr_statvfs(vfs_t *, statvfs64_t *); +static int lxpr_init(int, char *); + +static vfsdef_t vfw = { + VFSDEF_VERSION, + "lx_proc", + lxpr_init, + 0, + NULL +}; + +/* + * Module linkage information for the kernel. + */ +extern struct mod_ops mod_fsops; + +static struct modlfs modlfs = { + &mod_fsops, "generic linux procfs", &vfw +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlfs, NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + int retval; + + /* + * attempt to unload the module + */ + if ((retval = mod_remove(&modlinkage)) != 0) + goto done; + + /* + * destroy lxpr_node cache + */ + lxpr_fininodecache(); + + /* + * clean out the vfsops and vnodeops + */ + (void) vfs_freevfsops_by_type(lxprocfstype); + vn_freevnodeops(lxpr_vnodeops); + + mutex_destroy(&lxpr_mount_lock); +done: + return (retval); +} + +static int +lxpr_init(int fstype, char *name) +{ + static const fs_operation_def_t lxpr_vfsops_template[] = { + VFSNAME_MOUNT, lxpr_mount, + VFSNAME_UNMOUNT, lxpr_unmount, + VFSNAME_ROOT, lxpr_root, + VFSNAME_STATVFS, lxpr_statvfs, + NULL, NULL + }; + extern const fs_operation_def_t lxpr_vnodeops_template[]; + int error; + major_t dev; + + nproc_highbit = highbit(v.v_proc); + lxprocfstype = fstype; + ASSERT(lxprocfstype != 0); + + mutex_init(&lxpr_mount_lock, NULL, MUTEX_DEFAULT, NULL); + + /* + * Associate VFS ops vector with this fstype. + */ + error = vfs_setfsops(fstype, lxpr_vfsops_template, NULL); + if (error != 0) { + cmn_err(CE_WARN, "lxpr_init: bad vfs ops template"); + return (error); + } + + /* + * Set up vnode ops vector too. + */ + error = vn_make_ops(name, lxpr_vnodeops_template, &lxpr_vnodeops); + if (error != 0) { + (void) vfs_freevfsops_by_type(fstype); + cmn_err(CE_WARN, "lxpr_init: bad vnode ops template"); + return (error); + } + + /* + * Assign a unique "device" number (used by stat(2)). + */ + if ((dev = getudev()) == (major_t)-1) { + cmn_err(CE_WARN, "lxpr_init: can't get unique device number"); + dev = 0; + } + + /* + * Make the pseudo device + */ + lxprocdev = makedevice(dev, 0); + + /* + * Initialise cache for lxpr_nodes + */ + lxpr_initnodecache(); + + return (0); +} + +static int +lxpr_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr) +{ + lxpr_mnt_t *lxpr_mnt; + zone_t *zone = curproc->p_zone; + ldi_ident_t li; + int err; + + /* + * must be root to mount + */ + if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) + return (EPERM); + + /* + * mount point must be a directory + */ + if (mvp->v_type != VDIR) + return (ENOTDIR); + + if (zone == global_zone) { + zone_t *mntzone; + + mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); + zone_rele(mntzone); + if (zone != mntzone) + return (EBUSY); + } + + /* + * Having the resource be anything but "lxproc" doesn't make sense + */ + vfs_setresource(vfsp, "lxproc"); + + lxpr_mnt = kmem_alloc(sizeof (*lxpr_mnt), KM_SLEEP); + + if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) { + kmem_free(lxpr_mnt, sizeof (*lxpr_mnt)); + return (err); + } + + lxpr_mnt->lxprm_li = li; + + mutex_enter(&lxpr_mount_lock); + + /* + * Ensure we don't allow overlaying mounts + */ + mutex_enter(&mvp->v_lock); + if ((uap->flags & MS_OVERLAY) == 0 && + (mvp->v_count > 1 || (mvp->v_flag & VROOT))) { + mutex_exit(&mvp->v_lock); + mutex_exit(&lxpr_mount_lock); + kmem_free(lxpr_mnt, sizeof ((*lxpr_mnt))); + return (EBUSY); + } + mutex_exit(&mvp->v_lock); + + /* + * allocate the first vnode + */ + zone_hold(lxpr_mnt->lxprm_zone = zone); + + /* Arbitrarily set the parent vnode to the mounted over directory */ + lxpr_mnt->lxprm_node = lxpr_getnode(mvp, LXPR_PROCDIR, NULL, 0); + + /* Correctly set the fs for the root node */ + lxpr_mnt->lxprm_node->lxpr_vnode->v_vfsp = vfsp; + + vfs_make_fsid(&vfsp->vfs_fsid, lxprocdev, lxprocfstype); + vfsp->vfs_bsize = DEV_BSIZE; + vfsp->vfs_fstype = lxprocfstype; + vfsp->vfs_data = (caddr_t)lxpr_mnt; + vfsp->vfs_dev = lxprocdev; + + mutex_exit(&lxpr_mount_lock); + + return (0); +} + +static int +lxpr_unmount(vfs_t *vfsp, int flag, cred_t *cr) +{ + lxpr_mnt_t *lxpr_mnt = (lxpr_mnt_t *)vfsp->vfs_data; + vnode_t *vp; + int count; + + ASSERT(lxpr_mnt != NULL); + vp = LXPTOV(lxpr_mnt->lxprm_node); + + mutex_enter(&lxpr_mount_lock); + + /* + * must be root to unmount + */ + if (secpolicy_fs_unmount(cr, vfsp) != 0) { + mutex_exit(&lxpr_mount_lock); + return (EPERM); + } + + /* + * forced unmount is not supported by this file system + */ + if (flag & MS_FORCE) { + mutex_exit(&lxpr_mount_lock); + return (ENOTSUP); + } + + /* + * Ensure that no vnodes are in use on this mount point. + */ + mutex_enter(&vp->v_lock); + count = vp->v_count; + mutex_exit(&vp->v_lock); + if (count > 1) { + mutex_exit(&lxpr_mount_lock); + return (EBUSY); + } + + + /* + * purge the dnlc cache for vnode entries + * associated with this file system + */ + count = dnlc_purge_vfsp(vfsp, 0); + + /* + * free up the lxprnode + */ + lxpr_freenode(lxpr_mnt->lxprm_node); + zone_rele(lxpr_mnt->lxprm_zone); + kmem_free(lxpr_mnt, sizeof (*lxpr_mnt)); + + mutex_exit(&lxpr_mount_lock); + + return (0); +} + +static int +lxpr_root(vfs_t *vfsp, vnode_t **vpp) +{ + lxpr_node_t *lxpnp = ((lxpr_mnt_t *)vfsp->vfs_data)->lxprm_node; + vnode_t *vp = LXPTOV(lxpnp); + + VN_HOLD(vp); + *vpp = vp; + return (0); +} + +static int +lxpr_statvfs(vfs_t *vfsp, statvfs64_t *sp) +{ + int n; + dev32_t d32; + extern uint_t nproc; + + n = v.v_proc - nproc; + + bzero((caddr_t)sp, sizeof (*sp)); + sp->f_bsize = DEV_BSIZE; + sp->f_frsize = DEV_BSIZE; + sp->f_blocks = (fsblkcnt64_t)0; + sp->f_bfree = (fsblkcnt64_t)0; + sp->f_bavail = (fsblkcnt64_t)0; + sp->f_files = (fsfilcnt64_t)v.v_proc + 2; + sp->f_ffree = (fsfilcnt64_t)n; + sp->f_favail = (fsfilcnt64_t)n; + (void) cmpldev(&d32, vfsp->vfs_dev); + sp->f_fsid = d32; + /* It is guaranteed that vsw_name will fit in f_basetype */ + (void) strcpy(sp->f_basetype, vfssw[lxprocfstype].vsw_name); + sp->f_flag = vf_to_stf(vfsp->vfs_flag); + sp->f_namemax = 64; /* quite arbitrary */ + bzero(sp->f_fstr, sizeof (sp->f_fstr)); + + /* We know f_fstr is 32 chars */ + (void) strcpy(sp->f_fstr, "/proc"); + (void) strcpy(&sp->f_fstr[6], "/proc"); + + return (0); +} diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c new file mode 100644 index 0000000000..45bff38e16 --- /dev/null +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c @@ -0,0 +1,2951 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * lxpr_vnops.c: Vnode operations for the lx /proc file system + * + * Assumptions and Gotchas: + * + * In order to preserve Solaris' security policy. This file system's + * functionality does not override Solaris' security policies even if + * that means breaking Linux compatability. + * + * Linux has no concept of lwps so we only implement procs here as in the + * old /proc interface. + */ + +#include <sys/cpupart.h> +#include <sys/cpuvar.h> +#include <sys/session.h> +#include <sys/vmparam.h> +#include <sys/mman.h> +#include <vm/rm.h> +#include <vm/seg_vn.h> +#include <sys/sdt.h> +#include <lx_signum.h> +#include <sys/strlog.h> +#include <sys/stropts.h> +#include <sys/cmn_err.h> +#include <sys/lx_brand.h> +#include <sys/x86_archext.h> +#include <sys/archsystm.h> +#include <sys/fp.h> +#include <sys/pool_pset.h> +#include <sys/pset.h> +#include <sys/zone.h> + +/* Dependent on the Solaris procfs */ +extern kthread_t *prchoose(proc_t *); + +#include "lx_proc.h" + +extern pgcnt_t swapfs_minfree; +extern volatile clock_t lbolt; +extern time_t boot_time; + +/* + * Pointer to the vnode ops vector for this fs. + * This is instantiated in lxprinit() in lxpr_vfsops.c + */ +vnodeops_t *lxpr_vnodeops; + +static int lxpr_open(vnode_t **, int, cred_t *); +static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *); +static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); +static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *); +static int lxpr_access(vnode_t *, int, int, cred_t *); +static int lxpr_lookup(vnode_t *, char *, vnode_t **, + pathname_t *, int, vnode_t *, cred_t *); +static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *); +static int lxpr_readlink(vnode_t *, uio_t *); +static int lxpr_cmp(vnode_t *, vnode_t *); +static int lxpr_realvp(vnode_t *, vnode_t **); +static int lxpr_sync(void); +static void lxpr_inactive(vnode_t *, cred_t *); + +static vnode_t *lxpr_lookup_procdir(vnode_t *, char *); +static vnode_t *lxpr_lookup_piddir(vnode_t *, char *); +static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *); +static vnode_t *lxpr_lookup_fddir(vnode_t *, char *); +static vnode_t *lxpr_lookup_netdir(vnode_t *, char *); + +static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *); +static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *); +static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *); +static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *); +static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *); + +static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *); + +static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *); + +static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *); +static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *); + +/* + * Simple conversion + */ +#define btok(x) ((x) >> 10) /* bytes to kbytes */ +#define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */ + +/* + * The lx /proc vnode operations vector + */ +const fs_operation_def_t lxpr_vnodeops_template[] = { + VOPNAME_OPEN, lxpr_open, + VOPNAME_CLOSE, lxpr_close, + VOPNAME_READ, lxpr_read, + VOPNAME_GETATTR, lxpr_getattr, + VOPNAME_ACCESS, lxpr_access, + VOPNAME_LOOKUP, lxpr_lookup, + VOPNAME_READDIR, lxpr_readdir, + VOPNAME_READLINK, lxpr_readlink, + VOPNAME_FSYNC, lxpr_sync, + VOPNAME_SEEK, lxpr_sync, + VOPNAME_INACTIVE, (fs_generic_func_p) lxpr_inactive, + VOPNAME_CMP, lxpr_cmp, + VOPNAME_REALVP, lxpr_realvp, + NULL, NULL +}; + + +/* + * file contents of an lx /proc directory. + */ +static lxpr_dirent_t lx_procdir[] = { + { LXPR_CMDLINE, "cmdline" }, + { LXPR_CPUINFO, "cpuinfo" }, + { LXPR_DEVICES, "devices" }, + { LXPR_DMA, "dma" }, + { LXPR_FILESYSTEMS, "filesystems" }, + { LXPR_INTERRUPTS, "interrupts" }, + { LXPR_IOPORTS, "ioports" }, + { LXPR_KCORE, "kcore" }, + { LXPR_KMSG, "kmsg" }, + { LXPR_LOADAVG, "loadavg" }, + { LXPR_MEMINFO, "meminfo" }, + { LXPR_MOUNTS, "mounts" }, + { LXPR_NETDIR, "net" }, + { LXPR_PARTITIONS, "partitions" }, + { LXPR_SELF, "self" }, + { LXPR_STAT, "stat" }, + { LXPR_UPTIME, "uptime" }, + { LXPR_VERSION, "version" } +}; + +#define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0])) + +/* + * Contents of an lx /proc/<pid> directory. + */ +static lxpr_dirent_t piddir[] = { + { LXPR_PID_CMDLINE, "cmdline" }, + { LXPR_PID_CPU, "cpu" }, + { LXPR_PID_CURDIR, "cwd" }, + { LXPR_PID_ENV, "environ" }, + { LXPR_PID_EXE, "exe" }, + { LXPR_PID_MAPS, "maps" }, + { LXPR_PID_MEM, "mem" }, + { LXPR_PID_ROOTDIR, "root" }, + { LXPR_PID_STAT, "stat" }, + { LXPR_PID_STATM, "statm" }, + { LXPR_PID_STATUS, "status" }, + { LXPR_PID_FDDIR, "fd" } +}; + +#define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0])) + +/* + * contents of lx /proc/net directory + */ +static lxpr_dirent_t netdir[] = { + { LXPR_NET_ARP, "arp" }, + { LXPR_NET_DEV, "dev" }, + { LXPR_NET_DEV_MCAST, "dev_mcast" }, + { LXPR_NET_IGMP, "igmp" }, + { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" }, + { LXPR_NET_IP_MR_VIF, "ip_mr_vif" }, + { LXPR_NET_MCFILTER, "mcfilter" }, + { LXPR_NET_NETSTAT, "netstat" }, + { LXPR_NET_RAW, "raw" }, + { LXPR_NET_ROUTE, "route" }, + { LXPR_NET_RPC, "rpc" }, + { LXPR_NET_RT_CACHE, "rt_cache" }, + { LXPR_NET_SOCKSTAT, "sockstat" }, + { LXPR_NET_SNMP, "snmp" }, + { LXPR_NET_STAT, "stat" }, + { LXPR_NET_TCP, "tcp" }, + { LXPR_NET_UDP, "udp" }, + { LXPR_NET_UNIX, "unix" } +}; + +#define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0])) + +/* + * lxpr_open(): Vnode operation for VOP_OPEN() + */ +static int +lxpr_open(vnode_t **vpp, int flag, cred_t *cr) +{ + vnode_t *vp = *vpp; + lxpr_node_t *lxpnp = VTOLXP(vp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + vnode_t *rvp; + int error = 0; + + /* + * We only allow reading in this file systrem + */ + if (flag & FWRITE) + return (EROFS); + + /* + * If we are opening an underlying file only allow regular files + * reject the open for anything but a regular file. + * Just do it if we are opening the current or root directory. + */ + if (lxpnp->lxpr_realvp != NULL) { + rvp = lxpnp->lxpr_realvp; + + if (type == LXPR_PID_FD_FD && rvp->v_type != VREG) + error = EACCES; + else { + /* + * Need to hold rvp since VOP_OPEN() may release it. + */ + VN_HOLD(rvp); + error = VOP_OPEN(&rvp, flag, cr); + if (error) { + VN_RELE(rvp); + } else { + *vpp = rvp; + VN_RELE(vp); + } + } + } + + if (type == LXPR_KMSG) { + ldi_ident_t li = VTOLXPM(vp)->lxprm_li; + struct strioctl str; + int rv; + + /* + * Open the zone's console device using the layered driver + * interface. + */ + if ((error = ldi_open_by_name("/dev/log", FREAD, cr, + &lxpnp->lxpr_cons_ldih, li)) != 0) + return (error); + + /* + * Send an ioctl to the underlying console device, letting it + * know we're interested in getting console messages. + */ + str.ic_cmd = I_CONSLOG; + str.ic_timout = 0; + str.ic_len = 0; + str.ic_dp = NULL; + if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR, + (intptr_t)&str, FKIOCTL, cr, &rv)) != 0) + return (error); + } + + return (error); +} + + +/* + * lxpr_close(): Vnode operation for VOP_CLOSE() + */ +/* ARGSUSED */ +static int +lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) +{ + lxpr_node_t *lxpr = VTOLXP(vp); + lxpr_nodetype_t type = lxpr->lxpr_type; + int err; + + /* + * we should never get here because the close is done on the realvp + * for these nodes + */ + ASSERT(type != LXPR_PID_FD_FD && + type != LXPR_PID_CURDIR && + type != LXPR_PID_ROOTDIR && + type != LXPR_PID_EXE); + + if (type == LXPR_KMSG) { + if ((err = ldi_close(lxpr->lxpr_cons_ldih, 0, cr)) != 0) + return (err); + } + + return (0); +} + +static void (*lxpr_read_function[LXPR_NFILES])() = { + lxpr_read_isdir, /* /proc */ + lxpr_read_isdir, /* /proc/<pid> */ + lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */ + lxpr_read_empty, /* /proc/<pid>/cpu */ + lxpr_read_invalid, /* /proc/<pid>/cwd */ + lxpr_read_empty, /* /proc/<pid>/environ */ + lxpr_read_invalid, /* /proc/<pid>/exe */ + lxpr_read_pid_maps, /* /proc/<pid>/maps */ + lxpr_read_empty, /* /proc/<pid>/mem */ + lxpr_read_invalid, /* /proc/<pid>/root */ + lxpr_read_pid_stat, /* /proc/<pid>/stat */ + lxpr_read_pid_statm, /* /proc/<pid>/statm */ + lxpr_read_pid_status, /* /proc/<pid>/status */ + lxpr_read_isdir, /* /proc/<pid>/fd */ + lxpr_read_fd, /* /proc/<pid>/fd/nn */ + lxpr_read_empty, /* /proc/cmdline */ + lxpr_read_cpuinfo, /* /proc/cpuinfo */ + lxpr_read_empty, /* /proc/devices */ + lxpr_read_empty, /* /proc/dma */ + lxpr_read_empty, /* /proc/filesystems */ + lxpr_read_empty, /* /proc/interrupts */ + lxpr_read_empty, /* /proc/ioports */ + lxpr_read_empty, /* /proc/kcore */ + lxpr_read_kmsg, /* /proc/kmsg */ + lxpr_read_loadavg, /* /proc/loadavg */ + lxpr_read_meminfo, /* /proc/meminfo */ + lxpr_read_mounts, /* /proc/mounts */ + lxpr_read_isdir, /* /proc/net */ + lxpr_read_net_arp, /* /proc/net/arp */ + lxpr_read_net_dev, /* /proc/net/dev */ + lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */ + lxpr_read_net_igmp, /* /proc/net/igmp */ + lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */ + lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */ + lxpr_read_net_mcfilter, /* /proc/net/mcfilter */ + lxpr_read_net_netstat, /* /proc/net/netstat */ + lxpr_read_net_raw, /* /proc/net/raw */ + lxpr_read_net_route, /* /proc/net/route */ + lxpr_read_net_rpc, /* /proc/net/rpc */ + lxpr_read_net_rt_cache, /* /proc/net/rt_cache */ + lxpr_read_net_sockstat, /* /proc/net/sockstat */ + lxpr_read_net_snmp, /* /proc/net/snmp */ + lxpr_read_net_stat, /* /proc/net/stat */ + lxpr_read_net_tcp, /* /proc/net/tcp */ + lxpr_read_net_udp, /* /proc/net/udp */ + lxpr_read_net_unix, /* /proc/net/unix */ + lxpr_read_partitions, /* /proc/partitions */ + lxpr_read_invalid, /* /proc/self */ + lxpr_read_stat, /* /proc/stat */ + lxpr_read_uptime, /* /proc/uptime */ + lxpr_read_version, /* /proc/version */ +}; + +/* + * Array of lookup functions, indexed by lx /proc file type. + */ +static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = { + lxpr_lookup_procdir, /* /proc */ + lxpr_lookup_piddir, /* /proc/<pid> */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/root */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/status */ + lxpr_lookup_fddir, /* /proc/<pid>/fd */ + lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */ + lxpr_lookup_not_a_dir, /* /proc/cmdline */ + lxpr_lookup_not_a_dir, /* /proc/cpuinfo */ + lxpr_lookup_not_a_dir, /* /proc/devices */ + lxpr_lookup_not_a_dir, /* /proc/dma */ + lxpr_lookup_not_a_dir, /* /proc/filesystems */ + lxpr_lookup_not_a_dir, /* /proc/interrupts */ + lxpr_lookup_not_a_dir, /* /proc/ioports */ + lxpr_lookup_not_a_dir, /* /proc/kcore */ + lxpr_lookup_not_a_dir, /* /proc/kmsg */ + lxpr_lookup_not_a_dir, /* /proc/loadavg */ + lxpr_lookup_not_a_dir, /* /proc/meminfo */ + lxpr_lookup_not_a_dir, /* /proc/mounts */ + lxpr_lookup_netdir, /* /proc/net */ + lxpr_lookup_not_a_dir, /* /proc/net/arp */ + lxpr_lookup_not_a_dir, /* /proc/net/dev */ + lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */ + lxpr_lookup_not_a_dir, /* /proc/net/igmp */ + lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */ + lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */ + lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */ + lxpr_lookup_not_a_dir, /* /proc/net/netstat */ + lxpr_lookup_not_a_dir, /* /proc/net/raw */ + lxpr_lookup_not_a_dir, /* /proc/net/route */ + lxpr_lookup_not_a_dir, /* /proc/net/rpc */ + lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */ + lxpr_lookup_not_a_dir, /* /proc/net/sockstat */ + lxpr_lookup_not_a_dir, /* /proc/net/snmp */ + lxpr_lookup_not_a_dir, /* /proc/net/stat */ + lxpr_lookup_not_a_dir, /* /proc/net/tcp */ + lxpr_lookup_not_a_dir, /* /proc/net/udp */ + lxpr_lookup_not_a_dir, /* /proc/net/unix */ + lxpr_lookup_not_a_dir, /* /proc/partitions */ + lxpr_lookup_not_a_dir, /* /proc/self */ + lxpr_lookup_not_a_dir, /* /proc/stat */ + lxpr_lookup_not_a_dir, /* /proc/uptime */ + lxpr_lookup_not_a_dir, /* /proc/version */ +}; + +/* + * Array of readdir functions, indexed by /proc file type. + */ +static int (*lxpr_readdir_function[LXPR_NFILES])() = { + lxpr_readdir_procdir, /* /proc */ + lxpr_readdir_piddir, /* /proc/<pid> */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/root */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/status */ + lxpr_readdir_fddir, /* /proc/<pid>/fd */ + lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */ + lxpr_readdir_not_a_dir, /* /proc/cmdline */ + lxpr_readdir_not_a_dir, /* /proc/cpuinfo */ + lxpr_readdir_not_a_dir, /* /proc/devices */ + lxpr_readdir_not_a_dir, /* /proc/dma */ + lxpr_readdir_not_a_dir, /* /proc/filesystems */ + lxpr_readdir_not_a_dir, /* /proc/interrupts */ + lxpr_readdir_not_a_dir, /* /proc/ioports */ + lxpr_readdir_not_a_dir, /* /proc/kcore */ + lxpr_readdir_not_a_dir, /* /proc/kmsg */ + lxpr_readdir_not_a_dir, /* /proc/loadavg */ + lxpr_readdir_not_a_dir, /* /proc/meminfo */ + lxpr_readdir_not_a_dir, /* /proc/mounts */ + lxpr_readdir_netdir, /* /proc/net */ + lxpr_readdir_not_a_dir, /* /proc/net/arp */ + lxpr_readdir_not_a_dir, /* /proc/net/dev */ + lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */ + lxpr_readdir_not_a_dir, /* /proc/net/igmp */ + lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */ + lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */ + lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */ + lxpr_readdir_not_a_dir, /* /proc/net/netstat */ + lxpr_readdir_not_a_dir, /* /proc/net/raw */ + lxpr_readdir_not_a_dir, /* /proc/net/route */ + lxpr_readdir_not_a_dir, /* /proc/net/rpc */ + lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */ + lxpr_readdir_not_a_dir, /* /proc/net/sockstat */ + lxpr_readdir_not_a_dir, /* /proc/net/snmp */ + lxpr_readdir_not_a_dir, /* /proc/net/stat */ + lxpr_readdir_not_a_dir, /* /proc/net/tcp */ + lxpr_readdir_not_a_dir, /* /proc/net/udp */ + lxpr_readdir_not_a_dir, /* /proc/net/unix */ + lxpr_readdir_not_a_dir, /* /proc/partitions */ + lxpr_readdir_not_a_dir, /* /proc/self */ + lxpr_readdir_not_a_dir, /* /proc/stat */ + lxpr_readdir_not_a_dir, /* /proc/uptime */ + lxpr_readdir_not_a_dir, /* /proc/version */ +}; + + +/* + * lxpr_read(): Vnode operation for VOP_READ() + * + * As the format of all the files that can be read in the lx procfs is human + * readable and not binary structures there do not have to be different + * read variants depending on whether the reading process model is 32 or 64 bits + * (at least in general, and certainly the difference is unlikely to be enough + * to justify have different routines for 32 and 64 bit reads + */ +/* ARGSUSED */ +static int +lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, + caller_context_t *ct) +{ + lxpr_node_t *lxpnp = VTOLXP(vp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop); + int error; + + ASSERT(type >= 0 && type < LXPR_NFILES); + + lxpr_read_function[type](lxpnp, uiobuf); + + error = lxpr_uiobuf_flush(uiobuf); + lxpr_uiobuf_free(uiobuf); + + return (error); +} + + +/* + * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty() + * + * Various special case reads: + * - trying to read a directory + * - invalid file (used to mean a file that should be implemented, + * but isn't yet) + * - empty file + * - wait to be able to read a file that will never have anything to read + */ +/* ARGSUSED */ +static void +lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + lxpr_uiobuf_seterr(uiobuf, EISDIR); +} + +/* ARGSUSED */ +static void +lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + lxpr_uiobuf_seterr(uiobuf, EINVAL); +} + +/* ARGSUSED */ +static void +lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* + * lxpr_read_pid_cmdline(): + * + * This is not precisely compatible with linux: + * + * The linux cmdline returns argv with the correct separation + * using \0 between the arguments, we cannot do that without + * copying the real argv from the correct process context. + * This is too difficult to attempt so we pretend that the + * entire cmdline is just argv[0]. This is good enough for + * ps to display correctly, but might cause some other things + * not to work correctly. + */ +static void +lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + if (PTOU(p)->u_argv != 0) { + char *buff = PTOU(p)->u_psargs; + int len = strlen(buff); + lxpr_unlock(p); + lxpr_uiobuf_write(uiobuf, buff, len+1); + } else { + lxpr_unlock(p); + } +} + + +/* + * lxpr_read_pid_maps(): memory map file + */ +static void +lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + struct as *as; + struct seg *seg; + char *buf; + int buflen = MAXPATHLEN; + struct print_data { + caddr_t saddr; + caddr_t eaddr; + int type; + char prot[5]; + uint32_t offset; + vnode_t *vp; + struct print_data *next; + } *print_head = NULL; + struct print_data **print_tail = &print_head; + struct print_data *pbuf; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + as = p->p_as; + + if (as == &kas) { + lxpr_unlock(p); + return; + } + + mutex_exit(&p->p_lock); + + /* Iterate over all segments in the address space */ + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { + vnode_t *vp; + uint_t protbits; + + pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP); + + pbuf->saddr = seg->s_base; + pbuf->eaddr = seg->s_base+seg->s_size; + pbuf->type = SEGOP_GETTYPE(seg, seg->s_base); + + /* + * Cheat and only use the protection bits of the first page + * in the segment + */ + (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot)); + (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits); + + if (protbits & PROT_READ) pbuf->prot[0] = 'r'; + if (protbits & PROT_WRITE) pbuf->prot[1] = 'w'; + if (protbits & PROT_EXEC) pbuf->prot[2] = 'x'; + if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's'; + else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p'; + + if (seg->s_ops == &segvn_ops && + SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && + vp != NULL && vp->v_type == VREG) { + VN_HOLD(vp); + pbuf->vp = vp; + } else { + pbuf->vp = NULL; + } + + pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr); + + pbuf->next = NULL; + *print_tail = pbuf; + print_tail = &pbuf->next; + } + AS_LOCK_EXIT(as, &as->a_lock); + mutex_enter(&p->p_lock); + lxpr_unlock(p); + + buf = kmem_alloc(buflen, KM_SLEEP); + + /* print the data we've extracted */ + pbuf = print_head; + while (pbuf != NULL) { + struct print_data *pbuf_next; + vattr_t vattr; + + int maj = 0; + int min = 0; + int inode = 0; + + *buf = '\0'; + if (pbuf->vp != NULL) { + vattr.va_mask = AT_FSID | AT_NODEID; + if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED()) == 0) { + maj = getmajor(vattr.va_fsid); + min = getminor(vattr.va_fsid); + inode = vattr.va_nodeid; + } + (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED()); + VN_RELE(pbuf->vp); + } + + if (*buf != '\0') { + lxpr_uiobuf_printf(uiobuf, + "%08x-%08x %s %08x %02d:%03d %d %s\n", + pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, + maj, min, inode, buf); + } else { + lxpr_uiobuf_printf(uiobuf, + "%08x-%08x %s %08x %02d:%03d %d\n", + pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, + maj, min, inode); + } + + pbuf_next = pbuf->next; + kmem_free(pbuf, sizeof (*pbuf)); + pbuf = pbuf_next; + } + + kmem_free(buf, buflen); +} + +/* + * lxpr_read_pid_statm(): memory status file + */ +static void +lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + struct as *as; + size_t vsize; + size_t rss; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + as = p->p_as; + + mutex_exit(&p->p_lock); + + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + vsize = btopr(rm_assize(as)); + rss = rm_asrss(as); + AS_LOCK_EXIT(as, &as->a_lock); + + mutex_enter(&p->p_lock); + lxpr_unlock(p); + + lxpr_uiobuf_printf(uiobuf, + "%lu %lu %lu %lu %lu %lu %lu\n", + vsize, rss, 0l, rss, 0l, 0l, 0l); +} + +/* + * lxpr_read_pid_status(): status file + */ +static void +lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + kthread_t *t; + user_t *up; + cred_t *cr; + const gid_t *groups; + int ngroups; + struct as *as; + char *status; + pid_t pid, ppid; + size_t vsize; + size_t rss; + k_sigset_t current, ignore, handle; + int i, lx_sig; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + pid = p->p_pid; + + /* + * Convert pid to the Linux default of 1 if we're the zone's init + * process + */ + if (pid == curproc->p_zone->zone_proc_initpid) { + pid = 1; + ppid = 0; /* parent pid for init is 0 */ + } else { + /* + * Make sure not to reference parent PIDs that reside outside + * the zone + */ + ppid = ((p->p_flag & SZONETOP) + ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); + + /* + * Convert ppid to the Linux default of 1 if our parent is the + * zone's init process + */ + if (ppid == curproc->p_zone->zone_proc_initpid) + ppid = 1; + } + + t = prchoose(p); + if (t != NULL) { + switch (t->t_state) { + case TS_SLEEP: + status = "S (sleeping)"; + break; + case TS_RUN: + case TS_ONPROC: + status = "R (running)"; + break; + case TS_ZOMB: + status = "Z (zombie)"; + break; + case TS_STOPPED: + status = "T (stopped)"; + break; + default: + status = "! (unknown)"; + break; + } + thread_unlock(t); + } else { + /* + * there is a hole in the exit code, where a proc can have + * no threads but it is yet to be flagged SZOMB. We will + * assume we are about to become a zombie + */ + status = "Z (zombie)"; + } + + up = PTOU(p); + mutex_enter(&p->p_crlock); + crhold(cr = p->p_cred); + mutex_exit(&p->p_crlock); + + lxpr_uiobuf_printf(uiobuf, + "Name:\t%s\n" + "State:\t%s\n" + "Tgid:\t%d\n" + "Pid:\t%d\n" + "PPid:\t%d\n" + "TracerPid:\t%d\n" + "Uid:\t%d\t%d\t%d\t%d\n" + "Gid:\t%d\t%d\t%d\t%d\n" + "FDSize:\t%d\n" + "Groups:\t", + up->u_comm, + status, + pid, /* thread group id - same as pid until we map lwps to procs */ + pid, + ppid, + 0, + crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr), + crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr), + p->p_fno_ctl); + + ngroups = crgetngroups(cr); + groups = crgetgroups(cr); + for (i = 0; i < ngroups; i++) { + lxpr_uiobuf_printf(uiobuf, + "%d ", + groups[i]); + } + crfree(cr); + + as = p->p_as; + if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) { + mutex_exit(&p->p_lock); + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + vsize = rm_assize(as); + rss = rm_asrss(as); + AS_LOCK_EXIT(as, &as->a_lock); + mutex_enter(&p->p_lock); + + lxpr_uiobuf_printf(uiobuf, + "\n" + "VmSize:\t%8lu kB\n" + "VmLck:\t%8lu kB\n" + "VmRSS:\t%8lu kB\n" + "VmData:\t%8lu kB\n" + "VmStk:\t%8lu kB\n" + "VmExe:\t%8lu kB\n" + "VmLib:\t%8lu kB", + btok(vsize), + 0l, + ptok(rss), + 0l, + btok(p->p_stksize), + ptok(rss), + 0l); + } + + sigemptyset(¤t); + sigemptyset(&ignore); + sigemptyset(&handle); + + for (i = 1; i < MAXSIG; i++) { + lx_sig = stol_signo[i]; + + if ((lx_sig > 0) && (lx_sig < MAXSIG)) { + if (sigismember(&p->p_sig, i)) + sigaddset(¤t, lx_sig); + + if (up->u_signal[i] == SIG_IGN) + sigaddset(&ignore, lx_sig); + else if (up->u_signal[i] != SIG_DFL) + sigaddset(&handle, lx_sig); + } + } + + lxpr_uiobuf_printf(uiobuf, + "\n" + "SigPnd:\t%08x%08x\n" + "SigBlk:\t%08x%08x\n" + "SigIgn:\t%08x%08x\n" + "SigCgt:\t%08x%08x\n" + "CapInh:\t%016x\n" + "CapPrm:\t%016x\n" + "CapEff:\t%016x\n", + current.__sigbits[1], current.__sigbits[0], + 0, 0, /* signals blocked on per thread basis */ + ignore.__sigbits[1], ignore.__sigbits[0], + handle.__sigbits[1], handle.__sigbits[0], + /* Can't do anything with linux capabilities */ + 0, + 0, + 0); + + lxpr_unlock(p); +} + + +/* + * lxpr_read_pid_stat(): pid stat file + */ +static void +lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + proc_t *p; + kthread_t *t; + struct as *as; + char stat; + pid_t pid, ppid, pgpid, spid; + gid_t psgid; + dev_t psdev; + size_t rss, vsize; + int nice, pri; + caddr_t wchan; + processorid_t cpu; + + ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT); + + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) { + lxpr_uiobuf_seterr(uiobuf, EINVAL); + return; + } + + pid = p->p_pid; + + /* + * Set Linux defaults if we're the zone's init process + */ + if (pid == curproc->p_zone->zone_proc_initpid) { + pid = 1; /* PID for init */ + ppid = 0; /* parent PID for init is 0 */ + pgpid = 0; /* process group for init is 0 */ + psgid = -1; /* credential GID for init is -1 */ + spid = 0; /* session id for init is 0 */ + psdev = 0; /* session device for init is 0 */ + } else { + /* + * Make sure not to reference parent PIDs that reside outside + * the zone + */ + ppid = ((p->p_flag & SZONETOP) + ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); + + /* + * Convert ppid to the Linux default of 1 if our parent is the + * zone's init process + */ + if (ppid == curproc->p_zone->zone_proc_initpid) + ppid = 1; + + pgpid = p->p_pgrp; + + mutex_enter(&p->p_splock); + mutex_enter(&p->p_sessp->s_lock); + spid = p->p_sessp->s_sid; + /* XXBRAND psdev = DEV_TO_LXDEV(p->p_sessp->s_dev, VCHR); */ + psdev = p->p_sessp->s_dev; + if (p->p_sessp->s_cred) + psgid = crgetgid(p->p_sessp->s_cred); + else + psgid = crgetgid(p->p_cred); + + mutex_exit(&p->p_sessp->s_lock); + mutex_exit(&p->p_splock); + } + + t = prchoose(p); + if (t != NULL) { + switch (t->t_state) { + case TS_SLEEP: + stat = 'S'; break; + case TS_RUN: + case TS_ONPROC: + stat = 'R'; break; + case TS_ZOMB: + stat = 'Z'; break; + case TS_STOPPED: + stat = 'T'; break; + default: + stat = '!'; break; + } + + if (CL_DONICE(t, NULL, 0, &nice) != 0) + nice = 0; + + pri = v.v_maxsyspri - t->t_pri; + wchan = t->t_wchan; + cpu = t->t_cpu->cpu_seqid; + thread_unlock(t); + } else { + /* Only zombies have no threads */ + stat = 'Z'; + nice = 0; + pri = 0; + wchan = 0; + cpu = 0; + } + as = p->p_as; + mutex_exit(&p->p_lock); + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + vsize = rm_assize(as); + rss = rm_asrss(as); + AS_LOCK_EXIT(as, &as->a_lock); + mutex_enter(&p->p_lock); + + lxpr_uiobuf_printf(uiobuf, + "%d (%s) %c %d %d %d %d %d " + "%lu %lu %lu %lu %lu " + "%lu %lu %ld %ld " + "%d %d " + "0 " + "%ld %lu " + "%lu %ld %llu " + "%lu %lu %u " + "%lu %lu " + "%lu %lu %lu %lu " + "%lu " + "%lu %lu " + "%d " + "%d" + "\n", + pid, + PTOU(p)->u_comm, + stat, + ppid, pgpid, + spid, psdev, psgid, + 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */ + p->p_utime, p->p_stime, p->p_cutime, p->p_cstime, + pri, nice, + 0l, PTOU(p)->u_ticks, /* ticks till next SIGALARM, start time */ + vsize, rss, p->p_vmem_ctl, + 0l, 0l, USRSTACK, /* startcode, endcode, startstack */ + 0l, 0l, /* kstkesp, kstkeip */ + 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */ + wchan, + 0l, 0l, /* nswap, cnswap */ + 0, /* exit_signal */ + cpu); + + lxpr_unlock(p); +} + +/* ARGSUSED */ +static void +lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + lxpr_uiobuf_printf(uiobuf, "Inter-| Receive " + " | Transmit\n"); + lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo" + " frame compressed multicast|bytes packets errs drop fifo" + " colls carrier compressed\n"); + + /* + * XXX: data about each interface should go here, but we'll wait to + * see if anybody wants to use it. + */ +} + +/* ARGSUSED */ +static void +lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* ARGSUSED */ +static void +lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ +} + +/* + * lxpr_read_kmsg(): read the contents of the kernel message queue. We + * translate this into the reception of console messages for this lx zone; each + * read copies out a single zone console message, or blocks until the next one + * is produced. + */ + +#define LX_KMSG_PRI "<0>" + +static void +lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf) +{ + ldi_handle_t lh = lxpnp->lxpr_cons_ldih; + mblk_t *mp; + + if (ldi_getmsg(lh, &mp, NULL) == 0) { + /* + * lx procfs doesn't like successive reads to the same file + * descriptor unless we do an explicit rewind each time. + */ + lxpr_uiobuf_seek(uiobuf, 0); + + lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI, + mp->b_cont->b_rptr); + + freemsg(mp); + } +} + +/* + * lxpr_read_loadavg(): read the contents of the "loadavg" file. + * + * Just enough for uptime to work + */ +extern int nthread; + +static void +lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + ulong_t avenrun1; + ulong_t avenrun5; + ulong_t avenrun15; + ulong_t avenrun1_cs; + ulong_t avenrun5_cs; + ulong_t avenrun15_cs; + int loadavg[3]; + int *loadbuf; + cpupart_t *cp; + + uint_t nrunnable = 0; + rctl_qty_t nlwps; + + ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG); + + mutex_enter(&cpu_lock); + + /* + * Need to add up values over all CPU partitions. If pools are active, + * only report the values of the zone's partition, which by definition + * includes the current CPU. + */ + if (pool_pset_enabled()) { + psetid_t psetid = zone_pset_get(curproc->p_zone); + + ASSERT(curproc->p_zone != &zone0); + cp = CPU->cpu_part; + + nrunnable = cp->cp_nrunning + cp->cp_nrunnable; + (void) cpupart_get_loadavg(psetid, &loadavg[0], 3); + loadbuf = &loadavg[0]; + + /* + * We'll report the total number of lwps in the zone for the + * "nproc" parameter of /proc/loadavg; good enough for lx. + */ + nlwps = curproc->p_zone->zone_nlwps; + } else { + cp = cp_list_head; + do { + nrunnable += cp->cp_nrunning + cp->cp_nrunnable; + } while ((cp = cp->cp_next) != cp_list_head); + + loadbuf = &avenrun[0]; + + /* + * This will report kernel threads as well as user lwps, but it + * should be good enough for lx consumers. + */ + nlwps = nthread; + } + + mutex_exit(&cpu_lock); + + avenrun1 = loadbuf[0] >> FSHIFT; + avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT; + avenrun5 = loadbuf[1] >> FSHIFT; + avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT; + avenrun15 = loadbuf[2] >> FSHIFT; + avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT; + + lxpr_uiobuf_printf(uiobuf, + "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n", + avenrun1, avenrun1_cs, + avenrun5, avenrun5_cs, + avenrun15, avenrun15_cs, + nrunnable, nlwps, 0); +} + +/* + * lxpr_read_meminfo(): read the contents of the "meminfo" file. + */ +static void +lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + long total_mem = physmem * PAGESIZE; + long free_mem = freemem * PAGESIZE; + long total_swap = k_anoninfo.ani_max * PAGESIZE; + long used_swap = k_anoninfo.ani_phys_resv * PAGESIZE; + + ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO); + + lxpr_uiobuf_printf(uiobuf, + " total: used: free: shared: buffers: cached:\n" + "Mem: %8lu %8lu %8lu %8u %8u %8u\n" + "Swap: %8lu %8lu %8lu\n" + "MemTotal: %8lu kB\n" + "MemFree: %8lu kB\n" + "MemShared: %8u kB\n" + "Buffers: %8u kB\n" + "Cached: %8u kB\n" + "SwapCached:%8u kB\n" + "Active: %8u kB\n" + "Inactive: %8u kB\n" + "HighTotal: %8u kB\n" + "HighFree: %8u kB\n" + "LowTotal: %8u kB\n" + "LowFree: %8u kB\n" + "SwapTotal: %8lu kB\n" + "SwapFree: %8lu kB\n", + total_mem, total_mem - free_mem, free_mem, 0, 0, 0, + total_swap, used_swap, total_swap - used_swap, + btok(total_mem), /* MemTotal */ + btok(free_mem), /* MemFree */ + 0, /* MemShared */ + 0, /* Buffers */ + 0, /* Cached */ + 0, /* SwapCached */ + 0, /* Active */ + 0, /* Inactive */ + 0, /* HighTotal */ + 0, /* HighFree */ + btok(total_mem), /* LowTotal */ + btok(free_mem), /* LowFree */ + btok(total_swap), /* SwapTotal */ + btok(total_swap - used_swap)); /* SwapFree */ +} + +/* + * lxpr_read_mounts(): + */ +/* ARGSUSED */ +static void +lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + struct vfs *vfsp; + struct vfs *vfslist; + zone_t *zone = LXPTOZ(lxpnp); + struct print_data { + refstr_t *vfs_mntpt; + refstr_t *vfs_resource; + uint_t vfs_flag; + int vfs_fstype; + struct print_data *next; + } *print_head = NULL; + struct print_data **print_tail = &print_head; + struct print_data *printp; + + vfs_list_read_lock(); + + if (zone == global_zone) { + vfsp = vfslist = rootvfs; + } else { + vfsp = vfslist = zone->zone_vfslist; + /* + * If the zone has a root entry, it will be the first in + * the list. If it doesn't, we conjure one up. + */ + if (vfslist == NULL || + strcmp(refstr_value(vfsp->vfs_mntpt), + zone->zone_rootpath) != 0) { + struct vfs *tvfsp; + /* + * The root of the zone is not a mount point. The vfs + * we want to report is that of the zone's root vnode. + */ + tvfsp = zone->zone_rootvp->v_vfsp; + + lxpr_uiobuf_printf(uiobuf, + "/ / %s %s 0 0\n", + vfssw[tvfsp->vfs_fstype].vsw_name, + tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); + + } + if (vfslist == NULL) { + vfs_list_unlock(); + return; + } + } + + /* + * Later on we have to do a lookupname, which can end up causing + * another vfs_list_read_lock() to be called. Which can lead to a + * deadlock. To avoid this, we extract the data we need into a local + * list, then we can run this list without holding vfs_list_read_lock() + * We keep the list in the same order as the vfs_list + */ + do { + /* Skip mounts we shouldn't show */ + if (vfsp->vfs_flag & VFS_NOMNTTAB) { + goto nextfs; + } + + printp = kmem_alloc(sizeof (*printp), KM_SLEEP); + refstr_hold(vfsp->vfs_mntpt); + printp->vfs_mntpt = vfsp->vfs_mntpt; + refstr_hold(vfsp->vfs_resource); + printp->vfs_resource = vfsp->vfs_resource; + printp->vfs_flag = vfsp->vfs_flag; + printp->vfs_fstype = vfsp->vfs_fstype; + printp->next = NULL; + + *print_tail = printp; + print_tail = &printp->next; + +nextfs: + vfsp = (zone == global_zone) ? + vfsp->vfs_next : vfsp->vfs_zone_next; + + } while (vfsp != vfslist); + + vfs_list_unlock(); + + /* + * now we can run through what we've extracted without holding + * vfs_list_read_lock() + */ + printp = print_head; + while (printp != NULL) { + struct print_data *printp_next; + const char *resource; + char *mntpt; + struct vnode *vp; + int error; + + mntpt = (char *)refstr_value(printp->vfs_mntpt); + resource = refstr_value(printp->vfs_resource); + + if (mntpt != NULL && mntpt[0] != '\0') + mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); + else + mntpt = "-"; + + error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); + + if (error != 0) + goto nextp; + + if (!(vp->v_flag & VROOT)) { + VN_RELE(vp); + goto nextp; + } + VN_RELE(vp); + + if (resource != NULL && resource[0] != '\0') { + if (resource[0] == '/') { + resource = ZONE_PATH_VISIBLE(resource, zone) ? + ZONE_PATH_TRANSLATE(resource, zone) : + mntpt; + } + } else { + resource = "-"; + } + + lxpr_uiobuf_printf(uiobuf, + "%s %s %s %s 0 0\n", + resource, mntpt, vfssw[printp->vfs_fstype].vsw_name, + printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); + +nextp: + printp_next = printp->next; + refstr_rele(printp->vfs_mntpt); + refstr_rele(printp->vfs_resource); + kmem_free(printp, sizeof (*printp)); + printp = printp_next; + + } +} + +/* + * lxpr_read_partitions(): + * + * We don't support partitions in a local zone because it requires access to + * physical devices. But we need to fake up enough of the file to show that we + * have no partitions. + */ +/* ARGSUSED */ +static void +lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + lxpr_uiobuf_printf(uiobuf, + "major minor #blocks name rio rmerge rsect ruse " + "wio wmerge wsect wuse running use aveq\n\n"); +} + +/* + * lxpr_read_version(): read the contents of the "version" file. + */ +/* ARGSUSED */ +static void +lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + lxpr_uiobuf_printf(uiobuf, + "%s version %s (%s version %d.%d.%d) " + "#%s SMP %s\n", + LX_UNAME_SYSNAME, LX_UNAME_RELEASE, +#if defined(__GNUC__) + "gcc", + __GNUC__, + __GNUC_MINOR__, + __GNUC_PATCHLEVEL__, +#else + "Sun C", + __SUNPRO_C / 0x100, + (__SUNPRO_C & 0xff) / 0x10, + __SUNPRO_C & 0xf, +#endif + LX_UNAME_VERSION, + __TIME__ " " __DATE__); +} + + +/* + * lxpr_read_stat(): read the contents of the "stat" file. + * + */ +/* ARGSUSED */ +static void +lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + cpu_t *cp, *cpstart; + int pools_enabled; + ulong_t idle_cum = 0; + ulong_t sys_cum = 0; + ulong_t user_cum = 0; + ulong_t pgpgin_cum = 0; + ulong_t pgpgout_cum = 0; + ulong_t pgswapout_cum = 0; + ulong_t pgswapin_cum = 0; + ulong_t intr_cum = 0; + ulong_t pswitch_cum = 0; + ulong_t forks_cum = 0; + hrtime_t msnsecs[NCMSTATES]; + + ASSERT(lxpnp->lxpr_type == LXPR_STAT); + + mutex_enter(&cpu_lock); + pools_enabled = pool_pset_enabled(); + + /* Calculate cumulative stats */ + cp = cpstart = CPU; + do { + int i; + + /* + * Don't count CPUs that aren't even in the system + * or aren't up yet. + */ + if ((cp->cpu_flags & CPU_EXISTS) == 0) { + continue; + } + + get_cpu_mstate(cp, msnsecs); + + idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]); + sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); + user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]); + + pgpgin_cum += CPU_STATS(cp, vm.pgpgin); + pgpgout_cum += CPU_STATS(cp, vm.pgpgout); + pgswapin_cum += CPU_STATS(cp, vm.pgswapin); + pgswapout_cum += CPU_STATS(cp, vm.pgswapout); + + for (i = 0; i < PIL_MAX; i++) + intr_cum += CPU_STATS(cp, sys.intr[i]); + + pswitch_cum += CPU_STATS(cp, sys.pswitch); + forks_cum += CPU_STATS(cp, sys.sysfork); + forks_cum += CPU_STATS(cp, sys.sysvfork); + + if (pools_enabled) + cp = cp->cpu_next_part; + else + cp = cp->cpu_next; + } while (cp != cpstart); + + lxpr_uiobuf_printf(uiobuf, + "cpu %ld %ld %ld %ld\n", + user_cum, 0, sys_cum, idle_cum); + + /* Do per processor stats */ + do { + ulong_t idle_ticks; + ulong_t sys_ticks; + ulong_t user_ticks; + + /* + * Don't count CPUs that aren't even in the system + * or aren't up yet. + */ + if ((cp->cpu_flags & CPU_EXISTS) == 0) { + continue; + } + + get_cpu_mstate(cp, msnsecs); + + idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]); + sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); + user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]); + + lxpr_uiobuf_printf(uiobuf, + "cpu%d %ld %ld %ld %ld\n", + cp->cpu_id, + user_ticks, 0, sys_ticks, idle_ticks); + + if (pools_enabled) + cp = cp->cpu_next_part; + else + cp = cp->cpu_next; + } while (cp != cpstart); + + mutex_exit(&cpu_lock); + + lxpr_uiobuf_printf(uiobuf, + "page %lu %lu\n" + "swap %lu %lu\n" + "intr %lu\n" + "ctxt %lu\n" + "btime %lu\n" + "processes %lu\n", + pgpgin_cum, pgpgout_cum, + pgswapin_cum, pgswapout_cum, + intr_cum, + pswitch_cum, + boot_time, + forks_cum); +} + + +/* + * lxpr_read_uptime(): read the contents of the "uptime" file. + * + * format is: "%.2lf, %.2lf",uptime_secs, idle_secs + * Use fixed point arithmetic to get 2 decimal places + */ +/* ARGSUSED */ +static void +lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + cpu_t *cp, *cpstart; + int pools_enabled; + ulong_t idle_cum = 0; + ulong_t cpu_count = 0; + ulong_t idle_s; + ulong_t idle_cs; + ulong_t up_s; + ulong_t up_cs; + + ASSERT(lxpnp->lxpr_type == LXPR_UPTIME); + + /* Calculate cumulative stats */ + mutex_enter(&cpu_lock); + pools_enabled = pool_pset_enabled(); + + cp = cpstart = CPU; + do { + /* + * Don't count CPUs that aren't even in the system + * or aren't up yet. + */ + if ((cp->cpu_flags & CPU_EXISTS) == 0) { + continue; + } + + idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle); + idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait); + cpu_count += 1; + + if (pools_enabled) + cp = cp->cpu_next_part; + else + cp = cp->cpu_next; + } while (cp != cpstart); + mutex_exit(&cpu_lock); + + /* Capture lbolt in case it changes */ + up_cs = lbolt; + up_s = up_cs / hz; + up_cs %= hz; + up_cs *= 100; + up_cs /= hz; + + ASSERT(cpu_count > 0); + idle_cum /= cpu_count; + idle_s = idle_cum / hz; + idle_cs = idle_cum % hz; + idle_cs *= 100; + idle_cs /= hz; + + lxpr_uiobuf_printf(uiobuf, + "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs); +} + +static const char *amd_x_edx[] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "mp", + "nx", NULL, "mmxext", NULL, + NULL, NULL, NULL, NULL, + NULL, "lm", "3dnowext", "3dnow" +}; + +static const char *amd_x_ecx[] = { + "lahf_lm", NULL, "svm", NULL, + "altmovcr8" +}; + +static const char *tm_x_edx[] = { + "recovery", "longrun", NULL, "lrti" +}; + +/* + * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx." + */ +static const char *intc_x_edx[] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "nx", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, "lm", NULL, NULL +}; + +static const char *intc_edx[] = { + "fpu", "vme", "de", "pse", + "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", + "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", + NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", + "ht", "tm", "ia64", "pbe" +}; + +/* + * "sse3" on linux is called "pni" (Prescott New Instructions). + */ +static const char *intc_ecx[] = { + "pni", NULL, NULL, "monitor", + "ds_cpl", NULL, NULL, "est", + "tm2", NULL, "cid", NULL, + NULL, "cx16", "xtpr" +}; + +static void +lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + int i; + uint32_t bits; + cpu_t *cp, *cpstart; + int pools_enabled; + const char **fp; + char brandstr[CPU_IDSTRLEN]; + struct cpuid_regs cpr; + int maxeax; + int std_ecx, std_edx, ext_ecx, ext_edx; + + ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO); + + mutex_enter(&cpu_lock); + pools_enabled = pool_pset_enabled(); + + cp = cpstart = CPU; + do { + /* + * This returns the maximum eax value for standard cpuid + * functions in eax. + */ + cpr.cp_eax = 0; + (void) cpuid_insn(cp, &cpr); + maxeax = cpr.cp_eax; + + /* + * Get standard x86 feature flags. + */ + cpr.cp_eax = 1; + (void) cpuid_insn(cp, &cpr); + std_ecx = cpr.cp_ecx; + std_edx = cpr.cp_edx; + + /* + * Now get extended feature flags. + */ + cpr.cp_eax = 0x80000001; + (void) cpuid_insn(cp, &cpr); + ext_ecx = cpr.cp_ecx; + ext_edx = cpr.cp_edx; + + (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN); + + lxpr_uiobuf_printf(uiobuf, + "processor\t: %d\n" + "vendor_id\t: %s\n" + "cpu family\t: %d\n" + "model\t\t: %d\n" + "model name\t: %s\n" + "stepping\t: %d\n" + "cpu MHz\t\t: %u.%03u\n", + cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp), + cpuid_getmodel(cp), brandstr, cpuid_getstep(cp), + (uint32_t)(cpu_freq_hz / 1000000), + ((uint32_t)(cpu_freq_hz / 1000)) % 1000); + + lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n", + getl2cacheinfo(cp, NULL, NULL, NULL) / 1024); + + if (x86_feature & X86_HTT) { + /* + * 'siblings' is used for HT-style threads + */ + lxpr_uiobuf_printf(uiobuf, + "physical id\t: %lu\n" + "siblings\t: %u\n", chip_plat_get_chipid(cp), + cpuid_get_ncpu_per_chip(cp)); + } + + /* + * Since we're relatively picky about running on older hardware, + * we can be somewhat cavalier about the answers to these ones. + * + * In fact, given the hardware we support, we just say: + * + * fdiv_bug : no (if we're on a 64-bit kernel) + * hlt_bug : no + * f00f_bug : no + * coma_bug : no + * wp : yes (write protect in supervsr mode) + */ + lxpr_uiobuf_printf(uiobuf, + "fdiv_bug\t: %s\n" + "hlt_bug \t: no\n" + "f00f_bug\t: no\n" + "coma_bug\t: no\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "flags\t\t:", +#if defined(__i386) + fpu_pentium_fdivbug ? "yes" : "no", +#else + "no", +#endif /* __i386 */ + fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no", + maxeax); + + for (bits = std_edx, fp = intc_edx, i = 0; + i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + + /* + * name additional features where appropriate + */ + switch (x86_vendor) { + case X86_VENDOR_Intel: + for (bits = ext_edx, fp = intc_x_edx, i = 0; + i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]); + fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + break; + + case X86_VENDOR_AMD: + for (bits = ext_edx, fp = amd_x_edx, i = 0; + i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]); + fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + + for (bits = ext_ecx, fp = amd_x_ecx, i = 0; + i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]); + fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + break; + + case X86_VENDOR_TM: + for (bits = ext_edx, fp = tm_x_edx, i = 0; + i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]); + fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + break; + default: + break; + } + + for (bits = std_ecx, fp = intc_ecx, i = 0; + i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++) + if ((bits & (1 << i)) != 0 && *fp) + lxpr_uiobuf_printf(uiobuf, " %s", *fp); + + lxpr_uiobuf_printf(uiobuf, "\n\n"); + + if (pools_enabled) + cp = cp->cpu_next_part; + else + cp = cp->cpu_next; + } while (cp != cpstart); + + mutex_exit(&cpu_lock); +} + +/* ARGSUSED */ +static void +lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) +{ + ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD); + lxpr_uiobuf_seterr(uiobuf, EFAULT); +} + + + +/* + * lxpr_getattr(): Vnode operation for VOP_GETATTR() + */ +static int +lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) +{ + register lxpr_node_t *lxpnp = VTOLXP(vp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + extern uint_t nproc; + int error; + + /* + * Return attributes of underlying vnode if ATTR_REAL + * + * but keep fd files with the symlink permissions + */ + if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) { + vnode_t *rvp = lxpnp->lxpr_realvp; + + /* + * withold attribute information to owner or root + */ + if ((error = VOP_ACCESS(rvp, 0, 0, cr)) != 0) { + return (error); + } + + /* + * now its attributes + */ + if ((error = VOP_GETATTR(rvp, vap, flags, cr)) != 0) { + return (error); + } + + /* + * if it's a file in lx /proc/pid/fd/xx then set its + * mode and keep it looking like a symlink + */ + if (type == LXPR_PID_FD_FD) { + vap->va_mode = lxpnp->lxpr_mode; + vap->va_type = vp->v_type; + vap->va_size = 0; + vap->va_nlink = 1; + } + return (0); + } + + /* Default attributes, that may be overridden below */ + bzero(vap, sizeof (*vap)); + vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time; + vap->va_nlink = 1; + vap->va_type = vp->v_type; + vap->va_mode = lxpnp->lxpr_mode; + vap->va_fsid = vp->v_vfsp->vfs_dev; + vap->va_blksize = DEV_BSIZE; + vap->va_uid = lxpnp->lxpr_uid; + vap->va_gid = lxpnp->lxpr_gid; + vap->va_nodeid = lxpnp->lxpr_ino; + + switch (type) { + case LXPR_PROCDIR: + vap->va_nlink = nproc + 2 + PROCDIRFILES; + vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE; + break; + case LXPR_PIDDIR: + vap->va_nlink = PIDDIRFILES; + vap->va_size = PIDDIRFILES * LXPR_SDSIZE; + break; + case LXPR_SELF: + vap->va_uid = crgetruid(curproc->p_cred); + vap->va_gid = crgetrgid(curproc->p_cred); + break; + default: + break; + } + + vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size); + return (0); +} + + +/* + * lxpr_access(): Vnode operation for VOP_ACCESS() + */ +static int +lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr) +{ + lxpr_node_t *lxpnp = VTOLXP(vp); + int shift = 0; + proc_t *tp; + + /* lx /proc is a read only file system */ + if (mode & VWRITE) + return (EROFS); + + /* + * If this is a restricted file, check access permissions. + */ + switch (lxpnp->lxpr_type) { + case LXPR_PIDDIR: + return (0); + case LXPR_PID_CURDIR: + case LXPR_PID_ENV: + case LXPR_PID_EXE: + case LXPR_PID_MAPS: + case LXPR_PID_MEM: + case LXPR_PID_ROOTDIR: + case LXPR_PID_FDDIR: + case LXPR_PID_FD_FD: + if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL) + return (ENOENT); + if (tp != curproc && secpolicy_proc_access(cr) != 0 && + priv_proc_cred_perm(cr, tp, NULL, mode) != 0) { + lxpr_unlock(tp); + return (EACCES); + } + lxpr_unlock(tp); + default: + break; + } + + if (lxpnp->lxpr_realvp != NULL) { + /* + * For these we use the underlying vnode's accessibility. + */ + return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr)); + } + + /* + * Access check is based on only + * one of owner, group, public. + * If not owner, then check group. + * If not a member of the group, then + * check public access. + */ + if (crgetuid(cr) != lxpnp->lxpr_uid) { + shift += 3; + if (!groupmember((uid_t)lxpnp->lxpr_gid, cr)) + shift += 3; + } + + mode &= ~(lxpnp->lxpr_mode << shift); + + if (mode == 0) + return (0); + + return (EACCES); +} + + + + +/* ARGSUSED */ +static vnode_t * +lxpr_lookup_not_a_dir(vnode_t *dp, char *comp) +{ + return (NULL); +} + + +/* + * lxpr_lookup(): Vnode operation for VOP_LOOKUP() + */ +/* ARGSUSED */ +static int +lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp, + int flags, vnode_t *rdir, cred_t *cr) +{ + lxpr_node_t *lxpnp = VTOLXP(dp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + int error; + + ASSERT(dp->v_type == VDIR); + ASSERT(type >= 0 && type < LXPR_NFILES); + + /* + * we should never get here because the lookup + * is done on the realvp for these nodes + */ + ASSERT(type != LXPR_PID_FD_FD && + type != LXPR_PID_CURDIR && + type != LXPR_PID_ROOTDIR); + + /* + * restrict lookup permission to owner or root + */ + if ((error = lxpr_access(dp, VEXEC, 0, cr)) != 0) { + return (error); + } + + /* + * Just return the parent vnode + * if thats where we are trying to go + */ + if (strcmp(comp, "..") == 0) { + VN_HOLD(lxpnp->lxpr_parent); + *vpp = lxpnp->lxpr_parent; + return (0); + } + + /* + * Special handling for directory searches + * Note: null component name is synonym for + * current directory being searched. + */ + if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) { + VN_HOLD(dp); + *vpp = dp; + return (0); + } + + *vpp = (lxpr_lookup_function[type](dp, comp)); + return ((*vpp == NULL) ? ENOENT : 0); +} + +/* + * Do a sequential search on the given directory table + */ +static vnode_t * +lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p, + lxpr_dirent_t *dirtab, int dirtablen) +{ + lxpr_node_t *lxpnp; + int count; + + for (count = 0; count < dirtablen; count++) { + if (strcmp(dirtab[count].d_name, comp) == 0) { + lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0); + dp = LXPTOV(lxpnp); + ASSERT(dp != NULL); + return (dp); + } + } + return (NULL); +} + + +static vnode_t * +lxpr_lookup_piddir(vnode_t *dp, char *comp) +{ + proc_t *p; + + ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR); + + p = lxpr_lock(VTOLXP(dp)->lxpr_pid); + if (p == NULL) + return (NULL); + + dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES); + + lxpr_unlock(p); + + return (dp); +} + + +/* + * Lookup one of the process's open files. + */ +static vnode_t * +lxpr_lookup_fddir(vnode_t *dp, char *comp) +{ + lxpr_node_t *dlxpnp = VTOLXP(dp); + lxpr_node_t *lxpnp; + vnode_t *vp = NULL; + proc_t *p; + file_t *fp; + uint_t fd; + int c; + uf_entry_t *ufp; + uf_info_t *fip; + + ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR); + + /* + * convert the string rendition of the filename + * to a file descriptor + */ + fd = 0; + while ((c = *comp++) != '\0') { + int ofd; + if (c < '0' || c > '9') + return (NULL); + + ofd = fd; + fd = 10*fd + c - '0'; + /* integer overflow */ + if (fd / 10 != ofd) + return (NULL); + } + + /* + * get the proc to work with and lock it + */ + p = lxpr_lock(dlxpnp->lxpr_pid); + if ((p == NULL)) + return (NULL); + + /* + * If the process is a zombie or system process + * it can't have any open files. + */ + if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { + lxpr_unlock(p); + return (NULL); + } + + /* + * get us a fresh node/vnode + */ + lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd); + + /* + * get open file info + */ + fip = (&(p)->p_user.u_finfo); + mutex_enter(&fip->fi_lock); + + /* + * got the fd data so now done with this proc + */ + lxpr_unlock(p); + + if (fd < fip->fi_nfiles) { + UF_ENTER(ufp, fip, fd); + /* + * ensure the fd is still kosher. + * it may have gone between the readdir and + * the lookup + */ + if (fip->fi_list[fd].uf_file == NULL) { + mutex_exit(&fip->fi_lock); + UF_EXIT(ufp); + lxpr_freenode(lxpnp); + return (NULL); + } + + if ((fp = ufp->uf_file) != NULL) + vp = fp->f_vnode; + UF_EXIT(ufp); + } + mutex_exit(&fip->fi_lock); + + if (vp == NULL) { + lxpr_freenode(lxpnp); + return (NULL); + } else { + /* + * Fill in the lxpr_node so future references will + * be able to find the underlying vnode. + * The vnode is held on the realvp. + */ + lxpnp->lxpr_realvp = vp; + VN_HOLD(lxpnp->lxpr_realvp); + } + + dp = LXPTOV(lxpnp); + ASSERT(dp != NULL); + + return (dp); +} + + +static vnode_t * +lxpr_lookup_netdir(vnode_t *dp, char *comp) +{ + ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR); + + dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES); + + return (dp); +} + + +static vnode_t * +lxpr_lookup_procdir(vnode_t *dp, char *comp) +{ + ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR); + + /* + * We know all the names of files & dirs in our + * file system structure except those that are pid names. + * These change as pids are created/deleted etc. + * So just look for a number as the first char to see if we + * are we doing pid lookups? + * + * Don't need to check for "self" as it is implemented as a symlink + */ + if (*comp >= '0' && *comp <= '9') { + pid_t pid = 0; + lxpr_node_t *lxpnp = NULL; + proc_t *p; + int c; + + while ((c = *comp++) != '\0') + pid = 10*pid + c - '0'; + + /* + * Can't continue if the process is still loading + * or it doesn't really exist yet (or maybe it just died!) + */ + p = lxpr_lock(pid); + if (p == NULL) + return (NULL); + + if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { + lxpr_unlock(p); + return (NULL); + } + + /* + * allocate and fill in a new lx /proc node + */ + lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0); + + lxpr_unlock(p); + + dp = LXPTOV(lxpnp); + ASSERT(dp != NULL); + + return (dp); + + } + + /* Lookup fixed names */ + return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES)); +} + + + + +/* + * lxpr_readdir(): Vnode operation for VOP_READDIR() + */ +/* ARGSUSED */ +static int +lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp) +{ + lxpr_node_t *lxpnp = VTOLXP(dp); + lxpr_nodetype_t type = lxpnp->lxpr_type; + ssize_t uresid; + off_t uoffset; + int error; + + ASSERT(dp->v_type == VDIR); + ASSERT(type >= 0 && type < LXPR_NFILES); + + /* + * we should never get here because the readdir + * is done on the realvp for these nodes + */ + ASSERT(type != LXPR_PID_FD_FD && + type != LXPR_PID_CURDIR && + type != LXPR_PID_ROOTDIR); + + /* + * restrict readdir permission to owner or root + */ + if ((error = lxpr_access(dp, VREAD, 0, cr)) != 0) + return (error); + + uoffset = uiop->uio_offset; + uresid = uiop->uio_resid; + + /* can't do negative reads */ + if (uoffset < 0 || uresid <= 0) + return (EINVAL); + + /* can't read directory entries that don't exist! */ + if (uoffset % LXPR_SDSIZE) + return (ENOENT); + + return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp)); +} + + +/* ARGSUSED */ +static int +lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + return (ENOTDIR); +} + +/* + * This has the common logic for returning directory entries + */ +static int +lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp, + lxpr_dirent_t *dirtab, int dirtablen) +{ + /* bp holds one dirent64 structure */ + longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; + dirent64_t *dirent = (dirent64_t *)bp; + ssize_t oresid; /* save a copy for testing later */ + ssize_t uresid; + + oresid = uiop->uio_resid; + + /* clear out the dirent buffer */ + bzero(bp, sizeof (bp)); + + /* + * Satisfy user request + */ + while ((uresid = uiop->uio_resid) > 0) { + int dirindex; + off_t uoffset; + int reclen; + int error; + + uoffset = uiop->uio_offset; + dirindex = (uoffset / LXPR_SDSIZE) - 2; + + if (uoffset == 0) { + + dirent->d_ino = lxpnp->lxpr_ino; + dirent->d_name[0] = '.'; + dirent->d_name[1] = '\0'; + reclen = DIRENT64_RECLEN(1); + + } else if (uoffset == LXPR_SDSIZE) { + + dirent->d_ino = lxpr_parentinode(lxpnp); + dirent->d_name[0] = '.'; + dirent->d_name[1] = '.'; + dirent->d_name[2] = '\0'; + reclen = DIRENT64_RECLEN(2); + + } else if (dirindex < dirtablen) { + int slen = strlen(dirtab[dirindex].d_name); + + dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type, + lxpnp->lxpr_pid, 0); + + ASSERT(slen < LXPNSIZ); + (void) strcpy(dirent->d_name, dirtab[dirindex].d_name); + reclen = DIRENT64_RECLEN(slen); + + } else { + /* Run out of table entries */ + if (eofp) { + *eofp = 1; + } + return (0); + } + + dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); + dirent->d_reclen = (ushort_t)reclen; + + /* + * if the size of the data to transfer is greater + * that that requested then we can't do it this transfer. + */ + if (reclen > uresid) { + /* + * Error if no entries have been returned yet. + */ + if (uresid == oresid) { + return (EINVAL); + } + break; + } + + /* + * uiomove() updates both uiop->uio_resid and + * uiop->uio_offset by the same amount. But we want + * uiop->uio_offset to change in increments + * of LXPR_SDSIZE, which is different from the number of bytes + * being returned to the user. + * So we set uiop->uio_offset separately, ignoring what + * uiomove() does. + */ + if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) { + return (error); + } + + uiop->uio_offset = uoffset + LXPR_SDSIZE; + } + + /* Have run out of space, but could have just done last table entry */ + if (eofp) { + *eofp = + (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0; + } + return (0); +} + + +static int +lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + /* bp holds one dirent64 structure */ + longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; + dirent64_t *dirent = (dirent64_t *)bp; + ssize_t oresid; /* save a copy for testing later */ + ssize_t uresid; + off_t uoffset; + zoneid_t zoneid; + pid_t pid; + int error; + int ceof; + + ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR); + + oresid = uiop->uio_resid; + zoneid = LXPTOZ(lxpnp)->zone_id; + + /* + * We return directory entries in the order: + * "." and ".." then the unique lx procfs files, then the + * directories corresponding to the running processes. + * + * This is a good order because it allows us to more easily + * keep track of where we are betwen calls to getdents(). + * If the number of processes changes between calls then we + * can't lose track of where we are in the lx procfs files. + */ + + /* Do the fixed entries */ + error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir, + PROCDIRFILES); + + /* Finished if we got an error or if we couldn't do all the table */ + if (error != 0 || ceof == 0) + return (error); + + /* clear out the dirent buffer */ + bzero(bp, sizeof (bp)); + + /* Do the process entries */ + while ((uresid = uiop->uio_resid) > 0) { + proc_t *p; + int len; + int reclen; + int i; + + uoffset = uiop->uio_offset; + + /* + * Stop when entire proc table has been examined. + */ + i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES; + if (i >= v.v_proc) { + /* Run out of table entries */ + if (eofp) { + *eofp = 1; + } + return (0); + } + mutex_enter(&pidlock); + + /* + * Skip indices for which there is no pid_entry, PIDs for + * which there is no corresponding process, the zched process, + * a PID of 0, and anything the security policy doesn't allow + * us to look at. + */ + if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL || + p->p_pid == curproc->p_zone->zone_zsched->p_pid || + p->p_pid == 0 || + secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { + mutex_exit(&pidlock); + goto next; + } + mutex_exit(&pidlock); + + /* + * Convert pid to the Linux default of 1 if we're the zone's + * init process, otherwise use the value from the proc + * structure + */ + pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ? + p->p_pid : 1); + + /* + * If this /proc was mounted in the global zone, view + * all procs; otherwise, only view zone member procs. + */ + if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) { + goto next; + } + + ASSERT(p->p_stat != 0); + + dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0); + len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid); + ASSERT(len < LXPNSIZ); + reclen = DIRENT64_RECLEN(len); + + dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); + dirent->d_reclen = (ushort_t)reclen; + + /* + * if the size of the data to transfer is greater + * that that requested then we can't do it this transfer. + */ + if (reclen > uresid) { + /* + * Error if no entries have been returned yet. + */ + if (uresid == oresid) + return (EINVAL); + break; + } + + /* + * uiomove() updates both uiop->uio_resid and + * uiop->uio_offset by the same amount. But we want + * uiop->uio_offset to change in increments + * of LXPR_SDSIZE, which is different from the number of bytes + * being returned to the user. + * So we set uiop->uio_offset separately, in the + * increment of this for loop, ignoring what uiomove() does. + */ + if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) + return (error); + +next: + uiop->uio_offset = uoffset + LXPR_SDSIZE; + } + + if (eofp) + *eofp = + (uiop->uio_offset >= + ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0; + + return (0); +} + + +static int +lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + proc_t *p; + + ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR); + + /* can't read its contents if it died */ + mutex_enter(&pidlock); + + p = prfind((lxpnp->lxpr_pid == 1) ? + curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid); + + if (p == NULL || p->p_stat == SIDL) { + mutex_exit(&pidlock); + return (ENOENT); + } + mutex_exit(&pidlock); + + return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES)); +} + + +static int +lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + ASSERT(lxpnp->lxpr_type == LXPR_NETDIR); + return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES)); +} + + +static int +lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) +{ + /* bp holds one dirent64 structure */ + longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; + dirent64_t *dirent = (dirent64_t *)bp; + ssize_t oresid; /* save a copy for testing later */ + ssize_t uresid; + off_t uoffset; + int error; + int ceof; + proc_t *p; + int fddirsize; + uf_info_t *fip; + + + ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR); + + oresid = uiop->uio_resid; + + /* can't read its contents if it died */ + p = lxpr_lock(lxpnp->lxpr_pid); + if (p == NULL) + return (ENOENT); + + /* Get open file info */ + fip = (&(p)->p_user.u_finfo); + + if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) + fddirsize = 0; + else + fddirsize = fip->fi_nfiles; + + mutex_enter(&fip->fi_lock); + lxpr_unlock(p); + + /* Do the fixed entries (in this case just "." & "..") */ + error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); + + /* Finished if we got an error or if we couldn't do all the table */ + if (error != 0 || ceof == 0) + return (error); + + /* clear out the dirent buffer */ + bzero(bp, sizeof (bp)); + + /* + * Loop until user's request is satisfied or until + * all file descriptors have been examined. + */ + for (; (uresid = uiop->uio_resid) > 0; + uiop->uio_offset = uoffset + LXPR_SDSIZE) { + int reclen; + int fd; + int len; + + uoffset = uiop->uio_offset; + + /* + * Stop at the end of the fd list + */ + fd = (uoffset / LXPR_SDSIZE) - 2; + if (fd >= fddirsize) { + if (eofp) { + *eofp = 1; + } + goto out; + } + + if (fip->fi_list[fd].uf_file == NULL) + continue; + + dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd); + len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd); + ASSERT(len < LXPNSIZ); + reclen = DIRENT64_RECLEN(len); + + dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); + dirent->d_reclen = (ushort_t)reclen; + + if (reclen > uresid) { + /* + * Error if no entries have been returned yet. + */ + if (uresid == oresid) + error = EINVAL; + goto out; + } + + if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) + goto out; + } + + if (eofp) + *eofp = + (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0; + +out: + mutex_exit(&fip->fi_lock); + return (error); +} + + +/* + * lxpr_readlink(): Vnode operation for VOP_READLINK() + */ +static int +lxpr_readlink(vnode_t *vp, uio_t *uiop) +{ + char bp[MAXPATHLEN + 1]; + size_t buflen = sizeof (bp); + lxpr_node_t *lxpnp = VTOLXP(vp); + pid_t pid; + int error = 0; + + /* must be a symbolic link file */ + if (vp->v_type != VLNK) + return (EINVAL); + + /* + * Try to produce a symlink name for anything that's really a regular + * file or directory (but not for anything else) + */ + if (lxpnp->lxpr_realvp != NULL && (lxpnp->lxpr_realvp->v_type == VDIR || + lxpnp->lxpr_realvp->v_type == VREG)) { + if ((error = lxpr_access(vp, VREAD, 0, CRED())) != 0) + return (error); + error = vnodetopath(NULL, lxpnp->lxpr_realvp, bp, buflen, + CRED()); + if (error != 0) + return (error); + } else { + switch (lxpnp->lxpr_type) { + case LXPR_SELF: + /* + * Don't need to check result as every possible int + * will fit within MAXPATHLEN bytes + */ + + /* + * Convert pid to the Linux default of 1 if we're the + * zone's init process + */ + pid = ((curproc->p_pid != + curproc->p_zone->zone_proc_initpid) + ? curproc->p_pid : 1); + + (void) snprintf(bp, buflen, "%d", pid); + break; + default: + /* + * Need to return error so that nothing thinks + * that the symlink is empty and hence "." + */ + return (EINVAL); + } + } + + /* copy the link data to user space */ + return (uiomove(bp, strlen(bp), UIO_READ, uiop)); +} + + +/* + * lxpr_inactive(): Vnode operation for VOP_INACTIVE() + * Vnode is no longer referenced, deallocate the file + * and all its resources. + */ +/* ARGSUSED */ +static void +lxpr_inactive(vnode_t *vp, cred_t *cr) +{ + lxpr_freenode(VTOLXP(vp)); +} + + +/* + * lxpr_sync(): Vnode operation for VOP_SYNC() + */ +static int +lxpr_sync() +{ + /* + * nothing to sync but this + * function must never fail + */ + return (0); +} + + +/* + * lxpr_cmp(): Vnode operation for VOP_CMP() + */ +static int +lxpr_cmp(vnode_t *vp1, vnode_t *vp2) +{ + vnode_t *rvp; + + while (vn_matchops(vp1, lxpr_vnodeops) && + (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) + vp1 = rvp; + while (vn_matchops(vp2, lxpr_vnodeops) && + (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) + vp2 = rvp; + if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops)) + return (vp1 == vp2); + return (VOP_CMP(vp1, vp2)); +} + + +/* + * lxpr_realvp(): Vnode operation for VOP_REALVP() + */ +static int +lxpr_realvp(vnode_t *vp, vnode_t **vpp) +{ + vnode_t *rvp; + + if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) { + vp = rvp; + if (VOP_REALVP(vp, &rvp) == 0) + vp = rvp; + } + + *vpp = vp; + return (0); +} diff --git a/usr/src/uts/common/brand/lx/sys/ldlinux.h b/usr/src/uts/common/brand/lx/sys/ldlinux.h new file mode 100644 index 0000000000..b259c05d97 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/ldlinux.h @@ -0,0 +1,117 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LDLINUX_H +#define _SYS_LDLINUX_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The ldlinux streams module is only intended for use in lx branded zones. + * This streams module implements the following ioctls: + * TIOCSETLD and TIOCGETLD + * + * These ioctls are special ioctls supported only by the ldlinux streams + * module and invoked only by the lx brand emulation library. These ioctls + * do not exist on native Linux systems. + * + * The TIOCSETLD ioctl is used when emulating the following Linux ioctls: + * TCSETS/TCSETSW/TCSETSF + * TCSETA/TCSETAW/TCSETAF + * + * The TIOCGETLD ioctl is used when emulating the following Linux ioctls: + * TCGETS/TCGETA + * + * This module is needed to emulate these ioctls because the following arrays: + * termio.c_cc + * termios.c_cc + * which are parameters for the following ioctls: + * TCSETS/TCSETSW/TCSETSF + * TCSETA/TCSETAW/TCSETAF + * TCGETS/TCGETA + * + * are defined differently on Solaris and Linux. + * + * According to the termio(7I) man page on Solaris the following is true of + * the members of the c_cc array: + * The VMIN element is the same element as the VEOF element. + * The VTIME element is the same element as the VEOL element. + * + * But on Linux the termios(3) man page states: + * These symbolic subscript values are all different, except that + * VTIME, VMIN may have the same value as VEOL, VEOF, respectively. + * + * While the man page indicates that these values may be the same empirical + * tests shows them to be different. Since these values are different on + * Linux systems it's possible that applications could set the members of + * the c_cc array to different values and then later expect to be able to + * read back those same separate values. The ldlinux module exists to provide + * a per-stream storage area where the lx_brand emulation library can save + * these values. The values are set and retrieved via the TIOCSETLD and + * TIOCGETLD ioctls respectively. + */ + +#include <sys/termios.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define LDLINUX_MOD "ldlinux" + +#ifdef _KERNEL + +/* + * LDLINUX_MODID - This should be a unique number associated with + * this particular module. Unfortunatly there is no authority responsible + * for administering this name space, hence there's no real guarantee that + * whatever number we choose will be unique. Luckily, this constant + * is not really used anywhere by the system. It is used by some + * kernel subsystems to check for the presence of certain streams + * modules with known id vaules. Since no other kernel subsystem + * checks for the presence of this module we'll just set the id to 0. + */ +#define LDLINUX_MODID 0 + +struct ldlinux { + int state; /* state information */ + /* Linux expects the next four c_cc values */ + /* to be distinct, whereas solaris (legally) */ + /* overlaps their storage */ + unsigned char veof; /* veof value */ + unsigned char veol; /* veol value */ + unsigned char vmin; /* vmin value */ + unsigned char vtime; /* vtime value */ +}; + +#define ISPTSTTY 0x01 + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LDLINUX_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_audio.h b/usr/src/uts/common/brand/lx/sys/lx_audio.h new file mode 100644 index 0000000000..cbb3431c4b --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_audio.h @@ -0,0 +1,130 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_AUDIO_H +#define _LX_AUDIO_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/zone.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * name for this driver + */ +#define LX_AUDIO_DRV "lx_audio" + +/* + * names for the minor nodes this driver exports + */ +#define LXA_MINORNAME_DEVCTL "lx_devctl" +#define LXA_MINORNAME_DSP "lx_dsp" +#define LXA_MINORNAME_MIXER "lx_mixer" + +/* + * minor numbers for the minor nodes this driver exporrts + */ +#define LXA_MINORNUM_DEVCTL 0 +#define LXA_MINORNUM_DSP 1 +#define LXA_MINORNUM_MIXER 2 +#define LXA_MINORNUM_COUNT 3 + +/* + * driver ioctls + * + * note that we're layering on top of solaris audio devices so we want + * to make sure that our ioctls namespace doesn't conflict with theirs. + * looking in sys/audioio.h and sys/mixer.h we see that they seem to + * use an _IO key of 'A' and 'M', so we'll choose an _IO key of 'a.' + */ + +/* + * administrative ioctls. + * these ioctls are only supported on the DEVCTL minor node + */ +#define LXA_IOC_ZONE_REG (_IOR('a', 0, lxa_zone_reg_t)) +#define LXA_IOC_ZONE_UNREG (_IOR('a', 1, lxa_zone_reg_t)) + + +/* + * audio and mixer device ioctls + * these ioctls are supported on DSP and MIXER minor nodes. + */ +#define LXA_IOC_GETMINORNUM (_IOR('a', 20, int)) + +/* + * audio device ioctls. + * these ioctls are supports on DSP minor nodes. + */ +#define LXA_IOC_MMAP_OUTPUT (_IOR('a', 41, int)) +#define LXA_IOC_MMAP_PTR (_IOR('a', 42, int)) +#define LXA_IOC_GET_FRAG_INFO (_IOR('a', 43, lxa_frag_info_t)) +#define LXA_IOC_SET_FRAG_INFO (_IOR('a', 44, lxa_frag_info_t)) + +/* + * mixer device ioctls. + * these ioctls are supports on MIXER minor nodes. + */ +#define LXA_IOC_MIXER_GET_VOL (_IOR('a', 60, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_SET_VOL (_IOR('a', 61, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_GET_MIC (_IOR('a', 62, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_SET_MIC (_IOR('a', 63, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_GET_PCM (_IOR('a', 64, lxa_mixer_levels_t)) +#define LXA_IOC_MIXER_SET_PCM (_IOR('a', 65, lxa_mixer_levels_t)) + +/* command structure for LXA_IOC_ZONE_REG */ +#define LXA_INTSTRLEN 32 +typedef struct lxa_zone_reg { + char lxa_zr_zone_name[ZONENAME_MAX]; + char lxa_zr_inputdev[LXA_INTSTRLEN]; + char lxa_zr_outputdev[LXA_INTSTRLEN]; +} lxa_zone_reg_t; + +/* command structure for LXA_IOC_GET_FRAG_INFO and LXA_IOC_SET_FRAG_INFO */ +typedef struct lxa_frag_info { + int lxa_fi_size; + int lxa_fi_cnt; +} lxa_frag_info_t; + +/* command structure for LXA_IOC_MIXER_GET_* and LXA_IOC_MIXER_SET_* */ +typedef struct lxa_mixer_levels { + int lxa_ml_gain; + int lxa_ml_balance; +} lxa_mixer_levels_t; + +/* verify that a solaris mixer level structure has valid values */ +#define LXA_MIXER_LEVELS_OK(x) (((x)->lxa_ml_gain >= AUDIO_MIN_GAIN) && \ + ((x)->lxa_ml_gain <= AUDIO_MAX_GAIN) && \ + ((x)->lxa_ml_balance >= AUDIO_LEFT_BALANCE) && \ + ((x)->lxa_ml_balance <= AUDIO_RIGHT_BALANCE)) + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_AUDIO_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs.h b/usr/src/uts/common/brand/lx/sys/lx_autofs.h new file mode 100644 index 0000000000..4436226deb --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_autofs.h @@ -0,0 +1,334 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_AUTOFS_H +#define _LX_AUTOFS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * The lx_autofs filesystem exists to emulate the Linux autofs filesystem + * and provide support for the Linux "automount" automounter. + * + * + * + * +++ Linux automounter background. + * + * Linux has two automounters: "amd" and "automount" + * + * 1) "amd" is a userland NFS server. It basically mounts an NFS filesystem + * at an automount point, and it acts as the NFS server for the mount. When + * an access is done to that NFS filesystem, the access is redirected by the + * kernel to the "amd" process via rpc. "amd" then looks up any information + * required to resolve the requests, mounts real NFS filesystems if + * necessary, and returns. "amd" has it's own strange configuration + * mechanism that doesn't seem to be very compatabile with Solaris's network + * based automounter map support. + * + * 2) "automount" is the other Linux automounter. It utilizes a kernel + * filesystem (autofs) to provide it's functionality. Basically, it mounts + * the autofs filesystem at any automounter controlled mount point. This + * filesystem then intercepts and redirects lookup operations (and only + * lookup ops) to the userland automounter process via a pipe. (The + * pipe to the automounter is establised via mount options when the autofs + * filesystem is mounted.) When the automounter recieves a request via this + * pipe, it does lookups to whatever backing store it's configured to use, + * does mkdir operations on the autofs filesystem, mounts remote NFS + * filesystems on any leaf directories it just created, and signals the + * autofs filesystem via an ioctl to let it know that the lookup can + * continue. + * + * + * + * +++ Linux autofs (and automount daemon) notes + * + * Since we're mimicking the behavior of the Linux autofs filesystem it's + * important to document some of it's observed behavior here since there's + * no doubt that in the future this behavior will change. These comments + * apply to the behavior of the automounter as observed on a system + * running Linux v2.4.21 (autofs is bundled with the Linux kernel). + * + * A) Autofs allows root owned, non-automounter processes to create + * directories in the autofs filesystem. The autofs filesystem treats the + * automounter's process group as special, but it doesn't prevent root + * processes outside of the automounter's process group from creating new + * directories in the autofs filesystem. + * + * B) Autofs doesn't allow creation of any non-directory entries in the + * autofs filesystem. No entity can create files (e.g. /bin/touch or + * VOP_CREATE/VOP_SYMLINK/etc.) The only entries that can exist within + * the autofs filesystem are directories. + * + * C) Autofs only intercepts vop lookup operations. Notably, it does _not_ + * intercept and re-direct vop readdir operations. This means that the + * observed behavior of the Linux automounter can be considerably different + * from that of the Solaris automounter. Specifically, on Solaris if autofs + * mount point is mounted _without_ the -nobrowse option then if a user does + * an ls operation (which translates into a vop readdir operation) then the + * automounter will intercept that operation and list all the possible + * directories and mount points without actually mounting any filesystems. + * Essentially, all automounter managed mount points on Linux will behave + * like "-nobrowse" mount points on Solaris. Here's an example to + * illustrate this. If /ws was mounted on Solaris without the -nobrowse + * option and an auto_ws yp map was setup as the backing store for this + * mount point, then an "ls /ws" would list all the keys in the map as + * valid directories, but an "ls /ws" on Linux would list an emptry + * directory. + * + * D) NFS mounts are performed by the automount process. When the automount + * process gets a redirected lookup request, it determines _all_ the + * possible remote mount points for that request, creates directory paths + * via mkdir, and mounts the remote filesystems on the newly created paths. + * So for example, if a machine called mcescher exported /var/crash and + * /var/core, an "ls /net/mcescher" would result in the following actions + * being done by the automounter: + * mkdir /net/mcescher + * mkdir /net/mcescher/var + * mkdir /net/mcescher/var/crash + * mkdir /net/mcescher/var/core + * mount mcescher:/var/crash /var/crash + * mount mcescher:/var/crash /var/core + * once the automounter compleated the work above it would signal the autofs + * filesystem (via an ioctl) that the lookup could continue. + * + * E.1) Autofs only redirects vop lookup operations for path entries that + * don't already exist in the autofs filesystem. So for the example above, + * an initial (after the start of the automounter) "ls /net/mcescher" would + * result in a request to the automounter. A subsequest "ls /net/mcescher" + * would not result in a request to the automounter. Even if + * /net/mcescher/var/crash and /net/mcescher/var/core were manually unmounted + * after the initial "ls /net/mcescher", a subsequest "ls /net/mcescher" + * would not result in a new request to the automounter. + * + * E.2) Autofs lookup requests that are sent to the automounter only include + * the root directory path component. So for example, after starting up + * the automounter if a user were to do a "ls /net/mcescher/var/crash", the + * lookup request actually sent to the automounter would just be for + * "mcescher". (The same request as if the user had done "ls /net/mcescher".) + * + * E.3) The two statements above aren't entirely entirely true. The Linux + * autofs filesystem will also redirect lookup operations for leaf + * directories that don't have a filesystem mounted on them. Using the + * example above, if a user did a "ls /net/mcescher", then manually + * unmounted /net/mcescher/var/crash, and then did an "ls + * /net/mcescher/var/crash", this would result in a request for + * "mcescher/var/crash" being sent to the automounter. The strange thing + * (a Linux bug perhaps) is that the automounter won't do anything with this + * request and the lookup will fail. + * + * F) The autofs filesystem communication protocol (what ioctls it supports + * and what data it passes to the automount process) are versioned. The + * source for the userland automount daemon (i looked at version v3.1.7) + * seemed to support two versions of the Linux kernel autofs implementation. + * Both versions supported communiciation with a pipe and the format of the + * structure passed via this pipe was the same. The difference between the + * two versions was in the functionality supported. (The v3 version has + * additional ioctls to support automount timeouts.) + * + * + * + * +++ lx_autofs notes + * + * 1) In general, the lx_autofs filesystem tries to mimic the behavior of the + * Linux autofs filesystem with the following exceptions: + * + * 1.1) We don't bother to implement the E.3 functionality listed above + * since it doesn't appear to be of any use. + * + * 1.2) We only implement v2 of the automounter protocol since + * implementing v3 would take a _lot_ more work. If this proves to be a + * problem we can re-visit this decision later. (More details about v3 + * support are included in comments below.) + * + * 2) In general, the approach taken for lx_autofs is to keep it as simple + * as possible and to minimize it's memory usage. To do this all information + * about the contents of the lx_autofs filesystem are mirrored in the + * underlying filesystem that lx_autofs is mounted on and most vop operations + * are simply passed onto this underlying filesystem. This means we don't + * have to implement most the complex operations that a full filesystem + * normally has to implement. It also means that most of our filesystem state + * (wrt the contents of the filesystem) doesn't actually have to be stored + * in memory, we can simply go to the underlying filesystem to get it when + * it's requested. For the purposes of discussion, we'll call the underlying + * filesystem the "backing store." + * + * The backing store is actually directory called ".lx_afs" which is created in + * the directory where the lx_autofs filesystem is mounted. When the lx_autofs + * filesystem is unmounted this backing store directory is deleted. If this + * directory exists at mount time (perhaps the system crashed while a previous + * lx_autofs instance was mounted at the same location) it will be deleted. + * There are a few implications of using a backing store worth mentioning. + * + * 2.1) lx_autofs can't be mounted on a read only filesystem. If this + * proves to be a problem we can probably move the location of the + * backing store. + * + * 2.2) If the backing store filesystem runs out of space then the + * automounter process won't be able to create more directories and mount + * new filesystems. Of course, strange failures usually happen when + * filesystems run out of space. + * + * 3) Why aren't we using gfs? gfs has two different usage models. + * + * 3.1) I'm my own filesystem but i'm using gfs to help with managing + * readdir operations. + * + * 3.2) I'm a gfs filesystem and gfs is managing all my vnodes + * + * We're not using the 3.1 interfaces because we don't implement readdir + * ourselves. We pass all readdir operations onto the backing store + * filesystem and utilize its readdir implementation. + * + * We're not using the 3.2 interfaces because they are really designed for + * in memory filesystems where all of the filesystem state is stored in + * memory. They don't lend themselves to filesystems where part of the + * state is in memory and part of the state is on disk. + * + * For more information on gfs take a look at the block comments in the + * top of gfs.c + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Note that the name of the actual Solaris filesystem is lx_afs and not + * lx_autofs. This is becase filesystem names are stupidly limited to 8 + * characters. + */ +#define LX_AUTOFS_NAME "lx_afs" + +/* + * Mount options supported. + */ +#define LX_MNTOPT_FD "fd" +#define LX_MNTOPT_PGRP "pgrp" +#define LX_MNTOPT_MINPROTO "minproto" +#define LX_MNTOPT_MAXPROTO "maxproto" + +/* Version of the Linux kernel automount protocol we support. */ +#define LX_AUTOFS_PROTO_VERSION 2 + +/* + * Command structure sent to automount process from lx_autofs via a pipe. + * This structure is the same for v2 and v3 of the automount protocol + * (the communication pipe is established at mount time). + */ +typedef struct lx_autofs_pkt { + int lap_protover; /* protocol version number */ + int lap_constant; /* always set to 0 */ + int lap_id; /* every pkt must have a unique id */ + int lap_name_len; /* don't include newline or NULL */ + char lap_name[256]; /* path component to lookup */ +} lx_autofs_pkt_t; + +/* + * Ioctls supprted (v2 protocol). + */ +#define LX_AUTOFS_IOC_READY 0x00009360 /* arg: int */ +#define LX_AUTOFS_IOC_FAIL 0x00009361 /* arg: int */ +#define LX_AUTOFS_IOC_CATATONIC 0x00009362 /* arg: <none> */ + +/* + * Ioctls not supported (v3 protocol). + * + * Initially we're only going to support v2 of the Linux kernel automount + * protocol. This means that we don't support the following ioctls. + * + * 1) The protocol version ioctl (by not supporting it the automounter + * will assume version 2). + * + * 2) Automounter timeout ioctls. For v3 and later the automounter can + * be started with a timeout option. It will notify the filesystem of + * this timeout and, if any automounter filesystem root directory entry + * is not in use, it will notify the automounter via the LX_AUTOFS_IOC_EXPIRE + * ioctl. For example, if the timeout is 60 seconds, the Linux + * automounter will use the LX_AUTOFS_IOC_EXPIRE ioctl to query for + * timeouts more often than that. (v3.1.7 of the automount daemon would + * perform this ioctl every <timeout>/4 seconds.) Then, if the autofs + * filesystem will + * report top level directories that aren't in use to the automounter + * via this ioctl. If /net was managed by the automounter and + * there were the following mount points: + * /net/jurassic/var/crash + * /net/mcescher/var/crash + * and no one was looking at any crash dumps on mcescher but someone + * was analyzing a crash dump on jurassic, then after <timeout> seconds + * had passed the autofs filesystem would let the automounter know that + * "mcescher" could be unmounted. (Note the granularity of notification + * is directories in the root of the autofs filesystem.) Here's two + * ideas for how this functionality could be implemented on Solaris: + * + * 2.1) The easy incomplete way. Don't do any in-use detection. Simply + * tell the automounter it can try to unmount the filesystem every time + * the specified timeout passes. If the filesystem is in use then the + * unmount will fail. This would break down for remote hosts with multiple + * mounts. For example, if the automounter had mounted the following + * filesystems: + * /net/jurassic/var/crash + * /net/jurassic/var/core + * and the user was looking at a core file, and the timeout expired, the + * automounter would recieve notification to unmount "jurassic". Then + * it would unmount crash (which would succeed) and then to try unmount + * core (which would fail). After that (since the automounter only + * performs mounts for failed lookups in the root autofs directory) + * future access to /net/jurassic/var/crash would result to access + * to an empty autofs directory. We might be able to work around + * this by caching which root autofs directories we've timed out, + * then any access to paths that contain those directories could be + * stalled and we could resend another request to the automounter. + * This could work if the automounter ignores mount failures. + * + * 2.2) The hard correct way. The real difficulty here is detecting + * files in use on other filesystems (say NFS) that have been mounted + * on top of autofs. (Detecting in use autofs vnodes should be easy.) + * to do this we would probably have to create a new brand op to intercept + * mount/umount filesystem operations. Then using this entry point we + * could detect mounts of other filesystems on top of lx_autofs. When + * a successful mount finishes we would use the FEM (file event + * monitoring) framework to push a module onto that filesystem and + * intercept VOP operations that allocate/free vnodes in that filesystem. + * (We would also then have to track mount operations on top of that + * filesystem, etc.) this would allow us to properly detect any + * usage of subdirectories of an autofs directory. + */ +#define LX_AUTOFS_IOC_PROTOVER 0x80049363 /* arg: int */ +#define LX_AUTOFS_IOC_EXPIRE 0x81109365 /* arg: lx_autofs_expire * */ +#define LX_AUTOFS_IOC_SETTIMEOUT 0xc0049364 /* arg: ulong_t */ + +typedef struct lx_autofs_expire { + int lap_protover; /* protol version number */ + int lap_constant; /* always set to 1 */ + int lap_name_len; /* don't include newline or NULL */ + char lap_name[256]; /* path component that has timed out */ +} lx_autofs_expire_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_AUTOFS_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h new file mode 100644 index 0000000000..9c5517b8d5 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_AUTOFS_IMPL_H +#define _LX_AUTOFS_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/file.h> +#include <sys/id_space.h> +#include <sys/modhash.h> +#include <sys/vnode.h> + +#include <sys/lx_autofs.h> + +/* + * Space key. + * Used to persist data across lx_autofs filesystem module unloads. + */ +#define LX_AUTOFS_SPACE_KEY_UDEV LX_AUTOFS_NAME "_udev" + +/* + * Name of the backing store directory. + */ +#define LX_AUTOFS_BS_DIR "." LX_AUTOFS_NAME + +#define LX_AUTOFS_VFS_ID_HASH_SIZE 15 +#define LX_AUTOFS_VFS_PATH_HASH_SIZE 15 +#define LX_AUTOFS_VFS_VN_HASH_SIZE 15 + +/* + * VFS data object. + */ +typedef struct lx_autofs_vfs { + /* Info about the underlying filesystem and backing store. */ + vnode_t *lav_mvp; + char *lav_bs_name; + vnode_t *lav_bs_vp; + + /* Info about the automounter process managing this filesystem. */ + int lav_fd; + pid_t lav_pgrp; + file_t *lav_fifo_wr; + file_t *lav_fifo_rd; + + /* Each automount requests needs a unique id. */ + id_space_t *lav_ids; + + /* All remaining structure members are protected by lav_lock. */ + kmutex_t lav_lock; + + /* Hashes to keep track of outstanding automounter requests. */ + mod_hash_t *lav_path_hash; + mod_hash_t *lav_id_hash; + + /* We need to keep track of all our vnodes. */ + vnode_t *lav_root; + mod_hash_t *lav_vn_hash; +} lx_autofs_vfs_t; + +/* + * Structure to keep track of requests sent to the automounter. + */ +typedef struct lx_autofs_lookup_req { + /* Packet that gets sent to the automounter. */ + lx_autofs_pkt_t lalr_pkt; + + /* Reference count. Always updated atomically. */ + uint_t lalr_ref; + + /* + * Fields to keep track and sync threads waiting on a lookup. + * Fields are protected by lalr_lock. + */ + kmutex_t lalr_lock; + kcondvar_t lalr_cv; + int lalr_complete; +} lx_autofs_lookup_req_t; + +/* + * Generic stack structure. + */ +typedef struct stack_elem { + list_node_t se_list; + caddr_t se_ptr1; + caddr_t se_ptr2; + caddr_t se_ptr3; +} stack_elem_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_AUTOFS_IMPL_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h new file mode 100644 index 0000000000..4cbcda48bf --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -0,0 +1,210 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_BRAND_H +#define _LX_BRAND_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifndef _ASM +#include <sys/types.h> +#include <sys/cpuvar.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define LX_BRANDNAME "lx" + +/* + * Brand uname info + */ +#define LX_UNAME_SYSNAME "Linux" +#define LX_UNAME_RELEASE "2.4.21" +#define LX_UNAME_VERSION "BrandZ fake linux" +#define LX_UNAME_MACHINE "i686" + +#define LX_LINKER_NAME "ld-linux.so.2" +#define LX_LINKER "/lib/" LX_LINKER_NAME +#define LX_LIBC_NAME "libc.so.6" +#define LIB_PATH "/native/usr/lib/" +#define LX_LIB "lx_brand.so.1" +#define LX_LIB_PATH LIB_PATH LX_LIB + +#define LX_NSYSCALLS 270 + +/* + * brand(2) subcommands + * + * Everything >= 128 is a brand-specific subcommand. + * 192 to 462 are reserved for system calls, although most of that space is + * unused. + */ +#define B_LPID_TO_SPAIR 128 +#define B_SYSENTRY 129 +#define B_SYSRETURN 130 +#define B_PTRACE_SYSCALL 131 +#define B_SET_AFFINITY_MASK 132 +#define B_GET_AFFINITY_MASK 133 + +#define B_EMULATE_SYSCALL 192 + +#define LX_VERSION_1 1 +#define LX_VERSION LX_VERSION_1 + +#define LX_ATTR_RESTART_INIT ZONE_ATTR_BRAND_ATTRS + +#ifndef _ASM + +typedef struct lx_brand_registration { + uint_t lxbr_version; /* version number */ + void *lxbr_handler; /* base address of handler */ + void *lxbr_tracehandler; /* base address of trace handler */ + void *lxbr_traceflag; /* address of trace flag */ +} lx_brand_registration_t; + +#ifdef _SYSCALL32 +typedef struct lx_brand_registration32 { + uint32_t lxbr_version; /* version number */ + caddr32_t lxbr_handler; /* base address of handler */ + caddr32_t lxbr_tracehandler; /* base address of trace handler */ + caddr32_t lxbr_traceflag; /* address of trace flag */ +} lx_brand_registration32_t; +#endif + +typedef struct lx_regs { + long lxr_gs; + long lxr_edi; + long lxr_esi; + long lxr_ebp; + long lxr_esp; + long lxr_ebx; + long lxr_edx; + long lxr_ecx; + long lxr_eax; + long lxr_eip; + + long lxr_orig_eax; +} lx_regs_t; + +#endif /* _ASM */ + +/* + * GDT usage + */ +#define GDT_TLSMIN (GDT_BRANDMIN) +#define GDT_TLSMAX (GDT_TLSMIN + 2) +#define LX_TLSNUM (GDT_TLSMAX - GDT_TLSMIN) + +#ifndef _ASM + +/* + * Stores information needed by the lx linker to launch the main + * lx executable. + */ +typedef struct lx_elf_data { + int ed_phdr; + int ed_phent; + int ed_phnum; + int ed_entry; + int ed_base; + int ed_ldentry; +} lx_elf_data_t; + +#ifdef _KERNEL + +typedef struct lx_proc_data { + uintptr_t l_handler; /* address of user-space handler */ + uintptr_t l_tracehandler; /* address of user-space traced handler */ + uintptr_t l_traceflag; /* address of 32-bit tracing flag */ + void (*l_sigrestorer[MAXSIG])(void); /* array of sigrestorer fns */ + pid_t l_ppid; /* pid of originating parent proc */ + uint64_t l_ptrace; /* process being observed with ptrace */ + lx_elf_data_t l_elf_data; /* ELF data for linux executable */ +} lx_proc_data_t; + +#ifdef __amd64 +typedef uint64_t lx_affmask_t; /* Tolerates NCPU up to 64 */ +#else +typedef uint32_t lx_affmask_t; /* Tolerates NCPU up to 32 */ +#endif /* __amd64 */ + +/* + * lx-specific data in the klwp_t + */ +typedef struct lx_lwp_data { + uint_t br_lwp_flags; /* misc. flags */ + klwp_t *br_lwp; /* back pointer to container lwp */ + int br_signal; /* signal to send to parent when */ + /* clone()'ed child terminates */ + int br_exitwhy; /* reason for thread (process) exit */ + int br_exitwhat; /* exit code / killing signal */ + lx_affmask_t br_affinitymask; /* bitmask of CPU sched affinities */ + struct user_desc br_tls[LX_TLSNUM]; + /* descriptors used by libc for TLS */ + pid_t br_pid; /* converted pid for this thread */ + pid_t br_tgid; /* thread group ID for this thread */ + pid_t br_ppid; /* parent pid for this thread */ + id_t br_ptid; /* parent tid for this thread */ + void *br_clear_ctidp; /* clone thread id ptr */ + void *br_set_ctidp; /* clone thread id ptr */ + + /* + * The following struct is used by lx_clone() + * to pass info into fork() + */ + void *br_clone_args; + + /* + * Space to save off userland Linux %gs pointer so we can restore it + * before calling signal handlers. + */ + greg_t br_ugs; + + uint_t br_ptrace; /* ptrace is active for this LWP */ +} lx_lwp_data_t; + +#define BR_CPU_BOUND 0x0001 + +#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) +#define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l)) +#define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data) + +void lx_brand_int80_callback(void); +int64_t lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t); + +extern int lx_debug; +#define lx_print if (lx_debug) printf + +#endif /* _KERNEL */ +#endif /* _ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_BRAND_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_futex.h b/usr/src/uts/common/brand/lx/sys/lx_futex.h new file mode 100644 index 0000000000..ac963b015b --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_futex.h @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_FUTEX_H +#define _SYS_LX_FUTEX_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 +#define FUTEX_FD 2 +#define FUTEX_REQUEUE 3 +#define FUTEX_CMP_REQUEUE 4 +#define FUTEX_MAX_CMD FUTEX_CMP_REQUEUE + +extern long lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout, + uintptr_t addr2, int val2); +extern void lx_futex_init(void); +extern int lx_futex_fini(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_FUTEX_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_impl.h b/usr/src/uts/common/brand/lx/sys/lx_impl.h new file mode 100644 index 0000000000..12f1aab2b3 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_impl.h @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _LX_IMPL_H +#define _LX_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int64_t (*llfcn_t)(); + +typedef struct lx_sysent { + int sy_flags; + char *sy_name; + llfcn_t sy_callc; + char sy_narg; +} lx_sysent_t; + +typedef void (lx_systrace_f)(ulong_t, ulong_t, ulong_t, ulong_t, ulong_t, + ulong_t, ulong_t); + + +extern lx_sysent_t lx_sysent[]; + +extern lx_systrace_f *lx_systrace_entry_ptr; +extern lx_systrace_f *lx_systrace_return_ptr; + +extern void lx_brand_systrace_enable(void); +extern void lx_brand_systrace_disable(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_IMPL_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_ldt.h b/usr/src/uts/common/brand/lx/sys/lx_ldt.h new file mode 100644 index 0000000000..5080c3adae --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_ldt.h @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LINUX_LDT_H +#define _SYS_LINUX_LDT_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/segments.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct ldt_info { + uint_t entry_number; + uint_t base_addr; + uint_t limit; + uint_t seg_32bit:1, + contents:2, + read_exec_only:1, + limit_in_pages:1, + seg_not_present:1, + useable:1; +}; + +#define LDT_INFO_EMPTY(info) \ + ((info)->base_addr == 0 && (info)->limit == 0 && \ + (info)->contents == 0 && (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && (info)->useable == 0) + +#if defined(__amd64) +#define SETMODE(desc) (desc)->usd_long = SDP_SHORT; +#else +#define SETMODE(desc) +#endif + +#define LDT_INFO_TO_DESC(info, desc) { \ + USEGD_SETBASE(desc, (info)->base_addr); \ + USEGD_SETLIMIT(desc, (info)->limit); \ + (desc)->usd_type = ((info)->contents << 2) | \ + ((info)->read_exec_only ^ 1) << 1 | 0x10; \ + (desc)->usd_dpl = SEL_UPL; \ + (desc)->usd_p = (info)->seg_not_present ^ 1; \ + (desc)->usd_def32 = (info)->seg_32bit; \ + (desc)->usd_gran = (info)->limit_in_pages; \ + (desc)->usd_avl = (info)->useable; \ + SETMODE(desc); \ +} + +#define DESC_TO_LDT_INFO(desc, info) { \ + bzero((info), sizeof (*(info))); \ + (info)->base_addr = USEGD_GETBASE(desc); \ + (info)->limit = USEGD_GETLIMIT(desc); \ + (info)->seg_not_present = (desc)->usd_p ^ 1; \ + (info)->contents = ((desc)->usd_type >> 2) & 3; \ + (info)->read_exec_only = (((desc)->usd_type >> 1) & 1) ^ 1; \ + (info)->seg_32bit = (desc)->usd_def32; \ + (info)->limit_in_pages = (desc)->usd_gran; \ + (info)->useable = (desc)->usd_avl; \ +} + +extern void lx_set_gdt(int, user_desc_t *); +extern void lx_clear_gdt(int); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LINUX_LDT_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_pid.h b/usr/src/uts/common/brand/lx/sys/lx_pid.h new file mode 100644 index 0000000000..80c8079f0b --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_pid.h @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LX_PID_H +#define _SYS_LX_PID_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/note.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL +struct lx_pid { + pid_t s_pid; /* the solaris pid and ... */ + id_t s_tid; /* ... tid pair */ + pid_t l_pid; /* the corresponding linux pid */ + time_t l_start; /* birthday of this pid */ + struct pid *l_pidp; + struct lx_pid *stol_next; /* link in stol hash table */ + struct lx_pid *ltos_next; /* link in ltos hash table */ +}; + +extern int lx_pid_assign(kthread_t *); +extern void lx_pid_reassign(kthread_t *); +extern void lx_pid_rele(pid_t, id_t); +extern pid_t lx_lpid_to_spair(pid_t, pid_t *, id_t *); +extern pid_t lx_lwp_ppid(klwp_t *, pid_t *, id_t *); +extern void lx_pid_init(void); +extern void lx_pid_fini(void); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LX_PID_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_ptm.h b/usr/src/uts/common/brand/lx/sys/lx_ptm.h new file mode 100644 index 0000000000..74bbc939a3 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_ptm.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_PTM_LINUX_H +#define _SYS_PTM_LINUX_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define LX_PTM_DRV "lx_ptm" +#define LX_PTM_MINOR_NODE "lx_ptmajor" + +#define LX_PTM_DEV_TO_PTS(dev) (getminor(dev) - 1) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_PTM_LINUX_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_sched.h b/usr/src/uts/common/brand/lx/sys/lx_sched.h new file mode 100644 index 0000000000..b0ae748f3c --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_sched.h @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LINUX_SCHED_H +#define _SYS_LINUX_SCHED_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/procset.h> +#include <sys/priocntl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Linux scheduler policies. + */ +#define LX_SCHED_OTHER 0 +#define LX_SCHED_FIFO 1 +#define LX_SCHED_RR 2 + +#define LX_PRI_MAX 99 + +typedef int l_pid_t; + +struct lx_sched_param { + int lx_sched_prio; +}; + +extern int sched_setprocset(procset_t *, l_pid_t); +extern long do_priocntlsys(int, procset_t *, void *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LINUX_SCHED_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h new file mode 100644 index 0000000000..b4d41d5241 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_LINUX_SYSCALLS_H +#define _SYS_LINUX_SYSCALLS_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +extern long lx_brk(); +extern long lx_getpid(); +extern long lx_getppid(); +extern long lx_clone(); +extern long lx_kill(); +extern long lx_tkill(); +extern long lx_modify_ldt(); +extern long lx_gettid(); +extern long lx_futex(); +extern long lx_get_thread_area(); +extern long lx_sched_getparam(); +extern long lx_sched_getscheduler(); +extern long lx_sched_rr_get_interval(); +extern long lx_sched_setparam(); +extern long lx_sched_setscheduler(); +extern long lx_set_thread_area(); +extern long lx_set_tid_address(); +extern long lx_setresgid(); +extern long lx_setresgid16(); +extern long lx_setresuid(); +extern long lx_setresuid16(); +extern long lx_sysinfo(); +extern long lx_setgroups(); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LINUX_SYSCALLS_H */ diff --git a/usr/src/uts/common/brand/lx/syscall/lx_brk.c b/usr/src/uts/common/brand/lx/syscall/lx_brk.c new file mode 100644 index 0000000000..25a719986e --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_brk.c @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> + +/* + * The brk() system call needs to be in-kernel because Linux expects a call to + * brk(0) to return the current breakpoint. In Solaris, the process breakpoint + * is setup and managed by libc. Due to the way we link our libraries and the + * need for Linux to manage its own breakpoint, this has to remain in the + * kernel. + */ +extern int brk(caddr_t); + +long +lx_brk(caddr_t nva) +{ + proc_t *p = curproc; + klwp_t *lwp = ttolwp(curthread); + + if (nva != 0) { + (void) brk(nva); + + /* + * Despite claims to the contrary in the manpage, when Linux + * brk() fails, errno is left unchanged. + */ + lwp->lwp_errno = 0; + } + +out: + return ((long)(p->p_brkbase + p->p_brksize)); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c new file mode 100644 index 0000000000..2af3c00bae --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_ldt.h> + +#define LX_CSIGNAL 0x000000ff +#define LX_CLONE_VM 0x00000100 +#define LX_CLONE_FS 0x00000200 +#define LX_CLONE_FILES 0x00000400 +#define LX_CLONE_SIGHAND 0x00000800 +#define LX_CLONE_PID 0x00001000 +#define LX_CLONE_PTRACE 0x00002000 +#define LX_CLONE_PARENT 0x00008000 +#define LX_CLONE_THREAD 0x00010000 +#define LX_CLONE_SYSVSEM 0x00040000 +#define LX_CLONE_SETTLS 0x00080000 +#define LX_CLONE_PARENT_SETTID 0x00100000 +#define LX_CLONE_CHILD_CLEARTID 0x00200000 +#define LX_CLONE_DETACH 0x00400000 +#define LX_CLONE_CHILD_SETTID 0x01000000 + +/* + * Our lwp has already been created at this point, so this routine is + * responsible for setting up all the state needed to track this as a + * linux cloned thread. + */ +/* ARGSUSED */ +long +lx_clone(int flags, void *stkp, void *ptidp, void *ldtinfo, void *ctidp) +{ + struct lx_lwp_data *lwpd = ttolxlwp(curthread); + struct ldt_info info; + struct user_desc descr; + int tls_index; + int entry = -1; + int signo; + + signo = flags & LX_CSIGNAL; + if (signo < 0 || signo > MAXSIG) + return (set_errno(EINVAL)); + + if (flags & LX_CLONE_SETTLS) { + if (copyin((caddr_t)ldtinfo, &info, sizeof (info))) + return (set_errno(EFAULT)); + + if (LDT_INFO_EMPTY(&info)) + return (set_errno(EINVAL)); + + entry = info.entry_number; + if (entry < GDT_TLSMIN || entry > GDT_TLSMAX) + return (set_errno(EINVAL)); + + tls_index = entry - GDT_TLSMIN; + + /* + * Convert the user-space structure into a real x86 + * descriptor and copy it into this LWP's TLS array. We + * also load it into the GDT. + */ + LDT_INFO_TO_DESC(&info, &descr); + bcopy(&descr, &lwpd->br_tls[tls_index], sizeof (descr)); + lx_set_gdt(entry, &lwpd->br_tls[tls_index]); + } else { + tls_index = -1; + bzero(&descr, sizeof (descr)); + } + + lwpd->br_clear_ctidp = + (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL; + + if (signo && ! (flags & LX_CLONE_DETACH)) + lwpd->br_signal = signo; + else + lwpd->br_signal = 0; + + if (flags & LX_CLONE_THREAD) + lwpd->br_tgid = curthread->t_procp->p_pid; + + if (flags & LX_CLONE_PARENT) + lwpd->br_ppid = 0; + + if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) && + (suword32(ctidp, lwpd->br_pid) != 0)) { + if (entry >= 0) + lx_clear_gdt(entry); + return (set_errno(EFAULT)); + } + if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) && + (suword32(ptidp, lwpd->br_pid) != 0)) { + if (entry >= 0) + lx_clear_gdt(entry); + return (set_errno(EFAULT)); + } + + return (lwpd->br_pid); +} + +long +lx_set_tid_address(int *tidp) +{ + struct lx_lwp_data *lwpd = ttolxlwp(curthread); + + lwpd->br_clear_ctidp = tidp; + + return (lwpd->br_pid); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_futex.c b/usr/src/uts/common/brand/lx/syscall/lx_futex.c new file mode 100644 index 0000000000..ceb6f330aa --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_futex.c @@ -0,0 +1,471 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/debug.h> +#include <vm/as.h> +#include <vm/seg.h> +#include <vm/seg_vn.h> +#include <vm/page.h> +#include <sys/mman.h> +#include <sys/timer.h> +#include <sys/condvar.h> +#include <sys/inttypes.h> +#include <sys/lx_futex.h> + +/* + * Futexes are a Linux-specific implementation of inter-process mutexes. + * They are designed to use shared memory for simple, uncontested + * operations, and rely on the kernel to resolve any contention issues. + * + * Most of the information in this section comes from the paper "Futexes + * Are Tricky", by Ulrich Drepper. This paper is currently available at: + * http://people.redhat.com/~drepper/futex.pdf. + * + * A futex itself a 4-byte integer, which must be 4-byte aligned. The + * value of this integer is expected to be modified using user-level atomic + * operations. The futex(4) design itself does not impose any semantic + * constraints on the value stored in the futex; it is up to the + * application to define its own protocol. + * + * When the application decides that kernel intervention is required, it + * will use the futex(2) system call. There are 5 different operations + * that can be performed on a futex, using this system call. Since this + * interface has evolved over time, there are several different prototypes + * available to the user. Fortunately, there is only a single kernel-level + * interface: + * + * long sys_futex(void *futex1, int cmd, int val1, + * struct timespec *timeout, void *futex2, int val2) + * + * The kernel-level operations that may be performed on a futex are: + * + * FUTEX_WAIT + * + * Atomically verify that futex1 contains the value val1. If it + * doesn't, return EWOULDBLOCK. If it does contain the expected + * value, the thread will sleep until somebody performs a FUTEX_WAKE + * on the futex. The caller may also specify a timeout, indicating + * the maximum time the thread should sleep. If the timer expires, + * the call returns ETIMEDOUT. If the thread is awoken with a signal, + * the call returns EINTR. Otherwise, the call returns 0. + * + * FUTEX_WAKE + * + * Wake up val1 processes that are waiting on futex1. The call + * returns the number of blocked threads that were woken up. + * + * FUTEX_CMP_REQUEUE + * + * If the value stored in futex1 matches that passed in in val2, wake + * up val1 processes that are waiting on futex1. Otherwise, return + * EAGAIN. + * + * If there are more than val1 threads waiting on the futex, remove + * the remaining threads from this futex, and requeue them on futex2. + * The caller can limit the number of threads being requeued by + * encoding an integral numerical value in the position usually used + * for the timeout pointer. + * + * The call returns the number of blocked threads that were woken up + * or requeued. + * + * FUTEX_REQUEUE + * + * Identical to FUTEX_CMP_REQUEUE except that it does not use val2. + * This command has been declared broken and obsolete, but we still + * need to support it. + * + * FUTEX_FD + * + * Return a file descriptor, which can be used to refer to the futex. + * We don't support this operation. + */ + +/* + * This structure is used to track all the threads currently waiting on a + * futex. There is one fwaiter_t for each blocked thread. We store all + * fwaiter_t's in a hash structure, indexed by the memid_t of the integer + * containing the futex's value. + * + * At the moment, all fwaiter_t's for a single futex are simply dumped into + * the hash bucket. If futex contention ever becomes a hot path, we can + * chain a single futex's waiters together. + */ +typedef struct fwaiter { + memid_t fw_memid; /* memid of the user-space futex */ + kcondvar_t fw_cv; /* cond var */ + struct fwaiter *fw_next; /* hash queue */ + struct fwaiter *fw_prev; /* hash queue */ + volatile int fw_woken; +} fwaiter_t; + +#define MEMID_COPY(s, d) \ + { (d)->val[0] = (s)->val[0]; (d)->val[1] = (s)->val[1]; } +#define MEMID_EQUAL(s, d) \ + ((d)->val[0] == (s)->val[0] && (d)->val[1] == (s)->val[1]) + +/* Borrowed from the page freelist hash code. */ +#define HASH_SHIFT_SZ 7 +#define HASH_SIZE (1 << HASH_SHIFT_SZ) +#define HASH_FUNC(id) \ + ((((uintptr_t)((id)->val[1]) >> PAGESHIFT) + \ + ((uintptr_t)((id)->val[1]) >> (PAGESHIFT + HASH_SHIFT_SZ)) + \ + ((uintptr_t)((id)->val[0]) >> 3) + \ + ((uintptr_t)((id)->val[0]) >> (3 + HASH_SHIFT_SZ)) + \ + ((uintptr_t)((id)->val[0]) >> (3 + 2 * HASH_SHIFT_SZ))) & \ + (HASH_SIZE - 1)) + +static fwaiter_t *futex_hash[HASH_SIZE]; +static kmutex_t futex_hash_lock[HASH_SIZE]; + +static void +futex_hashin(fwaiter_t *fwp) +{ + int index; + + index = HASH_FUNC(&fwp->fw_memid); + ASSERT(MUTEX_HELD(&futex_hash_lock[index])); + + fwp->fw_prev = NULL; + fwp->fw_next = futex_hash[index]; + if (fwp->fw_next) + fwp->fw_next->fw_prev = fwp; + futex_hash[index] = fwp; +} + +static void +futex_hashout(fwaiter_t *fwp) +{ + int index; + + index = HASH_FUNC(&fwp->fw_memid); + ASSERT(MUTEX_HELD(&futex_hash_lock[index])); + + if (fwp->fw_prev) + fwp->fw_prev->fw_next = fwp->fw_next; + if (fwp->fw_next) + fwp->fw_next->fw_prev = fwp->fw_prev; + if (futex_hash[index] == fwp) + futex_hash[index] = fwp->fw_next; + + fwp->fw_prev = NULL; + fwp->fw_next = NULL; +} + +/* + * Go to sleep until somebody does a WAKE operation on this futex, we get a + * signal, or the timeout expires. + */ +static int +futex_wait(memid_t *memid, caddr_t addr, int val, timespec_t *timeout) +{ + int err, ret; + int32_t curval; + fwaiter_t fw; + int index; + + fw.fw_woken = 0; + MEMID_COPY(memid, &fw.fw_memid); + cv_init(&fw.fw_cv, NULL, CV_DEFAULT, NULL); + + index = HASH_FUNC(&fw.fw_memid); + mutex_enter(&futex_hash_lock[index]); + + if (fuword32(addr, (uint32_t *)&curval)) { + err = set_errno(EFAULT); + goto out; + } + if (curval != val) { + err = set_errno(EWOULDBLOCK); + goto out; + } + + futex_hashin(&fw); + + err = 0; + while ((fw.fw_woken == 0) && (err == 0)) { + ret = cv_waituntil_sig(&fw.fw_cv, &futex_hash_lock[index], + timeout, timechanged); + if (ret < 0) + err = set_errno(ETIMEDOUT); + else if (ret == 0) + err = set_errno(EINTR); + } + + /* + * The futex is normally hashed out in wakeup. If we timed out or + * got a signal, we need to hash it out here instead. + */ + if (fw.fw_woken == 0) + futex_hashout(&fw); + +out: + mutex_exit(&futex_hash_lock[index]); + + return (err); +} + +/* + * Wake up to wake_threads threads that are blocked on the futex at memid. + */ +static int +futex_wake(memid_t *memid, int wake_threads) +{ + fwaiter_t *fwp, *next; + int index; + int ret = 0; + + index = HASH_FUNC(memid); + + mutex_enter(&futex_hash_lock[index]); + + for (fwp = futex_hash[index]; fwp && ret < wake_threads; fwp = next) { + next = fwp->fw_next; + if (MEMID_EQUAL(&fwp->fw_memid, memid)) { + futex_hashout(fwp); + fwp->fw_woken = 1; + cv_signal(&fwp->fw_cv); + ret++; + } + } + + mutex_exit(&futex_hash_lock[index]); + + return (ret); +} + +/* + * Wake up to wake_threads waiting on the futex at memid. If there are + * more than that many threads waiting, requeue the remaining threads on + * the futex at requeue_memid. + */ +static int +futex_requeue(memid_t *memid, memid_t *requeue_memid, int wake_threads, + ulong_t requeue_threads, caddr_t addr, int *cmpval) +{ + fwaiter_t *fwp, *next; + int index1, index2; + int ret = 0; + int32_t curval; + kmutex_t *l1, *l2; + + /* + * To ensure that we don't miss a wakeup if the value of cmpval + * changes, we need to grab locks on both the original and new hash + * buckets. To avoid deadlock, we always grab the lower-indexed + * lock first. + */ + index1 = HASH_FUNC(memid); + index2 = HASH_FUNC(requeue_memid); + + if (index1 == index2) { + l1 = &futex_hash_lock[index1]; + l2 = NULL; + } else if (index1 < index2) { + l1 = &futex_hash_lock[index1]; + l2 = &futex_hash_lock[index2]; + } else { + l1 = &futex_hash_lock[index2]; + l2 = &futex_hash_lock[index1]; + } + + mutex_enter(l1); + if (l2 != NULL) + mutex_enter(l2); + + if (cmpval != NULL) { + if (fuword32(addr, (uint32_t *)&curval)) { + ret = -EFAULT; + goto out; + } + if (curval != *cmpval) { + ret = -EAGAIN; + goto out; + } + } + + for (fwp = futex_hash[index1]; fwp; fwp = next) { + next = fwp->fw_next; + if (!MEMID_EQUAL(&fwp->fw_memid, memid)) + continue; + + futex_hashout(fwp); + if (ret++ < wake_threads) { + fwp->fw_woken = 1; + cv_signal(&fwp->fw_cv); + } else { + MEMID_COPY(requeue_memid, &fwp->fw_memid); + futex_hashin(fwp); + + if ((ret - wake_threads) >= requeue_threads) + break; + } + } + +out: + if (l2 != NULL) + mutex_exit(l2); + mutex_exit(l1); + + if (ret < 0) + return (set_errno(-ret)); + return (ret); +} + +/* + * Copy in the relative timeout provided by the application and convert it + * to an absolute timeout. + */ +static int +get_timeout(void *lx_timeout, timestruc_t *timeout) +{ + timestruc_t now; + + if (get_udatamodel() == DATAMODEL_NATIVE) { + if (copyin(lx_timeout, timeout, sizeof (timestruc_t))) + return (EFAULT); + } +#ifdef _SYSCALL32_IMPL + else { + timestruc32_t timeout32; + if (copyin(lx_timeout, &timeout32, sizeof (timestruc32_t))) + return (EFAULT); + timeout->tv_sec = (time_t)timeout32.tv_sec; + timeout->tv_nsec = timeout32.tv_nsec; + } +#endif + gethrestime(&now); + + if (itimerspecfix(timeout)) + return (EINVAL); + + timespecadd(timeout, &now); + return (0); +} + +long +lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout, + uintptr_t addr2, int val2) +{ + struct as *as = curproc->p_as; + memid_t memid, requeue_memid; + timestruc_t timeout; + timestruc_t *tptr = NULL; + int requeue_threads; + int *requeue_cmp = NULL; + int rval = 0; + + /* must be aligned on int boundary */ + if (addr & 0x3) + return (set_errno(EINVAL)); + + /* Sanity check the futex command */ + if (cmd < 0 || cmd > FUTEX_MAX_CMD) + return (set_errno(EINVAL)); + + /* Copy in the timeout structure from userspace. */ + if (cmd == FUTEX_WAIT && lx_timeout != NULL) { + rval = get_timeout((timespec_t *)lx_timeout, &timeout); + if (rval != 0) + return (set_errno(rval)); + tptr = &timeout; + } + + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) { + if (cmd == FUTEX_CMP_REQUEUE) + requeue_cmp = &val2; + + /* + * lx_timeout is nominally a pointer to a userspace + * address. For these two commands, it actually contains + * an integer which indicates the maximum number of threads + * to requeue. This is horrible, and I'm sorry. + */ + requeue_threads = (int)lx_timeout; + } + + /* + * Translate the process-specific, user-space futex virtual + * address(es) to universal memid. + */ + rval = as_getmemid(as, (void *)addr, &memid); + if (rval != 0) + return (set_errno(rval)); + + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) { + rval = as_getmemid(as, (void *)addr2, &requeue_memid); + if (rval) + return (set_errno(rval)); + } + + switch (cmd) { + case FUTEX_WAIT: + rval = futex_wait(&memid, (void *)addr, val, tptr); + break; + + case FUTEX_WAKE: + rval = futex_wake(&memid, val); + break; + + case FUTEX_CMP_REQUEUE: + case FUTEX_REQUEUE: + rval = futex_requeue(&memid, &requeue_memid, val, + requeue_threads, (void *)addr2, requeue_cmp); + + break; + } + + return (rval); +} + +void +lx_futex_init(void) +{ + int i; + + for (i = 0; i < HASH_SIZE; i++) + mutex_init(&futex_hash_lock[i], NULL, MUTEX_DEFAULT, NULL); + bzero(futex_hash, sizeof (futex_hash)); +} + +int +lx_futex_fini(void) +{ + int i, err; + + err = 0; + for (i = 0; (err == 0) && (i < HASH_SIZE); i++) { + mutex_enter(&futex_hash_lock[i]); + if (futex_hash[i] != NULL) + err = EBUSY; + mutex_exit(&futex_hash_lock[i]); + } + return (err); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getpid.c b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c new file mode 100644 index 0000000000..91dc24c6d6 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/zone.h> +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/thread.h> +#include <sys/cpuvar.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_pid.h> + +/* + * return the pid + */ +long +lx_getpid() +{ + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + long rv; + + if (curproc->p_pid == curproc->p_zone->zone_proc_initpid) + rv = 1; + else + rv = lwpd->br_tgid; + + return (rv); +} + +/* + * return the parent pid + */ +long +lx_getppid(void) +{ + return (lx_lwp_ppid(ttolwp(curthread), NULL, NULL)); +} + +/* + * return the thread id + */ +long +lx_gettid(void) +{ + lx_lwp_data_t *lwpd = ttolxlwp(curthread); + + return (lwpd->br_pid); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_id.c b/usr/src/uts/common/brand/lx/syscall/lx_id.c new file mode 100644 index 0000000000..077194ee25 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_id.c @@ -0,0 +1,297 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/zone.h> +#include <sys/cred_impl.h> +#include <sys/policy.h> + +typedef ushort_t l_uid16_t; +typedef ushort_t l_gid16_t; +typedef uint_t l_uid_t; +typedef uint_t l_gid_t; + +#define LINUX_UID16_TO_UID32(uid16) \ + (((uid16) == (l_uid16_t)-1) ? ((l_uid_t)-1) : (l_uid_t)(uid16)) + +#define LINUX_GID16_TO_GID32(gid16) \ + (((gid16) == (l_gid16_t)-1) ? ((l_gid_t)-1) : (l_gid_t)(gid16)) + +#define LX_NGROUPS_MAX 32 +extern int setgroups(int, gid_t *); + +/* + * This function is based on setreuid in common/syscall/uid.c and exists + * because Solaris does not have a way to explicitly set the saved uid (suid) + * from any other system call. + */ +long +lx_setresuid(l_uid_t ruid, l_uid_t euid, l_uid_t suid) +{ + proc_t *p; + int error = 0; + int do_nocd = 0; + int uidchge = 0; + uid_t oldruid = ruid; + cred_t *cr, *newcr; + zoneid_t zoneid = getzoneid(); + + if ((ruid != -1 && (ruid < 0 || ruid > MAXUID)) || + (euid != -1 && (euid < 0 || euid > MAXUID)) || + (suid != -1 && (suid < 0 || suid > MAXUID))) { + error = EINVAL; + goto done; + } + + /* + * Need to pre-allocate the new cred structure before grabbing + * the p_crlock mutex. + */ + newcr = cralloc(); + + p = ttoproc(curthread); + +retry: + mutex_enter(&p->p_crlock); + cr = p->p_cred; + + if (ruid != -1 && + ruid != cr->cr_ruid && ruid != cr->cr_uid && + ruid != cr->cr_suid && secpolicy_allow_setid(cr, ruid, B_FALSE)) { + error = EPERM; + } else if (euid != -1 && + euid != cr->cr_ruid && euid != cr->cr_uid && + euid != cr->cr_suid && secpolicy_allow_setid(cr, euid, B_FALSE)) { + error = EPERM; + } else if (suid != -1 && + suid != cr->cr_ruid && suid != cr->cr_uid && + suid != cr->cr_suid && secpolicy_allow_setid(cr, suid, B_FALSE)) { + error = EPERM; + } else { + if (!uidchge && ruid != -1 && cr->cr_ruid != ruid) { + /* + * The ruid of the process is going to change. In order + * to avoid a race condition involving the + * process count associated with the newly given ruid, + * we increment the count before assigning the + * credential to the process. + * To do that, we'll have to take pidlock, so we first + * release p_crlock. + */ + mutex_exit(&p->p_crlock); + uidchge = 1; + mutex_enter(&pidlock); + upcount_inc(ruid, zoneid); + mutex_exit(&pidlock); + /* + * As we released p_crlock we can't rely on the cr + * we read. So retry the whole thing. + */ + goto retry; + } + crhold(cr); + crcopy_to(cr, newcr); + p->p_cred = newcr; + + if (euid != -1) + newcr->cr_uid = euid; + if (suid != -1) + newcr->cr_suid = suid; + if (ruid != -1) { + oldruid = newcr->cr_ruid; + newcr->cr_ruid = ruid; + ASSERT(ruid != oldruid ? uidchge : 1); + } + + /* + * A process that gives up its privilege + * must be marked to produce no core dump. + */ + if ((cr->cr_uid != newcr->cr_uid || + cr->cr_ruid != newcr->cr_ruid || + cr->cr_suid != newcr->cr_suid)) + do_nocd = 1; + + crfree(cr); + } + mutex_exit(&p->p_crlock); + + /* + * We decrement the number of processes associated with the oldruid + * to match the increment above, even if the ruid of the process + * did not change or an error occurred (oldruid == uid). + */ + if (uidchge) { + ASSERT(oldruid != -1 && ruid != -1); + mutex_enter(&pidlock); + upcount_dec(oldruid, zoneid); + mutex_exit(&pidlock); + } + + if (error == 0) { + if (do_nocd) { + mutex_enter(&p->p_lock); + p->p_flag |= SNOCD; + mutex_exit(&p->p_lock); + } + crset(p, newcr); /* broadcast to process threads */ + goto done; + } + crfree(newcr); +done: + if (error) + return (set_errno(error)); + else + return (0); +} + +long +lx_setresuid16(l_uid16_t ruid16, l_uid16_t euid16, l_uid16_t suid16) +{ + long rval; + + rval = lx_setresuid( + LINUX_UID16_TO_UID32(ruid16), + LINUX_UID16_TO_UID32(euid16), + LINUX_UID16_TO_UID32(suid16)); + + return (rval); +} + +/* + * This function is based on setregid in common/syscall/gid.c + */ +long +lx_setresgid(l_gid_t rgid, l_gid_t egid, l_gid_t sgid) +{ + proc_t *p; + int error = 0; + int do_nocd = 0; + cred_t *cr, *newcr; + + if ((rgid != -1 && (rgid < 0 || rgid > MAXUID)) || + (egid != -1 && (egid < 0 || egid > MAXUID)) || + (sgid != -1 && (sgid < 0 || sgid > MAXUID))) { + error = EINVAL; + goto done; + } + + /* + * Need to pre-allocate the new cred structure before grabbing + * the p_crlock mutex. + */ + newcr = cralloc(); + + p = ttoproc(curthread); + mutex_enter(&p->p_crlock); + cr = p->p_cred; + + if (rgid != -1 && + rgid != cr->cr_rgid && rgid != cr->cr_gid && + rgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) { + error = EPERM; + } else if (egid != -1 && + egid != cr->cr_rgid && egid != cr->cr_gid && + egid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) { + error = EPERM; + } else if (sgid != -1 && + sgid != cr->cr_rgid && sgid != cr->cr_gid && + sgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) { + error = EPERM; + } else { + crhold(cr); + crcopy_to(cr, newcr); + p->p_cred = newcr; + + if (egid != -1) + newcr->cr_gid = egid; + if (sgid != -1) + newcr->cr_sgid = sgid; + if (rgid != -1) + newcr->cr_rgid = rgid; + + /* + * A process that gives up its privilege + * must be marked to produce no core dump. + */ + if ((cr->cr_gid != newcr->cr_gid || + cr->cr_rgid != newcr->cr_rgid || + cr->cr_sgid != newcr->cr_sgid)) + do_nocd = 1; + + crfree(cr); + } + mutex_exit(&p->p_crlock); + + if (error == 0) { + if (do_nocd) { + mutex_enter(&p->p_lock); + p->p_flag |= SNOCD; + mutex_exit(&p->p_lock); + } + crset(p, newcr); /* broadcast to process threads */ + goto done; + } + crfree(newcr); +done: + if (error) + return (set_errno(error)); + else + return (0); +} + +long +lx_setresgid16(l_gid16_t rgid16, l_gid16_t egid16, l_gid16_t sgid16) +{ + long rval; + + rval = lx_setresgid( + LINUX_GID16_TO_GID32(rgid16), + LINUX_GID16_TO_GID32(egid16), + LINUX_GID16_TO_GID32(sgid16)); + + return (rval); +} + +/* + * Linux defines NGROUPS_MAX to be 32, but on Solaris it is only 16. We employ + * the terrible hack below so that tests may proceed, if only on DEBUG kernels. + */ +long +lx_setgroups(int ngroups, gid_t *grouplist) +{ +#ifdef DEBUG + if (ngroups > ngroups_max && ngroups <= LX_NGROUPS_MAX) + ngroups = ngroups_max; +#endif /* DEBUG */ + + return (setgroups(ngroups, grouplist)); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_kill.c b/usr/src/uts/common/brand/lx/syscall/lx_kill.c new file mode 100644 index 0000000000..d86d50f4e6 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_kill.c @@ -0,0 +1,249 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#pragma ident "%Z%%M% %I% %E% SMI" + + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/proc.h> +#include <sys/zone.h> +#include <sys/thread.h> +#include <sys/signal.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_pid.h> +#include <lx_signum.h> + +extern int kill(pid_t, int); + +/* + * Check if it is legal to send this signal to the init process. Linux + * kill(2) semantics dictate that no _unhandled_ signal may be sent to pid + * 1. + */ +static int +init_sig_check(int sig, pid_t pid) +{ + proc_t *p; + int rv = 0; + + mutex_enter(&pidlock); + + if (((p = prfind(pid)) == NULL) || (p->p_stat == SIDL)) + rv = ESRCH; + else if (sig && (sigismember(&cantmask, sig) || + (PTOU(p)->u_signal[sig-1] == SIG_DFL) || + (PTOU(p)->u_signal[sig-1] == SIG_IGN))) + rv = EPERM; + + mutex_exit(&pidlock); + + return (rv); +} + +long +lx_tkill(pid_t pid, int lx_sig) +{ + kthread_t *t; + proc_t *pp; + pid_t initpid; + sigqueue_t *sqp; + struct lx_lwp_data *br = ttolxlwp(curthread); + int tid = 1; /* default tid */ + int sig, rv; + + /* + * Unlike kill(2), Linux tkill(2) doesn't allow signals to + * be sent to process IDs <= 0 as it doesn't overlay any special + * semantics on the pid. + */ + if ((pid <= 0) || ((lx_sig < 0) || (lx_sig >= LX_NSIG)) || + ((sig = ltos_signo[lx_sig]) < 0)) + return (set_errno(EINVAL)); + + /* + * If the Linux pid is 1, translate the pid to the actual init + * pid for the zone. Note that Linux dictates that no unhandled + * signals may be sent to init, so check for that, too. + * + * Otherwise, extract the tid and real pid from the Linux pid. + */ + initpid = curproc->p_zone->zone_proc_initpid; + if (pid == 1) + pid = initpid; + if ((pid == initpid) && ((rv = init_sig_check(sig, pid)) != 0)) + return (set_errno(rv)); + else if (lx_lpid_to_spair(pid, &pid, &tid) < 0) + return (set_errno(ESRCH)); + + sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); + + /* + * Find the process for the passed pid... + */ + mutex_enter(&pidlock); + if (((pp = prfind(pid)) == NULL) || (pp->p_stat == SIDL)) { + mutex_exit(&pidlock); + rv = set_errno(ESRCH); + goto free_and_exit; + } + mutex_enter(&pp->p_lock); + mutex_exit(&pidlock); + + /* + * Deny permission to send the signal if either of the following + * is true: + * + * + The signal is SIGCONT and the target pid is not in the same + * session as the sender + * + * + prochasprocperm() shows the user lacks sufficient permission + * to send the signal to the target pid + */ + if (((sig == SIGCONT) && (pp->p_sessp != curproc->p_sessp)) || + (!prochasprocperm(pp, curproc, CRED()))) { + mutex_exit(&pp->p_lock); + rv = set_errno(EPERM); + goto free_and_exit; + } + + /* check for the tid */ + if ((t = idtot(pp, tid)) == NULL) { + mutex_exit(&pp->p_lock); + rv = set_errno(ESRCH); + goto free_and_exit; + } + + /* a signal of 0 means just check for the existence of the thread */ + if (lx_sig == 0) { + mutex_exit(&pp->p_lock); + rv = 0; + goto free_and_exit; + } + + sqp->sq_info.si_signo = sig; + sqp->sq_info.si_code = SI_LWP; + sqp->sq_info.si_pid = br->br_pid; + sqp->sq_info.si_uid = crgetruid(CRED()); + sigaddqa(pp, t, sqp); + + mutex_exit(&pp->p_lock); + + return (0); + +free_and_exit: + kmem_free(sqp, sizeof (sigqueue_t)); + return (rv); +} + +long +lx_kill(pid_t lx_pid, int lx_sig) +{ + pid_t s_pid, initpid; + sigsend_t v; + zone_t *zone = curproc->p_zone; + struct proc *p; + int err, sig, nfound; + + if ((lx_sig < 0) || (lx_sig >= LX_NSIG) || + ((sig = ltos_signo[lx_sig]) < 0)) + return (set_errno(EINVAL)); + + /* + * Since some linux apps rely on init(1M) having PID 1, we + * transparently translate 1 to the real init(1M)'s pid. We then + * check to be sure that it is legal for this process to send this + * signal to init(1M). + */ + initpid = zone->zone_proc_initpid; + if (lx_pid == 1 || lx_pid == -1) { + s_pid = initpid; + } else if (lx_pid == 0) { + s_pid = 0; + } else { + if (lx_pid < 0) + err = lx_lpid_to_spair(-lx_pid, &s_pid, NULL); + else + err = lx_lpid_to_spair(lx_pid, &s_pid, NULL); + + /* + * If we didn't find this pid in our hash table, it either + * means that the process doesn't exist, that it exists but + * isn't a Linux process, or that it is a zombie process. + * In each of these cases, assuming that the Linux pid is + * the same as the Solaris pid will get us the correct + * behavior. + */ + if (err < 0) + s_pid = lx_pid; + } + + if ((s_pid == initpid) && ((err = init_sig_check(sig, s_pid)) != 0)) + return (set_errno(err)); + + /* + * For individual processes, kill() semantics are the same between + * Solaris and Linux. + */ + if (lx_pid >= 0) + return (kill(s_pid, sig)); + + /* + * In Solaris, sending a signal to -pid means "send a signal to + * everyone in process group pid." In Linux it means "send a + * signal to everyone in the group other than init." Sending a + * signal to -1 means "send a signal to every process except init + * and myself." + */ + + bzero(&v, sizeof (v)); + v.sig = sig; + v.checkperm = 1; + v.sicode = SI_USER; + err = 0; + + mutex_enter(&pidlock); + + p = (lx_pid == -1) ? practive : pgfind(s_pid); + nfound = 0; + while (err == 0 && p != NULL) { + if ((p->p_zone == zone) && (p->p_stat != SIDL) && + (p->p_pid != initpid) && (lx_pid < -1 || p != curproc)) { + nfound++; + err = sigsendproc(p, &v); + } + + p = (lx_pid == -1) ? p->p_next : p->p_pglink; + } + mutex_exit(&pidlock); + if (nfound == 0) + err = ESRCH; + else if (err == 0 && v.perm == 0) + err = EPERM; + return (err ? set_errno(err) : 0); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c new file mode 100644 index 0000000000..aa6e12a7d8 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c @@ -0,0 +1,121 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/segments.h> +#include <sys/archsystm.h> +#include <sys/proc.h> +#include <sys/sysi86.h> +#include <sys/cmn_err.h> +#include <sys/lx_ldt.h> + +/* + * Read the ldt_info structure in from the Linux app, convert it to an ssd + * structure, and then call setdscr() to do all the heavy lifting. + */ +static int +write_ldt(void *data, ulong_t count) +{ + user_desc_t usd; + struct ssd ssd; + struct ldt_info ldt_inf; + proc_t *pp = curthread->t_procp; + int err; + + if (count != sizeof (ldt_inf)) + return (set_errno(EINVAL)); + + if (copyin(data, &ldt_inf, sizeof (ldt_inf))) + return (set_errno(EFAULT)); + + if (ldt_inf.entry_number >= MAXNLDT) + return (set_errno(EINVAL)); + + LDT_INFO_TO_DESC(&ldt_inf, &usd); + usd_to_ssd(&usd, &ssd, SEL_LDT(ldt_inf.entry_number)); + + /* + * Get everyone into a safe state before changing the LDT. + */ + if (!holdlwps(SHOLDFORK1)) + return (set_errno(EINTR)); + + err = setdscr(&ssd); + + /* + * Release the hounds! + */ + mutex_enter(&pp->p_lock); + continuelwps(pp); + mutex_exit(&pp->p_lock); + + return (err ? set_errno(err) : 0); +} + +static int +read_ldt(void *uptr, ulong_t count) +{ + proc_t *pp = curproc; + int bytes; + + if (pp->p_ldt == NULL) + return (0); + + bytes = (pp->p_ldtlimit + 1) * sizeof (user_desc_t); + if (bytes > count) + bytes = count; + + if (copyout(pp->p_ldt, uptr, bytes)) + return (set_errno(EFAULT)); + + return (bytes); +} + +long +lx_modify_ldt(int op, void *data, ulong_t count) +{ + int rval; + + switch (op) { + case 0: + rval = read_ldt(data, count); + break; + + case 1: + rval = write_ldt(data, count); + break; + + default: + rval = set_errno(ENOSYS); + break; + } + + return (rval); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sched.c b/usr/src/uts/common/brand/lx/syscall/lx_sched.c new file mode 100644 index 0000000000..7b1cd49f37 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_sched.c @@ -0,0 +1,513 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/proc.h> +#include <sys/cpu.h> +#include <sys/rtpriocntl.h> +#include <sys/tspriocntl.h> +#include <sys/processor.h> +#include <sys/brand.h> +#include <sys/lx_pid.h> +#include <sys/lx_sched.h> +#include <sys/lx_brand.h> + +extern long priocntl_common(int, procset_t *, int, caddr_t, caddr_t, uio_seg_t); + +int +lx_sched_affinity(int cmd, uintptr_t pid, int len, uintptr_t maskp, + int64_t *rval) +{ + pid_t s_pid; + id_t s_tid; + kthread_t *t = curthread; + lx_lwp_data_t *lx_lwp; + + if (cmd != B_GET_AFFINITY_MASK && cmd != B_SET_AFFINITY_MASK) + return (set_errno(EINVAL)); + + /* + * The caller wants to know how large the mask should be. + */ + if (cmd == B_GET_AFFINITY_MASK && len == 0) { + *rval = sizeof (lx_affmask_t); + return (0); + } + + /* + * Otherwise, ensure they have a large enough mask. + */ + if (cmd == B_GET_AFFINITY_MASK && len < sizeof (lx_affmask_t)) { + *rval = -1; + return (set_errno(EINVAL)); + } + + if (pid == 0) { + s_pid = curproc->p_pid; + s_tid = curthread->t_tid; + } else if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) == -1) { + return (set_errno(ESRCH)); + } + + /* + * For now, we only support manipulating threads in the + * same process. + */ + if (curproc->p_pid != s_pid) + return (set_errno(EPERM)); + + /* + * We must hold the process lock so that the thread list + * doesn't change while we're looking at it. We'll hold + * the lock until we no longer reference the + * corresponding lwp. + */ + + mutex_enter(&curproc->p_lock); + + do { + if (t->t_tid == s_tid) + break; + t = t->t_forw; + } while (t != curthread); + + /* + * If the given PID is in the current thread's process, + * then we _must_ find it in the process's thread list. + */ + ASSERT(t->t_tid == s_tid); + + lx_lwp = t->t_lwp->lwp_brand; + + if (cmd == B_SET_AFFINITY_MASK) { + if (copyin_nowatch((void *)maskp, &lx_lwp->br_affinitymask, + sizeof (lx_affmask_t)) != 0) { + mutex_exit(&curproc->p_lock); + return (set_errno(EFAULT)); + } + + *rval = 0; + } else { + if (copyout_nowatch(&lx_lwp->br_affinitymask, (void *)maskp, + sizeof (lx_affmask_t)) != 0) { + mutex_exit(&curproc->p_lock); + return (set_errno(EFAULT)); + } + + *rval = sizeof (lx_affmask_t); + } + + mutex_exit(&curproc->p_lock); + return (0); +} + +long +lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param) +{ + klwp_t *lwp = ttolwp(curthread); + procset_t procset; + procset_t procset_cid; + pcparms_t pcparm; + pcinfo_t pcinfo; + struct lx_sched_param sched_param; + tsparms_t *tsp; + int prio, maxupri; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if (rv = sched_setprocset(&procset, pid)) + return (rv); + + if (copyin(param, &sched_param, sizeof (sched_param))) + return (set_errno(EFAULT)); + + prio = sched_param.lx_sched_prio; + + if (policy < 0) { + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the current policy + */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (strcmp(pcinfo.pc_clname, "TS") == 0) + policy = LX_SCHED_OTHER; + else if (strcmp(pcinfo.pc_clname, "RT") == 0) + policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == + RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR; + else + return (set_errno(EINVAL)); + } + + bzero(&pcinfo, sizeof (pcinfo)); + bzero(&pcparm, sizeof (pcparm)); + setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0); + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + (void) strcpy(pcinfo.pc_clname, "RT"); + (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (prio < 0 || + prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri) + return (set_errno(EINVAL)); + pcparm.pc_cid = pcinfo.pc_cid; + ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; + ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = + policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF; + break; + + case LX_SCHED_OTHER: + (void) strcpy(pcinfo.pc_clname, "TS"); + (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri; + if (prio > maxupri || prio < -maxupri) + return (set_errno(EINVAL)); + + pcparm.pc_cid = pcinfo.pc_cid; + tsp = (tsparms_t *)pcparm.pc_clparms; + tsp->ts_upri = prio; + tsp->ts_uprilim = TS_NOCHANGE; + break; + + default: + return (set_errno(EINVAL)); + } + + /* + * finally set scheduling policy and parameters + */ + (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm); + + return (0); +} + +long +lx_sched_getscheduler(l_pid_t pid) +{ + klwp_t *lwp = ttolwp(curthread); + procset_t procset; + pcparms_t pcparm; + pcinfo_t pcinfo; + int policy; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if (rv = sched_setprocset(&procset, pid)) + return (rv); + + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the class info and identify the equivalent linux policy + */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (strcmp(pcinfo.pc_clname, "TS") == 0) + policy = LX_SCHED_OTHER; + else if (strcmp(pcinfo.pc_clname, "RT") == 0) + policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == + RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR; + else + policy = set_errno(EINVAL); + + return (policy); +} + +long +lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param) +{ + klwp_t *lwp = ttolwp(curthread); + procset_t procset; + procset_t procset_cid; + pcparms_t pcparm; + pcinfo_t pcinfo; + struct lx_sched_param sched_param; + tsparms_t *tsp; + int policy; + int prio, maxupri; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if (rv = sched_setprocset(&procset, pid)) + return (rv); + + if (copyin(param, &sched_param, sizeof (sched_param))) + return (set_errno(EFAULT)); + + prio = sched_param.lx_sched_prio; + + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the current policy + */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (strcmp(pcinfo.pc_clname, "TS") == 0) + policy = LX_SCHED_OTHER; + else if (strcmp(pcinfo.pc_clname, "RT") == 0) + policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs == + RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR; + else + return (set_errno(EINVAL)); + + bzero(&pcinfo, sizeof (pcinfo)); + bzero(&pcparm, sizeof (pcparm)); + setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0); + switch (policy) { + case LX_SCHED_FIFO: + case LX_SCHED_RR: + (void) strcpy(pcinfo.pc_clname, "RT"); + (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (prio < 0 || + prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri) + return (set_errno(EINVAL)); + pcparm.pc_cid = pcinfo.pc_cid; + ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio; + ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs = + policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF; + break; + + case LX_SCHED_OTHER: + (void) strcpy(pcinfo.pc_clname, "TS"); + (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri; + if (prio > maxupri || prio < -maxupri) + return (set_errno(EINVAL)); + + pcparm.pc_cid = pcinfo.pc_cid; + tsp = (tsparms_t *)pcparm.pc_clparms; + tsp->ts_upri = prio; + tsp->ts_uprilim = TS_NOCHANGE; + break; + + default: + return (set_errno(EINVAL)); + } + + /* + * finally set scheduling policy and parameters + */ + (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm); + + return (0); +} + +long +lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param) +{ + klwp_t *lwp = ttolwp(curthread); + struct lx_sched_param local_param; + procset_t procset; + pcparms_t pcparm; + pcinfo_t pcinfo; + tsinfo_t *tsi; + int prio, scale; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if (rv = sched_setprocset(&procset, pid)) + return (rv); + + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the class info and identify the equivalent linux policy + */ + bzero(&pcinfo, sizeof (pcinfo)); + pcinfo.pc_cid = pcparm.pc_cid; + (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + bzero(&local_param, sizeof (local_param)); + if (strcmp(pcinfo.pc_clname, "TS") == 0) { + /* + * I don't know if we need to do this, coz it can't be + * changed from zero anyway..... + */ + tsi = (tsinfo_t *)pcinfo.pc_clinfo; + prio = ((tsparms_t *)pcparm.pc_clparms)->ts_upri; + scale = tsi->ts_maxupri; + if (scale == 0) + local_param.lx_sched_prio = 0; + else + local_param.lx_sched_prio = -(prio * 20) / scale; + } else if (strcmp(pcinfo.pc_clname, "RT") == 0) + local_param.lx_sched_prio = + ((rtparms_t *)pcparm.pc_clparms)->rt_pri; + else + rv = set_errno(EINVAL); + + if (rv == 0) + if (copyout(&local_param, param, sizeof (local_param))) + return (set_errno(EFAULT)); + + return (rv); +} + +long +lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival) +{ + klwp_t *lwp = ttolwp(curthread); + struct timespec interval; + procset_t procset; + pcparms_t pcparm; + pcinfo_t pcinfo; + int rv; + + if (pid < 0) + return (set_errno(ESRCH)); + + if (rv = sched_setprocset(&procset, pid)) + return (rv); + + /* + * get the class id + */ + pcparm.pc_cid = PC_CLNULL; + (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + /* + * get the class info and identify the equivalent linux policy + */ + setprocset(&procset, POP_AND, P_PID, 0, P_ALL, 0); + bzero(&pcinfo, sizeof (pcinfo)); + (void) strcpy(pcinfo.pc_clname, "RT"); + (void) do_priocntlsys(PC_GETCID, &procset, &pcinfo); + if (lwp->lwp_errno) + return (lwp->lwp_errno); + + if (pcparm.pc_cid == pcinfo.pc_cid && + ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) { + interval.tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs; + interval.tv_nsec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs; + + if (copyout(&interval, ival, sizeof (interval))) + return (set_errno(EFAULT)); + + return (0); + } + + return (set_errno(EINVAL)); +} + +int +sched_setprocset(procset_t *procset, l_pid_t pid) +{ + id_t lid, rid; + idtype_t lidtype, ridtype; + + /* + * define the target lwp + */ + if (pid == 0) { + ridtype = P_ALL; + lidtype = P_PID; + rid = 0; + lid = P_MYID; + } else { + if (lx_lpid_to_spair(pid, &pid, &lid) < 0) + return (set_errno(ESRCH)); + if (pid != curproc->p_pid) + return (set_errno(ESRCH)); + rid = 0; + ridtype = P_ALL; + lidtype = P_LWPID; + } + setprocset(procset, POP_AND, lidtype, lid, ridtype, rid); + + return (0); +} + +long +do_priocntlsys(int cmd, procset_t *procset, void *arg) +{ + return (priocntl_common(PC_VERSION, procset, cmd, (caddr_t)arg, 0, + UIO_SYSSPACE)); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c new file mode 100644 index 0000000000..9fdb734805 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <vm/anon.h> +#include <sys/systm.h> +#include <sys/sysmacros.h> +#include <sys/zone.h> +#include <sys/time.h> + +struct lx_sysinfo { + int32_t si_uptime; /* Seconds since boot */ + uint32_t si_loads[3]; /* 1, 5, and 15 minute avg runq length */ + uint32_t si_totalram; /* Total memory size */ + uint32_t si_freeram; /* Available memory */ + uint32_t si_sharedram; /* Shared memory */ + uint32_t si_bufferram; /* Buffer memory */ + uint32_t si_totalswap; /* Total swap space */ + uint32_t si_freeswap; /* Avail swap space */ + uint16_t si_procs; /* Process count */ + uint32_t si_totalhigh; /* High memory size */ + uint32_t si_freehigh; /* Avail high memory */ + uint32_t si_mem_unit; /* Unit size of memory fields */ +}; + +long +lx_sysinfo(struct lx_sysinfo *sip) +{ + struct lx_sysinfo si; + hrtime_t birthtime; + zone_t *zone = curthread->t_procp->p_zone; + proc_t *init_proc; + + /* + * We don't record the time a zone was booted, so we use the + * birthtime of that zone's init process instead. + */ + mutex_enter(&pidlock); + init_proc = prfind(zone->zone_proc_initpid); + if (init_proc != NULL) + birthtime = init_proc->p_mstart; + else + birthtime = p0.p_mstart; + mutex_exit(&pidlock); + si.si_uptime = (gethrtime() - birthtime) / NANOSEC; + + /* + * We scale down the load in avenrun to allow larger load averages + * to fit in 32 bits. Linux doesn't, so we remove the scaling + * here. + */ + si.si_loads[0] = avenrun[0] << FSHIFT; + si.si_loads[1] = avenrun[1] << FSHIFT; + si.si_loads[2] = avenrun[2] << FSHIFT; + + /* + * In linux each thread looks like a process, so we conflate the + * two in this stat as well. + */ + si.si_procs = (int32_t)zone->zone_nlwps; + + /* + * If the maximum memory stat is less than 1^20 pages (i.e. 4GB), + * then we report the result in bytes. Otherwise we use pages. + * Once we start supporting >1TB x86 systems, we'll need a third + * option. + */ + if (MAX(physmem, k_anoninfo.ani_max) < 1024 * 1024) { + si.si_totalram = physmem * PAGESIZE; + si.si_freeram = freemem * PAGESIZE; + si.si_totalswap = k_anoninfo.ani_max * PAGESIZE; + si.si_freeswap = k_anoninfo.ani_free * PAGESIZE; + si.si_mem_unit = 1; + } else { + si.si_totalram = physmem; + si.si_freeram = freemem; + si.si_totalswap = k_anoninfo.ani_max; + si.si_freeswap = k_anoninfo.ani_free; + si.si_mem_unit = PAGESIZE; + } + si.si_bufferram = 0; + si.si_sharedram = 0; + + /* + * These two stats refer to high physical memory. If an + * application running in a Linux zone cares about this, then + * either it or we are broken. + */ + si.si_totalhigh = 0; + si.si_freehigh = 0; + + if (copyout(&si, sip, sizeof (si)) != 0) + return (set_errno(EFAULT)); + return (0); +} diff --git a/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c new file mode 100644 index 0000000000..f9751819f9 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c @@ -0,0 +1,128 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/cpuvar.h> +#include <sys/archsystm.h> +#include <sys/proc.h> +#include <sys/brand.h> +#include <sys/lx_brand.h> +#include <sys/lx_ldt.h> + +long +lx_get_thread_area(struct ldt_info *inf) +{ + struct lx_lwp_data *jlwp = ttolxlwp(curthread); + struct ldt_info ldt_inf; + user_desc_t *dscrp; + int entry; + + if (fuword32(&inf->entry_number, (uint32_t *)&entry)) + return (set_errno(EFAULT)); + + if (entry < GDT_TLSMIN || entry > GDT_TLSMAX) + return (set_errno(EINVAL)); + + dscrp = jlwp->br_tls + entry - GDT_TLSMIN; + + /* + * convert the solaris ldt to the linux format expected by the + * caller + */ + DESC_TO_LDT_INFO(dscrp, &ldt_inf); + ldt_inf.entry_number = entry; + + if (copyout(&ldt_inf, inf, sizeof (struct ldt_info))) + return (set_errno(EFAULT)); + + return (0); +} + +long +lx_set_thread_area(struct ldt_info *inf) +{ + struct lx_lwp_data *jlwp = ttolxlwp(curthread); + struct ldt_info ldt_inf; + user_desc_t *dscrp; + int entry; + int i; + + if (copyin(inf, &ldt_inf, sizeof (ldt_inf))) + return (set_errno(EFAULT)); + + entry = ldt_inf.entry_number; + if (entry == -1) { + /* + * find an empty entry in the tls for this thread + */ + for (i = 0, dscrp = jlwp->br_tls; + i < LX_TLSNUM; i++, dscrp++) + if (((unsigned long *)dscrp)[0] == 0 && + ((unsigned long *)dscrp)[1] == 0) + break; + + if (i < LX_TLSNUM) { + /* + * found one + */ + entry = i + GDT_TLSMIN; + if (suword32(&inf->entry_number, entry)) + return (set_errno(EFAULT)); + } else { + return (set_errno(ESRCH)); + } + } + + if (entry < GDT_TLSMIN || entry > GDT_TLSMAX) + return (set_errno(EINVAL)); + + /* + * convert the linux ldt info to standard intel descriptor + */ + dscrp = jlwp->br_tls + entry - GDT_TLSMIN; + + if (LDT_INFO_EMPTY(&ldt_inf)) { + ((unsigned long *)dscrp)[0] = 0; + ((unsigned long *)dscrp)[1] = 0; + } else { + LDT_INFO_TO_DESC(&ldt_inf, dscrp); + } + + /* + * update the gdt with the new descriptor + */ + kpreempt_disable(); + + for (i = 0, dscrp = jlwp->br_tls; i < LX_TLSNUM; i++, dscrp++) + lx_set_gdt(GDT_TLSMIN + i, dscrp); + + kpreempt_enable(); + + return (0); +} diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c new file mode 100644 index 0000000000..a46ea3c979 --- /dev/null +++ b/usr/src/uts/common/brand/sn1/sn1_brand.c @@ -0,0 +1,288 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/errno.h> +#include <sys/exec.h> +#include <sys/kmem.h> +#include <sys/modctl.h> +#include <sys/model.h> +#include <sys/proc.h> +#include <sys/syscall.h> +#include <sys/systm.h> +#include <sys/thread.h> +#include <sys/cmn_err.h> +#include <sys/archsystm.h> + +#include <sys/machbrand.h> +#include <sys/brand.h> +#include "sn1_brand.h" + +char *sn1_emulation_table = NULL; + +void sn1_setbrand(proc_t *); +int sn1_getattr(zone_t *, int, void *, size_t *); +int sn1_setattr(zone_t *, int, void *, size_t); +int sn1_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t); +void sn1_copy_procdata(proc_t *, proc_t *); +void sn1_proc_exit(struct proc *, klwp_t *); +void sn1_exec(); +int sn1_initlwp(klwp_t *); +void sn1_forklwp(klwp_t *, klwp_t *); +void sn1_freelwp(klwp_t *); +void sn1_lwpexit(klwp_t *); +int sn1_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, + long *, int, caddr_t, cred_t *, int); + +/* sn1 brand */ +struct brand_ops sn1_brops = { + sn1_brandsys, + sn1_setbrand, + sn1_getattr, + sn1_setattr, + sn1_copy_procdata, + sn1_proc_exit, + sn1_exec, + lwp_setrval, + sn1_initlwp, + sn1_forklwp, + sn1_freelwp, + sn1_lwpexit, + sn1_elfexec +}; + +#ifdef sparc + +struct brand_mach_ops sn1_mops = { + sn1_brand_syscall_callback, + sn1_brand_syscall_callback +}; + +#else /* sparc */ + +#ifdef __amd64 + +struct brand_mach_ops sn1_mops = { + sn1_brand_sysenter_callback, + NULL, + sn1_brand_int91_callback, + sn1_brand_syscall_callback, + sn1_brand_syscall32_callback, + NULL +}; + +#else /* ! __amd64 */ + +struct brand_mach_ops sn1_mops = { + sn1_brand_sysenter_callback, + NULL, + NULL, + sn1_brand_syscall_callback, + NULL, + NULL +}; +#endif /* __amd64 */ + +#endif /* _sparc */ + +struct brand sn1_brand = { + BRAND_VER_1, + "sn1", + &sn1_brops, + &sn1_mops +}; + +static struct modlbrand modlbrand = { + &mod_brandops, "Solaris N-1 Brand %I%", &sn1_brand +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlbrand, NULL +}; + +void +sn1_setbrand(proc_t *p) +{ + p->p_brand_data = NULL; + p->p_brand = &sn1_brand; +} + +/* ARGSUSED */ +int +sn1_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) +{ + return (EINVAL); +} + +/* ARGSUSED */ +int +sn1_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) +{ + return (EINVAL); +} + +/* + * Get the address of the user-space system call handler from the user + * process and attach it to the proc structure. + */ +/*ARGSUSED*/ +int +sn1_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, + uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) +{ + proc_t *p = curproc; + *rval = 0; + + if (cmd == B_REGISTER) { + p->p_brand = &sn1_brand; + p->p_brand_data = (void *) arg1; + return (0); + } + + ASSERT(p->p_brand == &sn1_brand); + + return (EINVAL); +} + +/* + * Copy the per-process brand data from a parent proc to a child. In the + * sn1 brand, the only per-process state is the address of the user-space + * handler. + */ +void +sn1_copy_procdata(proc_t *child, proc_t *parent) +{ + child->p_brand_data = parent->p_brand_data; +} + +/*ARGSUSED*/ +void +sn1_proc_exit(struct proc *p, klwp_t *l) +{ + p->p_brand_data = NULL; + p->p_brand = &native_brand; +} + +void +sn1_exec() +{ + curproc->p_brand_data = NULL; +} + +/*ARGSUSED*/ +int +sn1_initlwp(klwp_t *l) +{ + return (0); +} + +/*ARGSUSED*/ +void +sn1_forklwp(klwp_t *p, klwp_t *c) +{ +} + +/*ARGSUSED*/ +void +sn1_freelwp(klwp_t *l) +{ +} + +/*ARGSUSED*/ +void +sn1_lwpexit(klwp_t *l) +{ +} + +int +sn1_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, + int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, + int brand_action) +{ + args->brandname = "sn1"; + return ((args->execswp->exec_func)(vp, uap, args, idatap, level + 1, + execsz, setid, exec_file, cred, brand_action)); +} + + +int +_init(void) +{ + int err; + +#if defined(sparc) && !defined(DEBUG) + cmn_err(CE_WARN, "The sn1 brand is only supported on DEBUG kernels."); + return (ENOTSUP); +#else + + /* + * Set up the table indicating which system calls we want to + * interpose on. We should probably build this automatically from + * a list of system calls that is shared with the user-space + * library. + */ + sn1_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP); + sn1_emulation_table[SYS_uname] = 1; + sn1_emulation_table[SYS_fork1] = 1; + + err = mod_install(&modlinkage); + if (err) { + cmn_err(CE_WARN, "Couldn't install brand module"); + kmem_free(sn1_emulation_table, NSYSCALL); + } + + return (err); +#endif +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + int err; + + /* + * If there are any zones using this brand, we can't allow it to be + * unloaded. + */ + if (brand_zone_count(&sn1_brand)) + return (EBUSY); + + kmem_free(sn1_emulation_table, NSYSCALL); + sn1_emulation_table = NULL; + + err = mod_remove(&modlinkage); + if (err) + cmn_err(CE_WARN, "Couldn't unload sn1 brand module"); + + return (err); +} diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.h b/usr/src/uts/common/brand/sn1/sn1_brand.h new file mode 100644 index 0000000000..a4efca189b --- /dev/null +++ b/usr/src/uts/common/brand/sn1/sn1_brand.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SN1_BRAND_H +#define _SN1_BRAND_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +void sn1_brand_syscall_callback(void); +void sn1_brand_sysenter_callback(void); +void sn1_brand_int91_callback(void); +#ifdef __amd64 +void sn1_brand_syscall32_callback(void); +#endif + +extern struct brand *sbrand; + +#ifdef __cplusplus +} +#endif + +#endif /* _SN1_BRAND_H */ diff --git a/usr/src/uts/common/c2/audit_event.c b/usr/src/uts/common/c2/audit_event.c index 4ee95e1728..b45da7bf26 100644 --- a/usr/src/uts/common/c2/audit_event.c +++ b/usr/src/uts/common/c2/audit_event.c @@ -158,6 +158,7 @@ static void aus_sigqueue(struct t_audit_data *); static void aus_p_online(struct t_audit_data *); static void aus_processor_bind(struct t_audit_data *); static void aus_inst_sync(struct t_audit_data *); +static void aus_brandsys(struct t_audit_data *); static void auf_accept(struct t_audit_data *, int, rval_t *); @@ -270,7 +271,7 @@ aui_null, AUE_FSTATFS, aus_fstatfs, /* 38 fstatfs */ auf_null, S2E_PUB, aui_null, AUE_SETPGRP, aus_null, /* 39 setpgrp */ auf_null, 0, -aui_null, AUE_NULL, aus_null, /* 40 (loadable) was cxenix */ +aui_null, AUE_NULL, aus_null, /* 40 uucopystr */ auf_null, 0, aui_null, AUE_NULL, aus_null, /* 41 dup */ auf_null, 0, @@ -564,7 +565,7 @@ aui_null, AUE_NULL, aus_null, /* 175 llseek */ aui_null, AUE_INST_SYNC, aus_inst_sync, /* 176 (loadable) */ /* aus_inst_sync */ auf_null, 0, -aui_null, AUE_NULL, aus_null, /* 177 (loadable) */ +aui_null, AUE_BRANDSYS, aus_brandsys, /* 177 brandsys */ auf_null, 0, aui_null, AUE_NULL, aus_null, /* 178 (loadable) */ auf_null, 0, @@ -718,8 +719,7 @@ aui_null, AUE_NULL, aus_null, /* 252 lwp_mutex_init */ auf_null, 0, aui_null, AUE_NULL, aus_null, /* 253 cladm */ auf_null, 0, -aui_null, AUE_NULL, aus_null, /* 254 (loadable) */ - /* was lwp_sigtimedwait */ +aui_null, AUE_NULL, aus_null, /* 254 uucopy */ auf_null, 0, aui_null, AUE_UMOUNT2, aus_umount2, /* 255 umount2 */ auf_null, 0 @@ -4706,6 +4706,40 @@ aus_inst_sync(struct t_audit_data *tad) /*ARGSUSED*/ static void +aus_brandsys(struct t_audit_data *tad) +{ + klwp_t *clwp = ttolwp(curthread); + + struct a { + long cmd; + long arg1; + long arg2; + long arg3; + long arg4; + long arg5; + long arg6; + } *uap = (struct a *)clwp->lwp_ap; + + au_uwrite(au_to_arg32(1, "cmd", (uint_t)uap->cmd)); +#ifdef _LP64 + au_uwrite(au_to_arg64(2, "arg1", (uint64_t)uap->arg1)); + au_uwrite(au_to_arg64(3, "arg2", (uint64_t)uap->arg2)); + au_uwrite(au_to_arg64(4, "arg3", (uint64_t)uap->arg3)); + au_uwrite(au_to_arg64(5, "arg4", (uint64_t)uap->arg4)); + au_uwrite(au_to_arg64(6, "arg5", (uint64_t)uap->arg5)); + au_uwrite(au_to_arg64(7, "arg6", (uint64_t)uap->arg6)); +#else + au_uwrite(au_to_arg32(2, "arg1", (uint32_t)uap->arg1)); + au_uwrite(au_to_arg32(3, "arg2", (uint32_t)uap->arg2)); + au_uwrite(au_to_arg32(4, "arg3", (uint32_t)uap->arg3)); + au_uwrite(au_to_arg32(5, "arg4", (uint32_t)uap->arg4)); + au_uwrite(au_to_arg32(6, "arg5", (uint32_t)uap->arg5)); + au_uwrite(au_to_arg32(7, "arg6", (uint32_t)uap->arg6)); +#endif +} + +/*ARGSUSED*/ +static void aus_p_online(struct t_audit_data *tad) { struct a { diff --git a/usr/src/uts/common/c2/audit_kevents.h b/usr/src/uts/common/c2/audit_kevents.h index 942887ae72..4a2e5b27db 100644 --- a/usr/src/uts/common/c2/audit_kevents.h +++ b/usr/src/uts/common/c2/audit_kevents.h @@ -330,9 +330,10 @@ extern "C" { #define AUE_MODADDPRIV 291 /* =ad modctl(2) */ #define AUE_CRYPTOADM 292 /* =as kernel cryptographic framework */ #define AUE_CONFIGKSSL 293 /* =as kernel SSL */ +#define AUE_BRANDSYS 294 /* =ot */ /* NOTE: update MAX_KEVENTS below if events are added. */ -#define MAX_KEVENTS 293 +#define MAX_KEVENTS 294 #ifdef __cplusplus diff --git a/usr/src/uts/common/disp/class.c b/usr/src/uts/common/disp/class.c index b5b2674d89..8e83a839ee 100644 --- a/usr/src/uts/common/disp/class.c +++ b/usr/src/uts/common/disp/class.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -325,7 +324,8 @@ parmsset(pcparms_t *parmsp, kthread_id_t targtp) * The parameters are specified by a key. */ int -vaparmsout(char *classp, pcparms_t *prmsp, pc_vaparms_t *vaparmsp) +vaparmsout(char *classp, pcparms_t *prmsp, pc_vaparms_t *vaparmsp, + uio_seg_t seg) { char *clname; @@ -348,7 +348,8 @@ vaparmsout(char *classp, pcparms_t *prmsp, pc_vaparms_t *vaparmsp) return (EINVAL); clname = sclass[prmsp->pc_cid].cl_name; - if (copyout(clname, (void *)(uintptr_t)vaparmsp->pc_parms[0].pc_parm, + if ((seg == UIO_USERSPACE ? copyout : kcopy)(clname, + (void *)(uintptr_t)vaparmsp->pc_parms[0].pc_parm, MIN(strlen(clname) + 1, PC_CLNMSZ))) return (EFAULT); diff --git a/usr/src/uts/common/disp/priocntl.c b/usr/src/uts/common/disp/priocntl.c index 3c1a271155..3bb90cf1fa 100644 --- a/usr/src/uts/common/disp/priocntl.c +++ b/usr/src/uts/common/disp/priocntl.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -81,7 +80,7 @@ struct stprmargs { * between the 64-bit kernel ABI and the 32-bit user ABI. */ static int -copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap) +copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg) { pc_vaparms32_t vaparms32; pc_vaparm32_t *src; @@ -90,7 +89,8 @@ copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap) ASSERT(get_udatamodel() == DATAMODEL_ILP32); - if (copyin(arg, &vaparms32, sizeof (vaparms32))) + if ((seg == UIO_USERSPACE ? copyin : kcopy)(arg, &vaparms32, + sizeof (vaparms32))) return (EFAULT); vap->pc_vaparmscnt = vaparms32.pc_vaparmscnt; @@ -104,13 +104,13 @@ copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap) return (0); } -#define COPYIN_VAPARMS(arg, vap, size) \ +#define COPYIN_VAPARMS(arg, vap, size, seg) \ (get_udatamodel() == DATAMODEL_NATIVE ? \ - copyin(arg, vap, size) : copyin_vaparms32(arg, vap)) + (*copyinfn)(arg, vap, size) : copyin_vaparms32(arg, vap, seg)) #else -#define COPYIN_VAPARMS(arg, vap, size) copyin(arg, vap, size) +#define COPYIN_VAPARMS(arg, vap, size, seg) (*copyinfn)(arg, vap, size) #endif @@ -123,7 +123,8 @@ extern int threadcmp(struct pcmpargs *, kthread_id_t); * The priocntl system call. */ long -priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) +priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg, + caddr_t arg2, uio_seg_t seg) { pcinfo_t pcinfo; pcparms_t pcparms; @@ -144,6 +145,8 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) int rv = 0; pid_t saved_pid; id_t classid; + int (*copyinfn)(const void *, void *, size_t); + int (*copyoutfn)(const void *, void *, size_t); /* * First just check the version number. Right now there is only @@ -157,6 +160,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) if (pc_version != PC_VERSION) return (set_errno(EINVAL)); + if (seg == UIO_USERSPACE) { + copyinfn = copyin; + copyoutfn = copyout; + } else { + copyinfn = kcopy; + copyoutfn = kcopy; + } + switch (cmd) { case PC_GETCID: /* @@ -171,7 +182,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) rv = loaded_classes; break; } else { - if (copyin(arg, &pcinfo, sizeof (pcinfo))) + if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo))) return (set_errno(EFAULT)); } @@ -204,7 +215,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) if (error) return (set_errno(error)); - if (copyout(&pcinfo, arg, sizeof (pcinfo))) + if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo))) return (set_errno(EFAULT)); rv = loaded_classes; @@ -221,7 +232,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) rv = loaded_classes; break; } else { - if (copyin(arg, &pcinfo, sizeof (pcinfo))) + if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo))) return (set_errno(EFAULT)); } @@ -245,7 +256,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) if (error) return (set_errno(error)); - if (copyout(&pcinfo, arg, sizeof (pcinfo))) + if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo))) return (set_errno(EFAULT)); rv = loaded_classes; @@ -259,13 +270,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) * because it's done on a per thread basis by parmsset(). */ if (cmd == PC_SETPARMS) { - if (copyin(arg, &pcparms, sizeof (pcparms))) + if ((*copyinfn)(arg, &pcparms, sizeof (pcparms))) return (set_errno(EFAULT)); error = parmsin(&pcparms, NULL); } else { - if (copyin(arg, clname, PC_CLNMSZ) || - COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms))) + if ((*copyinfn)(arg, clname, PC_CLNMSZ) || + COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms), + seg)) return (set_errno(EFAULT)); clname[PC_CLNMSZ-1] = '\0'; @@ -281,7 +293,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) /* * Get the procset from the user. */ - if (copyin(psp, &procset, sizeof (procset))) + if ((*copyinfn)(psp, &procset, sizeof (procset))) return (set_errno(EFAULT)); /* @@ -372,11 +384,11 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) case PC_GETPARMS: case PC_GETXPARMS: if (cmd == PC_GETPARMS) { - if (copyin(arg, &pcparms, sizeof (pcparms))) + if ((*copyinfn)(arg, &pcparms, sizeof (pcparms))) return (set_errno(EFAULT)); } else { if (arg != NULL) { - if (copyin(arg, clname, PC_CLNMSZ)) + if ((*copyinfn)(arg, clname, PC_CLNMSZ)) return (set_errno(EFAULT)); clname[PC_CLNMSZ-1] = '\0'; @@ -385,7 +397,9 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) return (set_errno(EINVAL)); } else pcparms.pc_cid = PC_CLNULL; - if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms))) + + if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms), + seg)) return (set_errno(EFAULT)); } @@ -393,7 +407,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) (pcparms.pc_cid < 1 && pcparms.pc_cid != PC_CLNULL)) return (set_errno(EINVAL)); - if (copyin(psp, &procset, sizeof (procset))) + if ((*copyinfn)(psp, &procset, sizeof (procset))) return (set_errno(EFAULT)); /* @@ -590,9 +604,10 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) return (set_errno(error)); if (cmd == PC_GETPARMS) { - if (copyout(&pcparms, arg, sizeof (pcparms))) + if ((*copyoutfn)(&pcparms, arg, sizeof (pcparms))) return (set_errno(EFAULT)); - } else if ((error = vaparmsout(arg, &pcparms, &vaparms)) != 0) + } else if ((error = vaparmsout(arg, &pcparms, &vaparms, + seg)) != 0) return (set_errno(error)); /* @@ -603,14 +618,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) case PC_ADMIN: if (get_udatamodel() == DATAMODEL_NATIVE) { - if (copyin(arg, &pcadmin, sizeof (pcadmin_t))) + if ((*copyinfn)(arg, &pcadmin, sizeof (pcadmin_t))) return (set_errno(EFAULT)); #ifdef _SYSCALL32_IMPL } else { /* pcadmin struct from ILP32 callers */ pcadmin32_t pcadmin32; - if (copyin(arg, &pcadmin32, sizeof (pcadmin32_t))) + if ((*copyinfn)(arg, &pcadmin32, sizeof (pcadmin32_t))) return (set_errno(EFAULT)); pcadmin.pc_cid = pcadmin32.pc_cid; pcadmin.pc_cladmin = (caddr_t)(uintptr_t) @@ -632,7 +647,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) break; case PC_GETPRIRANGE: - if (copyin(arg, &pcpri, sizeof (pcpri_t))) + if ((*copyinfn)(arg, &pcpri, sizeof (pcpri_t))) return (set_errno(EFAULT)); if (pcpri.pc_cid >= loaded_classes || pcpri.pc_cid < 0) @@ -640,7 +655,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) error = CL_GETCLPRI(&sclass[pcpri.pc_cid], &pcpri); if (!error) { - if (copyout(&pcpri, arg, sizeof (pcpri))) + if ((*copyoutfn)(&pcpri, arg, sizeof (pcpri))) return (set_errno(EFAULT)); } break; @@ -649,14 +664,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) /* * Get pcnice and procset structures from the user. */ - if (copyin(arg, &pcnice, sizeof (pcnice)) || - copyin(psp, &procset, sizeof (procset))) + if ((*copyinfn)(arg, &pcnice, sizeof (pcnice)) || + (*copyinfn)(psp, &procset, sizeof (procset))) return (set_errno(EFAULT)); error = donice(&procset, &pcnice); if (!error && (pcnice.pc_op == PC_GETNICE)) { - if (copyout(&pcnice, arg, sizeof (pcnice))) + if ((*copyoutfn)(&pcnice, arg, sizeof (pcnice))) return (set_errno(EFAULT)); } break; @@ -684,6 +699,12 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) return (error ? (set_errno(error)) : rv); } +long +priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2) +{ + return (priocntl_common(pc_version, psp, cmd, arg, arg2, + UIO_USERSPACE)); +} /* * The proccmp() function is part of the implementation of the @@ -844,7 +865,7 @@ setparms(proc_t *targpp, struct stprmargs *stprmp) return (0); } -static int +int setthreadnice(pcnice_t *pcnice, kthread_t *tp) { int error = 0; @@ -889,7 +910,7 @@ setthreadnice(pcnice_t *pcnice, kthread_t *tp) return (error); } -static int +int setprocnice(proc_t *pp, pcnice_t *pcnice) { kthread_t *tp; diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c index 91b4db8103..5f352b2203 100644 --- a/usr/src/uts/common/disp/thread.c +++ b/usr/src/uts/common/disp/thread.c @@ -64,6 +64,7 @@ #include <sys/spl.h> #include <sys/copyops.h> #include <sys/rctl.h> +#include <sys/brand.h> #include <sys/pool.h> #include <sys/zone.h> #include <sys/tsol/label.h> @@ -186,6 +187,7 @@ thread_init(void) rctl_init(); project_init(); + brand_init(); zone_init(); task_init(); tcache_init(); diff --git a/usr/src/uts/common/disp/ts.c b/usr/src/uts/common/disp/ts.c index a190297100..738a2e47b4 100644 --- a/usr/src/uts/common/disp/ts.c +++ b/usr/src/uts/common/disp/ts.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -21,7 +20,7 @@ */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1269,14 +1268,14 @@ ia_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp) tspp->ts_flags |= TSIASET; thread_unlock(tx); } - TTY_HOLD(p->p_sessp); + mutex_enter(&p->p_sessp->s_lock); sess_held = 1; if ((pid == sid) && (p->p_sessp->s_vp != NULL) && ((stp = p->p_sessp->s_vp->v_stream) != NULL)) { if ((stp->sd_pgidp != NULL) && (stp->sd_sidp != NULL)) { pgid = stp->sd_pgidp->pid_id; sess_held = 0; - TTY_RELE(p->p_sessp); + mutex_exit(&p->p_sessp->s_lock); if (iaparmsp->ia_mode == IA_SET_INTERACTIVE) { off = 0; @@ -1292,7 +1291,7 @@ ia_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp) } } if (sess_held) - TTY_RELE(p->p_sessp); + mutex_exit(&p->p_sessp->s_lock); thread_lock(tx); @@ -2130,14 +2129,14 @@ ia_set_process_group(pid_t sid, pid_t bg_pgid, pid_t fg_pgid) * that do not have focus and are changing the process group * attatched to the tty, e.g. a process that is exiting */ - TTY_HOLD(leader->p_sessp); + mutex_enter(&leader->p_sessp->s_lock); if (!(tspp->ts_flags & TSIASET) || (leader->p_sessp->s_vp == NULL) || (leader->p_sessp->s_vp->v_stream == NULL)) { - TTY_RELE(leader->p_sessp); + mutex_exit(&leader->p_sessp->s_lock); return; } - TTY_RELE(leader->p_sessp); + mutex_exit(&leader->p_sessp->s_lock); /* * If we're already holding the leader's p_lock, we should use diff --git a/usr/src/uts/common/exec/aout/aout.c b/usr/src/uts/common/exec/aout/aout.c index 5c7b6b1773..4e814b339b 100644 --- a/usr/src/uts/common/exec/aout/aout.c +++ b/usr/src/uts/common/exec/aout/aout.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -56,19 +55,19 @@ static int aoutexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, int level, long *execsz, int setid, - caddr_t exec_file, cred_t *cred); + caddr_t exec_file, cred_t *cred, int brand_action); static int get_aout_head(struct vnode **vpp, struct exdata *edp, long *execsz, int *isdyn); static int aoutcore(vnode_t *vp, proc_t *pp, cred_t *credp, rlim64_t rlimit, int sig, core_content_t content); #ifdef _LP64 extern int elf32exec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, - long *, int, caddr_t, cred_t *); + long *, int, caddr_t, cred_t *, int); extern int elf32core(vnode_t *, proc_t *, cred_t *, rlim64_t, int, core_content_t); #else /* _LP64 */ extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, - long *, int, caddr_t, cred_t *); + long *, int, caddr_t, cred_t *, int); extern int elfcore(vnode_t *, proc_t *, cred_t *, rlim64_t, int, core_content_t); #endif /* _LP64 */ @@ -141,7 +140,7 @@ _info(struct modinfo *modinfop) static int aoutexec(vnode_t *vp, struct execa *uap, struct uarg *args, struct intpdata *idatap, int level, long *execsz, int setid, - caddr_t exec_file, cred_t *cred) + caddr_t exec_file, cred_t *cred, int brand_action) { int error; struct exdata edp, edpout; @@ -201,10 +200,10 @@ aoutexec(vnode_t *vp, struct execa *uap, struct uarg *args, } #ifdef _LP64 if (error = elf32exec(nvp, uap, args, idatap, level, execsz, - setid, exec_file, cred)) + setid, exec_file, cred, brand_action)) #else /* _LP64 */ if (error = elfexec(nvp, uap, args, idatap, level, execsz, - setid, exec_file, cred)) + setid, exec_file, cred, brand_action)) #endif /* _LP64 */ { VN_RELE(nvp); diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c index 33e3cc9b8e..6508cdae85 100644 --- a/usr/src/uts/common/exec/elf/elf.c +++ b/usr/src/uts/common/exec/elf/elf.c @@ -62,8 +62,11 @@ #include <sys/shm_impl.h> #include <sys/archsystm.h> #include <sys/fasttrap.h> +#include <sys/brand.h> #include "elf_impl.h" +#include <sys/sdt.h> + extern int at_flags; #define ORIGIN_STR "ORIGIN" @@ -77,7 +80,7 @@ static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *, static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *); static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t, Phdr **, Phdr **, Phdr **, Phdr **, Phdr *, - caddr_t *, caddr_t *, intptr_t *, size_t, long *, size_t *); + caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *); typedef enum { STR_CTF, @@ -160,10 +163,83 @@ dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base) return (0); } +/* + * Map in the executable pointed to by vp. Returns 0 on success. + */ +int +mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Elf32_Addr *uphdr_vaddr, + intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase, + caddr_t *brkbase, size_t *brksize) +{ + size_t len; + struct vattr vat; + caddr_t phdrbase = NULL; + ssize_t phdrsize; + int nshdrs, shstrndx, nphdrs; + int error = 0; + Phdr *uphdr = NULL; + Phdr *junk = NULL; + Phdr *dynphdr = NULL; + Phdr *dtrphdr = NULL; + uintptr_t lddata; + long execsz; + intptr_t minaddr; + + if (error = execpermissions(vp, &vat, args)) { + uprintf("%s: Cannot execute %s\n", exec_file, args->pathname); + return (error); + } + + if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx, + &nphdrs)) != 0 || + (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase, + &phdrsize)) != 0) { + uprintf("%s: Cannot read %s\n", exec_file, args->pathname); + return (error); + } + + if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) { + uprintf("%s: Nothing to load in %s", exec_file, args->pathname); + kmem_free(phdrbase, phdrsize); + return (ENOEXEC); + } + + if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr, + &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr, + len, &execsz, brksize)) { + uprintf("%s: Cannot map %s\n", exec_file, args->pathname); + kmem_free(phdrbase, phdrsize); + return (error); + } + + /* + * Inform our caller if the executable needs an interpreter. + */ + *interp = (dynphdr == NULL) ? 0 : 1; + + /* + * If this is a statically linked executable, voffset should indicate + * the address of the executable itself (it normally holds the address + * of the interpreter). + */ + if (ehdr->e_type == ET_EXEC && *interp == 0) + *voffset = minaddr; + + if (uphdr != NULL) { + *uphdr_vaddr = uphdr->p_vaddr; + } else { + *uphdr_vaddr = (Elf32_Addr)-1; + } + + kmem_free(phdrbase, phdrsize); + return (error); +} + /*ARGSUSED*/ int elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, - int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred) + int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, + int brand_action) { caddr_t phdrbase = NULL; caddr_t bssbase = 0; @@ -175,10 +251,10 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, ssize_t resid; int fd = -1; intptr_t voffset; - Phdr *dyphdr = NULL; - Phdr *stphdr = NULL; - Phdr *uphdr = NULL; - Phdr *junk = NULL; + Phdr *dyphdr = NULL; + Phdr *stphdr = NULL; + Phdr *uphdr = NULL; + Phdr *junk = NULL; size_t len; ssize_t phdrsize; int postfixsize = 0; @@ -189,6 +265,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, int hasu = 0; int hasauxv = 0; int hasdy = 0; + int branded = 0; struct proc *p = ttoproc(curthread); struct user *up = PTOU(p); @@ -209,6 +286,13 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64); + if ((level < 2) && + (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { + return (BROP(p)->b_elfexec(vp, uap, args, + idatap, level + 1, execsz, setid, exec_file, cred, + brand_action)); + } + bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP); ehdrp = &bigwad->ehdr; dlnp = bigwad->dl_name; @@ -353,6 +437,22 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, } else args->auxsize = 0; + /* + * If this binary is using an emulator, we need to add an + * AT_SUN_EMULATOR aux entry. + */ + if (args->emulator != NULL) + args->auxsize += sizeof (aux_entry_t); + + if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { + branded = 1; + /* + * We will be adding 2 entries to the aux vector. One for + * the branded binary's phdr and one for the brandname. + */ + args->auxsize += 2 * sizeof (aux_entry_t); + } + aux = bigwad->elfargs; /* * Move args to the user's stack. @@ -364,6 +464,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, } goto out; } + /* we're single threaded after this point */ /* * If this is an ET_DYN executable (shared object), @@ -377,8 +478,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, dtrphdr = NULL; if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr, - &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, len, - execsz, &brksize)) != 0) + &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL, + len, execsz, &brksize)) != 0) goto bad; if (uphdr != NULL && dyphdr == NULL) @@ -542,8 +643,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, dtrphdr = NULL; error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk, - &junk, &dtrphdr, NULL, NULL, NULL, &voffset, len, execsz, - NULL); + &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len, + execsz, NULL); if (error || junk != NULL) { VN_RELE(nvp); uprintf("%s: Cannot map %s\n", exec_file, dlnp); @@ -601,6 +702,16 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, #else ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap) #endif + if (branded) { + /* + * Reserve space for the brand-private aux vector entry, + * and record the user addr of that space. + */ + args->brand_auxp = (auxv32_t *)((char *)args->stackend + + ((char *)&aux->a_type - (char *)bigwad->elfargs)); + ADDAUX(aux, AT_SUN_BRAND_PHDR, 0) + } + ADDAUX(aux, AT_NULL, 0) postfixsize = (char *)aux - (char *)bigwad->elfargs; ASSERT(postfixsize == args->auxsize); @@ -639,6 +750,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, /* * Copy auxv to the process's user structure for use by /proc. + * If this is a branded process, the brand's exec routine will + * copy it's private entries to the user structure later. It + * relies on the fact that the blank entries are at the end. */ num_auxv = postfixsize / sizeof (aux_entry_t); ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t)); @@ -968,6 +1082,7 @@ mapelfexec( caddr_t *bssbase, caddr_t *brkbase, intptr_t *voffset, + intptr_t *minaddr, size_t len, long *execsz, size_t *brksize) @@ -980,6 +1095,7 @@ mapelfexec( int page; off_t offset; int hsize = ehdr->e_phentsize; + caddr_t mintmp = (caddr_t)-1; if (ehdr->e_type == ET_DYN) { /* @@ -1010,6 +1126,14 @@ mapelfexec( prot |= PROT_EXEC; addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset); + + /* + * Keep track of the segment with the lowest starting + * address. + */ + if (addr < mintmp) + mintmp = addr; + zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz; offset = phdr->p_offset; @@ -1110,6 +1234,12 @@ mapelfexec( } phdr = (Phdr *)((caddr_t)phdr + hsize); } + + if (minaddr != NULL) { + ASSERT(mintmp != (caddr_t)-1); + *minaddr = (intptr_t)mintmp; + } + return (0); bad: if (error == 0) @@ -1850,13 +1980,14 @@ static struct execsw esw = { }; static struct modlexec modlexec = { - &mod_execops, "exec module for elf", &esw + &mod_execops, "exec module for elf %I%", &esw }; #ifdef _LP64 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, int level, long *execsz, - int setid, caddr_t exec_file, cred_t *cred); + int setid, caddr_t exec_file, cred_t *cred, + int brand_action); extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig, core_content_t content); diff --git a/usr/src/uts/common/exec/elf/elf_impl.h b/usr/src/uts/common/exec/elf/elf_impl.h index 52094e3794..010d5e6256 100644 --- a/usr/src/uts/common/exec/elf/elf_impl.h +++ b/usr/src/uts/common/exec/elf/elf_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -80,6 +79,7 @@ typedef struct { #define elfexec elf32exec #define elfnote elf32note #define elfcore elf32core +#define mapexec_brand mapexec32_brand #define setup_note_header setup_note_header32 #define write_elfnotes write_elfnotes32 #define setup_old_note_header setup_old_note_header32 diff --git a/usr/src/uts/common/exec/intp/intp.c b/usr/src/uts/common/exec/intp/intp.c index 6c6c98246d..4d5c04dfd4 100644 --- a/usr/src/uts/common/exec/intp/intp.c +++ b/usr/src/uts/common/exec/intp/intp.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -45,6 +44,7 @@ #include <sys/disp.h> #include <sys/exec.h> #include <sys/kmem.h> +#include <sys/note.h> /* * This is the loadable module wrapper. @@ -166,8 +166,10 @@ intpexec( long *execsz, int setid, caddr_t exec_file, - struct cred *cred) + struct cred *cred, + int brand_action) { + _NOTE(ARGUNUSED(brand_action)) vnode_t *nvp; int error = 0; struct intpdata idata; @@ -223,8 +225,8 @@ intpexec( args->fname = devfd; } - error = gexec(&nvp, uap, args, &idata, ++level, - execsz, exec_file, cred); + error = gexec(&nvp, uap, args, &idata, ++level, execsz, exec_file, cred, + EBA_NONE); done: VN_RELE(nvp); args->pathname = opath; diff --git a/usr/src/uts/common/exec/java/java.c b/usr/src/uts/common/exec/java/java.c index 0e8c3996e7..bcf61453c9 100644 --- a/usr/src/uts/common/exec/java/java.c +++ b/usr/src/uts/common/exec/java/java.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -88,7 +87,7 @@ char *jexec_arg = "-jar"; static int javaexec(vnode_t *vp, struct execa *uap, struct uarg *args, struct intpdata *idatap, int level, long *execsz, int setid, - caddr_t execfile, cred_t *cred) + caddr_t execfile, cred_t *cred, int brand_action) { struct intpdata idata; int error; @@ -162,8 +161,8 @@ javaexec(vnode_t *vp, struct execa *uap, struct uarg *args, args->pathname = resolvepn.pn_path; /* don't free resolvepn until we are done with args */ pn_free(&lookpn); - error = gexec(&nvp, - uap, args, &idata, level + 1, execsz, execfile, cred); + error = gexec(&nvp, uap, args, &idata, level + 1, execsz, execfile, + cred, EBA_NONE); VN_RELE(nvp); args->pathname = opath; pn_free(&resolvepn); diff --git a/usr/src/uts/common/fs/fifofs/fifosubr.c b/usr/src/uts/common/fs/fifofs/fifosubr.c index 3ee72c9124..8767999322 100644 --- a/usr/src/uts/common/fs/fifofs/fifosubr.c +++ b/usr/src/uts/common/fs/fifofs/fifosubr.c @@ -304,7 +304,8 @@ static void fifo_reinit_vp(vnode_t *vp) { vn_reinit(vp); vp->v_type = VFIFO; - vp->v_flag = VNOMAP | VNOSWAP; + vp->v_flag &= VROOT; + vp->v_flag |= VNOMAP | VNOSWAP; } /* @@ -470,6 +471,7 @@ fifovp(vnode_t *vp, cred_t *crp) fifo_reinit_vp(newvp); newvp->v_vfsp = vp->v_vfsp; newvp->v_rdev = vp->v_rdev; + newvp->v_flag |= (vp->v_flag & VROOT); fifoinsert(fnp); mutex_exit(&ftable_lock); diff --git a/usr/src/uts/common/fs/fifofs/fifovnops.c b/usr/src/uts/common/fs/fifofs/fifovnops.c index cab88019ff..34f731af1e 100644 --- a/usr/src/uts/common/fs/fifofs/fifovnops.c +++ b/usr/src/uts/common/fs/fifofs/fifovnops.c @@ -77,6 +77,8 @@ static int fifo_setattr(vnode_t *, vattr_t *, int, cred_t *, caller_context_t *); static int fifo_realvp(vnode_t *, vnode_t **); static int fifo_access(vnode_t *, int, int, cred_t *); +static int fifo_create(struct vnode *, char *, vattr_t *, enum vcexcl, + int, struct vnode **, struct cred *, int); static int fifo_fid(vnode_t *, fid_t *); static int fifo_fsync(vnode_t *, int, cred_t *); static int fifo_seek(vnode_t *, offset_t, offset_t *); @@ -116,6 +118,7 @@ const fs_operation_def_t fifo_vnodeops_template[] = { VOPNAME_GETATTR, fifo_getattr, VOPNAME_SETATTR, fifo_setattr, VOPNAME_ACCESS, fifo_access, + VOPNAME_CREATE, fifo_create, VOPNAME_FSYNC, fifo_fsync, VOPNAME_INACTIVE, (fs_generic_func_p) fifo_inactive, VOPNAME_FID, fifo_fid, @@ -1542,6 +1545,27 @@ fifo_access(vnode_t *vp, int mode, int flags, cred_t *crp) } /* + * This can be called if creat or an open with O_CREAT is done on the root + * of a lofs mount where the mounted entity is a fifo. + */ +/*ARGSUSED*/ +static int +fifo_create(struct vnode *dvp, char *name, vattr_t *vap, enum vcexcl excl, + int mode, struct vnode **vpp, struct cred *cr, int flag) +{ + int error; + + ASSERT(dvp && (dvp->v_flag & VROOT) && *name == '\0'); + if (excl == NONEXCL) { + if (mode && (error = fifo_access(dvp, mode, 0, cr))) + return (error); + VN_HOLD(dvp); + return (0); + } + return (EEXIST); +} + +/* * If shadowing a vnode, apply the VOP_FSYNC to it. * Otherwise, return 0. */ diff --git a/usr/src/uts/common/fs/nfs/nfs4_subr.c b/usr/src/uts/common/fs/nfs/nfs4_subr.c index 9278fe03da..2a6505ccf9 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_subr.c +++ b/usr/src/uts/common/fs/nfs/nfs4_subr.c @@ -1451,7 +1451,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, } else mutex_exit(&mi->mi_lock); - if (*doqueue && curproc->p_sessp->s_vp != NULL) { + if (*doqueue && nfs_has_ctty()) { *doqueue = 0; if (!(mi->mi_flags & MI4_NOPRINT)) nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi, @@ -1481,7 +1481,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, bufp = clnt_sperror(client, svp->sv_hostname); zprintf(zoneid, "NFS%d %s failed for %s\n", mi->mi_vers, mi->mi_rfsnames[which], bufp); - if (curproc->p_sessp->s_vp != NULL) { + if (nfs_has_ctty()) { if (!(mi->mi_flags & MI4_NOPRINT)) { uprintf("NFS%d %s failed for %s\n", mi->mi_vers, mi->mi_rfsnames[which], @@ -1494,7 +1494,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, "NFS %s failed for server %s: error %d (%s)\n", mi->mi_rfsnames[which], svp->sv_hostname, status, clnt_sperrno(status)); - if (curproc->p_sessp->s_vp != NULL) { + if (nfs_has_ctty()) { if (!(mi->mi_flags & MI4_NOPRINT)) { uprintf( "NFS %s failed for server %s: error %d (%s)\n", diff --git a/usr/src/uts/common/fs/nfs/nfs_subr.c b/usr/src/uts/common/fs/nfs/nfs_subr.c index 05e70935be..bf1beb1275 100644 --- a/usr/src/uts/common/fs/nfs/nfs_subr.c +++ b/usr/src/uts/common/fs/nfs/nfs_subr.c @@ -1235,7 +1235,7 @@ failoverretry: #endif } else mutex_exit(&mi->mi_lock); - if (*douprintf && curproc->p_sessp->s_vp != NULL) { + if (*douprintf && nfs_has_ctty()) { *douprintf = 0; if (!(mi->mi_flags & MI_NOPRINT)) #ifdef DEBUG @@ -1292,7 +1292,7 @@ failoverretry: bufp = clnt_sperror(client, svp->sv_hostname); zprintf(zoneid, "NFS%d %s failed for %s\n", mi->mi_vers, mi->mi_rfsnames[which], bufp); - if (curproc->p_sessp->s_vp != NULL) { + if (nfs_has_ctty()) { if (!(mi->mi_flags & MI_NOPRINT)) { uprintf("NFS%d %s failed for %s\n", mi->mi_vers, mi->mi_rfsnames[which], @@ -1305,7 +1305,7 @@ failoverretry: "NFS %s failed for server %s: error %d (%s)\n", mi->mi_rfsnames[which], svp->sv_hostname, status, clnt_sperrno(status)); - if (curproc->p_sessp->s_vp != NULL) { + if (nfs_has_ctty()) { if (!(mi->mi_flags & MI_NOPRINT)) { uprintf( "NFS %s failed for server %s: error %d (%s)\n", @@ -1821,7 +1821,7 @@ failoverretry: #endif } else mutex_exit(&mi->mi_lock); - if (*douprintf && curproc->p_sessp->s_vp != NULL) { + if (*douprintf && nfs_has_ctty()) { *douprintf = 0; if (!(mi->mi_flags & MI_NOPRINT)) #ifdef DEBUG @@ -1886,7 +1886,7 @@ failoverretry: bufp = clnt_sperror(client, svp->sv_hostname); zprintf(zoneid, "NFS_ACL%d %s failed for %s\n", mi->mi_vers, mi->mi_aclnames[which], bufp); - if (curproc->p_sessp->s_vp != NULL) { + if (nfs_has_ctty()) { if (!(mi->mi_flags & MI_NOPRINT)) { uprintf("NFS_ACL%d %s failed for %s\n", mi->mi_vers, mi->mi_aclnames[which], @@ -1899,7 +1899,7 @@ failoverretry: "NFS %s failed for server %s: error %d (%s)\n", mi->mi_aclnames[which], svp->sv_hostname, status, clnt_sperrno(status)); - if (curproc->p_sessp->s_vp != NULL) { + if (nfs_has_ctty()) { if (!(mi->mi_flags & MI_NOPRINT)) uprintf( "NFS %s failed for server %s: error %d (%s)\n", @@ -5117,3 +5117,13 @@ out: label_rele(zlabel); return (retv); } + +boolean_t +nfs_has_ctty(void) +{ + boolean_t rv; + mutex_enter(&curproc->p_splock); + rv = (curproc->p_sessp->s_vp != NULL); + mutex_exit(&curproc->p_splock); + return (rv); +} diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c index 6a2d6f73d0..24c7ffedab 100644 --- a/usr/src/uts/common/fs/specfs/specvnops.c +++ b/usr/src/uts/common/fs/specfs/specvnops.c @@ -680,13 +680,16 @@ streams_open: /* STREAMS devices don't have a size */ sp->s_size = csp->s_size = 0; - /* - * try to allocate it as a controlling terminal - */ - if ((stp->sd_flag & STRISTTY) && !(flag & FNOCTTY)) - stralloctty(stp); + if (!(stp->sd_flag & STRISTTY) || (flag & FNOCTTY)) + return (0); - return (0); + /* try to allocate it as a controlling terminal */ + if (strctty(stp) != EINTR) + return (0); + + /* strctty() was interrupted by a signal */ + (void) spec_close(vp, flag, 1, 0, cr); + return (EINTR); } /* diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c index 7c64462314..49bde7abeb 100644 --- a/usr/src/uts/common/fs/vnode.c +++ b/usr/src/uts/common/fs/vnode.c @@ -943,7 +943,7 @@ top: * Do remaining checks for FNOFOLLOW and FNOLINKS. */ if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) { - error = EINVAL; + error = ELOOP; goto out; } if (filemode & FNOLINKS) { diff --git a/usr/src/uts/common/io/gentty.c b/usr/src/uts/common/io/gentty.c index 9cb3e23b87..431e80245d 100644 --- a/usr/src/uts/common/io/gentty.c +++ b/usr/src/uts/common/io/gentty.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 1990-1992,1996,1998-2003 Sun Microsystems, Inc. + * Copyright 2006 Sun Microsystems, Inc. * All rights reserved. * Use is subject to license terms. */ @@ -198,17 +197,20 @@ syopen(dev_t *devp, int flag, int otyp, struct cred *cr) { dev_t ttyd; vnode_t *ttyvp; - sess_t *sp = curproc->p_sessp; + sess_t *sp; int error; - if ((ttyd = sp->s_dev) == NODEV) + if ((sp = tty_hold()) == NULL) + return (EINTR); + + if (sp->s_dev == NODEV) { + tty_rele(sp); return (ENXIO); - TTY_HOLD(sp); - if ((ttyvp = sp->s_vp) == NULL) { - TTY_RELE(sp); - return (EIO); } + ttyd = sp->s_dev; + ttyvp = sp->s_vp; + /* * Open the control terminal. The control terminal may be * opened multiple times and it is closed in freectty(). @@ -237,10 +239,12 @@ syopen(dev_t *devp, int flag, int otyp, struct cred *cr) ASSERT(vn_matchops(ttyvp, spec_getvnodeops())); csp = VTOS(VTOS(ttyvp)->s_commonvp); mutex_enter(&csp->s_lock); + ASSERT(csp->s_count > 1); csp->s_count--; mutex_exit(&csp->s_lock); } - TTY_RELE(sp); + + tty_rele(sp); return (error); } @@ -255,41 +259,41 @@ syclose(dev_t dev, int flag, int otyp, struct cred *cr) int syread(dev_t dev, struct uio *uiop, struct cred *cr) { - vnode_t *ttyvp; - sess_t *sp = curproc->p_sessp; + sess_t *sp; int error; - if (sp->s_dev == NODEV) + if ((sp = tty_hold()) == NULL) + return (EINTR); + + if (sp->s_dev == NODEV) { + tty_rele(sp); return (ENXIO); - TTY_HOLD(sp); - if ((ttyvp = sp->s_vp) == NULL) { - TTY_RELE(sp); - return (EIO); } - error = VOP_READ(ttyvp, uiop, 0, cr, NULL); - TTY_RELE(sp); - return (error); + error = VOP_READ(sp->s_vp, uiop, 0, cr, NULL); + + tty_rele(sp); + return (error); } /* ARGSUSED */ int sywrite(dev_t dev, struct uio *uiop, struct cred *cr) { - vnode_t *ttyvp; - sess_t *sp = curproc->p_sessp; + sess_t *sp; int error; - if (sp->s_dev == NODEV) + if ((sp = tty_hold()) == NULL) + return (EINTR); + + if (sp->s_dev == NODEV) { + tty_rele(sp); return (ENXIO); - TTY_HOLD(sp); - if ((ttyvp = sp->s_vp) == NULL) { - TTY_RELE(sp); - return (EIO); } - error = VOP_WRITE(ttyvp, uiop, 0, cr, NULL); - TTY_RELE(sp); + error = VOP_WRITE(sp->s_vp, uiop, 0, cr, NULL); + + tty_rele(sp); return (error); } @@ -299,19 +303,32 @@ int syioctl(dev_t dev, int cmd, intptr_t arg, int mode, struct cred *cr, int *rvalp) { - vnode_t *ttyvp; - sess_t *sp = curproc->p_sessp; + sess_t *sp; int error; - if (sp->s_dev == NODEV) + if (cmd == TIOCNOTTY) { + /* + * we can't allow this ioctl. the reason is that it + * attempts to remove the ctty for a session. to do + * this the ctty can't be in use but we grab a hold on + * the current ctty (via tty_hold) to perform this ioctl. + * if we were to allow this ioctl to pass through we + * would deadlock with ourselves. + */ + return (EINVAL); + } + + if ((sp = tty_hold()) == NULL) + return (EINTR); + + if (sp->s_dev == NODEV) { + tty_rele(sp); return (ENXIO); - TTY_HOLD(sp); - if ((ttyvp = sp->s_vp) == NULL) { - TTY_RELE(sp); - return (EIO); } - error = VOP_IOCTL(ttyvp, cmd, arg, mode, cr, rvalp); - TTY_RELE(sp); + + error = VOP_IOCTL(sp->s_vp, cmd, arg, mode, cr, rvalp); + + tty_rele(sp); return (error); } @@ -322,18 +339,19 @@ int sypoll(dev_t dev, short events, int anyyet, short *reventsp, struct pollhead **phpp) { - vnode_t *ttyvp; - sess_t *sp = curproc->p_sessp; + sess_t *sp; int error; - if (sp->s_dev == NODEV) + if ((sp = tty_hold()) == NULL) + return (EINTR); + + if (sp->s_dev == NODEV) { + tty_rele(sp); return (ENXIO); - TTY_HOLD(sp); - if ((ttyvp = sp->s_vp) == NULL) { - TTY_RELE(sp); - return (EIO); } - error = VOP_POLL(ttyvp, events, anyyet, reventsp, phpp); - TTY_RELE(sp); + + error = VOP_POLL(sp->s_vp, events, anyyet, reventsp, phpp); + + tty_rele(sp); return (error); } diff --git a/usr/src/uts/common/io/l_strplumb.c b/usr/src/uts/common/io/l_strplumb.c index 287ad1f08f..3997874684 100644 --- a/usr/src/uts/common/io/l_strplumb.c +++ b/usr/src/uts/common/io/l_strplumb.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -228,7 +227,7 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin, li = ldi_ident_from_anon(); if (op == SET_AUTOPUSH || op == CLR_AUTOPUSH) { error = ldi_open_by_name(SAD_ADM, FREAD|FWRITE, - CRED(), &lh, li); + kcred, &lh, li); if (error) { printf("kstr_autopush: open failed error %d\n", error); ldi_ident_release(li); @@ -236,7 +235,7 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin, } } else { error = ldi_open_by_name(SAD_USR, FREAD|FWRITE, - CRED(), &lh, li); + kcred, &lh, li); if (error) { printf("kstr_autopush: open failed error %d\n", error); ldi_ident_release(li); @@ -253,11 +252,11 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin, push.sap_minor = *min; error = ldi_ioctl(lh, SAD_GAP, (intptr_t)&push, - FKIOCTL, CRED(), &rval); + FKIOCTL, kcred, &rval); if (error) { printf("kstr_autopush: ioctl failed, error %d\n", error); - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, kcred); return (error); } switch (push.sap_cmd) { @@ -288,7 +287,7 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin, (void) strcpy(mods[i], push.sap_list[i]); mods[i] = NULL; } - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, kcred); return (0); case CLR_AUTOPUSH: @@ -299,12 +298,12 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin, push.sap_major = *maj; error = ldi_ioctl(lh, SAD_SAP, (intptr_t)&push, - FKIOCTL, CRED(), &rval); + FKIOCTL, kcred, &rval); if (error) { printf("kstr_autopush: ioctl failed, error %d\n", error); } - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, kcred); return (error); case SET_AUTOPUSH: @@ -338,16 +337,16 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin, push.sap_list[i][0] = '\0'; error = ldi_ioctl(lh, SAD_SAP, (intptr_t)&push, - FKIOCTL, CRED(), &rval); + FKIOCTL, kcred, &rval); if (error) { printf("kstr_autopush: ioctl failed, error %d\n", error); } - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, kcred); return (error); default: - (void) ldi_close(lh, FREAD|FWRITE, CRED()); + (void) ldi_close(lh, FREAD|FWRITE, kcred); return (EINVAL); } } diff --git a/usr/src/uts/common/io/ptm.c b/usr/src/uts/common/io/ptm.c index bd4dc10511..7910b58cc8 100644 --- a/usr/src/uts/common/io/ptm.c +++ b/usr/src/uts/common/io/ptm.c @@ -449,6 +449,18 @@ ptmclose(queue_t *rqp, int flag, cred_t *credp) return (0); } +static boolean_t +ptmptsopencb(ptmptsopencb_arg_t arg) +{ + struct pt_ttys *ptmp = (struct pt_ttys *)arg; + boolean_t rval; + + PT_ENTER_READ(ptmp); + rval = (ptmp->pt_nullmsg != NULL); + PT_EXIT_READ(ptmp); + return (rval); +} + /* * The wput procedure will only handle ioctl and flush messages. */ @@ -572,6 +584,41 @@ ptmwput(queue_t *qp, mblk_t *mp) miocack(qp, mp, 0, 0); break; } + case PTMPTSOPENCB: + { + mblk_t *dp; /* ioctl reply data */ + ptmptsopencb_t *ppocb; + + /* only allow the kernel to invoke this ioctl */ + if (iocp->ioc_cr != kcred) { + miocnak(qp, mp, 0, EINVAL); + break; + } + + /* we don't support transparent ioctls */ + ASSERT(iocp->ioc_count != TRANSPARENT); + if (iocp->ioc_count == TRANSPARENT) { + miocnak(qp, mp, 0, EINVAL); + break; + } + + /* allocate a response message */ + dp = allocb(sizeof (ptmptsopencb_t), BPRI_MED); + if (dp == NULL) { + miocnak(qp, mp, 0, EAGAIN); + break; + } + + /* initialize the ioctl results */ + ppocb = (ptmptsopencb_t *)dp->b_rptr; + ppocb->ppocb_func = ptmptsopencb; + ppocb->ppocb_arg = (ptmptsopencb_arg_t)ptmp; + + /* send the reply data */ + mioc2ack(mp, dp, sizeof (ptmptsopencb_t), 0); + qreply(qp, mp); + break; + } } break; @@ -643,6 +690,13 @@ ptmwsrv(queue_t *qp) ASSERT(qp->q_ptr); ptmp = (struct pt_ttys *)qp->q_ptr; + + if ((mp = getq(qp)) == NULL) { + /* If there are no messages there's nothing to do. */ + DBG(("leaving ptmwsrv (no messages)\n")); + return; + } + PT_ENTER_READ(ptmp); if ((ptmp->pt_state & PTLOCK) || (ptmp->pts_rdq == NULL)) { DBG(("in master write srv proc but no slave\n")); @@ -652,12 +706,12 @@ ptmwsrv(queue_t *qp) * the user process waiting for ACK/NAK from * the ioctl invocation */ - while ((mp = getq(qp)) != NULL) { + do { if (mp->b_datap->db_type == M_IOCTL) miocnak(qp, mp, 0, EINVAL); else freemsg(mp); - } + } while ((mp = getq(qp)) != NULL); flushq(qp, FLUSHALL); mp = mexchange(NULL, NULL, 2, M_ERROR, -1); @@ -672,7 +726,7 @@ ptmwsrv(queue_t *qp) /* * while there are messages on this write queue... */ - while ((mp = getq(qp)) != NULL) { + do { /* * if don't have control message and cannot put * msg. on slave's read queue, put it back on @@ -689,7 +743,7 @@ ptmwsrv(queue_t *qp) */ DBG(("send message to slave\n")); putnext(ptmp->pts_rdq, mp); - } + } while ((mp = getq(qp)) != NULL); DBG(("leaving ptmwsrv\n")); PT_EXIT_READ(ptmp); } diff --git a/usr/src/uts/common/nfs/nfs.h b/usr/src/uts/common/nfs/nfs.h index eda293574e..03c32254b7 100644 --- a/usr/src/uts/common/nfs/nfs.h +++ b/usr/src/uts/common/nfs/nfs.h @@ -931,6 +931,7 @@ extern void nfsauth_fini(); extern int nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *args); extern int nfs_mount_label_policy(vfs_t *vfsp, struct netbuf *addr, struct knetconfig *knconf, cred_t *cr); +extern boolean_t nfs_has_ctty(void); extern void nfs_srv_stop_all(void); extern void nfs_srv_quiesce_all(void); extern void (*nfs_srv_quiesce_func)(void); diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c new file mode 100644 index 0000000000..15d82871bf --- /dev/null +++ b/usr/src/uts/common/os/brand.c @@ -0,0 +1,323 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/kmem.h> +#include <sys/errno.h> +#include <sys/systm.h> +#include <sys/cmn_err.h> +#include <sys/brand.h> +#include <sys/machbrand.h> +#include <sys/modctl.h> +#include <sys/rwlock.h> +#include <sys/zone.h> + +#define SUPPORTED_BRAND_VERSION BRAND_VER_1 + +#if defined(__sparcv9) +struct brand_mach_ops native_mach_ops = { + NULL, NULL +}; +#else +struct brand_mach_ops native_mach_ops = { + NULL, NULL, NULL, NULL, NULL, NULL +}; +#endif + +brand_t native_brand = { + BRAND_VER_1, + "native", + NULL, + &native_mach_ops +}; + +/* + * Used to maintain a list of all the brands currently loaded into the + * kernel. + */ +struct brand_list { + int bl_refcnt; + struct brand_list *bl_next; + brand_t *bl_brand; +}; + +static struct brand_list *brand_list = NULL; + +/* + * This lock protects the integrity of the brand list. + */ +static kmutex_t brand_list_lock; + +void +brand_init() +{ + mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL); + p0.p_brand = &native_brand; +} + +int +brand_register(brand_t *brand) +{ + struct brand_list *list, *scan; + + if (brand == NULL) + return (EINVAL); + + if (is_system_labeled()) { + cmn_err(CE_WARN, + "Branded zones are not allowed on labeled systems."); + return (EINVAL); + } + + if (brand->b_version != SUPPORTED_BRAND_VERSION) { + if (brand->b_version < SUPPORTED_BRAND_VERSION) { + cmn_err(CE_WARN, + "brand '%s' was built to run on older versions " + "of Solaris.", + brand->b_name); + } else { + cmn_err(CE_WARN, + "brand '%s' was built to run on a newer version " + "of Solaris.", + brand->b_name); + } + return (EINVAL); + } + + /* Sanity checks */ + if (brand->b_name == NULL || brand->b_ops == NULL || + brand->b_ops->b_brandsys == NULL) { + cmn_err(CE_WARN, "Malformed brand"); + return (EINVAL); + } + + list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP); + + /* Add the brand to the list of loaded brands. */ + mutex_enter(&brand_list_lock); + + /* + * Check to be sure we haven't already registered this brand. + */ + for (scan = brand_list; scan != NULL; scan = scan->bl_next) { + if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) { + cmn_err(CE_WARN, + "Invalid attempt to load a second instance of " + "brand %s", brand->b_name); + mutex_exit(&brand_list_lock); + kmem_free(list, sizeof (struct brand_list)); + return (EINVAL); + } + } + + list->bl_brand = brand; + list->bl_refcnt = 0; + list->bl_next = brand_list; + brand_list = list; + mutex_exit(&brand_list_lock); + + return (0); +} + +/* + * The kernel module implementing this brand is being unloaded, so remove + * it from the list of active brands. + */ +int +brand_unregister(brand_t *brand) +{ + struct brand_list *list, *prev; + + /* Sanity checks */ + if (brand == NULL || brand->b_name == NULL) { + cmn_err(CE_WARN, "Malformed brand"); + return (EINVAL); + } + + prev = NULL; + mutex_enter(&brand_list_lock); + + for (list = brand_list; list != NULL; list = list->bl_next) { + if (list->bl_brand == brand) + break; + prev = list; + } + + if (list == NULL) { + cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name); + mutex_exit(&brand_list_lock); + return (EINVAL); + } + + if (list->bl_refcnt > 0) { + cmn_err(CE_WARN, "Unregistering brand %s which is still in use", + brand->b_name); + mutex_exit(&brand_list_lock); + return (EBUSY); + } + + /* Remove brand from the list */ + if (prev != NULL) + prev->bl_next = list->bl_next; + else + brand_list = list->bl_next; + + mutex_exit(&brand_list_lock); + + kmem_free(list, sizeof (struct brand_list)); + + return (0); +} + +/* + * Record that a zone of this brand has been instantiated. If the kernel + * module implementing this brand's functionality is not present, this + * routine attempts to load the module as a side effect. + */ +brand_t * +brand_register_zone(struct brand_attr *attr) +{ + struct brand_list *l = NULL; + ddi_modhandle_t hdl = NULL; + char *modname; + int err = 0; + + if (is_system_labeled()) { + cmn_err(CE_WARN, + "Branded zones are not allowed on labeled systems."); + return (NULL); + } + + /* + * We make at most two passes through this loop. The first time + * through, we're looking to see if this is a new user of an + * already loaded brand. If the brand hasn't been loaded, we + * call ddi_modopen() to force it to be loaded and then make a + * second pass through the list of brands. If we don't find the + * brand the second time through it means that the modname + * specified in the brand_attr structure doesn't provide the brand + * specified in the brandname field. This would suggest a bug in + * the brand's config.xml file. We close the module and return + * 'NULL' to the caller. + */ + for (;;) { + /* + * Search list of loaded brands + */ + mutex_enter(&brand_list_lock); + for (l = brand_list; l != NULL; l = l->bl_next) + if (strcmp(attr->ba_brandname, + l->bl_brand->b_name) == 0) + break; + if ((l != NULL) || (hdl != NULL)) + break; + mutex_exit(&brand_list_lock); + + /* + * We didn't find that the requested brand has been loaded + * yet, so we trigger the load of the appropriate kernel + * module and search the list again. + */ + modname = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) strcpy(modname, "brand/"); + (void) strcat(modname, attr->ba_modname); + hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err); + kmem_free(modname, MAXPATHLEN); + + if (err != 0) + return (NULL); + } + + /* + * If we found the matching brand, bump its reference count. + */ + if (l != NULL) + l->bl_refcnt++; + + mutex_exit(&brand_list_lock); + + if (hdl != NULL) + (void) ddi_modclose(hdl); + + return ((l != NULL) ? l->bl_brand : NULL); +} + +/* + * Return the number of zones currently using this brand. + */ +int +brand_zone_count(struct brand *bp) +{ + struct brand_list *l; + int cnt = 0; + + mutex_enter(&brand_list_lock); + for (l = brand_list; l != NULL; l = l->bl_next) + if (l->bl_brand == bp) { + cnt = l->bl_refcnt; + break; + } + mutex_exit(&brand_list_lock); + + return (cnt); +} + +void +brand_unregister_zone(struct brand *bp) +{ + struct brand_list *list; + + mutex_enter(&brand_list_lock); + for (list = brand_list; list != NULL; list = list->bl_next) { + if (list->bl_brand == bp) { + ASSERT(list->bl_refcnt > 0); + list->bl_refcnt--; + break; + } + } + mutex_exit(&brand_list_lock); +} + +void +brand_setbrand(proc_t *p) +{ + brand_t *bp = p->p_zone->zone_brand; + + ASSERT(bp != NULL); + ASSERT(p->p_brand == &native_brand); + + /* + * We should only be called from exec(), when we know the process + * is single-threaded. + */ + ASSERT(p->p_tlist == p->p_tlist->t_forw); + + p->p_brand = bp; + if (PROC_IS_BRANDED(p)) { + BROP(p)->b_setbrand(p); + lwp_attach_brand_hdlrs(p->p_tlist->t_lwp); + } +} diff --git a/usr/src/uts/common/os/ddi.c b/usr/src/uts/common/os/ddi.c index ec12f51f37..6a0b6ace80 100644 --- a/usr/src/uts/common/os/ddi.c +++ b/usr/src/uts/common/os/ddi.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -293,14 +292,15 @@ WR(queue_t *q) int drv_getparm(unsigned int parm, void *valuep) { - time_t now; + proc_t *p = curproc; + time_t now; switch (parm) { case UPROCP: - *(proc_t **)valuep = ttoproc(curthread); + *(proc_t **)valuep = p; break; case PPGRP: - *(pid_t *)valuep = ttoproc(curthread)->p_pgrp; + *(pid_t *)valuep = p->p_pgrp; break; case LBOLT: *(clock_t *)valuep = lbolt; @@ -317,10 +317,12 @@ drv_getparm(unsigned int parm, void *valuep) } break; case PPID: - *(pid_t *)valuep = ttoproc(curthread)->p_pid; + *(pid_t *)valuep = p->p_pid; break; case PSID: - *(pid_t *)valuep = ttoproc(curthread)->p_sessp->s_sid; + mutex_enter(&p->p_splock); + *(pid_t *)valuep = p->p_sessp->s_sid; + mutex_exit(&p->p_splock); break; case UCRED: *(cred_t **)valuep = CRED(); diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c index a3cd19e423..3b01993465 100644 --- a/usr/src/uts/common/os/exec.c +++ b/usr/src/uts/common/os/exec.c @@ -65,6 +65,7 @@ #include <sys/lwpchan_impl.h> #include <sys/pool.h> #include <sys/sdt.h> +#include <sys/brand.h> #include <c2/audit.h> @@ -89,7 +90,6 @@ uint_t auxv_hwcap32 = 0; /* 32-bit version of auxv_hwcap */ #endif int exec_lpg_disable = 0; - #define PSUIDFLAGS (SNOCD|SUGID) /* @@ -109,12 +109,13 @@ exece(const char *fname, const char **argp, const char **envp) { int error; - error = exec_common(fname, argp, envp); + error = exec_common(fname, argp, envp, EBA_NONE); return (error ? (set_errno(error)) : 0); } int -exec_common(const char *fname, const char **argp, const char **envp) +exec_common(const char *fname, const char **argp, const char **envp, + int brand_action) { vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL; proc_t *p = ttoproc(curthread); @@ -136,6 +137,7 @@ exec_common(const char *fname, const char **argp, const char **envp) lwpdir_t **old_tidhash; uint_t old_tidhash_sz; lwpent_t *lep; + int brandme = 0; /* * exec() is not supported for the /proc agent lwp. @@ -146,6 +148,35 @@ exec_common(const char *fname, const char **argp, const char **envp) if ((error = secpolicy_basic_exec(CRED())) != 0) return (error); + if (brand_action != EBA_NONE) { + /* + * Brand actions are not supported for processes that are not + * running in a branded zone. + */ + if (!ZONE_IS_BRANDED(p->p_zone)) + return (ENOTSUP); + + if (brand_action == EBA_NATIVE) { + /* Only branded processes can be unbranded */ + if (!PROC_IS_BRANDED(p)) + return (ENOTSUP); + } else { + /* Only unbranded processes can be branded */ + if (PROC_IS_BRANDED(p)) + return (ENOTSUP); + brandme = 1; + } + } else { + /* + * If this is a native zone, or if the process is already + * branded, then we don't need to do anything. If this is + * a native process in a branded zone, we need to brand the + * process as it exec()s the new binary. + */ + if (ZONE_IS_BRANDED(p->p_zone) && !PROC_IS_BRANDED(p)) + brandme = 1; + } + /* * Inform /proc that an exec() has started. * Hold signals that are ignored by default so that we will @@ -237,8 +268,14 @@ exec_common(const char *fname, const char **argp, const char **envp) ua.argp = argp; ua.envp = envp; + /* If necessary, brand this process before we start the exec. */ + if (brandme != 0) + brand_setbrand(p); + if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz, - exec_file, p->p_cred)) != 0) { + exec_file, p->p_cred, brand_action)) != 0) { + if (brandme != 0) + BROP(p)->b_proc_exit(p, lwp); VN_RELE(vp); if (dir != NULL) VN_RELE(dir); @@ -351,6 +388,12 @@ exec_common(const char *fname, const char **argp, const char **envp) */ close_exec(P_FINFO(p)); TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up); + + /* Unbrand ourself if requested. */ + if (brand_action == EBA_NATIVE) + BROP(p)->b_proc_exit(p, lwp); + ASSERT((brand_action != EBA_NATIVE) || !PROC_IS_BRANDED(p)); + setregs(&args); /* Mark this as an executable vnode */ @@ -376,6 +419,9 @@ exec_common(const char *fname, const char **argp, const char **envp) lep = kmem_zalloc(sizeof (*lep), KM_SLEEP); } + if (PROC_IS_BRANDED(p)) + BROP(p)->b_exec(); + mutex_enter(&p->p_lock); prbarrier(p); @@ -411,6 +457,7 @@ exec_common(const char *fname, const char **argp, const char **envp) lep->le_start = curthread->t_start; lwp_hash_in(p, lep); } + /* * Restore the saved signal mask and * inform /proc that the exec() has finished. @@ -422,6 +469,7 @@ exec_common(const char *fname, const char **argp, const char **envp) kmem_free(old_lwpdir, old_lwpdir_sz * sizeof (lwpdir_t)); kmem_free(old_tidhash, old_tidhash_sz * sizeof (lwpdir_t *)); } + ASSERT(error == 0); DTRACE_PROC(exec__success); return (0); @@ -451,7 +499,8 @@ gexec( int level, long *execsz, caddr_t exec_file, - struct cred *cred) + struct cred *cred, + int brand_action) { struct vnode *vp; proc_t *pp = ttoproc(curthread); @@ -593,7 +642,7 @@ gexec( setidfl |= EXECSETID_PRIVS; error = (*eswp->exec_func)(vp, uap, args, idatap, level, execsz, - setidfl, exec_file, cred); + setidfl, exec_file, cred, brand_action); rw_exit(eswp->exec_lock); if (error != 0) { if (newcred != NULL) @@ -1016,17 +1065,44 @@ execmap(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen, } if (zfodlen) { + struct as *as = curproc->p_as; + struct seg *seg; + uint_t zprot = 0; + end = (size_t)addr + len; zfodbase = (caddr_t)roundup(end, PAGESIZE); zfoddiff = (uintptr_t)zfodbase - end; if (zfoddiff) { + /* + * Before we go to zero the remaining space on the last + * page, make sure we have write permission. + */ + + AS_LOCK_ENTER(as, &as->a_lock, RW_READER); + seg = as_segat(curproc->p_as, (caddr_t)end); + if (seg != NULL) + SEGOP_GETPROT(seg, (caddr_t)end, zfoddiff - 1, + &zprot); + AS_LOCK_EXIT(as, &as->a_lock); + + if (seg != NULL && (zprot & PROT_WRITE) == 0) { + (void) as_setprot(as, (caddr_t)end, + zfoddiff - 1, zprot | PROT_WRITE); + } + if (on_fault(&ljb)) { no_fault(); + if (seg != NULL && (zprot & PROT_WRITE) == 0) + (void) as_setprot(as, (caddr_t)end, + zfoddiff - 1, zprot); error = EFAULT; goto bad; } uzero((void *)end, zfoddiff); no_fault(); + if (seg != NULL && (zprot & PROT_WRITE) == 0) + (void) as_setprot(as, (caddr_t)end, + zfoddiff - 1, zprot); } if (zfodlen > zfoddiff) { struct segvn_crargs crargs = @@ -1326,13 +1402,22 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) args->ne = args->na - argc; /* - * Add AT_SUN_PLATFORM and AT_SUN_EXECNAME strings to the stack. + * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and + * AT_SUN_EMULATOR strings to the stack. */ if (auxvpp != NULL && *auxvpp != NULL) { if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0) return (error); if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0) return (error); + if (args->brandname != NULL && + (error = stk_add(args, args->brandname, + UIO_SYSSPACE)) != 0) + return (error); + if (args->emulator != NULL && + (error = stk_add(args, args->emulator, + UIO_SYSSPACE)) != 0) + return (error); } /* @@ -1438,19 +1523,32 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up) /* * Fill in the aux vector now that we know the user stack addresses - * for the AT_SUN_PLATFORM and AT_SUN_EXECNAME strings. + * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and + * AT_SUN_EMULATOR strings. */ if (auxvpp != NULL && *auxvpp != NULL) { if (args->to_model == DATAMODEL_NATIVE) { auxv_t **a = (auxv_t **)auxvpp; ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp]) ADDAUX(*a, AT_SUN_EXECNAME, (long)&ustrp[*--offp]) + if (args->brandname != NULL) + ADDAUX(*a, + AT_SUN_BRANDNAME, (long)&ustrp[*--offp]) + if (args->emulator != NULL) + ADDAUX(*a, + AT_SUN_EMULATOR, (long)&ustrp[*--offp]) } else { auxv32_t **a = (auxv32_t **)auxvpp; ADDAUX(*a, AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp]) ADDAUX(*a, - AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp]); + AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp]) + if (args->brandname != NULL) + ADDAUX(*a, AT_SUN_BRANDNAME, + (int)(uintptr_t)&ustrp[*--offp]) + if (args->emulator != NULL) + ADDAUX(*a, AT_SUN_EMULATOR, + (int)(uintptr_t)&ustrp[*--offp]) } } diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c index 70061a7d3e..3063e5717f 100644 --- a/usr/src/uts/common/os/exit.c +++ b/usr/src/uts/common/os/exit.c @@ -73,6 +73,7 @@ #include <sys/pool.h> #include <sys/sdt.h> #include <sys/corectl.h> +#include <sys/brand.h> /* * convert code/data pair into old style wait status @@ -158,7 +159,6 @@ restart_init(int what, int why) user_t *up = PTOU(p); vnode_t *oldcd, *oldrd; - sess_t *sp; int i, err; char reason_buf[64]; @@ -257,17 +257,9 @@ restart_init(int what, int why) if (oldcd != NULL) VN_RELE(oldcd); - /* - * Free the controlling tty. - */ - mutex_enter(&pidlock); - sp = p->p_sessp; - if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { - mutex_exit(&pidlock); - freectty(sp); - } else { - mutex_exit(&pidlock); - } + /* Free the controlling tty. (freectty() always assumes curproc.) */ + ASSERT(p == curproc); + (void) freectty(B_TRUE); /* * Now exec() the new init(1M) on top of the current process. If we @@ -343,7 +335,6 @@ proc_exit(int why, int what) timeout_id_t tmp_id; int rv; proc_t *q; - sess_t *sp; task_t *tk; vnode_t *exec_vp, *execdir_vp, *cdir, *rdir; sigqueue_t *sqp; @@ -367,6 +358,14 @@ proc_exit(int why, int what) DTRACE_PROC1(exit, int, why); /* + * Will perform any brand specific proc exit processing, since this + * is always the last lwp, will also perform lwp_exit and free brand + * data + */ + if (PROC_IS_BRANDED(p)) + BROP(p)->b_proc_exit(p, lwp); + + /* * Don't let init exit unless zone_start_init() failed its exec, or * we are shutting down the zone or the machine. * @@ -377,6 +376,7 @@ proc_exit(int why, int what) if (z->zone_boot_err == 0 && zone_status_get(z) < ZONE_IS_SHUTTING_DOWN && zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN && + z->zone_restart_init == B_TRUE && restart_init(what, why) == 0) return (0); /* @@ -523,13 +523,9 @@ proc_exit(int why, int what) closeall(P_FINFO(p)); - mutex_enter(&pidlock); - sp = p->p_sessp; - if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) { - mutex_exit(&pidlock); - freectty(sp); - } else - mutex_exit(&pidlock); + /* Free the controlling tty. (freectty() always assumes curproc.) */ + ASSERT(p == curproc); + (void) freectty(B_TRUE); #if defined(__sparc) if (p->p_utraps != NULL) diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c index c7c400246d..fbda5b8c4a 100644 --- a/usr/src/uts/common/os/fork.c +++ b/usr/src/uts/common/os/fork.c @@ -80,6 +80,7 @@ #include <sys/sdt.h> #include <sys/class.h> #include <sys/corectl.h> +#include <sys/brand.h> static int64_t cfork(int, int); static int getproc(proc_t **, int); @@ -461,8 +462,10 @@ cfork(int isvfork, int isfork1) mutex_exit(&p->p_lock); } - /* set return values for child */ - lwp_setrval(clone, p->p_pid, 1); + if (PROC_IS_BRANDED(p)) + BROP(p)->b_lwp_setrval(clone, p->p_pid, 1); + else + lwp_setrval(clone, p->p_pid, 1); /* set return values for parent */ r.r_val1 = (int)cp->p_pid; @@ -873,6 +876,7 @@ getproc(proc_t **cpp, int kernel) /* * Make proc entry for child process */ + mutex_init(&cp->p_splock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&cp->p_crlock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&cp->p_pflock, NULL, MUTEX_DEFAULT, NULL); #if defined(__x86) @@ -882,7 +886,7 @@ getproc(proc_t **cpp, int kernel) cp->p_stat = SIDL; cp->p_mstart = gethrtime(); - if ((newpid = pid_assign(cp)) == -1) { + if ((newpid = pid_allocate(cp, PID_ALLOC_PROC)) == -1) { if (nproc == v.v_proc) { CPU_STATS_ADDQ(CPU, sys, procovf, 1); cmn_err(CE_WARN, "out of processes"); @@ -926,10 +930,13 @@ getproc(proc_t **cpp, int kernel) cp->p_siginfo = pp->p_siginfo; cp->p_flag = pp->p_flag & (SJCTL|SNOWAIT|SNOCD); cp->p_sessp = pp->p_sessp; - SESS_HOLD(pp->p_sessp); + sess_hold(pp); cp->p_exec = pp->p_exec; cp->p_execdir = pp->p_execdir; cp->p_zone = pp->p_zone; + cp->p_brand = pp->p_brand; + if (PROC_IS_BRANDED(pp)) + BROP(pp)->b_copy_procdata(cp, pp); cp->p_bssbase = pp->p_bssbase; cp->p_brkbase = pp->p_brkbase; @@ -1198,6 +1205,7 @@ try_again: if (p->p_segacct) shmexit(p); + /* * We grab p_lock for the benefit of /proc */ diff --git a/usr/src/uts/common/os/lwp.c b/usr/src/uts/common/os/lwp.c index dbccf77b9e..26a12c805e 100644 --- a/usr/src/uts/common/os/lwp.c +++ b/usr/src/uts/common/os/lwp.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -59,6 +58,7 @@ #include <sys/cpc_impl.h> #include <sys/sdt.h> #include <sys/cmn_err.h> +#include <sys/brand.h> void *segkp_lwp; /* cookie for pool of segkp resources */ @@ -87,6 +87,7 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p, uint_t old_hashsz = 0; int i; int rctlfail = 0; + boolean_t branded = 0; mutex_enter(&p->p_lock); mutex_enter(&p->p_zone->zone_nlwps_lock); @@ -448,6 +449,19 @@ grow: break; } while (lwp_hash_lookup(p, t->t_tid) != NULL); } + + /* + * If this is a branded process, let the brand do any necessary lwp + * initialization. + */ + if (PROC_IS_BRANDED(p)) { + if (BROP(p)->b_initlwp(lwp)) { + err = 1; + goto error; + } + branded = 1; + } + p->p_lwpcnt++; t->t_waitfor = -1; @@ -540,6 +554,9 @@ error: if (cid != NOCLASS && bufp != NULL) CL_FREE(cid, bufp); + if (branded) + BROP(p)->b_freelwp(lwp); + mutex_exit(&p->p_lock); t->t_state = TS_FREE; thread_rele(t); @@ -673,6 +690,13 @@ lwp_exit(void) if (t->t_upimutex != NULL) upimutex_cleanup(); + /* + * Perform any brand specific exit processing, then release any + * brand data associated with the lwp + */ + if (PROC_IS_BRANDED(p)) + BROP(p)->b_lwpexit(lwp); + mutex_enter(&p->p_lock); lwp_cleanup(); @@ -1565,6 +1589,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) proc_t *p = lwptoproc(lwp); int cid; void *bufp; + void *brand_data; int val; ASSERT(p == curproc); @@ -1578,6 +1603,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) if (t == curthread) /* copy args out of registers first */ (void) save_syscall_args(); + clwp = lwp_create(cp->p_lwpcnt == 0 ? lwp_rtt_initial : lwp_rtt, NULL, 0, cp, TS_STOPPED, t->t_pri, &t->t_hold, NOCLASS, lwpid); if (clwp == NULL) @@ -1591,14 +1617,16 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) ct = clwp->lwp_thread; tregs = clwp->lwp_regs; tfpu = clwp->lwp_fpu; + brand_data = clwp->lwp_brand; /* copy parent lwp to child lwp */ *clwp = *lwp; /* fix up child's lwp */ - clwp->lwp_pcb.pcb_flags = 0; -#if defined(__sparc) +#if defined(__i386) || defined(__amd64) + clwp->lwp_pcb.pcb_flags = clwp->lwp_pcb.pcb_flags & RUPDATE_PENDING; +#elif defined(__sparc) clwp->lwp_pcb.pcb_step = STEP_NONE; #endif clwp->lwp_cursig = 0; @@ -1608,6 +1636,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) ct->t_sysnum = t->t_sysnum; clwp->lwp_regs = tregs; clwp->lwp_fpu = tfpu; + clwp->lwp_brand = brand_data; clwp->lwp_ap = clwp->lwp_arg; clwp->lwp_procp = cp; bzero(clwp->lwp_timer, sizeof (clwp->lwp_timer)); @@ -1640,6 +1669,10 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid) ct->t_proc_flag |= TP_MSACCT; mutex_exit(&cp->p_lock); + /* Allow brand to propagate brand-specific state */ + if (PROC_IS_BRANDED(p)) + BROP(p)->b_forklwp(lwp, clwp); + retry: cid = t->t_cid; diff --git a/usr/src/uts/common/os/main.c b/usr/src/uts/common/os/main.c index 958bbf96c8..ec9fc6c3e3 100644 --- a/usr/src/uts/common/os/main.c +++ b/usr/src/uts/common/os/main.c @@ -70,6 +70,7 @@ #include <sys/errorq.h> #include <sys/class.h> #include <sys/stack.h> +#include <sys/brand.h> #include <vm/as.h> #include <vm/seg_kmem.h> @@ -124,6 +125,7 @@ cluster_wrapper(void) char initname[INITNAME_SZ] = "/sbin/init"; /* also referenced by zone0 */ char initargs[BOOTARGS_MAX] = ""; /* also referenced by zone0 */ +extern int64_t lwp_sigmask(int, uint_t, uint_t); /* * Construct a stack for init containing the arguments to it, then @@ -144,6 +146,7 @@ exec_init(const char *initpath, const char *args) int error = 0, count = 0; proc_t *p = ttoproc(curthread); klwp_t *lwp = ttolwp(curthread); + int brand_action; if (args == NULL) args = ""; @@ -247,9 +250,17 @@ exec_init(const char *initpath, const char *args) curthread->t_post_sys = 1; curthread->t_sysnum = SYS_execve; + /* + * If we are executing init from zsched, we may have inherited its + * parent process's signal mask. Clear it now so that we behave in + * the same way as when started from the global zone. + */ + (void) lwp_sigmask(SIG_UNBLOCK, 0xffffffff, 0xffffffff); + + brand_action = ZONE_IS_BRANDED(p->p_zone) ? EBA_BRAND : EBA_NONE; again: error = exec_common((const char *)(uintptr_t)exec_fnamep, - (const char **)(uintptr_t)uap, NULL); + (const char **)(uintptr_t)uap, NULL, brand_action); /* * Normally we would just set lwp_argsaved and t_post_sys and diff --git a/usr/src/uts/common/os/modconf.c b/usr/src/uts/common/os/modconf.c index 2992567207..3e662fac7d 100644 --- a/usr/src/uts/common/os/modconf.c +++ b/usr/src/uts/common/os/modconf.c @@ -55,6 +55,7 @@ #include <ipp/ipp.h> #include <sys/strsubr.h> #include <sys/kcpc.h> +#include <sys/brand.h> #include <sys/cpc_pcbe.h> #include <sys/kstat.h> #include <sys/fs/sdev_node.h> @@ -237,6 +238,16 @@ struct mod_ops mod_pcbeops = { mod_installpcbe, mod_removepcbe, mod_infonull }; +/* + * Brand modules. + */ +static int mod_installbrand(struct modlbrand *, struct modlinkage *); +static int mod_removebrand(struct modlbrand *, struct modlinkage *); + +struct mod_ops mod_brandops = { + mod_installbrand, mod_removebrand, mod_infonull +}; + static struct sysent *mod_getsysent(struct modlinkage *, struct sysent *); static char uninstall_err[] = "Cannot uninstall %s; not installed"; @@ -496,6 +507,23 @@ mod_removepcbe(struct modlpcbe *modl, struct modlinkage *modlp) } /* + * Manage BrandZ modules. + */ +/*ARGSUSED*/ +static int +mod_installbrand(struct modlbrand *modl, struct modlinkage *modlp) +{ + return (brand_register(modl->brand_branddef)); +} + +/*ARGSUSED*/ +static int +mod_removebrand(struct modlbrand *modl, struct modlinkage *modlp) +{ + return (brand_unregister(modl->brand_branddef)); +} + +/* * manage /dev fs modules */ /*ARGSUSED*/ @@ -1075,8 +1103,10 @@ mod_removefs(struct modlfs *modl, struct modlinkage *modlp) return (EBUSY); } - /* XXX - Shouldn't the refcount be sufficient? */ - + /* + * A mounted filesystem could still have vsw_count = 0 + * so we must check whether anyone is actually using our ops + */ if (vfs_opsinuse(&vswp->vsw_vfsops)) { vfs_unrefvfssw(vswp); WUNLOCK_VFSSW(); diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c index 66cfed74b4..88b0258afe 100644 --- a/usr/src/uts/common/os/pid.c +++ b/usr/src/uts/common/os/pid.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -21,7 +20,7 @@ */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -115,6 +114,18 @@ pid_lookup(pid_t pid) return (pidp); } +struct pid * +pid_find(pid_t pid) +{ + struct pid *pidp; + + mutex_enter(&pidlinklock); + pidp = pid_lookup(pid); + mutex_exit(&pidlinklock); + + return (pidp); +} + void pid_setmin(void) { @@ -154,14 +165,13 @@ pid_getlockslot(int prslot) } /* - * This function assigns a pid for use in a fork request. It allocates - * a pid structure, tries to find an empty slot in the proc table, - * and selects the process id. + * This function allocates a pid structure, a free pid, and optionally a + * slot in the proc table for it. * - * pid_assign() returns the new pid on success, -1 on failure. + * pid_allocate() returns the new pid on success, -1 on failure. */ pid_t -pid_assign(proc_t *prp) +pid_allocate(proc_t *prp, int flags) { struct pid *pidp; union procent *pep; @@ -170,7 +180,7 @@ pid_assign(proc_t *prp) pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); mutex_enter(&pidlinklock); - if ((pep = procentfree) == NULL) { + if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) { /* * ran out of /proc directory entries */ @@ -190,10 +200,6 @@ pid_assign(proc_t *prp) goto failed; } - procentfree = pep->pe_next; - pep->pe_proc = prp; - prp->p_pidp = pidp; - /* * Put pid into the pid hash table. */ @@ -201,8 +207,17 @@ pid_assign(proc_t *prp) HASHPID(newpid) = pidp; pidp->pid_ref = 1; pidp->pid_id = newpid; - pidp->pid_prslot = pep - procdir; - prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; + + if (flags & PID_ALLOC_PROC) { + procentfree = pep->pe_next; + pidp->pid_prslot = pep - procdir; + pep->pe_proc = prp; + prp->p_pidp = pidp; + prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; + } else { + pidp->pid_prslot = 0; + } + mutex_exit(&pidlinklock); return (newpid); @@ -264,7 +279,7 @@ pid_exit(proc_t *prp) if (prp->p_pgidp != NULL) pgexit(prp); - SESS_RELE(prp->p_sessp); + sess_rele(prp->p_sessp, B_TRUE); pidp = prp->p_pidp; diff --git a/usr/src/uts/common/os/printf.c b/usr/src/uts/common/os/printf.c index 603da31b62..a50bfa0db9 100644 --- a/usr/src/uts/common/os/printf.c +++ b/usr/src/uts/common/os/printf.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -142,21 +141,15 @@ retry: if (sl & SL_USER) { ssize_t resid; - sess_t *sessp; - - mutex_enter(&pidlock); - sessp = curproc->p_sessp; - SESS_HOLD(sessp); - TTY_HOLD(sessp); - mutex_exit(&pidlock); - if (sessp->s_vp) - (void) vn_rdwr(UIO_WRITE, sessp->s_vp, - body, len, 0LL, UIO_SYSSPACE, - FAPPEND, (rlim64_t)LOG_HIWAT, kcred, &resid); - mutex_enter(&pidlock); - TTY_RELE(sessp); - SESS_RELE(sessp); - mutex_exit(&pidlock); + sess_t *sp; + + if ((sp = tty_hold()) != NULL) { + if (sp->s_vp != NULL) + (void) vn_rdwr(UIO_WRITE, sp->s_vp, body, + len, 0LL, UIO_SYSSPACE, FAPPEND, + (rlim64_t)LOG_HIWAT, kcred, &resid); + tty_rele(sp); + } } if (on_intr && !panicstr) { diff --git a/usr/src/uts/common/os/procset.c b/usr/src/uts/common/os/procset.c index 7a675c604e..ae5473847e 100644 --- a/usr/src/uts/common/os/procset.c +++ b/usr/src/uts/common/os/procset.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -290,8 +289,10 @@ procinset(proc_t *pp, procset_t *psp) break; case P_SID: + mutex_enter(&pp->p_splock); if (pp->p_sessp->s_sid == psp->p_lid) loperand++; + mutex_exit(&pp->p_splock); break; case P_CID: @@ -380,8 +381,10 @@ procinset(proc_t *pp, procset_t *psp) break; case P_SID: + mutex_enter(&pp->p_splock); if (pp->p_sessp->s_sid == psp->p_rid) roperand++; + mutex_exit(&pp->p_splock); break; case P_TASKID: @@ -533,8 +536,10 @@ lwpinset(proc_t *pp, procset_t *psp, kthread_t *tp, int *done) break; case P_SID: + mutex_enter(&pp->p_splock); if (pp->p_sessp->s_sid == psp->p_lid) loperand++; + mutex_exit(&pp->p_splock); break; case P_TASKID: @@ -617,8 +622,10 @@ lwpinset(proc_t *pp, procset_t *psp, kthread_t *tp, int *done) break; case P_SID: + mutex_enter(&pp->p_splock); if (pp->p_sessp->s_sid == psp->p_rid) roperand++; + mutex_exit(&pp->p_splock); break; case P_TASKID: @@ -756,6 +763,7 @@ getmyid(idtype_t idtype) proc_t *pp; uid_t uid; gid_t gid; + pid_t sid; pp = ttoproc(curthread); @@ -773,7 +781,10 @@ getmyid(idtype_t idtype) return (pp->p_pgrp); case P_SID: - return (pp->p_sessp->s_sid); + mutex_enter(&pp->p_splock); + sid = pp->p_sessp->s_sid; + mutex_exit(&pp->p_splock); + return (sid); case P_TASKID: return (pp->p_task->tk_tkid); diff --git a/usr/src/uts/common/os/session.c b/usr/src/uts/common/os/session.c index 972677f7dc..7790a09094 100644 --- a/usr/src/uts/common/os/session.c +++ b/usr/src/uts/common/os/session.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -47,102 +46,614 @@ #include <sys/kmem.h> #include <sys/cmn_err.h> #include <sys/strsubr.h> +#include <sys/fs/snode.h> sess_t session0 = { - 1, /* s_ref */ - NODEV, /* s_dev */ - NULL, /* s_vp */ - &pid0, /* s_sidp */ - NULL /* s_cred */ + &pid0, /* s_sidp */ + {0}, /* s_lock */ + 1, /* s_ref */ + B_FALSE, /* s_sighuped */ + B_FALSE, /* s_exit */ + 0, /* s_exit_cv */ + 0, /* s_cnt */ + 0, /* s_cnt_cv */ + NODEV, /* s_dev */ + NULL, /* s_vp */ + NULL /* s_cred */ }; void -sess_rele(sess_t *sp) +sess_hold(proc_t *p) { - ASSERT(MUTEX_HELD(&pidlock)); + ASSERT(MUTEX_HELD(&pidlock) || MUTEX_HELD(&p->p_splock)); + mutex_enter(&p->p_sessp->s_lock); + p->p_sessp->s_ref++; + mutex_exit(&p->p_sessp->s_lock); +} + +void +sess_rele(sess_t *sp, boolean_t pidlock_held) +{ + ASSERT(MUTEX_HELD(&pidlock) || !pidlock_held); + + mutex_enter(&sp->s_lock); ASSERT(sp->s_ref != 0); - if (--sp->s_ref == 0) { - if (sp == &session0) - panic("sp == &session0"); - PID_RELE(sp->s_sidp); - mutex_destroy(&sp->s_lock); - cv_destroy(&sp->s_wait_cv); - kmem_free(sp, sizeof (sess_t)); + if (--sp->s_ref > 0) { + mutex_exit(&sp->s_lock); + return; } + ASSERT(sp->s_ref == 0); + + /* + * It's ok to free this session structure now because we know + * that no one else can have a pointer to it. We know this + * to be true because the only time that s_ref can possibly + * be incremented is when pidlock or p_splock is held AND there + * is a proc_t that points to that session structure. In that + * case we are guaranteed that the s_ref is at least 1 since there + * is a proc_t that points to it. So when s_ref finally drops to + * zero then no one else has a reference (and hence pointer) to + * this session structure and there is no valid proc_t pointing + * to this session structure anymore so, no one can acquire a + * reference (and pointer) to this session structure so it's + * ok to free it here. + */ + + if (sp == &session0) + panic("sp == &session0"); + + /* make sure there are no outstanding holds */ + ASSERT(sp->s_cnt == 0); + + /* make sure there is no exit in progress */ + ASSERT(!sp->s_exit); + + /* make sure someone already freed any ctty */ + ASSERT(sp->s_vp == NULL); + ASSERT(sp->s_dev == NODEV); + + if (!pidlock_held) + mutex_enter(&pidlock); + PID_RELE(sp->s_sidp); + if (!pidlock_held) + mutex_exit(&pidlock); + + mutex_destroy(&sp->s_lock); + cv_destroy(&sp->s_cnt_cv); + kmem_free(sp, sizeof (sess_t)); +} + +sess_t * +tty_hold(void) +{ + proc_t *p = curproc; + sess_t *sp; + boolean_t got_sig = B_FALSE; + + /* make sure the caller isn't holding locks they shouldn't */ + ASSERT(MUTEX_NOT_HELD(&pidlock)); + + for (;;) { + mutex_enter(&p->p_splock); /* protect p->p_sessp */ + sp = p->p_sessp; + mutex_enter(&sp->s_lock); /* protect sp->* */ + + /* make sure the caller isn't holding locks they shouldn't */ + ASSERT((sp->s_vp == NULL) || + MUTEX_NOT_HELD(&sp->s_vp->v_stream->sd_lock)); + + /* + * If the session leader process is not exiting (and hence + * not trying to release the session's ctty) then we can + * safely grab a hold on the current session structure + * and return it. If on the other hand the session leader + * process is exiting and clearing the ctty then we'll + * wait till it's done before we loop around and grab a + * hold on the session structure. + */ + if (!sp->s_exit) + break; + + /* need to hold the session so it can't be freed */ + sp->s_ref++; + mutex_exit(&p->p_splock); + + /* Wait till the session leader is done */ + if (!cv_wait_sig(&sp->s_exit_cv, &sp->s_lock)) + got_sig = B_TRUE; + + /* + * Now we need to drop our hold on the session structure, + * but we can't hold any locks when we do this because + * sess_rele() may need to aquire pidlock. + */ + mutex_exit(&sp->s_lock); + sess_rele(sp, B_FALSE); + + if (got_sig) + return (NULL); + } + + /* whew, we finally got a hold */ + sp->s_cnt++; + sp->s_ref++; + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + return (sp); } void -sess_create(void) +tty_rele(sess_t *sp) { - proc_t *pp; - sess_t *sp; + /* make sure the caller isn't holding locks they shouldn't */ + ASSERT(MUTEX_NOT_HELD(&pidlock)); - pp = ttoproc(curthread); + mutex_enter(&sp->s_lock); + if ((--sp->s_cnt) == 0) + cv_broadcast(&sp->s_cnt_cv); + mutex_exit(&sp->s_lock); + + sess_rele(sp, B_FALSE); +} + +void +sess_create(void) +{ + proc_t *p = curproc; + sess_t *sp, *old_sp; sp = kmem_zalloc(sizeof (sess_t), KM_SLEEP); mutex_init(&sp->s_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&sp->s_wait_cv, NULL, CV_DEFAULT, NULL); + cv_init(&sp->s_cnt_cv, NULL, CV_DEFAULT, NULL); + /* + * we need to grap p_lock to protect p_pgidp because + * /proc looks at p_pgidp while holding only p_lock. + * + * we don't need to hold p->p_sessp->s_lock or get a hold on the + * session structure since we're not actually updating any of + * the contents of the old session structure. + */ mutex_enter(&pidlock); + mutex_enter(&p->p_lock); + mutex_enter(&p->p_splock); + + pgexit(p); + + sp->s_sidp = p->p_pidp; + sp->s_ref = 1; + sp->s_dev = NODEV; + + old_sp = p->p_sessp; + p->p_sessp = sp; + + pgjoin(p, p->p_pidp); + PID_HOLD(p->p_pidp); + + mutex_exit(&p->p_splock); + mutex_exit(&p->p_lock); + mutex_exit(&pidlock); + sess_rele(old_sp, B_FALSE); +} + +/* + * Note that sess_ctty_clear() resets all the fields in the session + * structure but doesn't release any holds or free any objects + * that the session structure might currently point to. it is the + * callers responsibility to do this. + */ +static void +sess_ctty_clear(sess_t *sp, stdata_t *stp) +{ /* - * We need to protect p_pgidp with p_lock because - * /proc looks at it while holding only p_lock. + * Assert that we hold all the necessary locks. We also need + * to be holding proc_t->p_splock for the process associated + * with this session, but since we don't have a proc pointer + * passed in we can't assert this here. */ - mutex_enter(&pp->p_lock); - pgexit(pp); - SESS_RELE(pp->p_sessp); + ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) && + MUTEX_HELD(&sp->s_lock)); - sp->s_sidp = pp->p_pidp; - sp->s_ref = 1; + /* reset the session structure members to defaults */ + sp->s_sighuped = B_FALSE; sp->s_dev = NODEV; + sp->s_vp = NULL; + sp->s_cred = NULL; + + /* reset the stream session and group pointers */ + stp->sd_pgidp = NULL; + stp->sd_sidp = NULL; +} + +static void +sess_ctty_set(proc_t *p, sess_t *sp, stdata_t *stp) +{ + cred_t *crp; + + /* Assert that we hold all the necessary locks. */ + ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) && + MUTEX_HELD(&p->p_splock) && MUTEX_HELD(&sp->s_lock)); + + /* get holds on structures */ + mutex_enter(&p->p_crlock); + crhold(crp = p->p_cred); + mutex_exit(&p->p_crlock); + PID_HOLD(sp->s_sidp); /* requires pidlock */ + PID_HOLD(sp->s_sidp); /* requires pidlock */ + + /* update the session structure members */ + sp->s_vp = makectty(stp->sd_vnode); + sp->s_dev = sp->s_vp->v_rdev; + sp->s_cred = crp; + + /* update the stream emebers */ + stp->sd_flag |= STRISTTY; /* just to be sure */ + stp->sd_sidp = sp->s_sidp; + stp->sd_pgidp = sp->s_sidp; +} + +int +strctty(stdata_t *stp) +{ + sess_t *sp; + proc_t *p = curproc; + boolean_t got_sig = B_FALSE; + + /* + * We are going to try to make stp the default ctty for the session + * associated with curproc. Not only does this require holding a + * bunch of locks but it also requires waiting for any outstanding + * holds on the session structure (aquired via tty_hold()) to be + * released. Hence, we have the following for(;;) loop that will + * aquire our locks, do some sanity checks, and wait for the hold + * count on the session structure to hit zero. If we get a signal + * while waiting for outstanding holds to be released then we abort + * the operation and return. + */ + for (;;) { + mutex_enter(&stp->sd_lock); /* protects sd_pgidp/sd_sidp */ + mutex_enter(&pidlock); /* protects p_pidp */ + mutex_enter(&p->p_splock); /* protects p_sessp */ + sp = p->p_sessp; + mutex_enter(&sp->s_lock); /* protects sp->* */ + + if (((stp->sd_flag & (STRHUP|STRDERR|STWRERR|STPLEX)) != 0) || + (stp->sd_sidp != NULL) || /* stp already ctty? */ + (p->p_pidp != sp->s_sidp) || /* we're not leader? */ + (sp->s_vp != NULL)) { /* session has ctty? */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + return (ENOTTY); + } + + /* sanity check. we can't be exiting right now */ + ASSERT(!sp->s_exit); + + /* + * If no one else has a hold on this session structure + * then we now have exclusive access to it, so break out + * of this loop and update the session structure. + */ + if (sp->s_cnt == 0) + break; + + /* need to hold the session so it can't be freed */ + sp->s_ref++; - pp->p_sessp = sp; + /* ain't locking order fun? */ + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); - pgjoin(pp, pp->p_pidp); - mutex_exit(&pp->p_lock); + if (!cv_wait_sig(&sp->s_cnt_cv, &sp->s_lock)) + got_sig = B_TRUE; + mutex_exit(&sp->s_lock); + sess_rele(sp, B_FALSE); - PID_HOLD(sp->s_sidp); + if (got_sig) + return (EINTR); + } + + /* set the session ctty bindings */ + sess_ctty_set(p, sp, stp); + + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + return (0); } -void -freectty(sess_t *sp) +/* + * freectty_lock() attempts to aquire the army of locks required to free + * the ctty associated with a given session leader process. If it returns + * successfully the following locks will be held: + * sd_lock, pidlock, p_splock, s_lock + * + * as a secondary bit of convience, freectty_lock() will also return + * pointers to the session, ctty, and ctty stream associated with the + * specified session leader process. + */ +static boolean_t +freectty_lock(proc_t *p, sess_t **spp, vnode_t **vpp, stdata_t **stpp, + boolean_t at_exit) { - vnode_t *vp = sp->s_vp; - cred_t *cred = sp->s_cred; + sess_t *sp; + vnode_t *vp; + stdata_t *stp; - strfreectty(vp->v_stream); + mutex_enter(&pidlock); /* protect p_pidp */ + mutex_enter(&p->p_splock); /* protect p->p_sessp */ + sp = p->p_sessp; + mutex_enter(&sp->s_lock); /* protect sp->* */ - mutex_enter(&sp->s_lock); - while (sp->s_cnt > 0) { - cv_wait(&sp->s_wait_cv, &sp->s_lock); + if ((sp->s_sidp != p->p_pidp) || /* we're not leader? */ + (sp->s_vp == NULL)) { /* no ctty? */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + return (B_FALSE); + } + + vp = sp->s_vp; + stp = sp->s_vp->v_stream; + + if (at_exit) { + /* stop anyone else calling tty_hold() */ + sp->s_exit = B_TRUE; + } else { + /* + * due to locking order we have to grab stp->sd_lock before + * grabbing all the other proc/session locks. but after we + * drop all our current locks it's possible that someone + * could come in and change our current session or close + * the current ctty (vp) there by making sp or stp invalid. + * (a VN_HOLD on vp won't protect stp because that only + * prevents the vnode from being freed not closed.) so + * to prevent this we bump s_ref and s_cnt here. + * + * course this doesn't matter if we're the last thread in + * an exiting process that is the session leader, since no + * one else can change our session or free our ctty. + */ + sp->s_ref++; /* hold the session structure */ + sp->s_cnt++; /* protect vp and stp */ + } + + /* drop our session locks */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + + /* grab locks in the right order */ + mutex_enter(&stp->sd_lock); /* protects sd_pgidp/sd_sidp */ + mutex_enter(&pidlock); /* protect p_pidp */ + mutex_enter(&p->p_splock); /* protects p->p_sessp */ + mutex_enter(&sp->s_lock); /* protects sp->* */ + + /* if the session has changed, abort mission */ + if (sp != p->p_sessp) { + /* + * this can't happen during process exit since we're the + * only thread in the process and we sure didn't change + * our own session at this point. + */ + ASSERT(!at_exit); + + /* release our locks and holds */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + tty_rele(sp); + return (B_FALSE); } - ASSERT(sp->s_cnt == 0); - ASSERT(vp->v_count >= 1); - sp->s_vp = NULL; - sp->s_cred = NULL; /* - * It is possible for the VOP_CLOSE below to call stralloctty() - * and reallocate a new tty vnode. To prevent that the - * session is marked as closing here. + * sanity checks. none of this should have changed since we had + * holds on the current ctty. */ + ASSERT(sp->s_sidp == p->p_pidp); /* we're the leader */ + ASSERT(sp->s_vp != NULL); /* a ctty exists */ + ASSERT(vp == sp->s_vp); + ASSERT(stp == sp->s_vp->v_stream); + + /* release our holds */ + if (!at_exit) { + if ((--(sp)->s_cnt) == 0) + cv_broadcast(&sp->s_cnt_cv); + sp->s_ref--; + ASSERT(sp->s_ref > 0); + } + + /* return our pointers */ + *spp = sp; + *vpp = vp; + *stpp = stp; - sp->s_flag = SESS_CLOSE; + return (B_TRUE); +} + +/* + * Returns B_FALSE if no signal is sent to the process group associated with + * this ctty. Returns B_TRUE if a signal is sent to the process group. + * If it return B_TRUE it also means that all the locks we were holding + * were dropped so that we could send the signal. + */ +static boolean_t +freectty_signal(proc_t *p, sess_t *sp, stdata_t *stp, boolean_t at_exit) +{ + /* Assert that we hold all the necessary locks. */ + ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) && + MUTEX_HELD(&p->p_splock) && MUTEX_HELD(&sp->s_lock)); + + /* check if we already signaled this group */ + if (sp->s_sighuped) + return (B_FALSE); + + sp->s_sighuped = B_TRUE; + + if (!at_exit) { + /* + * once again, we're about to drop our army of locks and we + * don't want sp or stp to be freed. (see the comment in + * freectty_lock()) + */ + sp->s_ref++; /* hold the session structure */ + sp->s_cnt++; /* protect vp and stp */ + } + + /* can't hold these locks while calling pgsignal() */ mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + + /* signal anyone in the foreground process group */ + pgsignal(stp->sd_pgidp, SIGHUP); + + /* signal anyone blocked in poll on this stream */ + if (!(stp->sd_flag & STRHUP)) + strhup(stp); + + mutex_exit(&stp->sd_lock); + + /* release our holds */ + if (!at_exit) + tty_rele(sp); + + return (B_TRUE); +} + +int +freectty(boolean_t at_exit) +{ + proc_t *p = curproc; + stdata_t *stp; + vnode_t *vp; + cred_t *cred; + sess_t *sp; + struct pid *pgidp, *sidp; + boolean_t got_sig = B_FALSE; /* - * This will be the only thread with access to - * this vnode, from this point on. + * If the current process is a session leader we are going to + * try to release the ctty associated our current session. To + * do this we need to aquire a bunch of locks, signal any + * processes in the forground that are associated with the ctty, + * and make sure no one has any outstanding holds on the current + * session * structure (aquired via tty_hold()). Hence, we have + * the following for(;;) loop that will do all this work for + * us and break out when the hold count on the session structure + * hits zero. */ + for (;;) { + if (!freectty_lock(p, &sp, &vp, &stp, at_exit)) + return (EIO); + + if (freectty_signal(p, sp, stp, at_exit)) { + /* loop around to re-aquire locks */ + continue; + } + + /* + * Only a session leader process can free a ctty. So if + * we've made it here we know we're a session leader and + * if we're not actively exiting it impossible for another + * thread in this process to be exiting. (Because that + * thread would have already stopped all other threads + * in the current process.) + */ + ASSERT(at_exit || !sp->s_exit); + + /* + * If no one else has a hold on this session structure + * then we now have exclusive access to it, so break out + * of this loop and update the session structure. + */ + if (sp->s_cnt == 0) + break; + + if (!at_exit) { + /* need to hold the session so it can't be freed */ + sp->s_ref++; + } + + /* ain't locking order fun? */ + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + + if (at_exit) { + /* + * if we're exiting then we can't allow this operation + * to fail so we do a cw_wait() instead of a + * cv_wait_sig(). if there are threads with active + * holds on this ctty that are blocked, then + * they should only be blocked in a cv_wait_sig() + * and hopefully they were in the foreground process + * group and recieved the SIGHUP we sent above. of + * course it's possible that they weren't in the + * foreground process group and didn't get our + * signal (or they could be stopped by job control + * in which case our signal wouldn't matter until + * they are restarted). in this case we won't + * exit until someone else sends them a signal. + */ + cv_wait(&sp->s_cnt_cv, &sp->s_lock); + mutex_exit(&sp->s_lock); + continue; + } + + if (!cv_wait_sig(&sp->s_cnt_cv, &sp->s_lock)) { + got_sig = B_TRUE; + } + + mutex_exit(&sp->s_lock); + sess_rele(sp, B_FALSE); + + if (got_sig) + return (EINTR); + } + ASSERT(sp->s_cnt == 0); + /* save some pointers for later */ + cred = sp->s_cred; + pgidp = stp->sd_pgidp; + sidp = stp->sd_sidp; + + /* clear the session ctty bindings */ + sess_ctty_clear(sp, stp); + + /* wake up anyone blocked in tty_hold() */ + if (at_exit) { + ASSERT(sp->s_exit); + sp->s_exit = B_FALSE; + cv_broadcast(&sp->s_exit_cv); + } + + /* we can drop these locks now */ + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); + + /* This is the only remaining thread with access to this vnode */ (void) VOP_CLOSE(vp, 0, 1, (offset_t)0, cred); VN_RELE(vp); - crfree(cred); + + /* release our holds on assorted structures and return */ + mutex_enter(&pidlock); + PID_RELE(pgidp); + PID_RELE(sidp); + mutex_exit(&pidlock); + + return (1); } /* @@ -169,23 +680,29 @@ vhangup(void) dev_t cttydev(proc_t *pp) { - sess_t *sp = pp->p_sessp; + sess_t *sp; + dev_t dev; + + mutex_enter(&pp->p_splock); /* protects p->p_sessp */ + sp = pp->p_sessp; + +#ifdef DEBUG + mutex_enter(&sp->s_lock); /* protects sp->* */ if (sp->s_vp == NULL) - return (NODEV); - return (sp->s_dev); + ASSERT(sp->s_dev == NODEV); + else + ASSERT(sp->s_dev != NODEV); + mutex_exit(&sp->s_lock); +#endif /* DEBUG */ + + dev = sp->s_dev; + mutex_exit(&pp->p_splock); + return (dev); } void -alloctty(proc_t *pp, vnode_t *vp) +ctty_clear_sighuped(void) { - sess_t *sp = pp->p_sessp; - cred_t *crp; - - sp->s_vp = vp; - sp->s_dev = vp->v_rdev; - - mutex_enter(&pp->p_crlock); - crhold(crp = pp->p_cred); - mutex_exit(&pp->p_crlock); - sp->s_cred = crp; + ASSERT(MUTEX_HELD(&pidlock) || MUTEX_HELD(&curproc->p_splock)); + curproc->p_sessp->s_sighuped = B_FALSE; } diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c index ffa676604f..e189a1627d 100644 --- a/usr/src/uts/common/os/streamio.c +++ b/usr/src/uts/common/os/streamio.c @@ -77,6 +77,19 @@ #include <sys/autoconf.h> #include <sys/policy.h> + +/* + * This define helps improve the readability of streams code while + * still maintaining a very old streams performance enhancement. The + * performance enhancement basically involved having all callers + * of straccess() perform the first check that straccess() will do + * locally before actually calling straccess(). (There by reducing + * the number of unnecessary calls to straccess().) + */ +#define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \ + (stp->sd_vnode->v_type == VFIFO) ? 0 : \ + straccess((x), (y))) + /* * what is mblk_pull_len? * @@ -1095,11 +1108,13 @@ strread(struct vnode *vp, struct uio *uiop, cred_t *crp) ASSERT(vp->v_stream); stp = vp->v_stream; - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCREAD)) - return (error); - mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCREAD)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } + if (stp->sd_flag & (STRDERR|STPLEX)) { error = strgeterr(stp, STRDERR|STPLEX, 0); if (error != 0) { @@ -1161,12 +1176,8 @@ strread(struct vnode *vp, struct uio *uiop, cred_t *crp) } TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE, "strread awakes:%p, %p, %p", vp, uiop, crp); - if (stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) { - mutex_exit(&stp->sd_lock); - if (error = straccess(stp, JCREAD)) - goto oops1; - mutex_enter(&stp->sd_lock); + if ((error = i_straccess(stp, JCREAD)) != 0) { + goto oops; } first = 0; } @@ -2026,8 +2037,8 @@ strrput_nondata(queue_t *q, mblk_t *bp) cv_broadcast(&q->q_wait); /* the readers */ cv_broadcast(&_WR(q)->q_wait); /* the writers */ cv_broadcast(&stp->sd_monitor); /* the ioctllers */ - mutex_exit(&stp->sd_lock); strhup(stp); + mutex_exit(&stp->sd_lock); return (0); case M_UNHANGUP: @@ -2665,18 +2676,23 @@ strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) ASSERT(vp->v_stream); stp = vp->v_stream; - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if ((error = straccess(stp, JCWRITE)) != 0) - return (error); + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { - mutex_enter(&stp->sd_lock); error = strwriteable(stp, B_TRUE, B_TRUE); - mutex_exit(&stp->sd_lock); - if (error != 0) + if (error != 0) { + mutex_exit(&stp->sd_lock); return (error); + } } + mutex_exit(&stp->sd_lock); + wqp = stp->sd_wrq; /* get these values from them cached in the stream head */ @@ -2778,11 +2794,11 @@ strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag) } TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE, "strwrite wake:q %p awakes", wqp); + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + goto out; + } mutex_exit(&stp->sd_lock); - if (stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCWRITE)) - goto out; } waitflag |= NOINTR; TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID, @@ -3101,6 +3117,7 @@ job_control_type(int cmd) case JAGENT: /* Obsolete */ case JTRUN: /* Obsolete */ case JXTPROTO: /* Obsolete */ + case TIOCSETLD: return (JCSETP); } @@ -3162,10 +3179,12 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR) return (EINVAL); - if (access != -1 && stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, access)) - return (error); + mutex_enter(&stp->sd_lock); + if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) { + mutex_exit(&stp->sd_lock); + return (error); + } + mutex_exit(&stp->sd_lock); /* * Check for sgttyb-related ioctls first, and complain as @@ -3307,11 +3326,16 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, secpolicy_sti(crp) != 0) { return (EPERM); } - if (stp->sd_sidp != - ttoproc(curthread)->p_sessp->s_sidp && + mutex_enter(&stp->sd_lock); + mutex_enter(&curproc->p_splock); + if (stp->sd_sidp != curproc->p_sessp->s_sidp && secpolicy_sti(crp) != 0) { + mutex_exit(&curproc->p_splock); + mutex_exit(&stp->sd_lock); return (EACCES); } + mutex_exit(&curproc->p_splock); + mutex_exit(&stp->sd_lock); strioc.ic_len = sizeof (char); strioc.ic_dp = (char *)arg; @@ -3445,10 +3469,13 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, return (EINVAL); access = job_control_type(strioc.ic_cmd); - if (access != -1 && stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO && - (error = straccess(stp, access)) != 0) + mutex_enter(&stp->sd_lock); + if ((access != -1) && + ((error = i_straccess(stp, access)) != 0)) { + mutex_exit(&stp->sd_lock); return (error); + } + mutex_exit(&stp->sd_lock); /* * The I_STR facility provides a trap door for malicious @@ -3699,7 +3726,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, /* * try to allocate it as a controlling terminal */ - stralloctty(stp); + (void) strctty(stp); } } @@ -5053,15 +5080,11 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, releasef(STRUCT_FGET(strfdinsert, fildes)); return (error); } - if (stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) { + if ((error = i_straccess(stp, access)) != 0) { mutex_exit(&stp->sd_lock); - if (error = straccess(stp, access)) { - releasef( - STRUCT_FGET(strfdinsert, fildes)); - return (error); - } - mutex_enter(&stp->sd_lock); + releasef( + STRUCT_FGET(strfdinsert, fildes)); + return (error); } } mutex_exit(&stp->sd_lock); @@ -5144,12 +5167,9 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, mutex_exit(&stp->sd_lock); return (error); } - if (stp->sd_sidp != NULL && - stp->sd_vnode->v_type != VFIFO) { + if ((error = i_straccess(stp, access)) != 0) { mutex_exit(&stp->sd_lock); - if (error = straccess(stp, access)) - return (error); - mutex_enter(&stp->sd_lock); + return (error); } } if (mp->b_datap->db_type != M_PASSFP) { @@ -5446,13 +5466,13 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, { pid_t sid; - mutex_enter(&pidlock); + mutex_enter(&stp->sd_lock); if (stp->sd_sidp == NULL) { - mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); return (ENOTTY); } sid = stp->sd_sidp->pid_id; - mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); return (strcopyout(&sid, (void *)arg, sizeof (pid_t), copyflag)); } @@ -5494,6 +5514,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, bg_pgid = stp->sd_pgidp->pid_id; CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid); PID_RELE(stp->sd_pgidp); + ctty_clear_sighuped(); stp->sd_pgidp = q->p_pgidp; PID_HOLD(stp->sd_pgidp); mutex_exit(&pidlock); @@ -5505,17 +5526,30 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag, { pid_t pgrp; - mutex_enter(&pidlock); + mutex_enter(&stp->sd_lock); if (stp->sd_sidp == NULL) { - mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); return (ENOTTY); } pgrp = stp->sd_pgidp->pid_id; - mutex_exit(&pidlock); + mutex_exit(&stp->sd_lock); return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t), copyflag)); } + case TIOCSCTTY: + { + return (strctty(stp)); + } + + case TIOCNOTTY: + { + /* freectty() always assumes curproc. */ + if (freectty(B_FALSE) != 0) + return (0); + return (ENOTTY); + } + case FIONBIO: case FIOASYNC: return (0); /* handled by the upper layer */ @@ -6233,18 +6267,21 @@ strgetmsg( stp = vp->v_stream; rvp->r_val1 = 0; - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCREAD)) - return (error); + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCREAD)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } - /* Fast check of flags before acquiring the lock */ if (stp->sd_flag & (STRDERR|STPLEX)) { - mutex_enter(&stp->sd_lock); error = strgeterr(stp, STRDERR|STPLEX, 0); - mutex_exit(&stp->sd_lock); - if (error != 0) + if (error != 0) { + mutex_exit(&stp->sd_lock); return (error); + } } + mutex_exit(&stp->sd_lock); switch (*flagsp) { case MSG_HIPRI: @@ -6381,11 +6418,9 @@ strgetmsg( } TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE, "strgetmsg awakes:%p, %p", vp, uiop); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) { + if ((error = i_straccess(stp, JCREAD)) != 0) { mutex_exit(&stp->sd_lock); - if (error = straccess(stp, JCREAD)) - return (error); - mutex_enter(&stp->sd_lock); + return (error); } first = 0; } @@ -6797,23 +6832,26 @@ kstrgetmsg( stp = vp->v_stream; rvp->r_val1 = 0; - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCREAD)) - return (error); + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCREAD)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } flags = *flagsp; - /* Fast check of flags before acquiring the lock */ if (stp->sd_flag & (STRDERR|STPLEX)) { if ((stp->sd_flag & STPLEX) || (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) { - mutex_enter(&stp->sd_lock); error = strgeterr(stp, STRDERR|STPLEX, (flags & MSG_IPEEK)); - mutex_exit(&stp->sd_lock); - if (error != 0) + if (error != 0) { + mutex_exit(&stp->sd_lock); return (error); + } } } + mutex_exit(&stp->sd_lock); switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) { case MSG_HIPRI: @@ -6955,11 +6993,9 @@ retry: } TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE, "kstrgetmsg awakes:%p, %p", vp, uiop); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) { + if ((error = i_straccess(stp, JCREAD)) != 0) { mutex_exit(&stp->sd_lock); - if (error = straccess(stp, JCREAD)) - return (error); - mutex_enter(&stp->sd_lock); + return (error); } first = 0; } @@ -7430,18 +7466,23 @@ strputmsg( audit_strputmsg(vp, mctl, mdata, pri, flag, fmode); #endif - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCWRITE)) - return (error); + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { - mutex_enter(&stp->sd_lock); error = strwriteable(stp, B_FALSE, xpg4); - mutex_exit(&stp->sd_lock); - if (error != 0) + if (error != 0) { + mutex_exit(&stp->sd_lock); return (error); + } } + mutex_exit(&stp->sd_lock); + /* * Check for legal flag value. */ @@ -7561,10 +7602,11 @@ strputmsg( } TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE, "strputmsg wake:stp %p wakes", stp); + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + return (error); + } mutex_exit(&stp->sd_lock); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) - if (error = straccess(stp, JCWRITE)) - return (error); } out: /* @@ -7617,25 +7659,27 @@ kstrputmsg( if (mctl == NULL) return (EINVAL); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) { - if (error = straccess(stp, JCWRITE)) { - freemsg(mctl); - return (error); - } + mutex_enter(&stp->sd_lock); + + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + freemsg(mctl); + return (error); } if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) { if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { - mutex_enter(&stp->sd_lock); error = strwriteable(stp, B_FALSE, B_TRUE); - mutex_exit(&stp->sd_lock); if (error != 0) { + mutex_exit(&stp->sd_lock); freemsg(mctl); return (error); } } } + mutex_exit(&stp->sd_lock); + /* * Check for legal flag value. */ @@ -7804,13 +7848,12 @@ kstrputmsg( } TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE, "kstrputmsg wake:stp %p wakes", stp); - mutex_exit(&stp->sd_lock); - if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) { - if (error = straccess(stp, JCWRITE)) { - freemsg(mctl); - return (error); - } + if ((error = i_straccess(stp, JCWRITE)) != 0) { + mutex_exit(&stp->sd_lock); + freemsg(mctl); + return (error); } + mutex_exit(&stp->sd_lock); } out: freemsg(mctl); diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c index 57a918a3f0..ae99e5198a 100644 --- a/usr/src/uts/common/os/strsubr.c +++ b/usr/src/uts/common/os/strsubr.c @@ -3107,13 +3107,18 @@ straccess(struct stdata *stp, enum jcaccess mode) proc_t *p = ttoproc(t); sess_t *sp; + ASSERT(mutex_owned(&stp->sd_lock)); + if (stp->sd_sidp == NULL || stp->sd_vnode->v_type == VFIFO) return (0); - mutex_enter(&p->p_lock); - sp = p->p_sessp; + mutex_enter(&p->p_lock); /* protects p_pgidp */ for (;;) { + mutex_enter(&p->p_splock); /* protects p->p_sessp */ + sp = p->p_sessp; + mutex_enter(&sp->s_lock); /* protects sp->* */ + /* * If this is not the calling process's controlling terminal * or if the calling process is already in the foreground @@ -3121,6 +3126,8 @@ straccess(struct stdata *stp, enum jcaccess mode) */ if (sp->s_dev != stp->sd_vnode->v_rdev || p->p_pgidp == stp->sd_pgidp) { + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); mutex_exit(&p->p_lock); return (0); } @@ -3131,10 +3138,15 @@ straccess(struct stdata *stp, enum jcaccess mode) if (sp->s_vp == NULL) { if (!cantsend(p, t, SIGHUP)) sigtoproc(p, t, SIGHUP); + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); mutex_exit(&p->p_lock); return (EIO); } + mutex_exit(&sp->s_lock); + mutex_exit(&p->p_splock); + if (mode == JCGETP) { mutex_exit(&p->p_lock); return (0); @@ -3146,7 +3158,9 @@ straccess(struct stdata *stp, enum jcaccess mode) return (EIO); } mutex_exit(&p->p_lock); + mutex_exit(&stp->sd_lock); pgsignal(p->p_pgidp, SIGTTIN); + mutex_enter(&stp->sd_lock); mutex_enter(&p->p_lock); } else { /* mode == JCWRITE or JCSETP */ if ((mode == JCWRITE && !(stp->sd_flag & STRTOSTOP)) || @@ -3159,7 +3173,9 @@ straccess(struct stdata *stp, enum jcaccess mode) return (EIO); } mutex_exit(&p->p_lock); + mutex_exit(&stp->sd_lock); pgsignal(p->p_pgidp, SIGTTOU); + mutex_enter(&stp->sd_lock); mutex_enter(&p->p_lock); } @@ -3174,10 +3190,15 @@ straccess(struct stdata *stp, enum jcaccess mode) * We can't get here if the signal is ignored or * if the current thread is blocking the signal. */ + mutex_exit(&stp->sd_lock); if (!cv_wait_sig_swap(&lbolt_cv, &p->p_lock)) { mutex_exit(&p->p_lock); + mutex_enter(&stp->sd_lock); return (EINTR); } + mutex_exit(&p->p_lock); + mutex_enter(&stp->sd_lock); + mutex_enter(&p->p_lock); } } @@ -4001,59 +4022,12 @@ strsignal(stdata_t *stp, int sig, int32_t band) void strhup(stdata_t *stp) { + ASSERT(mutex_owned(&stp->sd_lock)); pollwakeup(&stp->sd_pollist, POLLHUP); - mutex_enter(&stp->sd_lock); if (stp->sd_sigflags & S_HANGUP) strsendsig(stp->sd_siglist, S_HANGUP, 0, 0); - mutex_exit(&stp->sd_lock); -} - -void -stralloctty(stdata_t *stp) -{ - proc_t *p = curproc; - sess_t *sp = p->p_sessp; - - mutex_enter(&stp->sd_lock); - /* - * No need to hold the session lock or do a TTY_HOLD() because - * this is the only thread that can be the session leader and not - * have a controlling tty. - */ - if ((stp->sd_flag & - (STRHUP|STRDERR|STWRERR|STPLEX|STRISTTY)) == STRISTTY && - stp->sd_sidp == NULL && /* not allocated as ctty */ - sp->s_sidp == p->p_pidp && /* session leader */ - sp->s_flag != SESS_CLOSE && /* session is not closing */ - sp->s_vp == NULL) { /* without ctty */ - ASSERT(stp->sd_pgidp == NULL); - alloctty(p, makectty(stp->sd_vnode)); - - mutex_enter(&pidlock); - stp->sd_sidp = sp->s_sidp; - stp->sd_pgidp = sp->s_sidp; - PID_HOLD(stp->sd_pgidp); - PID_HOLD(stp->sd_sidp); - mutex_exit(&pidlock); - } - mutex_exit(&stp->sd_lock); } -void -strfreectty(stdata_t *stp) -{ - mutex_enter(&stp->sd_lock); - pgsignal(stp->sd_pgidp, SIGHUP); - mutex_enter(&pidlock); - PID_RELE(stp->sd_pgidp); - PID_RELE(stp->sd_sidp); - stp->sd_pgidp = NULL; - stp->sd_sidp = NULL; - mutex_exit(&pidlock); - mutex_exit(&stp->sd_lock); - if (!(stp->sd_flag & STRHUP)) - strhup(stp); -} /* * Backenable the first queue upstream from `q' with a service procedure. */ diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c index 80761e102c..8211e23d01 100644 --- a/usr/src/uts/common/os/sysent.c +++ b/usr/src/uts/common/os/sysent.c @@ -51,6 +51,7 @@ int access(); int alarm(); int auditsys(); +int64_t brandsys(); int brk(); int chdir(); int chmod(); @@ -131,6 +132,8 @@ int unlink(); int utime(); int64_t utssys32(); int64_t utssys64(); +int uucopy(); +ssize_t uucopystr(); int64_t wait(); ssize_t write(); ssize_t readv(); @@ -473,7 +476,7 @@ struct sysent sysent[NSYSCALL] = SYSENT_NOSYS(), SYSENT_CI("fstatfs", fstatfs32, 4)), /* 39 */ SYSENT_CI("setpgrp", setpgrp, 3), - /* 40 */ SYSENT_LOADABLE(), /* (was cxenix) */ + /* 40 */ SYSENT_CI("uucopystr", uucopystr, 3), /* 41 */ SYSENT_CI("dup", dup, 1), /* 42 */ SYSENT_LOADABLE(), /* (was pipe ) */ /* 43 */ SYSENT_CL("times", times, 1), @@ -658,7 +661,7 @@ struct sysent sysent[NSYSCALL] = SYSENT_NOSYS(), SYSENT_C("llseek", llseek32, 4)), /* 176 */ SYSENT_LOADABLE(), /* inst_sync */ - /* 177 */ SYSENT_LOADABLE(), /* (was srmlimitsys) */ + /* 177 */ SYSENT_CI("brandsys", brandsys, 6), /* 178 */ SYSENT_LOADABLE(), /* kaio */ /* 179 */ SYSENT_LOADABLE(), /* cpc */ /* 180 */ SYSENT_CI("lgrpsys", lgrpsys, 3), @@ -770,7 +773,7 @@ struct sysent sysent[NSYSCALL] = /* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock, 1), /* 252 */ SYSENT_CI("lwp_mutex_init", lwp_mutex_init, 2), /* 253 */ SYSENT_CI("cladm", cladm, 3), - /* 254 */ SYSENT_LOADABLE(), /* (was lwp_sigtimedwait) */ + /* 254 */ SYSENT_CI("uucopy", uucopy, 3), /* 255 */ SYSENT_CI("umount2", umount2, 2) /* ONC_PLUS EXTRACT START */ }; @@ -876,7 +879,7 @@ struct sysent sysent32[NSYSCALL] = /* 37 */ SYSENT_CI("kill", kill, 2), /* 38 */ SYSENT_CI("fstatfs", fstatfs32, 4), /* 39 */ SYSENT_CI("setpgrp", setpgrp, 3), - /* 40 */ SYSENT_LOADABLE32(), /* (was cxenix) */ + /* 40 */ SYSENT_CI("uucopystr", uucopystr, 3), /* 41 */ SYSENT_CI("dup", dup, 1), /* 42 */ SYSENT_LOADABLE32(), /* (was pipe ) */ /* 43 */ SYSENT_CI("times", times32, 1), @@ -1036,7 +1039,7 @@ struct sysent sysent32[NSYSCALL] = /* 174 */ SYSENT_CI("pwrite", pwrite32, 4), /* 175 */ SYSENT_C("llseek", llseek32, 4), /* 176 */ SYSENT_LOADABLE32(), /* inst_sync */ - /* 177 */ SYSENT_LOADABLE32(), /* srmlimitsys */ + /* 177 */ SYSENT_CI("brandsys", brandsys, 6), /* 178 */ SYSENT_LOADABLE32(), /* kaio */ /* 179 */ SYSENT_LOADABLE32(), /* cpc */ /* 180 */ SYSENT_CI("lgrpsys", lgrpsys, 3), @@ -1116,7 +1119,7 @@ struct sysent sysent32[NSYSCALL] = /* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock, 1), /* 252 */ SYSENT_CI("lwp_mutex_init", lwp_mutex_init, 2), /* 253 */ SYSENT_CI("cladm", cladm, 3), - /* 254 */ SYSENT_LOADABLE32(), /* (was lwp_sigtimedwait) */ + /* 254 */ SYSENT_CI("uucopy", uucopy, 3), /* 255 */ SYSENT_CI("umount2", umount2, 2) /* ONC_PLUS EXTRACT START */ }; diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 6a5c9243b3..9fd6b423bd 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -228,6 +228,7 @@ #include <sys/nvpair.h> #include <sys/rctl.h> #include <sys/fss.h> +#include <sys/brand.h> #include <sys/zone.h> #include <sys/tsol/label.h> @@ -330,7 +331,6 @@ static kmutex_t mount_lock; const char * const zone_default_initname = "/sbin/init"; static char * const zone_prefix = "/zone/"; - static int zone_shutdown(zoneid_t zoneid); /* @@ -1223,6 +1223,8 @@ zone_init(void) zone0.zone_nlwps = p0.p_lwpcnt; zone0.zone_ntasks = 1; mutex_exit(&p0.p_lock); + zone0.zone_restart_init = B_TRUE; + zone0.zone_brand = &native_brand; rctl_prealloc_destroy(gp); /* * pool_default hasn't been initialized yet, so we let pool_init() take @@ -2330,33 +2332,40 @@ void zone_start_init(void) { proc_t *p = ttoproc(curthread); + zone_t *z = p->p_zone; ASSERT(!INGLOBALZONE(curproc)); /* + * For all purposes (ZONE_ATTR_INITPID and restart_init), + * storing just the pid of init is sufficient. + */ + z->zone_proc_initpid = p->p_pid; + + /* * We maintain zone_boot_err so that we can return the cause of the * failure back to the caller of the zone_boot syscall. */ p->p_zone->zone_boot_err = start_init_common(); mutex_enter(&zone_status_lock); - if (p->p_zone->zone_boot_err != 0) { + if (z->zone_boot_err != 0) { /* * Make sure we are still in the booting state-- we could have * raced and already be shutting down, or even further along. */ - if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING) - zone_status_set(p->p_zone, ZONE_IS_SHUTTING_DOWN); + if (zone_status_get(z) == ZONE_IS_BOOTING) + zone_status_set(z, ZONE_IS_SHUTTING_DOWN); mutex_exit(&zone_status_lock); /* It's gone bad, dispose of the process */ - if (proc_exit(CLD_EXITED, p->p_zone->zone_boot_err) != 0) { + if (proc_exit(CLD_EXITED, z->zone_boot_err) != 0) { mutex_enter(&p->p_lock); ASSERT(p->p_flag & SEXITLWPS); lwp_exit(); } } else { - if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING) - zone_status_set(p->p_zone, ZONE_IS_RUNNING); + if (zone_status_get(z) == ZONE_IS_BOOTING) + zone_status_set(z, ZONE_IS_RUNNING); mutex_exit(&zone_status_lock); /* cause the process to return to userland. */ lwp_rtt(); @@ -2939,6 +2948,9 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_psetid = ZONE_PS_INVAL; zone->zone_ncpus = 0; zone->zone_ncpus_online = 0; + zone->zone_restart_init = B_TRUE; + zone->zone_brand = &native_brand; + zone->zone_initname = NULL; mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL); @@ -3464,6 +3476,9 @@ zone_shutdown(zoneid_t zoneid) zone_rele(zone); return (set_errno(EINTR)); } + + brand_unregister_zone(zone->zone_brand); + zone_rele(zone); return (0); } @@ -3771,6 +3786,18 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) copyout(&initpid, buf, bufsize) != 0) error = EFAULT; break; + case ZONE_ATTR_BRAND: + size = strlen(zone->zone_brand->b_name) + 1; + + if (bufsize > size) + bufsize = size; + if (buf != NULL) { + err = copyoutstr(zone->zone_brand->b_name, buf, + bufsize, NULL); + if (err != 0 && err != ENAMETOOLONG) + error = EFAULT; + } + break; case ZONE_ATTR_INITNAME: size = strlen(zone->zone_initname) + 1; if (bufsize > size) @@ -3797,7 +3824,12 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) } break; default: - error = EINVAL; + if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) { + size = bufsize; + error = ZBROP(zone)->b_getattr(zone, attr, buf, &size); + } else { + error = EINVAL; + } } zone_rele(zone); @@ -3815,6 +3847,7 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) { zone_t *zone; zone_status_t zone_status; + struct brand_attr *attrp; int err; if (secpolicy_zone_config(CRED()) != 0) @@ -3847,8 +3880,33 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize) case ZONE_ATTR_BOOTARGS: err = zone_set_bootargs(zone, (const char *)buf); break; + case ZONE_ATTR_BRAND: + ASSERT(!ZONE_IS_BRANDED(zone)); + err = 0; + attrp = kmem_alloc(sizeof (struct brand_attr), KM_SLEEP); + if ((buf == NULL) || + (copyin(buf, attrp, sizeof (struct brand_attr)) != 0)) { + kmem_free(attrp, sizeof (struct brand_attr)); + err = EFAULT; + break; + } + + if (is_system_labeled() && strncmp(attrp->ba_brandname, + NATIVE_BRAND_NAME, MAXNAMELEN) != 0) { + err = EPERM; + break; + } + + zone->zone_brand = brand_register_zone(attrp); + kmem_free(attrp, sizeof (struct brand_attr)); + if (zone->zone_brand == NULL) + err = EINVAL; + break; default: - err = EINVAL; + if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) + err = ZBROP(zone)->b_setattr(zone, attr, buf, bufsize); + else + err = EINVAL; } done: @@ -4145,10 +4203,10 @@ zone_enter(zoneid_t zoneid) */ mutex_enter(&pidlock); sp = zone->zone_zsched->p_sessp; - SESS_HOLD(sp); + sess_hold(zone->zone_zsched); mutex_enter(&pp->p_lock); pgexit(pp); - SESS_RELE(pp->p_sessp); + sess_rele(pp->p_sessp, B_TRUE); pp->p_sessp = sp; pgjoin(pp, zone->zone_zsched->p_pidp); mutex_exit(&pp->p_lock); diff --git a/usr/src/uts/common/rpc/clnt_gen.c b/usr/src/uts/common/rpc/clnt_gen.c index 0093210bd5..4c557b563f 100644 --- a/usr/src/uts/common/rpc/clnt_gen.c +++ b/usr/src/uts/common/rpc/clnt_gen.c @@ -346,6 +346,11 @@ bindresvport_again: } if (!error && bound_addr) { + if (bound_addr->maxlen < ret->addr.len) { + kmem_free(bound_addr->buf, bound_addr->maxlen); + bound_addr->buf = kmem_zalloc(ret->addr.len, KM_SLEEP); + bound_addr->maxlen = ret->addr.len; + } bcopy(ret->addr.buf, bound_addr->buf, ret->addr.len); bound_addr->len = ret->addr.len; } diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index f6bcef9c5c..2754405b01 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -102,6 +102,7 @@ CHKHDRS= \ bofi_impl.h \ bpp_io.h \ bootstat.h \ + brand.h \ buf.h \ bufmod.h \ bustypes.h \ diff --git a/usr/src/uts/common/sys/audioio.h b/usr/src/uts/common/sys/audioio.h index 5b8152cfc5..2814eb7040 100644 --- a/usr/src/uts/common/sys/audioio.h +++ b/usr/src/uts/common/sys/audioio.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,8 +19,8 @@ * CDDL HEADER END */ /* - * Copyright (c) 1995-2001 by Sun Microsystems, Inc. - * All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. */ #ifndef _SYS_AUDIOIO_H @@ -209,10 +208,10 @@ typedef struct audio_info audio_info_t; * a signed int. */ #define AUDIO_INITINFO(i) { \ - uint_t *__x__; \ - for (__x__ = (uint_t *)(i); \ + uint_t *__x__; \ + for (__x__ = (uint_t *)(i); \ (char *)__x__ < (((char *)(i)) + sizeof (audio_info_t)); \ - *__x__++ = ~0); \ + *__x__++ = (uint_t)~0); \ } diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h index 025d7a18e9..b9cf07f269 100644 --- a/usr/src/uts/common/sys/auxv.h +++ b/usr/src/uts/common/sys/auxv.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -171,6 +170,15 @@ extern uint_t getisax(uint32_t *, uint_t); #define AT_SUN_AUXFLAGS 2017 /* AF_SUN_ flags passed from the kernel */ /* + * Used to indicate to the runtime linker the name of the emulation binary, + * if one is being used. For brands, this is the name of the brand library. + */ +#define AT_SUN_EMULATOR 2018 + +#define AT_SUN_BRANDNAME 2019 +#define AT_SUN_BRAND_PHDR 2020 /* Brand executable's phdr */ + +/* * The kernel is in a better position to determine whether a process needs to * ignore dangerous LD environment variables. If set, this flags tells * ld.so.1 to run "secure" and ignore the the environment. @@ -183,7 +191,6 @@ extern uint_t getisax(uint32_t *, uint_t); */ #define AF_SUN_HWCAPVERIFY 0x00000002 - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/bitmap.h b/usr/src/uts/common/sys/bitmap.h index 8476ba9563..d0dd12b683 100644 --- a/usr/src/uts/common/sys/bitmap.h +++ b/usr/src/uts/common/sys/bitmap.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -124,6 +124,14 @@ extern "C" { #endif /* _LP64 */ +/* + * BIT_ONLYONESET is a private macro not designed for bitmaps of + * arbitrary size. u must be an unsigned integer/long. It returns + * true if one and only one bit is set in u. + */ +#define BIT_ONLYONESET(u) \ + ((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0)) + #if defined(_KERNEL) && !defined(_ASM) #include <sys/atomic.h> diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h new file mode 100644 index 0000000000..c4595e9641 --- /dev/null +++ b/usr/src/uts/common/sys/brand.h @@ -0,0 +1,134 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_BRAND_H +#define _SYS_BRAND_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/proc.h> +#include <sys/exec.h> + +/* + * All Brands supported by this kernel must use BRAND_VER_1. + */ +#define BRAND_VER_1 1 + +/* + * sub-commands to brandsys. + * 1 - 128 are for common commands + * 128+ are available for brand-specific commands. + */ +#define B_REGISTER 1 +#define B_TTYMODES 2 +#define B_ELFDATA 3 +#define B_EXEC_NATIVE 4 +#define B_EXEC_BRAND 5 + +/* + * Structure used by zoneadmd to communicate the name of a brand and the + * supporting brand module into the kernel. + */ +struct brand_attr { + char ba_brandname[MAXNAMELEN]; + char ba_modname[MAXPATHLEN]; +}; + +/* What we call the native brand. */ +#define NATIVE_BRAND_NAME "native" + +#ifdef _KERNEL + +/* Root for branded zone's native binaries */ +#define NATIVE_ROOT "/native/" + +struct proc; +struct uarg; +struct brand_mach_ops; +struct intpdata; +struct execa; + +struct brand_ops { + int (*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, + uintptr_t, uintptr_t, uintptr_t); + void (*b_setbrand)(struct proc *); + int (*b_getattr)(zone_t *, int, void *, size_t *); + int (*b_setattr)(zone_t *, int, void *, size_t); + void (*b_copy_procdata)(struct proc *, struct proc *); + void (*b_proc_exit)(struct proc *, klwp_t *); + void (*b_exec)(); + void (*b_lwp_setrval)(klwp_t *, int, int); + int (*b_initlwp)(klwp_t *); + void (*b_forklwp)(klwp_t *, klwp_t *); + void (*b_freelwp)(klwp_t *); + void (*b_lwpexit)(klwp_t *); + int (*b_elfexec)(struct vnode *vp, struct execa *uap, + struct uarg *args, struct intpdata *idata, int level, + long *execsz, int setid, caddr_t exec_file, + struct cred *cred, int brand_action); +}; + +/* + * The b_version field must always be the first entry in this struct. + */ +typedef struct brand { + int b_version; + char *b_name; + struct brand_ops *b_ops; + struct brand_mach_ops *b_machops; +} brand_t; + +extern brand_t native_brand; + +/* + * Convenience macros + */ +#define lwptolwpbrand(l) ((l)->lwp_brand) +#define ttolwpbrand(t) (lwptolwpbrand(ttolwp(t))) +#define PROC_IS_BRANDED(p) ((p)->p_brand != &native_brand) +#define ZONE_IS_BRANDED(z) ((z)->zone_brand != &native_brand) +#define BROP(p) ((p)->p_brand->b_ops) +#define ZBROP(z) ((z)->zone_brand->b_ops) +#define BRMOP(p) ((p)->p_brand->b_machops) + +extern void brand_init(); +extern int brand_register(brand_t *); +extern int brand_unregister(brand_t *); +extern brand_t *brand_register_zone(struct brand_attr *); +extern brand_t *brand_find_name(char *); +extern void brand_unregister_zone(brand_t *); +extern int brand_zone_count(brand_t *); +extern void brand_setbrand(proc_t *); +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_BRAND_H */ diff --git a/usr/src/uts/common/sys/class.h b/usr/src/uts/common/sys/class.h index fbfbcc6080..9988ca3190 100644 --- a/usr/src/uts/common/sys/class.h +++ b/usr/src/uts/common/sys/class.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +37,7 @@ #include <sys/thread.h> #include <sys/priocntl.h> #include <sys/mutex.h> +#include <sys/uio.h> #ifdef __cplusplus extern "C" { @@ -128,15 +128,15 @@ extern pri_t minclsyspri; extern id_t syscid; /* system scheduling class ID */ extern id_t defaultcid; /* "default" class id; see dispadmin(1M) */ -extern int alloc_cid(char *, id_t *); -extern int scheduler_load(char *, sclass_t *); -extern int getcid(char *, id_t *); -extern int getcidbyname(char *, id_t *); -extern int parmsin(pcparms_t *, pc_vaparms_t *); -extern int parmsout(pcparms_t *, pc_vaparms_t *); -extern int parmsset(pcparms_t *, kthread_id_t); -extern void parmsget(kthread_id_t, pcparms_t *); -extern int vaparmsout(char *, pcparms_t *, pc_vaparms_t *); +extern int alloc_cid(char *, id_t *); +extern int scheduler_load(char *, sclass_t *); +extern int getcid(char *, id_t *); +extern int getcidbyname(char *, id_t *); +extern int parmsin(pcparms_t *, pc_vaparms_t *); +extern int parmsout(pcparms_t *, pc_vaparms_t *); +extern int parmsset(pcparms_t *, kthread_id_t); +extern void parmsget(kthread_id_t, pcparms_t *); +extern int vaparmsout(char *, pcparms_t *, pc_vaparms_t *, uio_seg_t); #endif diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h index e9a34eacfe..a5eaf18edd 100644 --- a/usr/src/uts/common/sys/exec.h +++ b/usr/src/uts/common/sys/exec.h @@ -105,9 +105,19 @@ typedef struct uarg { uint_t brkpageszc; uintptr_t entry; uintptr_t thrptr; + char *emulator; + char *brandname; + auxv32_t *brand_auxp; /* starting user addr of brand auxvs on stack */ } uarg_t; /* + * Possible brand actions for exec. + */ +#define EBA_NONE 0 +#define EBA_NATIVE 1 +#define EBA_BRAND 2 + +/* * The following macro is a machine dependent encapsulation of * postfix processing to hide the stack direction from elf.c * thereby making the elf.c code machine independent. @@ -166,7 +176,7 @@ struct execsw { int (*exec_func)(struct vnode *vp, struct execa *uap, struct uarg *args, struct intpdata *idata, int level, long *execsz, int setid, caddr_t exec_file, - struct cred *cred); + struct cred *cred, int brand_action); int (*exec_core)(struct vnode *vp, struct proc *p, struct cred *cred, rlim64_t rlimit, int sig, core_content_t content); @@ -198,10 +208,10 @@ extern int exec_args(execa_t *, uarg_t *, intpdata_t *, void **); extern int exec(const char *fname, const char **argp); extern int exece(const char *fname, const char **argp, const char **envp); extern int exec_common(const char *fname, const char **argp, - const char **envp); + const char **envp, int brand_action); extern int gexec(vnode_t **vp, struct execa *uap, struct uarg *args, struct intpdata *idata, int level, long *execsz, caddr_t exec_file, - struct cred *cred); + struct cred *cred, int brand_action); extern struct execsw *allocate_execsw(char *name, char *magic, size_t magic_size); extern struct execsw *findexecsw(char *magic); diff --git a/usr/src/uts/common/sys/klwp.h b/usr/src/uts/common/sys/klwp.h index ade26b4f82..7dea5b4941 100644 --- a/usr/src/uts/common/sys/klwp.h +++ b/usr/src/uts/common/sys/klwp.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -211,6 +210,8 @@ typedef struct _klwp { */ struct ct_template *lwp_ct_active[CTT_MAXTYPE]; /* active templates */ struct contract *lwp_ct_latest[CTT_MAXTYPE]; /* last created contract */ + + void *lwp_brand; /* per-lwp brand data */ } klwp_t; /* lwp states */ diff --git a/usr/src/uts/common/sys/modctl.h b/usr/src/uts/common/sys/modctl.h index 1093eddef6..5e9450dde5 100644 --- a/usr/src/uts/common/sys/modctl.h +++ b/usr/src/uts/common/sys/modctl.h @@ -61,6 +61,7 @@ struct mod_ops { * The defined set of mod_ops structures for each loadable module type * Defined in modctl.c */ +extern struct mod_ops mod_brandops; #if defined(__i386) || defined(__amd64) extern struct mod_ops mod_cpuops; #endif @@ -175,6 +176,13 @@ struct modlpcbe { struct __pcbe_ops *pcbe_ops; }; +/* For Brand modules */ +struct modlbrand { + struct mod_ops *brand_modops; + char *brand_linkinfo; + struct brand *brand_branddef; +}; + /* for devname fs */ struct modldev { struct mod_ops *dev_modops; diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h index fadcbf4a6d..13a3605e66 100644 --- a/usr/src/uts/common/sys/proc.h +++ b/usr/src/uts/common/sys/proc.h @@ -115,6 +115,7 @@ typedef struct lwpdir { struct pool; struct task; struct zone; +struct brand; struct corectl_path; struct corectl_content; @@ -336,6 +337,11 @@ typedef struct proc { uintptr_t p_portcnt; /* event ports counter */ struct zone *p_zone; /* zone in which process lives */ struct vnode *p_execdir; /* directory that p_exec came from */ + struct brand *p_brand; /* process's brand */ + void *p_brand_data; /* per-process brand state */ + + /* additional lock to protect p_sessp (but not its contents) */ + kmutex_t p_splock; } proc_t; #define PROC_T /* headers relying on proc_t are OK */ @@ -408,6 +414,10 @@ struct plock { extern proc_t p0; /* process 0 */ extern struct plock p0lock; /* p0's plock */ extern struct pid pid0; /* p0's pid */ + +/* pid_allocate() flags */ +#define PID_ALLOC_PROC 0x0001 /* assign a /proc slot as well */ + #endif /* _KERNEL */ /* stat codes */ @@ -588,7 +598,8 @@ extern int sigcheck(proc_t *, kthread_t *); extern void sigdefault(proc_t *); extern void pid_setmin(void); -extern pid_t pid_assign(proc_t *); +extern pid_t pid_allocate(proc_t *, int); +extern struct pid *pid_find(pid_t); extern int pid_rele(struct pid *); extern void pid_exit(proc_t *); extern void proc_entry_free(struct pid *); @@ -724,6 +735,7 @@ extern void lwp_rtt(void); extern void lwp_rtt_initial(void); extern int lwp_setprivate(klwp_t *, int, uintptr_t); extern void lwp_stat_update(lwp_stat_id_t, long); +extern void lwp_attach_brand_hdlrs(klwp_t *); /* * Signal queue function prototypes. Must be here due to header ordering diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h index 9aa6493956..120503539b 100644 --- a/usr/src/uts/common/sys/ptms.h +++ b/usr/src/uts/common/sys/ptms.h @@ -35,6 +35,8 @@ extern "C" { #endif +#ifdef _KERNEL + /* * Structures and definitions supporting the pseudo terminal * drivers. This structure is private and should not be used by any @@ -63,8 +65,6 @@ struct pt_ttys { #define PTSOPEN 0x04 /* slave side is open */ #define PTSTTY 0x08 /* slave side is tty */ -#ifdef _KERNEL - /* * Multi-threading primitives. * Values of pt_refcnt: -1 if a writer is accessing the struct @@ -129,18 +129,29 @@ extern void ptms_logp(char *, uintptr_t); #define DDBGP(a, b) #endif +typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t; +typedef struct ptmptsopencb { + boolean_t (*ppocb_func)(ptmptsopencb_arg_t); + ptmptsopencb_arg_t ppocb_arg; +} ptmptsopencb_t; + #endif /* _KERNEL */ +typedef struct pt_own { + uid_t pto_ruid; + gid_t pto_rgid; +} pt_own_t; + /* * ioctl commands * - * ISPTM: Determines whether the file descriptor is that of an open master - * device. Return code of zero indicates that the file descriptor - * represents master device. + * ISPTM: Determines whether the file descriptor is that of an open master + * device. Return code of zero indicates that the file descriptor + * represents master device. * - * UNLKPT: Unlocks the master and slave devices. It returns 0 on success. On - * failure, the errno is set to EINVAL indicating that the master - * device is not open. + * UNLKPT: Unlocks the master and slave devices. It returns 0 on success. On + * failure, the errno is set to EINVAL indicating that the master + * device is not open. * * ZONEPT: Sets the zoneid of the pair of master and slave devices. It * returns 0 upon success. Used to force a pty 'into' a zone upon @@ -149,16 +160,24 @@ extern void ptms_logp(char *, uintptr_t); * PT_OWNER: Sets uid and gid for slave device. It returns 0 on success. * */ -#define ISPTM (('P'<<8)|1) /* query for master */ -#define UNLKPT (('P'<<8)|2) /* unlock master/slave pair */ -#define PTSSTTY (('P'<<8)|3) /* set tty flag */ -#define ZONEPT (('P'<<8)|4) /* set zone of master/slave pair */ -#define PT_OWNER (('P'<<8)|5) /* set owner and group for slave device */ +#define ISPTM (('P'<<8)|1) /* query for master */ +#define UNLKPT (('P'<<8)|2) /* unlock master/slave pair */ +#define PTSSTTY (('P'<<8)|3) /* set tty flag */ +#define ZONEPT (('P'<<8)|4) /* set zone of master/slave pair */ +#define PT_OWNER (('P'<<8)|5) /* set owner/group for slave device */ -typedef struct pt_own { - uid_t pto_ruid; - gid_t pto_rgid; -} pt_own_t; +#ifdef _KERNEL +/* + * kernel ioctl commands + * + * PTMPTSOPENCB: Returns a callback function pointer and opaque argument. + * The return value of the callback function when it's invoked + * with the opaque argument passed to it will indicate if the + * pts slave device is currently open. + */ +#define PTMPTSOPENCB (('P'<<8)|6) /* check if the slave is open */ + +#endif /* _KERNEL */ #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/session.h b/usr/src/uts/common/sys/session.h index 639d6bf69d..8db8a8a5bb 100644 --- a/usr/src/uts/common/sys/session.h +++ b/usr/src/uts/common/sys/session.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -37,54 +36,96 @@ extern "C" { #endif +/* + * Session structure overview. + * + * Currently, the only structure in the kernel which has a pointer to a + * session structures is the proc_t via the p_sessp pointer. To + * access a session proc_t->p_sessp pointer a caller must hold either + * pidlock or p_splock. These locks only protect the p_sessp pointer + * itself and do not protect any of the contents of the session structure. + * To prevent the contents of a the session structure from changing the + * caller must grab s_lock. + * + * No callers should ever update the contents of the session structure + * directly. Only the session management code should ever modify the + * contents of the session structure. When the session code attempts + * to modify the contents of a session structure it must hold multiple + * locks. The locking order for all the locks that may need to be + * acquired is: + * sd_lock -> pidlock -> p_splock -> s_lock + * + * If a caller requires access to a session structure for long + * periods of time or across operations that may block it should + * use the tty_hold() and sess_hold() interfaces. + * + * sess_hold() returns a pointer to a session structure associated + * with the proc_t that was passed in. It also increments the reference + * count associated with that session structure to ensure that it + * can't be freed until after the caller is done with it and calls + * sess_rele(). This hold doesn't actually protect any of the + * contents of the session structure. + * + * tty_hold() returns a pointer to a session structure associated + * with the curproc. It also "locks" the contents of the session + * structure. This hold should be used when the caller will be + * doing operations on a controlling tty associated with the session. + * This operation doesn an implicit sess_hold() so that the session + * structure can't be free'd until after the caller is done with it + * and invokes tty_rele(). + * + * NOTE: Neither of these functions (sess_hold() or tty_hold()) + * prevent a process from changing its session. Once these functions + * return a session pointer, that session pointer may no longer be + * associated with the current process. If a caller wants to prevent + * a process from changing its session then it must hold pidlock or + * p_splock. + */ + typedef struct sess { - uint_t s_ref; /* reference count */ - dev_t s_dev; /* tty's device number */ - struct vnode *s_vp; /* tty's vnode */ - struct pid *s_sidp; /* session ID info */ - struct cred *s_cred; /* allocation credentials */ - kmutex_t s_lock; /* sync s_vp use with freectty */ - kcondvar_t s_wait_cv; /* Condvar for sleeping */ - int s_cnt; /* # of active users of this session */ - int s_flag; /* session state flag see below */ -} sess_t; + struct pid *s_sidp; /* session ID info, never changes */ -#define SESS_CLOSE 1 /* session about to close */ -#define s_sid s_sidp->pid_id + kmutex_t s_lock; /* protects everything below */ + uint_t s_ref; /* reference count */ + boolean_t s_sighuped; /* ctty had sighup sent to it */ -#if defined(_KERNEL) + boolean_t s_exit; /* sesion leader is exiting */ + kcondvar_t s_exit_cv; /* Condvar for s_exit */ -extern sess_t session0; + int s_cnt; /* active users of this ctty */ + kcondvar_t s_cnt_cv; /* Condvar for s_cnt */ -#define SESS_HOLD(sp) (++(sp)->s_ref) -#define SESS_RELE(sp) sess_rele(sp) + /* + * The following fields can only be updated while s_lock is held + * and s_cnt is 0. (ie, no one has a tty_hold() on this session.) + */ + dev_t s_dev; /* tty's device number */ + struct vnode *s_vp; /* tty's vnode */ + struct cred *s_cred; /* allocation credentials */ +} sess_t; -/* - * Used to synchronize session vnode users with freectty() - */ +#define s_sid s_sidp->pid_id -#define TTY_HOLD(sp) { \ - mutex_enter(&(sp)->s_lock); \ - (++(sp)->s_cnt); \ - mutex_exit(&(sp)->s_lock); \ -} +#if defined(_KERNEL) -#define TTY_RELE(sp) { \ - mutex_enter(&(sp)->s_lock); \ - if ((--(sp)->s_cnt) == 0) \ - cv_signal(&(sp)->s_wait_cv); \ - mutex_exit(&(sp)->s_lock); \ -} +extern sess_t session0; /* forward referenced structure tags */ struct vnode; struct proc; +struct stdata; + +extern void sess_hold(proc_t *p); +extern void sess_rele(sess_t *, boolean_t); +extern sess_t *tty_hold(void); +extern void tty_rele(sess_t *sp); + -extern void sess_rele(sess_t *); extern void sess_create(void); -extern void freectty(sess_t *); -extern void alloctty(struct proc *, struct vnode *); +extern int strctty(struct stdata *); +extern int freectty(boolean_t); extern dev_t cttydev(struct proc *); +extern void ctty_clear_sighuped(void); #endif /* defined(_KERNEL) */ diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h index d00220f2a9..39112e6c97 100644 --- a/usr/src/uts/common/sys/socketvar.h +++ b/usr/src/uts/common/sys/socketvar.h @@ -544,11 +544,21 @@ struct sonodeops { (((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1)) /* - * Used in parsing msg_control + * Macros that operate on struct cmsghdr. + * Used in parsing msg_control. + * The CMSG_VALID macro does not assume that the last option buffer is padded. */ #define CMSG_NEXT(cmsg) \ (struct cmsghdr *)((uintptr_t)(cmsg) + \ ROUNDUP_cmsglen((cmsg)->cmsg_len)) +#define CMSG_CONTENT(cmsg) (&((cmsg)[1])) +#define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) +#define CMSG_VALID(cmsg, start, end) \ + (ISALIGNED_cmsghdr(cmsg) && \ + ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ + ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ + ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ + ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) /* * Maximum size of any argument that is copied in (addresses, options, diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h index 27403d72cc..4f424e96e1 100644 --- a/usr/src/uts/common/sys/strsubr.h +++ b/usr/src/uts/common/sys/strsubr.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1113,8 +1112,6 @@ extern mblk_t *strrput_misc(vnode_t *, mblk_t *, extern int getiocseqno(void); extern int strwaitbuf(size_t, int); extern int strwaitq(stdata_t *, int, ssize_t, int, clock_t, int *); -extern void stralloctty(struct stdata *); -extern void strfreectty(struct stdata *); extern struct stdata *shalloc(queue_t *); extern void shfree(struct stdata *s); extern queue_t *allocq(void); diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h index 43dee30f0b..1a6412b70b 100644 --- a/usr/src/uts/common/sys/syscall.h +++ b/usr/src/uts/common/sys/syscall.h @@ -99,7 +99,7 @@ extern "C" { * getpgid(pid) :: syscall(39,4,pid) * setpgid(pid,pgid) :: syscall(39,5,pid,pgid) */ -#define SYS_reserved_40 40 /* 40 not used, was xenix */ +#define SYS_uucopystr 40 #define SYS_dup 41 #define SYS_pipe 42 #define SYS_times 43 @@ -355,7 +355,7 @@ extern "C" { #define SYS_pwrite 174 #define SYS_llseek 175 #define SYS_inst_sync 176 -#define SYS_reserved_177 177 /* 177 reserved */ +#define SYS_brand 177 #define SYS_kaio 178 /* * subcodes: @@ -464,6 +464,8 @@ extern "C" { * zone_list(...) :: zone(ZONE_LIST, ...) * zone_shutdown(...) :: zone(ZONE_SHUTDOWN, ...) * zone_lookup(...) :: zone(ZONE_LOOKUP, ...) + * zone_setattr(...) :: zone(ZONE_SETATTR, ...) + * zone_getattr(...) :: zone(ZONE_GETATTR, ...) */ #define SYS_autofssys 228 #define SYS_getcwd 229 @@ -494,7 +496,7 @@ extern "C" { #define SYS_lwp_mutex_trylock 251 #define SYS_lwp_mutex_init 252 #define SYS_cladm 253 -#define SYS_reserved_254 254 /* 254 reserved */ +#define SYS_uucopy 254 #define SYS_umount2 255 diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h index c96ea5b4ac..ac465ad49f 100644 --- a/usr/src/uts/common/sys/systm.h +++ b/usr/src/uts/common/sys/systm.h @@ -246,6 +246,7 @@ int copyoutstr_noerr(const char *, char *, size_t, size_t *); int copystr(const char *, char *, size_t, size_t *); void bcopy(const void *, void *, size_t); void ucopy(const void *, void *, size_t); +void ucopystr(const char *, char *, size_t, size_t *); void pgcopy(const void *, void *, size_t); void ovbcopy(const void *, void *, size_t); void bzero(void *, size_t); diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h index 8bd020e5c1..2d99f70bc2 100644 --- a/usr/src/uts/common/sys/termios.h +++ b/usr/src/uts/common/sys/termios.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -24,7 +23,7 @@ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -382,6 +381,24 @@ extern pid_t tcgetsid(); #define TCSETSF (_TIOC|16) /* + * linux terminal ioctls we need to be aware of + */ +#define TIOCSETLD (_TIOC|123) /* set line discipline parms */ +#define TIOCGETLD (_TIOC|124) /* get line discipline parms */ + +/* + * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is + * perfectly legal except, linux expects them to be separate. So we keep + * them separately. + */ +struct lx_cc { + unsigned char veof; /* veof value */ + unsigned char veol; /* veol value */ + unsigned char vmin; /* vmin value */ + unsigned char vtime; /* vtime value */ +}; + +/* * NTP PPS ioctls */ #define TIOCGPPS (_TIOC|125) @@ -457,6 +474,7 @@ struct ppsclockev32 { #define TIOCGLTC (tIOC|116) /* get local special chars */ #define TIOCOUTQ (tIOC|115) /* driver output queue size */ #define TIOCNOTTY (tIOC|113) /* void tty association */ +#define TIOCSCTTY (tIOC|132) /* get a ctty */ #define TIOCSTOP (tIOC|111) /* stop output, like ^S */ #define TIOCSTART (tIOC|110) /* start output, like ^Q */ #define TIOCSILOOP (tIOC|109) /* private to Sun; do not use */ diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 56c23d00ad..636b8acc0f 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -87,6 +87,10 @@ extern "C" { #define ZONE_ATTR_SLBL 8 #define ZONE_ATTR_INITNAME 9 #define ZONE_ATTR_BOOTARGS 10 +#define ZONE_ATTR_BRAND 11 + +/* Start of the brand-specific attribute namespace */ +#define ZONE_ATTR_BRAND_ATTRS 32768 #define ZONE_EVENT_CHANNEL "com.sun:zones:status" #define ZONE_EVENT_STATUS_CLASS "status" @@ -103,6 +107,49 @@ extern "C" { #define ZONE_CB_TIMESTAMP "when" #define ZONE_CB_ZONEID "zoneid" +/* + * Exit values that may be returned by scripts or programs invoked by various + * zone commands. + * + * These are defined as: + * + * ZONE_SUBPROC_OK + * =============== + * The subprocess completed successfully. + * + * ZONE_SUBPROC_USAGE + * ================== + * The subprocess failed with a usage message, or a usage message should + * be output in its behalf. + * + * ZONE_SUBPROC_NOTCOMPLETE + * ======================== + * The subprocess did not complete, but the actions performed by the + * subprocess require no recovery actions by the user. + * + * For example, if the subprocess were called by "zoneadm install," the + * installation of the zone did not succeed but the user need not perform + * a "zoneadm uninstall" before attempting another install. + * + * ZONE_SUBPROC_FATAL + * ================== + * The subprocess failed in a fatal manner, usually one that will require + * some type of recovery action by the user. + * + * For example, if the subprocess were called by "zoneadm install," the + * installation of the zone did not succeed and the user will need to + * perform a "zoneadm uninstall" before another install attempt is + * possible. + * + * The non-success exit values are large to avoid accidental collision + * with values used internally by some commands (e.g. "Z_ERR" and + * "Z_USAGE" as used by zoneadm.) + */ +#define ZONE_SUBPROC_OK 0 +#define ZONE_SUBPROC_USAGE 253 +#define ZONE_SUBPROC_NOTCOMPLETE 254 +#define ZONE_SUBPROC_FATAL 255 + #ifdef _SYSCALL32 typedef struct { caddr32_t zone_name; @@ -159,8 +206,8 @@ typedef enum { * communicates with zoneadmd, but only uses Z_REBOOT and Z_HALT. */ typedef enum zone_cmd { - Z_READY, Z_BOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING, - Z_MOUNT, Z_UNMOUNT + Z_READY, Z_BOOT, Z_FORCEBOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING, + Z_MOUNT, Z_FORCEMOUNT, Z_UNMOUNT } zone_cmd_t; /* @@ -223,6 +270,7 @@ typedef struct zone_cmd_rval { #define ZF_IS_SCRATCH 0x4 /* scratch zone */ struct pool; +struct brand; /* * Structure to record list of ZFS datasets exported to a zone. @@ -318,6 +366,8 @@ typedef struct zone { int zone_match; /* require label match for packets */ tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */ + boolean_t zone_restart_init; /* Restart init if it dies? */ + struct brand *zone_brand; /* zone's brand */ } zone_t; /* @@ -330,8 +380,6 @@ extern zone_t *global_zone; extern uint_t maxzones; extern rctl_hndl_t rc_zone_nlwps; -extern const char * const zone_initname; - extern long zone(int, void *, void *, void *, void *); extern void zone_zsd_init(void); extern void zone_init(void); diff --git a/usr/src/uts/common/syscall/brandsys.c b/usr/src/uts/common/syscall/brandsys.c new file mode 100644 index 0000000000..9b4bd38baa --- /dev/null +++ b/usr/src/uts/common/syscall/brandsys.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/brand.h> +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/zone.h> + +/* + * brand(2) system call. + */ +int64_t +brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, + uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) +{ + struct proc *p = curthread->t_procp; + int64_t rval = 0; + int err; + + /* + * The brandsys system call can only be executed from inside a + * branded zone. + */ + if (INGLOBALZONE(p) || !ZONE_IS_BRANDED(p->p_zone)) + return (set_errno(ENOSYS)); + + if ((err = ZBROP(p->p_zone)->b_brandsys(cmd, &rval, arg1, arg2, arg3, + arg4, arg5, arg6)) != 0) + return (set_errno(err)); + + return (rval); +} diff --git a/usr/src/uts/common/syscall/pgrpsys.c b/usr/src/uts/common/syscall/pgrpsys.c index e8be876537..8f60747663 100644 --- a/usr/src/uts/common/syscall/pgrpsys.c +++ b/usr/src/uts/common/syscall/pgrpsys.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,11 +18,16 @@ * * CDDL HEADER END */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ -#ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.78 */ +#pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.78 */ #include <sys/param.h> #include <sys/types.h> @@ -39,8 +43,9 @@ int setpgrp(int flag, int pid, int pgid) { - register proc_t *p = ttoproc(curthread); - register int retval = 0; + proc_t *p = curproc; + int retval = 0; + int sid; switch (flag) { @@ -51,7 +56,10 @@ setpgrp(int flag, int pid, int pgid) sess_create(); } else mutex_exit(&pidlock); - return (p->p_sessp->s_sid); + mutex_enter(&p->p_splock); + sid = p->p_sessp->s_sid; + mutex_exit(&p->p_splock); + return (sid); case 3: /* setsid() */ mutex_enter(&pidlock); @@ -61,7 +69,10 @@ setpgrp(int flag, int pid, int pgid) } mutex_exit(&pidlock); sess_create(); - return (p->p_sessp->s_sid); + mutex_enter(&p->p_splock); + sid = p->p_sessp->s_sid; + mutex_exit(&p->p_splock); + return (sid); case 5: /* setpgid() */ { diff --git a/usr/src/uts/common/syscall/uucopy.c b/usr/src/uts/common/syscall/uucopy.c new file mode 100644 index 0000000000..c301599e2f --- /dev/null +++ b/usr/src/uts/common/syscall/uucopy.c @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/systm.h> + +int +uucopy(const void *from, void *to, size_t size) +{ + label_t ljb; + + if (on_fault(&ljb)) + return (set_errno(EFAULT)); + + ucopy(from, to, size); + + no_fault(); + + return (0); +} + +ssize_t +uucopystr(const char *from, char *to, size_t size) +{ + label_t ljb; + size_t len; + + if (on_fault(&ljb)) + return (set_errno(EFAULT)); + + ucopystr(from, to, size, &len); + + no_fault(); + + return ((ssize_t)len); +} diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic.h b/usr/src/uts/i86pc/io/pcplusmp/apic.h index 944f3e9523..3f3063d589 100644 --- a/usr/src/uts/i86pc/io/pcplusmp/apic.h +++ b/usr/src/uts/i86pc/io/pcplusmp/apic.h @@ -333,7 +333,7 @@ struct apic_io_intr { /* special or reserve vectors */ #define APIC_CHECK_RESERVE_VECTORS(v) \ ((v == T_FASTTRAP) || (v == APIC_SPUR_INTR) || (v == T_SYSCALLINT) ||\ - (v == T_DTRACE_RET)) + (v == T_DTRACE_RET) || (v == T_INT80)) /* cmos shutdown code for BIOS */ #define BIOS_SHUTDOWN 0x0a diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in index d3a221d0c8..2ed5b03b74 100644 --- a/usr/src/uts/i86pc/ml/offsets.in +++ b/usr/src/uts/i86pc/ml/offsets.in @@ -59,6 +59,7 @@ #include <sys/ontrap.h> #include <sys/lgrp.h> #include <sys/dtrace.h> +#include <sys/brand.h> proc PROCSIZE p_link @@ -76,6 +77,8 @@ proc PROCSIZE p_model p_pctx p_agenttp + p_brand + p_brand_data _kthread THREAD_SIZE t_pcb T_LABEL @@ -141,6 +144,7 @@ _label_t _klwp lwp_thread lwp_procp + lwp_brand lwp_eosys lwp_regs lwp_arg @@ -376,3 +380,6 @@ copyops cp_suword32 cp_suword64 cp_physio + +brand + b_machops diff --git a/usr/src/uts/i86pc/ml/syscall_asm.s b/usr/src/uts/i86pc/ml/syscall_asm.s index 9465c864a1..bc9618fcb9 100644 --- a/usr/src/uts/i86pc/ml/syscall_asm.s +++ b/usr/src/uts/i86pc/ml/syscall_asm.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +37,7 @@ #include <sys/regset.h> #include <sys/psw.h> #include <sys/x86_archext.h> +#include <sys/machbrand.h> #if defined(__lint) @@ -114,6 +114,52 @@ orl %ecx, %edi; \ ORL_SYSCALLTRACE(%edi) +/* + * When the brand's callback is invoked, the stack will look like this: + * -------------------------------------- + * | 'scratch space' | + * | user's %ebx | + * | user's %gs selector | + * | kernel's %gs selector | + * | | lwp brand data | + * | | proc brand data | + * v | user return address | + * | callback wrapper return addr | + * -------------------------------------- + * + * The lx brand (at least) uses each of these fields. + * If the brand code returns, we assume that we are meant to execute the + * normal system call path. + */ +#define BRAND_CALLBACK(callback_id) \ + subl $4, %esp /* save some scratch space */ ;\ + pushl %ebx /* save %ebx to use for scratch */ ;\ + pushl %gs /* save the user %gs */ ;\ + movl $KGS_SEL, %ebx ;\ + pushl %ebx /* push kernel's %gs */ ;\ + movw %ebx, %gs /* switch to the kernel's %gs */ ;\ + movl %gs:CPU_THREAD, %ebx /* load the thread pointer */ ;\ + movl T_LWP(%ebx), %ebx /* load the lwp pointer */ ;\ + pushl LWP_BRAND(%ebx) /* push the lwp's brand data */ ;\ + movl LWP_PROCP(%ebx), %ebx /* load the proc pointer */ ;\ + pushl P_BRAND_DATA(%ebx) /* push the proc's brand data */ ;\ + movl P_BRAND(%ebx), %ebx /* load the brand pointer */ ;\ + movl B_MACHOPS(%ebx), %ebx /* load the machops pointer */ ;\ + movl _CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx ;\ + cmpl $0, %ebx ;\ + je 1f ;\ + movl %ebx, 20(%esp) /* save callback to scratch */ ;\ + movl 12(%esp), %ebx /* grab the the user %gs */ ;\ + movw %ebx, %gs /* restore the user %gs */ ;\ + movl 16(%esp), %ebx /* restore %ebx */ ;\ + pushl 24(%esp) /* push the return address */ ;\ + call *24(%esp) /* call callback */ ;\ + addl $4, %esp /* get rid of ret addr */ ;\ +1: movl 12(%esp), %ebx /* grab the the user %gs */ ;\ + movw %ebx, %gs /* restore the user %gs */ ;\ + movl 16(%esp), %ebx /* restore user's %ebx */ ;\ + addl $24, %esp /* restore stack ptr */ + #define MSTATE_TRANSITION(from, to) \ pushl $to; \ pushl $from; \ @@ -314,9 +360,12 @@ size_t _allsyscalls_size; #else /* __lint */ - ENTRY_NP2(sys_call, _allsyscalls) + ENTRY_NP2(brand_sys_call, _allsyscalls) + BRAND_CALLBACK(BRAND_CB_SYSCALL) + ALTENTRY(sys_call) / on entry eax = system call number + / set up the stack to look as in reg.h subl $8, %esp / pad the stack with ERRCODE and TRAPNO @@ -401,6 +450,7 @@ _syscall_fault: xorl %edx, %edx jmp _syslcall_done SET_SIZE(sys_call) + SET_SIZE(brand_sys_call) #endif /* __lint */ @@ -460,6 +510,25 @@ _syscall_fault: * * Note that we are unable to return both "rvals" to userland with this * call, as %edx is used by the sysexit instruction. + * + * One final complication in this routine is its interaction with + * single-stepping in a debugger. For most of the system call mechanisms, + * the CPU automatically clears the single-step flag before we enter the + * kernel. The sysenter mechanism does not clear the flag, so a user + * single-stepping through a libc routine may suddenly find him/herself + * single-stepping through the kernel. To detect this, kmdb compares the + * trap %pc to the [brand_]sys_enter addresses on each single-step trap. + * If it finds that we have single-stepped to a sysenter entry point, it + * explicitly clears the flag and executes the sys_sysenter routine. + * + * One final complication in this final complication is the fact that we + * have two different entry points for sysenter: brand_sys_sysenter and + * sys_sysenter. If we enter at brand_sys_sysenter and start single-stepping + * through the kernel with kmdb, we will eventually hit the instruction at + * sys_sysenter. kmdb cannot distinguish between that valid single-step + * and the undesirable one mentioned above. To avoid this situation, we + * simply add a jump over the instruction at sys_sysenter to make it + * impossible to single-step to it. */ #if defined(__lint) @@ -469,7 +538,19 @@ sys_sysenter() #else /* __lint */ - ENTRY_NP(sys_sysenter) + ENTRY_NP(brand_sys_sysenter) + pushl %edx + BRAND_CALLBACK(BRAND_CB_SYSENTER) + popl %edx + /* + * Jump over sys_sysenter to allow single-stepping as described + * above. + */ + ja 1f + + ALTENTRY(sys_sysenter) + nop +1: / / do what the call gate would've done to the stack .. / @@ -544,6 +625,38 @@ _sysenter_done: sti sysexit SET_SIZE(sys_sysenter) + SET_SIZE(brand_sys_sysenter) + +#endif /* __lint */ + +#if defined(__lint) +/* + * System call via an int80. This entry point is only used by the Linux + * application environment. Unlike the sysenter path, there is no default + * action to take if no callback is registered for this process. + */ +void +sys_int80() +{} + +#else /* __lint */ + + ENTRY_NP(brand_sys_int80) + BRAND_CALLBACK(BRAND_CB_INT80) + + ALTENTRY(sys_int80) + /* + * We hit an int80, but this process isn't of a brand with an int80 + * handler. Bad process! Make it look as if the INT failed. + * Modify %eip to point before the INT, push the expected error + * code and fake a GP fault. + * + */ + subl $2, (%esp) /* int insn 2-bytes */ + pushl $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2) + jmp gptrap / GP fault + SET_SIZE(sys_int80) + SET_SIZE(brand_sys_int80) /* * Declare a uintptr_t which covers the entire pc range of syscall diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s index b5d1b98f9a..ce2f08f807 100644 --- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s +++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,6 +29,7 @@ #include <sys/asm_misc.h> #include <sys/regset.h> #include <sys/psw.h> +#include <sys/machbrand.h> #if defined(__lint) @@ -117,6 +117,54 @@ #define ORL_SYSCALLTRACE(r32) #endif +/* + * In the 32-bit kernel, we do absolutely nothing before getting into the + * brand callback checks. In 64-bit land, we do swapgs and then come here. + * We assume that the %rsp- and %r15-stashing fields in the CPU structure + * are still unused. + * + * When the callback is invoked, we will be on the user's %gs and + * the stack will look like this: + * + * stack: -------------------------------------- + * | callback pointer | + * | | user stack pointer | + * | | lwp brand data | + * | | proc brand data | + * v | userland return address | + * | callback wrapper return addr | + * -------------------------------------- + * + */ +#define BRAND_CALLBACK(callback_id) \ + movq %rsp, %gs:CPU_RTMP_RSP /* save the stack pointer */ ;\ + movq %r15, %gs:CPU_RTMP_R15 /* save %r15 */ ;\ + movq %gs:CPU_THREAD, %r15 /* load the thread pointer */ ;\ + movq T_STACK(%r15), %rsp /* switch to the kernel stack */ ;\ + subq $16, %rsp /* save space for two pointers */ ;\ + pushq %r14 /* save %r14 */ ;\ + movq %gs:CPU_RTMP_RSP, %r14 ;\ + movq %r14, 8(%rsp) /* stash the user stack pointer */ ;\ + popq %r14 /* restore %r14 */ ;\ + movq T_LWP(%r15), %r15 /* load the lwp pointer */ ;\ + pushq LWP_BRAND(%r15) /* push the lwp's brand data */ ;\ + movq LWP_PROCP(%r15), %r15 /* load the proc pointer */ ;\ + pushq P_BRAND_DATA(%r15) /* push the proc's brand data */ ;\ + movq P_BRAND(%r15), %r15 /* load the brand pointer */ ;\ + movq B_MACHOPS(%r15), %r15 /* load the machops pointer */ ;\ + movq _CONST(_MUL(callback_id, CPTRSIZE))(%r15), %r15 ;\ + cmpq $0, %r15 ;\ + je 1f ;\ + movq %r15, 24(%rsp) /* save the callback pointer */ ;\ + movq %gs:CPU_RTMP_RSP, %r15 /* grab the user stack pointer */ ;\ + pushq (%r15) /* push the return address */ ;\ + movq %gs:CPU_RTMP_R15, %r15 /* restore %r15 */ ;\ + swapgs ;\ + call *32(%rsp) /* call callback */ ;\ + swapgs ;\ +1: movq %gs:CPU_RTMP_R15, %r15 /* restore %r15 */ ;\ + movq %gs:CPU_RTMP_RSP, %rsp /* restore the stack pointer */ + #define MSTATE_TRANSITION(from, to) \ movl $from, %edi; \ movl $to, %esi; \ @@ -192,13 +240,13 @@ #if !defined(__lint) __lwptoregs_msg: - .string "%M%:%d lwptoregs(%p) [%p] != rp [%p]" + .string "syscall_asm_amd64.s:%d lwptoregs(%p) [%p] != rp [%p]" __codesel_msg: - .string "%M%:%d rp->r_cs [%ld] != %ld" + .string "syscall_asm_amd64.s:%d rp->r_cs [%ld] != %ld" __no_rupdate_msg: - .string "%M%:%d lwp %p, pcb_flags & RUPDATE_PENDING != 0" + .string "syscall_asm_amd64.s:%d lwp %p, pcb_flags & RUPDATE_PENDING != 0" #endif /* !__lint */ @@ -305,8 +353,12 @@ size_t _allsyscalls_size; #else /* __lint */ - ENTRY_NP2(sys_syscall,_allsyscalls) - + ENTRY_NP2(brand_sys_syscall,_allsyscalls) + swapgs + BRAND_CALLBACK(BRAND_CB_SYSCALL) + swapgs + + ALTENTRY(sys_syscall) swapgs movq %rsp, %gs:CPU_RTMP_RSP movq %r15, %gs:CPU_RTMP_R15 @@ -506,6 +558,7 @@ _syscall_post_call: MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER) jmp sys_rtt_syscall SET_SIZE(sys_syscall) + SET_SIZE(brand_sys_syscall) #endif /* __lint */ @@ -518,7 +571,12 @@ sys_syscall32() #else /* __lint */ - ENTRY_NP(sys_syscall32) + ENTRY_NP(brand_sys_syscall32) + swapgs + BRAND_CALLBACK(BRAND_CB_SYSCALL32) + swapgs + + ALTENTRY(sys_syscall32) swapgs movl %esp, %r10d movq %gs:CPU_THREAD, %r15 @@ -693,6 +751,7 @@ _full_syscall_postsys32: MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER) jmp sys_rtt_syscall32 SET_SIZE(sys_syscall32) + SET_SIZE(brand_sys_syscall32) #endif /* __lint */ @@ -717,6 +776,25 @@ _full_syscall_postsys32: * * Note that we are unable to return both "rvals" to userland with * this call, as %edx is used by the sysexit instruction. + * + * One final complication in this routine is its interaction with + * single-stepping in a debugger. For most of the system call mechanisms, + * the CPU automatically clears the single-step flag before we enter the + * kernel. The sysenter mechanism does not clear the flag, so a user + * single-stepping through a libc routine may suddenly find him/herself + * single-stepping through the kernel. To detect this, kmdb compares the + * trap %pc to the [brand_]sys_enter addresses on each single-step trap. + * If it finds that we have single-stepped to a sysenter entry point, it + * explicitly clears the flag and executes the sys_sysenter routine. + * + * One final complication in this final complication is the fact that we + * have two different entry points for sysenter: brand_sys_sysenter and + * sys_sysenter. If we enter at brand_sys_sysenter and start single-stepping + * through the kernel with kmdb, we will eventually hit the instruction at + * sys_sysenter. kmdb cannot distinguish between that valid single-step + * and the undesirable one mentioned above. To avoid this situation, we + * simply add a jump over the instruction at sys_sysenter to make it + * impossible to single-step to it. */ #if defined(__lint) @@ -726,8 +804,20 @@ sys_sysenter() #else /* __lint */ - ENTRY_NP(sys_sysenter) + ENTRY_NP(brand_sys_sysenter) + swapgs + + ALTENTRY(_brand_sys_sysenter_post_swapgs) + BRAND_CALLBACK(BRAND_CB_SYSENTER) + /* + * Jump over sys_sysenter to allow single-stepping as described + * above. + */ + jmp _sys_sysenter_post_swapgs + + ALTENTRY(sys_sysenter) swapgs + ALTENTRY(_sys_sysenter_post_swapgs) movq %gs:CPU_THREAD, %r15 @@ -909,7 +999,41 @@ sys_sysenter() sysexit SET_SIZE(sys_sysenter) SET_SIZE(_sys_sysenter_post_swapgs) + SET_SIZE(brand_sys_sysenter) + +#endif /* __lint */ + +#if defined(__lint) +/* + * System call via an int80. This entry point is only used by the Linux + * application environment. Unlike the other entry points, there is no + * default action to take if no callback is registered for this process. + */ +void +sys_int80() +{} + +#else /* __lint */ + ENTRY_NP(brand_sys_int80) + swapgs + BRAND_CALLBACK(BRAND_CB_INT80) + swapgs + + ENTRY_NP(sys_int80) + /* + * We hit an int80, but this process isn't of a brand with an int80 + * handler. Bad process! Make it look as if the INT failed. + * Modify %eip to point before the INT, push the expected error + * code and fake a GP fault. + * + */ + swapgs + subq $2, (%rsp) /* int insn 2-bytes */ + pushq $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2) + jmp gptrap / GP fault + SET_SIZE(sys_int80) + SET_SIZE(brand_sys_int80) #endif /* __lint */ @@ -927,7 +1051,12 @@ sys_syscall_int() #else /* __lint */ - ENTRY_NP(sys_syscall_int) + ENTRY_NP(brand_sys_syscall_int) + swapgs + BRAND_CALLBACK(BRAND_CB_INT91) + swapgs + + ALTENTRY(sys_syscall_int) swapgs movq %gs:CPU_THREAD, %r15 movq T_STACK(%r15), %rsp @@ -940,6 +1069,7 @@ sys_syscall_int() movb $1, T_POST_SYS(%r15) jmp _syscall32_save SET_SIZE(sys_syscall_int) + SET_SIZE(brand_sys_syscall_int) #endif /* __lint */ diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index 209ad07bca..f050d93b2a 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -114,6 +114,7 @@ extern void progressbar_init(void); extern void progressbar_start(void); +extern void brand_init(void); /* * XXX make declaration below "static" when drivers no longer use this @@ -1332,6 +1333,11 @@ startup_modules(void) param_init(); /* + * Initialize the default brands + */ + brand_init(); + + /* * maxmem is the amount of physical memory we're playing with. */ maxmem = physmem; diff --git a/usr/src/uts/i86pc/os/trap.c b/usr/src/uts/i86pc/os/trap.c index 9a38a811a5..10034cc67f 100644 --- a/usr/src/uts/i86pc/os/trap.c +++ b/usr/src/uts/i86pc/os/trap.c @@ -386,6 +386,7 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) int watchcode; int watchpage; caddr_t vaddr; + int singlestep_twiddle; size_t sz; int ta; #ifdef __amd64 @@ -975,19 +976,33 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) * * The same thing happens for sysenter, too. */ + singlestep_twiddle = 0; + if (rp->r_pc == (uintptr_t)sys_sysenter || + rp->r_pc == (uintptr_t)brand_sys_sysenter) { + singlestep_twiddle = 1; #if defined(__amd64) - if (rp->r_pc == (uintptr_t)sys_sysenter) { /* - * Adjust the pc so that we don't execute the - * swapgs instruction at the head of the - * handler and completely confuse things. + * Since we are already on the kernel's + * %gs, on 64-bit systems the sysenter case + * needs to adjust the pc to avoid + * executing the swapgs instruction at the + * top of the handler. */ - rp->r_pc = (uintptr_t) - _sys_sysenter_post_swapgs; -#elif defined(__i386) - if (rp->r_pc == (uintptr_t)sys_call || - rp->r_pc == (uintptr_t)sys_sysenter) { + if (rp->r_pc == (uintptr_t)sys_sysenter) + rp->r_pc = (uintptr_t) + _sys_sysenter_post_swapgs; + else + rp->r_pc = (uintptr_t) + _brand_sys_sysenter_post_swapgs; +#endif + } +#if defined(__i386) + else if (rp->r_pc == (uintptr_t)sys_call || + rp->r_pc == (uintptr_t)brand_sys_call) { + singlestep_twiddle = 1; + } #endif + if (singlestep_twiddle) { rp->r_ps &= ~PS_T; /* turn off trace */ lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING; cur_thread->t_post_sys = 1; @@ -1161,20 +1176,17 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) rp->r_ps &= ~PS_T; /* * If both NORMAL_STEP and WATCH_STEP are in effect, - * give precedence to NORMAL_STEP. If neither is set, + * give precedence to WATCH_STEP. If neither is set, * user must have set the PS_T bit in %efl; treat this * as NORMAL_STEP. */ - if ((pcb->pcb_flags & NORMAL_STEP) || - !(pcb->pcb_flags & WATCH_STEP)) { + if ((fault = undo_watch_step(&siginfo)) == 0 && + ((pcb->pcb_flags & NORMAL_STEP) || + !(pcb->pcb_flags & WATCH_STEP))) { siginfo.si_signo = SIGTRAP; siginfo.si_code = TRAP_TRACE; siginfo.si_addr = (caddr_t)rp->r_pc; fault = FLTTRACE; - if (pcb->pcb_flags & WATCH_STEP) - (void) undo_watch_step(NULL); - } else { - fault = undo_watch_step(&siginfo); } pcb->pcb_flags &= ~(NORMAL_STEP|WATCH_STEP); } else { diff --git a/usr/src/uts/intel/Makefile b/usr/src/uts/intel/Makefile index 5ddfbe5117..adaa4d6158 100644 --- a/usr/src/uts/intel/Makefile +++ b/usr/src/uts/intel/Makefile @@ -85,13 +85,13 @@ install_h := TARGET= install_h .PARALLEL: $(KMODS) $(CLOSED_KMODS) $(SVVS) $(XMODS) $(CLOSED_XMODS) \ config $(LINT_DEPS) -def all install clean clobber modlist: $(KMODS) $(CLOSED_KMODS) \ - $(SVVS) $(XMODS) $(CLOSED_XMODS) config +def all install clean clobber modlist: genassym $(KMODS) $(CLOSED_KMODS) \ + $(SVVS) $(XMODS) $(CLOSED_XMODS) config modlintlib clean.lint: $(LINT_KMODS) $(CLOSED_LINT_KMODS) $(SVVS) \ $(XMODS) $(CLOSED_XMODS) -$(KMODS) $(SUBDIRS) config: FRC +genassym $(KMODS) $(SUBDIRS) config: FRC @cd $@; pwd; $(MAKE) $(NO_STATE) $(TARGET) $(CLOSED_KMODS): FRC diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files index 45484afa4c..d2ab9a3290 100644 --- a/usr/src/uts/intel/Makefile.files +++ b/usr/src/uts/intel/Makefile.files @@ -18,6 +18,7 @@ # # CDDL HEADER END # + # # uts/intel/Makefile.files # @@ -107,6 +108,14 @@ XMEMFS_OBJS += \ xmem_vfsops.o \ xmem_vnops.o +LX_PROC_OBJS += \ + lx_prsubr.o \ + lx_prvfsops.o \ + lx_prvnops.o + +LX_AUTOFS_OBJS += \ + lx_autofs.o + # # Driver modules # @@ -178,6 +187,29 @@ AAC_OBJS = aac.o aac_ioctl.o AMR_OBJS = amr.o # +# Brand modules +# +SN1_BRAND_OBJS = sn1_brand.o sn1_brand_asm.o + +LX_BRAND_OBJS = \ + lx_brand.o \ + lx_brand_asm.o \ + lx_brk.o \ + lx_clone.o \ + lx_futex.o \ + lx_getpid.o \ + lx_id.o \ + lx_kill.o \ + lx_misc.o \ + lx_modify_ldt.o \ + lx_pid.o \ + lx_sched.o \ + lx_signum.o \ + lx_syscall.o \ + lx_sysinfo.o \ + lx_thread_area.o + +# # special files # MODSTUB_OBJ += \ diff --git a/usr/src/uts/intel/Makefile.intel.shared b/usr/src/uts/intel/Makefile.intel.shared index 7636718bd9..2367e343c3 100644 --- a/usr/src/uts/intel/Makefile.intel.shared +++ b/usr/src/uts/intel/Makefile.intel.shared @@ -25,9 +25,11 @@ # # ident "%Z%%M% %I% %E% SMI" # + # # This makefile contains the common definitions for all intel # implementation architecture independent modules. +# # # Machine type (implementation architecture): @@ -44,6 +46,7 @@ PLATFORM = i86pc # UNIX_DIR = $(UTSBASE)/i86pc/unix GENLIB_DIR = $(UTSBASE)/intel/genunix +GENASSYM_DIR = $(UTSBASE)/intel/genassym IPDRV_DIR = $(UTSBASE)/intel/ip MODSTUBS_DIR = $(UNIX_DIR) DSF_DIR = $(UTSBASE)/$(PLATFORM)/genassym @@ -135,6 +138,7 @@ ASFLAGS_XARCH_64 = $(amd64_ASFLAGS) ASFLAGS_XARCH = $(ASFLAGS_XARCH_$(CLASS)) ASFLAGS += $(ASFLAGS_XARCH) +AS_INC_PATH += -I$(GENASSYM_DIR)/$(OBJS_DIR) # # Define the base directory for installation. @@ -403,6 +407,13 @@ MISC_KMODS += md_stripe md_hotspares md_mirror md_raid md_trans md_notify MISC_KMODS += md_sp # +# Brand modules +# +MISC_KMODS += sn1_brand lx_brand +DRV_KMODS += lx_systrace lx_ptm lx_audio +STRMOD_KMODS += ldlinux + +# # Exec Class Modules (/kernel/exec): # EXEC_KMODS += elfexec intpexec javaexec @@ -416,7 +427,7 @@ SCHED_KMODS += IA RT TS RT_DPTBL TS_DPTBL FSS FX FX_DPTBL # File System Modules (/kernel/fs): # FS_KMODS += autofs cachefs ctfs dev devfs fdfs fifofs hsfs lofs -FS_KMODS += mntfs namefs nfs objfs zfs +FS_KMODS += lx_afs lx_proc mntfs namefs nfs objfs zfs FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs xmemfs # diff --git a/usr/src/uts/intel/Makefile.rules b/usr/src/uts/intel/Makefile.rules index 467289ca7f..50de973acd 100644 --- a/usr/src/uts/intel/Makefile.rules +++ b/usr/src/uts/intel/Makefile.rules @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -47,6 +46,9 @@ $(OBJS_DIR)/%.o: $(SRC)/common/fs/%.c $(OBJS_DIR)/%.o: $(SRC)/common/util/i386/%.s $(COMPILE.s) -o $@ $< +$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/sn1/%.s + $(COMPILE.s) -o $@ $< + $(OBJS_DIR)/%.o: $(UTSBASE)/intel/dtrace/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -150,6 +152,9 @@ $(LINTS_DIR)/%.ln: $(SRC)/common/fs/%.c $(LINTS_DIR)/%.ln: $(SRC)/common/util/i386/%.s @($(LHEAD) $(LINT.s) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/sn1/%.s + @($(LHEAD) $(LINT.s) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/dtrace/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/brand/lx/lx_brand_asm.s b/usr/src/uts/intel/brand/lx/lx_brand_asm.s new file mode 100644 index 0000000000..7e51a6e487 --- /dev/null +++ b/usr/src/uts/intel/brand/lx/lx_brand_asm.s @@ -0,0 +1,205 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(__lint) + +#include <sys/systm.h> + +#else /* __lint */ + +#include <sys/asm_linkage.h> +#include <sys/privregs.h> +#include <sys/segments.h> +#include "genassym.h" + +#endif /* __lint */ + +#ifdef __lint + +void +lx_brand_int80_callback(void) +{ +} + +#else /* __lint */ + +#if defined(__amd64) + /* + * lx brand callback for the int $0x80 trap handler. + * + * We're running on the user's %gs. + * + * We return directly to userland, bypassing the _update_sregs logic, so + * this routine must NOT do anything that could cause a context switch. + * + * %rax - syscall number + * + * When called, all general registers and %gs are as they were when + * the user process made the system call. The stack looks like + * this: + * -------------------------------------- + * 32 | saved stack pointer | + * | 24 | lwp brand data | + * | 16 | proc brand data | + * v 8 | user return address (*) | + * 0 | caller's return addr (sys_int80) | + * ------------------------------------- + */ + ENTRY(lx_brand_int80_callback) + movq 24(%rsp), %r15 /* grab the lwp brand data */ + movl %gs, BR_UGS(%r15) /* save user %gs */ + + movq 16(%rsp), %r15 /* grab the proc brand data */ + +.lx_brand_int80_patch_point: + jmp .lx_brand_int80_notrace + +.lx_brand_int80_notrace: + movq L_HANDLER(%r15), %r15 /* load the base address */ + +0: + /* + * Rather than returning to the instruction after the int 80, we + * transfer control into the brand library's handler table at + * table_addr + (16 * syscall_num) thus encoding the system + * call number in the instruction pointer. The original return address + * is passed in %eax. + */ + shlq $4, %rax + addq %r15, %rax + movq 32(%rsp), %rsp /* restore user stack pointer */ + xchgq (%rsp), %rax /* swap %rax and return addr */ + iretq + +.lx_brand_int80_trace: + /* + * If tracing is active, we vector to an alternate trace-enabling + * handler table instead. + */ + movq L_TRACEHANDLER(%r15), %r15 /* load trace handler address */ + jmp 0b + SET_SIZE(lx_brand_int80_callback) + + +#define PATCH_POINT _CONST(.lx_brand_int80_patch_point + 1) +#define PATCH_VAL _CONST(.lx_brand_int80_trace - .lx_brand_int80_notrace) + + ENTRY(lx_brand_int80_enable) + movl $1, lx_systrace_brand_enabled(%rip) + movq $PATCH_POINT, %r8 + movb $PATCH_VAL, (%r8) + ret + SET_SIZE(lx_brand_int80_enable) + + ENTRY(lx_brand_int80_disable) + movq $PATCH_POINT, %r8 + movb $0, (%r8) + movl $0, lx_systrace_brand_enabled(%rip) + ret + SET_SIZE(lx_brand_int80_disable) + + +#else + /* + * %eax - syscall number + * + * When called, all general registers and %gs are as they were when + * the user process made the system call. The stack looks like + * this: + * + * -------------------------------------- + * | 48 | user's %ss | + * | 44 | user's %esp | + * | 40 | EFLAGS register | + * | 36 | user's %cs | + * | 32 | user's %eip | + * | 28 | 'scatch space' | + * | 24 | user's %ebx | + * | 20 | user's %gs selector | + * | 16 | kernel's %gs selector | + * | 12 | lwp brand data | + * | 8 | proc brand data | + * v 4 | user return address | + * 0 | callback wrapper return addr | + * ------------------------------------- + */ + ENTRY(lx_brand_int80_callback) + pushl %ebx /* save for use as scratch */ + movl 16(%esp), %ebx /* grab the lwp brand data */ + movw %gs, BR_UGS(%ebx) /* save user %gs */ + + movl 12(%esp), %ebx /* grab the proc brand data */ + +.lx_brand_int80_patch_point: + jmp .lx_brand_int80_notrace + +.lx_brand_int80_notrace: + movl L_HANDLER(%ebx), %ebx /* load the base address */ + +0: + /* + * See the corresponding comment in the amd64 version above. + */ + shll $4, %eax + addl %ebx, %eax + popl %ebx /* restore %ebx */ + addl $32, %esp + xchgl (%esp), %eax /* swap %eax and return addr */ + iret + +.lx_brand_int80_trace: + movl L_TRACEHANDLER(%ebx), %ebx /* load trace handler address */ + jmp 0b + SET_SIZE(lx_brand_int80_callback) + + +#define PATCH_POINT _CONST(.lx_brand_int80_patch_point + 1) +#define PATCH_VAL _CONST(.lx_brand_int80_trace - .lx_brand_int80_notrace) + + ENTRY(lx_brand_int80_enable) + pushl %ebx + pushl %eax + movl $1, lx_systrace_brand_enabled + movl $PATCH_POINT, %ebx + movl $PATCH_VAL, %eax + movb %al, (%ebx) + popl %eax + popl %ebx + ret + SET_SIZE(lx_brand_int80_enable) + + ENTRY(lx_brand_int80_disable) + pushl %ebx + movl $PATCH_POINT, %ebx + movb $0, (%ebx) + movl $0, lx_systrace_brand_enabled + popl %ebx + ret + SET_SIZE(lx_brand_int80_disable) + +#endif /* __amd64 */ +#endif /* __lint */ diff --git a/usr/src/uts/intel/brand/sn1/sn1_brand_asm.s b/usr/src/uts/intel/brand/sn1/sn1_brand_asm.s new file mode 100644 index 0000000000..fd793b2098 --- /dev/null +++ b/usr/src/uts/intel/brand/sn1/sn1_brand_asm.s @@ -0,0 +1,340 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(lint) + +#include <sys/systm.h> + +#else /* lint */ + +#include <sys/asm_linkage.h> +#include <sys/privregs.h> +#include <sys/segments.h> + +#endif /* lint */ + +#ifdef lint + +void +sn1_brand_sysenter_callback(void) +{ +} + +void +sn1_brand_syscall_callback(void) +{ +} + +#if defined(__amd64) +void +sn1_brand_syscall32_callback(void) +{ +} +#endif /* amd64 */ + +void +sn1_brand_int91_callback(void) +{ +} + +#else /* lint */ + +/* + * Each of the following macros returns to the standard syscall codepath if + * it detects that this process is not able, or intended, to emulate this + * system call. They all assume that the routine provides a 'bail-out' + * label of '9'. + */ + +/* + * See if this process has a user-space handler registered for it. For the + * sn1 brand, the per-process brand data holds the address of the handler. + * As shown in the stack diagrams below, the callback code leaves that data + * at these offsets. + */ +#if defined(__amd64) +#define CHECK_FOR_HANDLER \ + cmpq $0, 24(%rsp) ;\ + je 9f +#else +#define CHECK_FOR_HANDLER \ + cmpl $0, 12(%esp) ;\ + je 9f +#endif /* __amd64 */ + +/* + * If the system call number is >= 1024, then it is coming from the + * emulation support library. As such we should handle it natively instead + * of sending it back to the emulation library. + */ +#define CHECK_FOR_NATIVE(reg) \ + cmp $1024, reg ;\ + jl 1f ;\ + sub $1024, reg ;\ + jmp 9f ;\ +1: + +/* + * Check to see if we want to interpose on this system call. If not, we + * jump back into the normal syscall path and pretend nothing happened. + */ +#define CHECK_FOR_INTERPOSITION(sysr, scr, scr_low) \ + lea sn1_emulation_table, scr ;\ + mov (scr), scr ;\ + add sysr, scr ;\ + movb (scr), scr_low ;\ + cmpb $0, scr_low ;\ + je 9f ;\ + +#define CALLBACK_PROLOGUE(call, scr, scr_low) ;\ + push scr /* Save scratch register */ ;\ + CHECK_FOR_HANDLER ;\ + CHECK_FOR_NATIVE(call) ;\ + CHECK_FOR_INTERPOSITION(call, scr, scr_low) + +/* + * The callback routines: + */ + +#if defined(__amd64) + /* + * When we get into any of these callback routines, the stack + * looks like this: + * -------------------------------------- + * 32 | saved stack pointer | + * | 24 | lwp brand data | + * | 16 | proc brand data | + * v 8 | user return address (*) | + * 0 | BRAND_CALLBACK()'s return addr | + * -------------------------------------- + * (*) This is actually just the bottom value from the user's + * stack. syscall puts this in %rcx instead of the stack, + * so it's just garbage for that entry point. + */ + + /* + * syscall handler for 32-bit user processes: + * + * %ecx contains the address of the instruction after the syscall + */ + ENTRY(sn1_brand_syscall32_callback) + + CALLBACK_PROLOGUE(%rax, %r15, %r15b) + + movq %rsp, %r15 /* save our stack pointer */ + + /* + * Adjust the user's stack so that the 'ret' from our userspace + * handler takes us to the post-syscall instruction instead of to + * the routine that called the system call. + */ + movq 40(%rsp), %rsp /* restore user's stack pointer */ + subq $4, %rsp /* save room for the post-syscall addr */ + movl %ecx, (%rsp) /* Save post-syscall addr on stack */ + + /* + * To 'return' to our user-space handler, we just need to copy + * its address into %ecx. + */ + movq 24(%r15), %rcx /* user-space handler == proc_data for sn1 */ + movq (%r15), %r15 /* Restore scratch register */ + sysret +9: + popq %r15 + retq + SET_SIZE(sn1_brand_syscall32_callback) + + /* + * syscall handler for 64-bit user processes: + * %rax - syscall number + * %rcx - user space %rip + */ + ENTRY(sn1_brand_syscall_callback) + + CALLBACK_PROLOGUE(%rax, %r15, %r15b) + + movq %rsp, %r15 /* save our stack pointer */ + + movq 40(%rsp), %rsp /* restore user's stack pointer */ + subq $8, %rsp /* save room for the post-syscall addr */ + movq %rcx, (%rsp) /* Save post-syscall addr on stack */ + + /* + * To 'return' to our user-space handler, we just need to copy + * its address into %ecx. + */ + movq 24(%r15), %rcx /* user-space handler == proc_data for sn1 */ + movq (%r15), %r15 /* Restore scratch register */ + sysretq +9: + popq %r15 + retq + + SET_SIZE(sn1_brand_syscall_callback) + + /* + * %rax - syscall number + * %rcx - user space %esp + * %rdx - user space return address + * + * XXX: not tested yet. Need a Nocona machine first. + */ + ENTRY(sn1_brand_sysenter_callback) + + CALLBACK_PROLOGUE(%rax, %r15, %r15b) + + subq $4, %rcx /* Save room for user ret addr */ + movq %rdx, (%rcx) /* Save current return addr */ + movq 24(%rsp), %rdx /* user-space handler == proc_data for sn1 */ + popq %r15 + sysexit +9: + popq %r15 + ret + SET_SIZE(sn1_brand_sysenter_callback) + + /* + * The saved stack pointer points at the state saved when we took + * the interrupt: + * -------------------------------------- + * | 32 | user's %ss | + * | 24 | user's %esp | + * | 16 | EFLAGS register | + * v 8 | user's %cs | + * 0 | user's %eip | + * -------------------------------------- + */ + ENTRY(sn1_brand_int91_callback) + + CALLBACK_PROLOGUE(%rax, %r15, %r15b) + + movq 24(%rsp), %r15 /* user-space handler == proc_data for sn1 */ + pushq %rax /* Save scratch register */ + movq 48(%rsp), %rax /* Get saved %esp */ + movq %r15, (%rax) /* replace iret target address with hdlr */ + + /* + * Adjust the caller's stack so we return to the instruction after + * the syscall on the next 'ret' in userspace - not to the parent + * routine. + */ + movq 24(%rax), %r15 /* Get user's %esp */ + subq $4, %r15 /* Make room for new ret addr */ + movq %r15, 24(%rax) /* Replace current with updated %esp */ + movl 24(%rsp), %eax /* Get post-syscall address */ + movl %eax, (%r15) /* Put it on the user's stack */ + + popq %rax /* Restore scratch register */ + popq %r15 /* Restore scratch register */ + movq 32(%rsp), %rsp /* Remove all callback stuff from stack */ + iretq +9: + popq %r15 + retq + SET_SIZE(sn1_brand_int91_callback) + +#else /* __amd64 */ + + /* + * When we get into any of these callback routines, the stack + * looks like this: + * -------------------------------------- + * | 28 | 'scatch space' | + * | 24 | user's %ebx | + * | 20 | user's %gs selector | + * | 16 | kernel's %gs selector | + * | 12 | lwp brand data | + * | 8 | proc brand data | + * v 4 | user return address | + * 0 | callback wrapper return addr | + * -------------------------------------- + */ + + /* + * lcall handler for 32-bit OS + * %eax - syscall number + * + * Above the stack contents common to all callbacks is the + * int/lcall-specific state: + * -------------------------------------- + * | 48 | user's %ss | + * | 44 | user's %esp | + * | 40 | EFLAGS register | + * v 36 | user's %cs | + * 32 | user's %eip | + * -------------------------------------- + */ + ENTRY(sn1_brand_syscall_callback) + + CALLBACK_PROLOGUE(%eax, %ebx, %bl) + + movl 12(%esp), %ebx /* user-space handler == proc_data for sn1 */ + movl %ebx, 36(%esp) /* replace iret target address with hdlr */ + + /* + * Adjust the caller's stack so we return to the instruction after + * the syscall on the next 'ret' in userspace - not to the parent + * routine. + */ + pushl %eax /* Save scratch register */ + movl 52(%esp), %eax /* Get current %esp */ + subl $4, %eax /* Make room for new ret addr */ + movl %eax, 52(%esp) /* Replace current with updated %esp */ + movl 12(%esp), %ebx /* Get post-syscall address */ + movl %ebx, (%eax) /* Put it on the user's stack */ + popl %eax /* Restore scratch register */ + + popl %ebx /* Restore scratch register */ + addl $32, %esp /* Remove all callback stuff from stack */ + iret +9: + popl %ebx + ret + SET_SIZE(sn1_brand_syscall_callback) + + /* + * %eax - syscall number + * %ecx - user space %esp + * %edx - user space return address + */ + ENTRY(sn1_brand_sysenter_callback) + + CALLBACK_PROLOGUE(%eax, %ebx, %bl) + + subl $4, %ecx /* Save room for user ret addr */ + movl %edx, (%ecx) /* Save current return addr */ + movl 12(%esp), %edx /* Return to user-space handler */ + popl %ebx /* Restore scratch register */ + sysexit +9: + popl %ebx + ret + SET_SIZE(sn1_brand_sysenter_callback) + +#endif /* __amd64 */ +#endif /* lint */ + diff --git a/usr/src/uts/intel/dtrace/fasttrap_isa.c b/usr/src/uts/intel/dtrace/fasttrap_isa.c index b64be9243f..5b0c0a9383 100644 --- a/usr/src/uts/intel/dtrace/fasttrap_isa.c +++ b/usr/src/uts/intel/dtrace/fasttrap_isa.c @@ -36,6 +36,7 @@ #include <sys/segments.h> #include <sys/sysmacros.h> #include <sys/trap.h> +#include <sys/archsystm.h> /* * Lossless User-Land Tracing on x86 @@ -230,7 +231,7 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); uint_t start = 0; int rmindex; - uint8_t rex = 0; + uint8_t seg, rex = 0; /* * Read the instruction at the given address out of the process's @@ -269,23 +270,49 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, if (tp->ftt_size > len) return (-1); + tp->ftt_segment = FASTTRAP_SEG_NONE; + /* * Find the start of the instruction's opcode by processing any * legacy prefixes. */ for (;;) { + seg = 0; switch (instr[start]) { + case FASTTRAP_PREFIX_SS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_GS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_FS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_ES: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_DS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_CS: + seg++; + /*FALLTHRU*/ case FASTTRAP_PREFIX_OPERAND: case FASTTRAP_PREFIX_ADDRESS: - case FASTTRAP_PREFIX_CS: - case FASTTRAP_PREFIX_DS: - case FASTTRAP_PREFIX_ES: - case FASTTRAP_PREFIX_FS: - case FASTTRAP_PREFIX_GS: - case FASTTRAP_PREFIX_SS: case FASTTRAP_PREFIX_LOCK: case FASTTRAP_PREFIX_REP: case FASTTRAP_PREFIX_REPNE: + if (seg != 0) { + /* + * It's illegal for an instruction to specify + * two segment prefixes -- give up on this + * illegal instruction. + */ + if (tp->ftt_segment != FASTTRAP_SEG_NONE) + return (-1); + + tp->ftt_segment = seg; + } start++; continue; } @@ -483,6 +510,19 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, */ ASSERT(instr[start] == FASTTRAP_INSTR); return (-1); + + case FASTTRAP_INT: + /* + * Interrupts seem like they could be traced with + * no negative implications, but it's possible that + * a thread could be redirected by the trap handling + * code which would eventually return to the + * instruction after the interrupt. If the interrupt + * were in our scratch space, the subsequent + * instruction might be overwritten before we return. + * Accordingly we refuse to instrument any interrupt. + */ + return (-1); } } @@ -696,6 +736,119 @@ fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc, } } +static int +fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr) +{ + proc_t *p = curproc; + user_desc_t *desc; + uint16_t sel, ndx, type; + uintptr_t limit; + + switch (tp->ftt_segment) { + case FASTTRAP_SEG_CS: + sel = rp->r_cs; + break; + case FASTTRAP_SEG_DS: + sel = rp->r_ds; + break; + case FASTTRAP_SEG_ES: + sel = rp->r_es; + break; + case FASTTRAP_SEG_FS: + sel = rp->r_fs; + break; + case FASTTRAP_SEG_GS: + sel = rp->r_gs; + break; + case FASTTRAP_SEG_SS: + sel = rp->r_ss; + break; + } + + /* + * Make sure the given segment register specifies a user priority + * selector rather than a kernel selector. + */ + if (!SELISUPL(sel)) + return (-1); + + ndx = SELTOIDX(sel); + + /* + * Check the bounds and grab the descriptor out of the specified + * descriptor table. + */ + if (SELISLDT(sel)) { + if (ndx > p->p_ldtlimit) + return (-1); + + desc = p->p_ldt + ndx; + + } else { + if (ndx >= NGDT) + return (-1); + + desc = cpu_get_gdt() + ndx; + } + + /* + * The descriptor must have user privilege level and it must be + * present in memory. + */ + if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1) + return (-1); + + type = desc->usd_type; + + /* + * If the S bit in the type field is not set, this descriptor can + * only be used in system context. + */ + if ((type & 0x10) != 0x10) + return (-1); + + limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1); + + if (tp->ftt_segment == FASTTRAP_SEG_CS) { + /* + * The code/data bit and readable bit must both be set. + */ + if ((type & 0xa) != 0xa) + return (-1); + + if (*addr > limit) + return (-1); + } else { + /* + * The code/data bit must be clear. + */ + if ((type & 0x8) != 0) + return (-1); + + /* + * If the expand-down bit is clear, we just check the limit as + * it would naturally be applied. Otherwise, we need to check + * that the address is the range [limit + 1 .. 0xffff] or + * [limit + 1 ... 0xffffffff] depending on if the default + * operand size bit is set. + */ + if ((type & 0x4) == 0) { + if (*addr > limit) + return (-1); + } else if (desc->usd_def32) { + if (*addr < limit + 1 || 0xffff < *addr) + return (-1); + } else { + if (*addr < limit + 1 || 0xffffffff < *addr) + return (-1); + } + } + + *addr += USEGD_GETBASE(desc); + + return (0); +} + int fasttrap_pid_probe(struct regs *rp) { @@ -1105,7 +1258,7 @@ fasttrap_pid_probe(struct regs *rp) if (tp->ftt_code == 0) { new_pc = tp->ftt_dest; } else { - uintptr_t addr = tp->ftt_dest; + uintptr_t value, addr = tp->ftt_dest; if (tp->ftt_base != FASTTRAP_NOREG) addr += fasttrap_getreg(rp, tp->ftt_base); @@ -1114,10 +1267,22 @@ fasttrap_pid_probe(struct regs *rp) tp->ftt_scale; if (tp->ftt_code == 1) { + /* + * If there's a segment prefix for this + * instruction, we'll need to check permissions + * and bounds on the given selector, and adjust + * the address accordingly. + */ + if (tp->ftt_segment != FASTTRAP_SEG_NONE && + fasttrap_do_seg(tp, rp, &addr) != 0) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + #ifdef __amd64 if (p->p_model == DATAMODEL_NATIVE) { #endif - uintptr_t value; if (fasttrap_fulword((void *)addr, &value) == -1) { fasttrap_sigsegv(p, curthread, @@ -1128,15 +1293,16 @@ fasttrap_pid_probe(struct regs *rp) new_pc = value; #ifdef __amd64 } else { - uint32_t value; + uint32_t value32; + addr = (uintptr_t)(uint32_t)addr; if (fasttrap_fuword32((void *)addr, - &value) == -1) { + &value32) == -1) { fasttrap_sigsegv(p, curthread, addr); new_pc = pc; break; } - new_pc = value; + new_pc = value32; } #endif } else { diff --git a/usr/src/uts/intel/genassym/Makefile b/usr/src/uts/intel/genassym/Makefile new file mode 100644 index 0000000000..ce01dc8610 --- /dev/null +++ b/usr/src/uts/intel/genassym/Makefile @@ -0,0 +1,85 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of genassym.h through +# compile time intialized data. +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +GENASSYM_H = $(GENASSYM_DIR)/$(OBJS_DIR)/genassym.h +OFFSETS_SRC = $(GENASSYM_DIR)/offsets.in + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(GENASSYM_H) + +INC_PATH += -I$(UTSBASE)/common/brand/lx + +# +# Overrides +# +CLEANFILES = Nothing_to_remove +CLOBBERFILES = $(GENASSYM_H) Nothing_to_remove + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +clean.lint: + +install: def + +# +# Create genassym.h +# +$(GENASSYM_H): $(OFFSETS_SRC) + $(OFFSETS_CREATE) <$(OFFSETS_SRC) >$@ + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/genassym/offsets.in b/usr/src/uts/intel/genassym/offsets.in new file mode 100644 index 0000000000..21dfcd01aa --- /dev/null +++ b/usr/src/uts/intel/genassym/offsets.in @@ -0,0 +1,45 @@ +\ +\ CDDL HEADER START +\ +\ The contents of this file are subject to the terms of the +\ Common Development and Distribution License (the "License"). +\ You may not use this file except in compliance with the License. +\ +\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +\ or http://www.opensolaris.org/os/licensing. +\ See the License for the specific language governing permissions +\ and limitations under the License. +\ +\ When distributing Covered Code, include this CDDL HEADER in each +\ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +\ If applicable, add the following below this CDDL HEADER, with the +\ fields enclosed by brackets "[]" replaced with your own identifying +\ information: Portions Copyright [yyyy] [name of copyright owner] +\ +\ CDDL HEADER END +\ +\ +\ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +\ Use is subject to license terms. +\ + +#pragma ident "%Z%%M% %I% %E% SMI" + +\ +\ offsets.in: input file to produce the architecture-dependent genassym.h +\ using the ctfstabs program +\ + +#ifndef _GENASSYM +#define _GENASSYM +#endif + +#include <sys/lx_brand.h> + +lx_lwp_data + br_ugs + +lx_proc_data + l_handler + l_tracehandler + l_traceflag diff --git a/usr/src/uts/intel/ia32/ml/copy.s b/usr/src/uts/intel/ia32/ml/copy.s index 98951b8164..f30b864a46 100644 --- a/usr/src/uts/intel/ia32/ml/copy.s +++ b/usr/src/uts/intel/ia32/ml/copy.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -2044,6 +2043,15 @@ void ucopy(const void *ufrom, void *uto, size_t ulength) {} +/* + * copy a string in user space + */ + +/* ARGSUSED */ +void +ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied) +{} + #else /* __lint */ #if defined(__amd64) @@ -2089,15 +2097,24 @@ ucopy(const void *ufrom, void *uto, size_t ulength) ENTRY(ucopy) movq kernelbase(%rip), %rax cmpq %rax, %rdi - jb 1f - movq %rax, %rdi -1: + cmovaeq %rax, %rdi /* force fault at kernelbase */ cmpq %rax, %rsi - jb do_copy - movq %rax, %rsi + cmovaeq %rax, %rsi /* force fault at kernelbase */ jmp do_copy SET_SIZE(ucopy) + ENTRY(ucopystr) + movq kernelbase(%rip), %rax + cmpq %rax, %rdi + cmovaeq %rax, %rdi /* force fault at kernelbase */ + cmpq %rax, %rsi + cmovaeq %rax, %rsi /* force fault at kernelbase */ + /* do_copystr expects lofault address in %r8 */ + movq %gs:CPU_THREAD, %r8 + movq T_LOFAULT(%r8), %r8 + jmp do_copystr + SET_SIZE(ucopystr) + #elif defined(__i386) ENTRY(copyin_noerr) @@ -2150,6 +2167,22 @@ ucopy(const void *ufrom, void *uto, size_t ulength) jmp do_copy SET_SIZE(ucopy) + ENTRY(ucopystr) + movl kernelbase, %eax + cmpl %eax, 4(%esp) + jb 1f + movl %eax, 4(%esp) /* force fault at kernelbase */ +1: + cmpl %eax, 8(%esp) + jb 2f + movl %eax, 8(%esp) /* force fault at kernelbase */ +2: + /* do_copystr expects the lofault address in %eax */ + movl %gs:CPU_THREAD, %eax + movl T_LOFAULT(%eax), %eax + jmp do_copystr + SET_SIZE(ucopystr) + #endif /* __i386 */ #ifdef DEBUG diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index 5a94720495..c3f0688965 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -65,6 +64,8 @@ #include <sys/auxv.h> #include <sys/auxv_386.h> #include <sys/dtrace.h> +#include <sys/brand.h> +#include <sys/machbrand.h> extern const struct fnsave_state x87_initial; extern const struct fxsave_state sse_initial; @@ -604,6 +605,8 @@ getuserpc() static greg_t fix_segreg(greg_t sr, model_t datamodel) { + kthread_t *t = curthread; + switch (sr &= 0xffff) { #if defined(__amd64) /* @@ -633,6 +636,13 @@ fix_segreg(greg_t sr, model_t datamodel) } /* + * Allow this process's brand to do any necessary segment register + * manipulation. + */ + if (PROC_IS_BRANDED(t->t_procp) && BRMOP(t->t_procp)->b_fixsegreg) + return (BRMOP(t->t_procp)->b_fixsegreg(sr, datamodel)); + + /* * Force it into the LDT in ring 3 for 32-bit processes, which by * default do not have an LDT, so that any attempt to use an invalid * selector will reference the (non-existant) LDT, and cause a #gp fault diff --git a/usr/src/uts/intel/ia32/os/desctbls.c b/usr/src/uts/intel/ia32/os/desctbls.c index f53563eae3..ed31db4a2d 100644 --- a/usr/src/uts/intel/ia32/os/desctbls.c +++ b/usr/src/uts/intel/ia32/os/desctbls.c @@ -57,6 +57,7 @@ #include <sys/reboot.h> #include <sys/kdi.h> #include <sys/systm.h> +#include <sys/controlregs.h> extern void syscall_int(void); @@ -104,6 +105,22 @@ void (*(fasttable[]))(void) = { }; /* + * Structure containing pre-computed descriptors to allow us to temporarily + * interpose on a standard handler. + */ +struct interposing_handler { + int ih_inum; + gate_desc_t ih_interp_desc; + gate_desc_t ih_default_desc; +}; + +/* + * The brand infrastructure interposes on two handlers, and we use one as a + * NULL signpost. + */ +static struct interposing_handler brand_tbl[3]; + +/* * software prototypes for default local descriptor table */ @@ -279,6 +296,7 @@ init_gdt(void) desctbr_t r_bgdt, r_gdt; user_desc_t *bgdt; size_t alen = 0xfffff; /* entire 32-bit address space */ + int i; /* * Copy in from boot's gdt to our gdt entries 1 - 4. @@ -358,6 +376,14 @@ init_gdt(void) SEL_UPL, SDP_PAGES, SDP_OP32); /* + * Initialize the descriptors set aside for brand usage. + * Only attributes and limits are initialized. + */ + for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) + set_usegd(&gdt0[i], SDP_SHORT, NULL, alen, SDT_MEMRWA, + SEL_UPL, SDP_PAGES, SDP_OP32); + + /* * Install our new GDT */ r_gdt.dtr_limit = sizeof (gdt0) - 1; @@ -382,6 +408,7 @@ init_gdt(void) { desctbr_t r_bgdt, r_gdt; user_desc_t *bgdt; + int i; /* * Copy in from boot's gdt to our gdt entries 1 - 4. @@ -455,6 +482,14 @@ init_gdt(void) SDP_PAGES, SDP_OP32); /* + * Initialize the descriptors set aside for brand usage. + * Only attributes and limits are initialized. + */ + for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) + set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL, + SDP_PAGES, SDP_OP32); + + /* * Install our new GDT */ r_gdt.dtr_limit = sizeof (gdt0) - 1; @@ -580,6 +615,12 @@ init_idt(void) } /* + * install "int80" handler at, well, 0x80. + */ + set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, 0, SDT_SYSIGT, + SEL_UPL); + + /* * install fast trap handler at 210. */ set_gatesegd(&idt0[T_FASTTRAP], &fasttrap, KCS_SEL, 0, @@ -608,6 +649,23 @@ init_idt(void) idt0_default_r.dtr_limit = sizeof (idt0) - 1; idt0_default_r.dtr_base = (uintptr_t)idt0; wr_idtr(&idt0_default_r); + + /* + * Prepare interposing descriptors for the branded "int80" + * and syscall handlers and cache copies of the default + * descriptors. + */ + brand_tbl[0].ih_inum = T_INT80; + brand_tbl[0].ih_default_desc = idt0[T_INT80]; + set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL, + 0, SDT_SYSIGT, SEL_UPL); + + brand_tbl[1].ih_inum = T_SYSCALLINT; + brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; + set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int, + KCS_SEL, 0, SDT_SYSIGT, SEL_UPL); + + brand_tbl[2].ih_inum = 0; } #elif defined(__i386) @@ -705,6 +763,12 @@ init_idt(void) } /* + * install "int80" handler at, well, 0x80. + */ + set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, 0, SDT_SYSIGT, + SEL_UPL); + + /* * install fast trap handler at 210. */ set_gatesegd(&idt0[T_FASTTRAP], &fasttrap, KCS_SEL, 0, @@ -734,6 +798,23 @@ init_idt(void) idt0_default_r.dtr_limit = sizeof (idt0) - 1; idt0_default_r.dtr_base = (uintptr_t)idt0; wr_idtr(&idt0_default_r); + + /* + * Prepare interposing descriptors for the branded "int80" + * and syscall handlers and cache copies of the default + * descriptors. + */ + brand_tbl[0].ih_inum = T_INT80; + brand_tbl[0].ih_default_desc = idt0[T_INT80]; + set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL, + 0, SDT_SYSIGT, SEL_UPL); + + brand_tbl[1].ih_inum = T_SYSCALLINT; + brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT]; + set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call, + KCS_SEL, 0, SDT_SYSIGT, SEL_UPL); + + brand_tbl[2].ih_inum = 0; } #endif /* __i386 */ @@ -835,3 +916,49 @@ init_tables(void) init_idt(); init_ldt(); } + +/* + * Enable interpositioning on the system call path by rewriting the + * sys{call|enter} MSRs and the syscall-related entries in the IDT to use + * the branded entry points. + */ +void +brand_interpositioning_enable(void) +{ + int i; + + for (i = 0; brand_tbl[i].ih_inum; i++) + CPU->cpu_idt[brand_tbl[i].ih_inum] = + brand_tbl[i].ih_interp_desc; + +#if defined(__amd64) + wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); + wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); +#endif + + if (x86_feature & X86_SEP) + wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); +} + +/* + * Disable interpositioning on the system call path by rewriting the + * sys{call|enter} MSRs and the syscall-related entries in the IDT to use + * the standard entry points, which bypass the interpositioning hooks. + */ +void +brand_interpositioning_disable(void) +{ + int i; + + for (i = 0; brand_tbl[i].ih_inum; i++) + CPU->cpu_idt[brand_tbl[i].ih_inum] = + brand_tbl[i].ih_default_desc; + +#if defined(__amd64) + wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); + wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); +#endif + + if (x86_feature & X86_SEP) + wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); +} diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c index b0372798ee..1fe1e7e72d 100644 --- a/usr/src/uts/intel/ia32/os/sundep.c +++ b/usr/src/uts/intel/ia32/os/sundep.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -81,6 +80,7 @@ #include <sys/tuneable.h> #include <c2/audit.h> #include <sys/bootconf.h> +#include <sys/brand.h> #include <sys/dumphdr.h> #include <sys/promif.h> #include <sys/systeminfo.h> @@ -345,9 +345,20 @@ void lwp_forkregs(klwp_t *lwp, klwp_t *clwp) { #if defined(__amd64) - clwp->lwp_pcb.pcb_flags |= RUPDATE_PENDING; - lwptot(clwp)->t_post_sys = 1; + struct pcb *pcb = &clwp->lwp_pcb; + struct regs *rp = lwptoregs(lwp); + + if ((pcb->pcb_flags & RUPDATE_PENDING) == 0) { + pcb->pcb_ds = rp->r_ds; + pcb->pcb_es = rp->r_es; + pcb->pcb_fs = rp->r_fs; + pcb->pcb_gs = rp->r_gs; + pcb->pcb_flags |= RUPDATE_PENDING; + lwptot(clwp)->t_post_sys = 1; + } + ASSERT(lwptot(clwp)->t_post_sys); #endif + bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct regs)); } @@ -484,6 +495,32 @@ lwp_segregs_restore32(klwp_t *lwp) #endif /* _SYSCALL32_IMPL */ /* + * If this is a process in a branded zone, then we want it to use the brand + * syscall entry points instead of the standard Solaris entry points. This + * routine must be called when a new lwp is created within a branded zone + * or when an existing lwp moves into a branded zone via a zone_enter() + * operation. + */ +void +lwp_attach_brand_hdlrs(klwp_t *lwp) +{ + kthread_t *t = lwptot(lwp); + + ASSERT(PROC_IS_BRANDED(lwptoproc(lwp))); + ASSERT(removectx(t, NULL, brand_interpositioning_disable, + brand_interpositioning_enable, NULL, NULL, NULL, NULL) == 0); + + installctx(t, NULL, brand_interpositioning_disable, + brand_interpositioning_enable, NULL, NULL, NULL, NULL); + + if (t == curthread) { + kpreempt_disable(); + brand_interpositioning_enable(); + kpreempt_enable(); + } +} + +/* * Add any lwp-associated context handlers to the lwp at the beginning * of the lwp's useful life. * @@ -571,6 +608,9 @@ lwp_installctx(klwp_t *lwp) kpreempt_enable(); } } + + if (PROC_IS_BRANDED(ttoproc(t))) + lwp_attach_brand_hdlrs(lwp); } /* @@ -701,6 +741,13 @@ setregs(uarg_t *args) lwp_installctx(lwp); } +user_desc_t * +cpu_get_gdt(void) +{ + return (CPU->cpu_gdt); +} + + #if !defined(lwp_getdatamodel) /* diff --git a/usr/src/uts/intel/ia32/os/syscall.c b/usr/src/uts/intel/ia32/os/syscall.c index 00fc0b6ba9..965fc07c1d 100644 --- a/usr/src/uts/intel/ia32/os/syscall.c +++ b/usr/src/uts/intel/ia32/os/syscall.c @@ -850,20 +850,17 @@ deferred_singlestep_trap(caddr_t pc) /* * If both NORMAL_STEP and WATCH_STEP are in - * effect, give precedence to NORMAL_STEP. + * effect, give precedence to WATCH_STEP. * If neither is set, user must have set the * PS_T bit in %efl; treat this as NORMAL_STEP. */ - if ((pcb->pcb_flags & NORMAL_STEP) || - !(pcb->pcb_flags & WATCH_STEP)) { + if ((fault = undo_watch_step(&siginfo)) == 0 && + ((pcb->pcb_flags & NORMAL_STEP) || + !(pcb->pcb_flags & WATCH_STEP))) { siginfo.si_signo = SIGTRAP; siginfo.si_code = TRAP_TRACE; siginfo.si_addr = pc; fault = FLTTRACE; - if (pcb->pcb_flags & WATCH_STEP) - (void) undo_watch_step(NULL); - } else { - fault = undo_watch_step(&siginfo); } pcb->pcb_flags &= ~(DEBUG_PENDING|NORMAL_STEP|WATCH_STEP); diff --git a/usr/src/uts/intel/ia32/os/sysi86.c b/usr/src/uts/intel/ia32/os/sysi86.c index 08b48234f7..8b56b01002 100644 --- a/usr/src/uts/intel/ia32/os/sysi86.c +++ b/usr/src/uts/intel/ia32/os/sysi86.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -63,7 +62,6 @@ #include <sys/fp.h> #include <sys/cmn_err.h> -static int setdscr(caddr_t ap); static void setup_ldt(proc_t *pp); static void *ldt_map(proc_t *pp, uint_t seli); static void ldt_free(proc_t *pp); @@ -80,6 +78,7 @@ extern void sgmtl(long); int sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) { + struct ssd ssd; int error = 0; int c; proc_t *pp = curproc; @@ -121,7 +120,19 @@ sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) error = EINTR; break; } - error = setdscr((caddr_t)arg1); + + if (get_udatamodel() == DATAMODEL_LP64) { + error = EINVAL; + break; + } + + if (copyin((caddr_t)arg1, &ssd, sizeof (ssd)) < 0) { + error = EFAULT; + break; + } + + error = setdscr(&ssd); + mutex_enter(&pp->p_lock); if (curthread != pp->p_agenttp) continuelwps(pp); @@ -440,30 +451,23 @@ ldt_installctx(proc_t *p, proc_t *cp) mutex_exit(&targ->p_lock); } -static int -setdscr(caddr_t ap) +int +setdscr(struct ssd *ssd) { - struct ssd ssd; /* request structure buffer */ ushort_t seli; /* selector index */ user_desc_t *dscrp; /* descriptor pointer */ proc_t *pp = ttoproc(curthread); - if (get_udatamodel() == DATAMODEL_LP64) - return (EINVAL); - - if (copyin(ap, &ssd, sizeof (ssd)) < 0) - return (EFAULT); - /* * LDT segments: executable and data at DPL 3 only. */ - if (!SELISLDT(ssd.sel) || !SELISUPL(ssd.sel)) + if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel)) return (EINVAL); /* * check the selector index. */ - seli = SELTOIDX(ssd.sel); + seli = SELTOIDX(ssd->sel); if (seli >= MAXNLDT || seli < LDT_UDBASE) return (EINVAL); @@ -541,7 +545,7 @@ setdscr(caddr_t ap) * a lot better failure mode than SIGKILL and a core file * from kern_gpfault() too.) */ - if (SI86SSD_PRES(&ssd) == 0) { + if (SI86SSD_PRES(ssd) == 0) { kthread_t *t; int bad = 0; @@ -563,27 +567,27 @@ setdscr(caddr_t ap) pcb_t *pcb = &lwp->lwp_pcb; #endif - if (ssd.sel == rp->r_cs || ssd.sel == rp->r_ss) { + if (ssd->sel == rp->r_cs || ssd->sel == rp->r_ss) { bad = 1; break; } #if defined(__amd64) if (pcb->pcb_flags & RUPDATE_PENDING) { - if (ssd.sel == pcb->pcb_ds || - ssd.sel == pcb->pcb_es || - ssd.sel == pcb->pcb_fs || - ssd.sel == pcb->pcb_gs) { + if (ssd->sel == pcb->pcb_ds || + ssd->sel == pcb->pcb_es || + ssd->sel == pcb->pcb_fs || + ssd->sel == pcb->pcb_gs) { bad = 1; break; } } else #endif { - if (ssd.sel == rp->r_ds || - ssd.sel == rp->r_es || - ssd.sel == rp->r_fs || - ssd.sel == rp->r_gs) { + if (ssd->sel == rp->r_ds || + ssd->sel == rp->r_es || + ssd->sel == rp->r_fs || + ssd->sel == rp->r_gs) { bad = 1; break; } @@ -601,7 +605,7 @@ setdscr(caddr_t ap) /* * If acc1 is zero, clear the descriptor (including the 'present' bit) */ - if (ssd.acc1 == 0) { + if (ssd->acc1 == 0) { bzero(dscrp, sizeof (*dscrp)); mutex_exit(&pp->p_ldtlock); return (0); @@ -611,17 +615,18 @@ setdscr(caddr_t ap) * Check segment type, allow segment not present and * only user DPL (3). */ - if (SI86SSD_DPL(&ssd) != SEL_UPL) { + if (SI86SSD_DPL(ssd) != SEL_UPL) { mutex_exit(&pp->p_ldtlock); return (EINVAL); } #if defined(__amd64) /* - * Do not allow 32-bit applications to create 64-bit mode code segments. + * Do not allow 32-bit applications to create 64-bit mode code + * segments. */ - if (SI86SSD_ISUSEG(&ssd) && ((SI86SSD_TYPE(&ssd) >> 3) & 1) == 1 && - SI86SSD_ISLONG(&ssd)) { + if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 && + SI86SSD_ISLONG(ssd)) { mutex_exit(&pp->p_ldtlock); return (EINVAL); } @@ -630,8 +635,8 @@ setdscr(caddr_t ap) /* * Set up a code or data user segment descriptor. */ - if (SI86SSD_ISUSEG(&ssd)) { - ssd_to_usd(&ssd, dscrp); + if (SI86SSD_ISUSEG(ssd)) { + ssd_to_usd(ssd, dscrp); mutex_exit(&pp->p_ldtlock); return (0); } @@ -639,8 +644,8 @@ setdscr(caddr_t ap) /* * Allow a call gate only if the destination is in the LDT. */ - if (SI86SSD_TYPE(&ssd) == SDT_SYSCGT && SELISLDT(ssd.ls)) { - ssd_to_sgd(&ssd, (gate_desc_t *)dscrp); + if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) { + ssd_to_sgd(ssd, (gate_desc_t *)dscrp); mutex_exit(&pp->p_ldtlock); return (0); } @@ -653,7 +658,7 @@ setdscr(caddr_t ap) * Allocate a private LDT for this process and initialize it with the * default entries. */ -void +static void setup_ldt(proc_t *pp) { user_desc_t *ldtp; /* descriptor pointer */ diff --git a/usr/src/uts/intel/ia32/sys/trap.h b/usr/src/uts/intel/ia32/sys/trap.h index b03f947fe8..fcd8739775 100644 --- a/usr/src/uts/intel/ia32/sys/trap.h +++ b/usr/src/uts/intel/ia32/sys/trap.h @@ -65,6 +65,7 @@ extern "C" { #define T_FASTTRAP 0xd2 /* fast system call */ #define T_SYSCALLINT 0x91 /* general system call */ #define T_DTRACE_RET 0x7f /* DTrace pid return */ +#define T_INT80 0x80 /* int80 handler for linux emulation */ #define T_SOFTINT 0x50fd /* pseudo softint trap type */ /* diff --git a/usr/src/uts/intel/ldlinux/Makefile b/usr/src/uts/intel/ldlinux/Makefile new file mode 100644 index 0000000000..6a842e41f4 --- /dev/null +++ b/usr/src/uts/intel/ldlinux/Makefile @@ -0,0 +1,98 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/ldlinux/Makefile +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the ldlinux streams kernel +# module. +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = ldlinux +OBJECTS = $(LDLINUX_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LDLINUX_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_STRMOD_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_afs/Makefile b/usr/src/uts/intel/lx_afs/Makefile new file mode 100644 index 0000000000..3656a6dc2c --- /dev/null +++ b/usr/src/uts/intel/lx_afs/Makefile @@ -0,0 +1,103 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# This makefile drives the production of the lxproc file system +# kernel module. +# +# i86 architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +# Note that the name of the actual filesystem is lx_afs and +# not lx_autofs. This is becase filesystem names are stupidly +# limited to 8 characters. +# +MODULE = lx_afs +OBJECTS = $(LX_AUTOFS_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_AUTOFS_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) +LDFLAGS += -dy + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +# +# Include brand-specific rules +# + +include $(UTSBASE)/intel/lx_afs/Makefile.rules diff --git a/usr/src/uts/intel/lx_afs/Makefile.rules b/usr/src/uts/intel/lx_afs/Makefile.rules new file mode 100644 index 0000000000..2793fedaa4 --- /dev/null +++ b/usr/src/uts/intel/lx_afs/Makefile.rules @@ -0,0 +1,40 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +# +# Section 1a: C object build rules +# +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/autofs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +# +# Section 1b: Lint `object' build rules. +# +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/autofs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_audio/Makefile b/usr/src/uts/intel/lx_audio/Makefile new file mode 100644 index 0000000000..89b828bd3e --- /dev/null +++ b/usr/src/uts/intel/lx_audio/Makefile @@ -0,0 +1,93 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/lx_audio/Makefile +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the lx_audio driver +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = lx_audio +OBJECTS = $(LX_AUDIO_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_AUDIO_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_brand/Makefile b/usr/src/uts/intel/lx_brand/Makefile new file mode 100644 index 0000000000..0ad69fefce --- /dev/null +++ b/usr/src/uts/intel/lx_brand/Makefile @@ -0,0 +1,100 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the kernel component of +# the lx brand +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Path to where brand common sources live +# +LX_CMN = $(SRC)/common/brand/lx + +# +# Define the module and object file sets. +# +MODULE = lx_brand +OBJECTS = $(LX_BRAND_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_BRAND_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_BRAND_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) + +# +# lint pass one enforcement +# +CFLAGS += $(CCVERBOSE) + +LDFLAGS += -dy -Nexec/elfexec + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +# +# Include brand-specific rules +# + +include $(UTSBASE)/intel/lx_brand/Makefile.rules diff --git a/usr/src/uts/intel/lx_brand/Makefile.rules b/usr/src/uts/intel/lx_brand/Makefile.rules new file mode 100644 index 0000000000..0862baef84 --- /dev/null +++ b/usr/src/uts/intel/lx_brand/Makefile.rules @@ -0,0 +1,85 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# + +# +# Section 1a: C object build rules +# +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c + $(COMPILE.c) -D_ELF32_COMPAT -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c + $(COMPILE.c) -D_ELF32_COMPAT -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c + $(COMPILE.c) -D_ELF32_COMPAT -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c + $(COMPILE.c) -D_ELF32_COMPAT -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/intel/brand/lx/%.s + $(COMPILE.s) -D_ELF32_COMPAT -o $@ $< + +$(OBJS_DIR_OBJ64)/%.o: $(LX_CMN)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/intel/brand/lx/%.s + $(COMPILE.s) -D_ELF32_COMPAT -o $@ $< + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/os/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(LX_CMN)/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/lx/%.s + $(COMPILE.s) -o $@ $< + +# +# Section 1b: Lint `object' build rules. +# +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/os/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/syscall/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(LX_CMN)/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/lx/%.s + @($(LHEAD) $(LINT.s) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_proc/Makefile b/usr/src/uts/intel/lx_proc/Makefile new file mode 100644 index 0000000000..91c032f9fe --- /dev/null +++ b/usr/src/uts/intel/lx_proc/Makefile @@ -0,0 +1,107 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/lx_proc/Makefile +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the lxproc file system +# kernel module. +# +# i86 architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Path to where brand common sources live +# +LX_CMN = $(SRC)/common/brand/lx + +# +# Define the module and object file sets. +# +MODULE = lx_proc +OBJECTS = $(LX_PROC_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_PROC_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +CFLAGS += $(CCVERBOSE) + +# +# Depends on procfs and lx_brand +# +LDFLAGS += -dy -Nfs/procfs -Nbrand/lx_brand + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +# +# Include brand-specific rules +# + +include $(UTSBASE)/intel/lx_proc/Makefile.rules diff --git a/usr/src/uts/intel/lx_proc/Makefile.rules b/usr/src/uts/intel/lx_proc/Makefile.rules new file mode 100644 index 0000000000..b8592d2fdd --- /dev/null +++ b/usr/src/uts/intel/lx_proc/Makefile.rules @@ -0,0 +1,38 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +# +# Section 1a: C object build rules +# +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/procfs/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +# +# Section 1b: Lint `object' build rules. +# +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/procfs/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_ptm/Makefile b/usr/src/uts/intel/lx_ptm/Makefile new file mode 100644 index 0000000000..9a7d26597e --- /dev/null +++ b/usr/src/uts/intel/lx_ptm/Makefile @@ -0,0 +1,93 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/intel/lx_ptm/Makefile +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#pragma ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the lx_ptm driver +# +# intel architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = lx_ptm +OBJECTS = $(LX_PTM_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_PTM_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/lx_systrace/Makefile b/usr/src/uts/intel/lx_systrace/Makefile new file mode 100644 index 0000000000..25872ff3dd --- /dev/null +++ b/usr/src/uts/intel/lx_systrace/Makefile @@ -0,0 +1,74 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" + +UTSBASE = ../.. + +MODULE = lx_systrace +OBJECTS = $(LX_SYSTRACE_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(LX_SYSTRACE_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) +ROOTLINK = $(ROOT_DTRACE_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/brand/lx/dtrace + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE) + +CPPFLAGS += -I$(UTSBASE)/common/brand/lx + +LDFLAGS += -dy -Ndrv/dtrace -Nbrand/lx_brand + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +$(ROOTLINK): $(ROOT_DTRACE_DIR) $(ROOTMODULE) + -$(RM) $@; ln $(ROOTMODULE) $@ + +include $(UTSBASE)/intel/Makefile.targ + +$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/dtrace/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + +$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/dtrace/%.c + @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/intel/os/minor_perm b/usr/src/uts/intel/os/minor_perm index 2bff059569..3929c6f960 100644 --- a/usr/src/uts/intel/os/minor_perm +++ b/usr/src/uts/intel/os/minor_perm @@ -121,3 +121,5 @@ zfs:* 0600 root sys zfs:zfs 0666 root sys scsi_vhci:* 0666 root sys kssl:* 0666 root sys +lx_ptm:lx_ptmajor 0666 root sys +lx_systrace:* 0644 root sys diff --git a/usr/src/uts/intel/os/name_to_major b/usr/src/uts/intel/os/name_to_major index 1e4d010253..551c165a18 100644 --- a/usr/src/uts/intel/os/name_to_major +++ b/usr/src/uts/intel/os/name_to_major @@ -122,3 +122,6 @@ kssl 185 mc-amd 186 tzmon 187 did 239 +lx_ptm 240 +lx_systrace 241 +lx_audio 242 diff --git a/usr/src/uts/intel/os/name_to_sysnum b/usr/src/uts/intel/os/name_to_sysnum index 61fcfca712..aebf29f2f2 100644 --- a/usr/src/uts/intel/os/name_to_sysnum +++ b/usr/src/uts/intel/os/name_to_sysnum @@ -38,6 +38,7 @@ syssync 36 kill 37 fstatfs 38 setpgrp 39 +uucopystr 40 dup 41 pipe 42 times 43 @@ -157,6 +158,7 @@ pread 173 pwrite 174 llseek 175 inst_sync 176 +brandsys 177 kaio 178 cpc 179 meminfosys 180 @@ -232,4 +234,5 @@ lwp_mutex_unlock 250 lwp_mutex_trylock 251 lwp_mutex_init 252 cladm 253 +uucopy 254 umount2 255 diff --git a/usr/src/uts/intel/sn1_brand/Makefile b/usr/src/uts/intel/sn1_brand/Makefile new file mode 100644 index 0000000000..403819001b --- /dev/null +++ b/usr/src/uts/intel/sn1_brand/Makefile @@ -0,0 +1,82 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the kernel component of +# the N-1 Solaris brand +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = sn1_brand +OBJECTS = $(SN1_BRAND_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(SN1_BRAND_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_BRAND_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/sn1 + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/sys/archsystm.h b/usr/src/uts/intel/sys/archsystm.h index 5ed70f0cb2..07adccd582 100644 --- a/usr/src/uts/intel/sys/archsystm.h +++ b/usr/src/uts/intel/sys/archsystm.h @@ -69,11 +69,17 @@ extern void sys_syscall(); extern void sys_syscall32(); extern void sys_lcall32(); extern void sys_syscall_int(); +extern void brand_sys_syscall(); +extern void brand_sys_syscall32(); +extern void brand_sys_syscall_int(); #elif defined(__i386) extern void sys_call(); +extern void brand_sys_call(); #endif extern void sys_sysenter(); extern void _sys_sysenter_post_swapgs(); +extern void brand_sys_sysenter(); +extern void _brand_sys_sysenter_post_swapgs(); extern void dosyscall(void); @@ -112,6 +118,9 @@ extern int fpu_pentium_fdivbug; extern void sep_save(void *); extern void sep_restore(void *); +extern void brand_interpositioning_enable(void); +extern void brand_interpositioning_disable(void); + struct regs; extern int instr_size(struct regs *, caddr_t *, enum seg_rw); @@ -136,6 +145,8 @@ extern void setup_mca(void); extern void setup_mtrr(void); extern void patch_tsc(void); +extern user_desc_t *cpu_get_gdt(void); + /* * Warning: these routines do -not- use normal calling conventions! */ diff --git a/usr/src/uts/intel/sys/fasttrap_isa.h b/usr/src/uts/intel/sys/fasttrap_isa.h index 89e2cf30ed..2406791710 100644 --- a/usr/src/uts/intel/sys/fasttrap_isa.h +++ b/usr/src/uts/intel/sys/fasttrap_isa.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -55,6 +54,7 @@ typedef struct fasttrap_machtp { uint8_t ftmt_base; /* branch base */ uint8_t ftmt_index; /* branch index */ uint8_t ftmt_scale; /* branch scale */ + uint8_t ftmt_segment; /* segment for memory accesses */ uintptr_t ftmt_dest; /* destination of control flow */ } fasttrap_machtp_t; @@ -69,6 +69,7 @@ typedef struct fasttrap_machtp { #define ftt_base ftt_mtp.ftmt_base #define ftt_index ftt_mtp.ftmt_index #define ftt_scale ftt_mtp.ftmt_scale +#define ftt_segment ftt_mtp.ftmt_segment #define ftt_dest ftt_mtp.ftmt_dest #define FASTTRAP_T_COMMON 0x00 /* common case -- no emulation */ @@ -89,6 +90,17 @@ typedef struct fasttrap_machtp { #define FASTTRAP_RIP_2 0x2 #define FASTTRAP_RIP_X 0x4 +/* + * Segment values. + */ +#define FASTTRAP_SEG_NONE 0 +#define FASTTRAP_SEG_CS 1 +#define FASTTRAP_SEG_DS 2 +#define FASTTRAP_SEG_ES 3 +#define FASTTRAP_SEG_FS 4 +#define FASTTRAP_SEG_GS 5 +#define FASTTRAP_SEG_SS 6 + #define FASTTRAP_AFRAMES 3 #define FASTTRAP_RETURN_AFRAMES 4 #define FASTTRAP_ENTRY_AFRAMES 3 diff --git a/usr/src/uts/intel/sys/machbrand.h b/usr/src/uts/intel/sys/machbrand.h new file mode 100644 index 0000000000..e4b90473a6 --- /dev/null +++ b/usr/src/uts/intel/sys/machbrand.h @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_MACHBRAND_H +#define _SYS_MACHBRAND_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ASM + +#include <sys/model.h> + +struct brand_mach_ops { + void (*b_sysenter)(void); + void (*b_int80)(void); + void (*b_int91)(void); + void (*b_syscall)(void); + void (*b_syscall32)(void); + greg_t (*b_fixsegreg)(greg_t, model_t); +}; + +#endif /* _ASM */ + +#define BRAND_CB_SYSENTER 0 +#define BRAND_CB_INT80 1 +#define BRAND_CB_INT91 2 +#define BRAND_CB_SYSCALL 3 +#define BRAND_CB_SYSCALL32 4 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MACHBRAND_H */ diff --git a/usr/src/uts/intel/sys/segments.h b/usr/src/uts/intel/sys/segments.h index b17b300cd6..fe65db8575 100644 --- a/usr/src/uts/intel/sys/segments.h +++ b/usr/src/uts/intel/sys/segments.h @@ -453,7 +453,9 @@ void set_syssegd(system_desc_t *, void *, size_t, uint_t, uint_t); #define GDT_GS GDT_NULL /* kernel %gs segment selector */ #define GDT_LWPFS 55 /* lwp private %fs segment selector */ #define GDT_LWPGS 56 /* lwp private %gs segment selector */ -#define NGDT 58 /* number of entries in GDT */ +#define GDT_BRANDMIN 57 /* first entry in GDT for brand usage */ +#define GDT_BRANDMAX 61 /* last entry in GDT for brand usage */ +#define NGDT 62 /* number of entries in GDT */ /* * This selector is only used in the temporary GDT used to bring additional @@ -479,6 +481,8 @@ void set_syssegd(system_desc_t *, void *, size_t, uint_t, uint_t); #define GDT_GS 54 /* kernel %gs segment selector */ #define GDT_LWPFS 55 /* lwp private %fs segment selector */ #define GDT_LWPGS 56 /* lwp private %gs segment selector */ +#define GDT_BRANDMIN 57 /* first entry in GDT for brand usage */ +#define GDT_BRANDMAX 61 /* last entry in GDT for brand usage */ #define NGDT 90 /* number of entries in GDT */ #endif /* __i386 */ @@ -501,6 +505,8 @@ void set_syssegd(system_desc_t *, void *, size_t, uint_t, uint_t); #define KGS_SEL SEL_GDT(GDT_GS, SEL_KPL) #define LWPFS_SEL SEL_GDT(GDT_LWPFS, SEL_UPL) #define LWPGS_SEL SEL_GDT(GDT_LWPGS, SEL_UPL) +#define BRANDMIN_SEL SEL_GDT(GDT_BRANDMIN, SEL_UPL) +#define BRANDMAX_SEL SEL_GDT(GDT_BRANDMAX, SEL_UPL) #if defined(__amd64) #define B64CODE_SEL SEL_GDT(GDT_B64CODE, SEL_KPL) #else @@ -551,6 +557,8 @@ extern void _start(), cmnint(); extern void achktrap(), mcetrap(); extern void xmtrap(); extern void fasttrap(); +extern void sys_int80(); +extern void brand_sys_int80(); extern void dtrace_ret(); #if !defined(__amd64) diff --git a/usr/src/uts/intel/sys/sysi86.h b/usr/src/uts/intel/sys/sysi86.h index 4c3c4182ff..54e9f905a0 100644 --- a/usr/src/uts/intel/sys/sysi86.h +++ b/usr/src/uts/intel/sys/sysi86.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -144,6 +143,7 @@ struct ssd { #ifdef _KERNEL extern void usd_to_ssd(user_desc_t *, struct ssd *, selector_t); +extern int setdscr(struct ssd *); #endif /* _KERNEL */ /* diff --git a/usr/src/uts/sparc/os/name_to_sysnum b/usr/src/uts/sparc/os/name_to_sysnum index c06f3c63a7..13cf3f5e16 100644 --- a/usr/src/uts/sparc/os/name_to_sysnum +++ b/usr/src/uts/sparc/os/name_to_sysnum @@ -38,6 +38,7 @@ syssync 36 kill 37 fstatfs 38 setpgrp 39 +uucopystr 40 dup 41 pipe 42 times 43 @@ -156,6 +157,7 @@ pread 173 pwrite 174 llseek 175 inst_sync 176 +brandsys 177 kaio 178 cpc 179 meminfosys 180 @@ -232,4 +234,5 @@ lwp_mutex_unlock 250 lwp_mutex_trylock 251 lwp_mutex_init 252 cladm 253 +uucopy 254 umount2 255 diff --git a/usr/src/uts/sun4/ml/copy.s b/usr/src/uts/sun4/ml/copy.s index f8666afd2e..2b82cfbb8d 100644 --- a/usr/src/uts/sun4/ml/copy.s +++ b/usr/src/uts/sun4/ml/copy.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -387,7 +386,8 @@ copyoutstr_noerr(const char *kaddr, char *uaddr, size_t maxlength, /* - * Copy a block of storage - must not overlap (from + len <= to). + * Copy a block of storage. If the source and target regions overlap, + * one or both of the regions will be silently corrupted. * No fault handler installed (to be called under on_fault()) */ @@ -418,3 +418,41 @@ ucopy(const void *ufrom, void *uto, size_t ulength) SET_SIZE(ucopy) #endif /* lint */ + +/* + * Copy a user-land string. If the source and target regions overlap, + * one or both of the regions will be silently corrupted. + * No fault handler installed (to be called under on_fault()) + */ + +#if defined(lint) + +/* ARGSUSED */ +void +ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *ulencopied) +{} + +#else /* lint */ + + ENTRY(ucopystr) + save %sp, -SA(MINFRAME), %sp ! get another window + + brz %i2, 5f + clr %i5 + + lduba [%i0 + %i5]ASI_USER, %i4 +4: stba %i4, [%i1 + %i5]ASI_USER + brz,pn %i4, 5f + inc %i5 + deccc %i2 + bnz,a,pt %ncc, 4b + lduba [%i0 + %i5]ASI_USER, %i4 +5: + brnz,a,pt %i3, 6f + stn %i5, [%i3] +6: + ret + restore %g0, 0, %o0 ! return (0) + + SET_SIZE(ucopystr) +#endif /* lint */ diff --git a/usr/src/uts/sun4/ml/offsets.in b/usr/src/uts/sun4/ml/offsets.in index 807cb1c845..eeac1d417f 100644 --- a/usr/src/uts/sun4/ml/offsets.in +++ b/usr/src/uts/sun4/ml/offsets.in @@ -90,6 +90,7 @@ #include <sys/ontrap.h> #include <sys/lgrp.h> #include <sys/ddifm_impl.h> +#include <sys/brand.h> greg_t GREGSIZE @@ -109,6 +110,8 @@ proc PROCSIZE p_pidp p_fixalignment p_utraps + p_brand + p_brand_data \#define P_UTRAP4 (UT_ILLTRAP_INSTRUCTION * CPTRSIZE) \#define P_UTRAP7 (UT_FP_DISABLED * CPTRSIZE) @@ -544,3 +547,5 @@ copyops cp_suword64 cp_physio +brand BRAND_SIZE + b_machops diff --git a/usr/src/uts/sun4/os/machdep.c b/usr/src/uts/sun4/os/machdep.c index f9421c9f39..9661c8b83c 100644 --- a/usr/src/uts/sun4/os/machdep.c +++ b/usr/src/uts/sun4/os/machdep.c @@ -260,6 +260,14 @@ lwp_freeregs(klwp_t *lwp, int isexec) } /* + * This function is currently unused on sparc. + */ +/*ARGSUSED*/ +void +lwp_attach_brand_hdlrs(klwp_t *lwp) +{} + +/* * fill in the extra register state area specified with the * specified lwp's platform-dependent non-floating-point extra * register state information diff --git a/usr/src/uts/sun4/os/trap.c b/usr/src/uts/sun4/os/trap.c index 1b2da4e79f..fbefb1c06d 100644 --- a/usr/src/uts/sun4/os/trap.c +++ b/usr/src/uts/sun4/os/trap.c @@ -18,6 +18,7 @@ * * CDDL HEADER END */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -721,19 +722,16 @@ trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t mmu_fsr) } /* * If both NORMAL_STEP and WATCH_STEP are in - * effect, give precedence to NORMAL_STEP. + * effect, give precedence to WATCH_STEP. * One or the other must be set at this point. */ ASSERT(pcb->pcb_flags & (NORMAL_STEP|WATCH_STEP)); - if (pcb->pcb_flags & NORMAL_STEP) { + if ((fault = undo_watch_step(&siginfo)) == 0 && + (pcb->pcb_flags & NORMAL_STEP)) { siginfo.si_signo = SIGTRAP; siginfo.si_code = TRAP_TRACE; siginfo.si_addr = (caddr_t)rp->r_pc; fault = FLTTRACE; - if (pcb->pcb_flags & WATCH_STEP) - (void) undo_watch_step(NULL); - } else { - fault = undo_watch_step(&siginfo); } pcb->pcb_flags &= ~(NORMAL_STEP|WATCH_STEP); } diff --git a/usr/src/uts/sun4/sys/machbrand.h b/usr/src/uts/sun4/sys/machbrand.h new file mode 100644 index 0000000000..2f1c528011 --- /dev/null +++ b/usr/src/uts/sun4/sys/machbrand.h @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_MACHBRAND_H +#define _SYS_MACHBRAND_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ASM + +struct brand_mach_ops { + void (*b_syscall)(void); + void (*b_syscall32)(void); +}; + +#endif /* _ASM */ + +#define BRAND_CB_SYSCALL 0 +#define BRAND_CB_SYSCALL32 1 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MACHBRAND_H */ diff --git a/usr/src/uts/sun4u/Makefile.files b/usr/src/uts/sun4u/Makefile.files index e568b400a7..96cadbd9c8 100644 --- a/usr/src/uts/sun4u/Makefile.files +++ b/usr/src/uts/sun4u/Makefile.files @@ -200,6 +200,11 @@ CPR_IMPL_OBJS = cpr_impl.o SBD_OBJS += sbd.o sbd_cpu.o sbd_mem.o sbd_io.o # +# Brand modules +# +SN1_BRAND_OBJS = sn1_brand.o sn1_brand_asm.o + +# # Performance Counter BackEnd (PCBE) Modules # US_PCBE_OBJS = us234_pcbe.o diff --git a/usr/src/uts/sun4u/Makefile.rules b/usr/src/uts/sun4u/Makefile.rules index 07b5f7aa9d..6779f9e734 100644 --- a/usr/src/uts/sun4u/Makefile.rules +++ b/usr/src/uts/sun4u/Makefile.rules @@ -113,6 +113,9 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/sun4u/pcbe/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/sun4u/brand/sn1/%.s + $(COMPILE.s) -o $@ $< + $(OBJS_DIR)/%.o: $(UTSBASE)/sun4u/vm/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) @@ -239,6 +242,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/sun4u/os/%.c $(LINTS_DIR)/%.ln: $(UTSBASE)/sun4u/pcbe/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) +$(LINTS_DIR)/%.ln: $(UTSBASE)/sun4u/brand/sn1/%.s + @($(LHEAD) $(LINT.s) $< $(LTAIL)) + $(LINTS_DIR)/%.ln: $(UTSBASE)/sun4u/vm/%.c @($(LHEAD) $(LINT.c) $< $(LTAIL)) diff --git a/usr/src/uts/sun4u/Makefile.sun4u.shared b/usr/src/uts/sun4u/Makefile.sun4u.shared index fc63b31e8c..3a2f922f5c 100644 --- a/usr/src/uts/sun4u/Makefile.sun4u.shared +++ b/usr/src/uts/sun4u/Makefile.sun4u.shared @@ -442,6 +442,11 @@ MISC_KMODS += opl_cfg MISC_KMODS += kmech_krb5 MISC_KMODS += zuluvm +# +# Brand modules +# +MISC_KMODS += sn1_brand + $(CLOSED_BUILD)CLOSED_MISC_KMODS += gptwo_cpu gptwocfg # diff --git a/usr/src/uts/sun4u/brand/sn1/sn1_brand_asm.s b/usr/src/uts/sun4u/brand/sn1/sn1_brand_asm.s new file mode 100644 index 0000000000..d699c8dda8 --- /dev/null +++ b/usr/src/uts/sun4u/brand/sn1/sn1_brand_asm.s @@ -0,0 +1,125 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#if defined(lint) + +#include <sys/systm.h> + +#else /* lint */ + +#include <sys/asm_linkage.h> +#include <sys/machthread.h> +#include <sys/privregs.h> +#include "assym.h" + +#endif /* lint */ + +#ifdef lint + +void +sn1_brand_syscall_callback(void) +{ +} + +#else /* lint */ + + ENTRY(sn1_brand_syscall_callback) + + /* + * save some locals in the CPU tmp area to give us a little + * room to work. + */ + CPU_ADDR(%g2, %g3) ! load CPU struct addr to %g2 using %g3 + stn %l0, [%g2 + CPU_TMP1] + stn %l1, [%g2 + CPU_TMP2] + + /* + * Switch from the alternate to user globals to grab the syscall + * number, then switch back to the alternate globals. + * + * If the system call number is >= 1024, then it is coming from the + * emulation support library and should not be emulated. + */ + rdpr %pstate, %l0 + wrpr %l0, PSTATE_AG, %pstate ! switch to normal globals + cmp %g1, 1024 ! is this call from the library? + bl,a 1f + mov %g1, %l1 ! delay slot - grab syscall number + sub %g1, 1024, %g1 ! convert magic num to real syscall + ba 2f ! jump back into syscall path +1: + wrpr %l0, %g0, %pstate ! switch back to alternate globals + + /* + * Check to see if we want to interpose on this system call. If + * not, we jump back into the normal syscall path and pretend + * nothing happened. + */ + set sn1_emulation_table, %g3 + ldn [%g3], %g3 + add %g3, %l1, %g3 + ldub [%g3], %g3 + brz %g3, 2f + nop + + /* + * Find the address of the userspace handler. + * cpu->cpu_thread->t_procp->p_brandhdlr. + */ + ldn [%g2 + CPU_THREAD], %g3 ! load thread pointer + ldn [%g3 + T_PROCP], %g3 ! get proc pointer + ldn [%g3 + P_BRAND_DATA], %g3 ! get brand handler + brz %g3, 2f ! has it been set? + nop + + /* + * Now the magic happens. Grab the trap return address and then + * reset it to point to the user space handler. When we execute + * the 'done' instruction, we will jump into our handler instead of + * the user's code. We also stick the old return address in %g6, + * so we can return to the proper instruction in the user's code. + * Note: we also pass back the base address of the syscall + * emulation table. This is a performance hack to avoid having to + * look it up on every call. + */ + rdpr %tnpc, %l1 ! save old tnpc + wrpr %g0, %g3, %tnpc ! setup tnpc + rdpr %pstate, %l0 + wrpr %l0, PSTATE_AG, %pstate ! switch to normal globals + mov %l1, %g6 ! pass tnpc to user code in %g6 + wrpr %l0, %g0, %pstate ! switch back to alternate globals + set fast_trap_done_chk_intr, %g1 + +2: + ! Note that %g2 still contains CPU struct addr + ldn [%g2 + CPU_TMP2], %l1 ! restore locals + ldn [%g2 + CPU_TMP1], %l0 + jmp %g1 + nop + SET_SIZE(sn1_brand_syscall_callback) +#endif /* lint */ + diff --git a/usr/src/uts/sun4u/ml/trap_table.s b/usr/src/uts/sun4u/ml/trap_table.s index d25ab51179..d92ceed8a8 100644 --- a/usr/src/uts/sun4u/ml/trap_table.s +++ b/usr/src/uts/sun4u/ml/trap_table.s @@ -35,6 +35,7 @@ #include <sys/cheetahregs.h> #include <sys/machtrap.h> #include <sys/machthread.h> +#include <sys/machbrand.h> #include <sys/pcb.h> #include <sys/pte.h> #include <sys/mmu.h> @@ -1440,7 +1441,11 @@ trap_table0: BAD; /* 105 range check ?? */ GOTO(.fix_alignment); /* 106 do unaligned references */ BAD; /* 107 unused */ - SYSCALL(syscall_trap32); /* 108 ILP32 system call on LP64 */ +#ifdef DEBUG + GOTO(syscall_wrapper32) /* 108 ILP32 system call on LP64 */ +#else + SYSCALL(syscall_trap32) /* 108 ILP32 system call on LP64 */ +#endif GOTO(set_trap0_addr); /* 109 set trap0 address */ BAD; BAD; BAD4; /* 10A - 10F unused */ TRP4; TRP4; TRP4; TRP4; /* 110 - 11F V9 user trap handlers */ @@ -1460,7 +1465,11 @@ trap_table0: BAD; /* 139 unused */ DTRACE_RETURN; /* 13A dtrace pid return probe */ BAD; BAD4; /* 13B - 13F unused */ +#ifdef DEBUG + GOTO(syscall_wrapper) /* 140 LP64 system call */ +#else SYSCALL(syscall_trap) /* 140 LP64 system call */ +#endif SYSCALL(nosys); /* 141 unused system call trap */ #ifdef DEBUG_USER_TRAPTRACECTL GOTO(.traptrace_freeze); /* 142 freeze traptrace */ @@ -2967,6 +2976,7 @@ fast_trap_done: ldxa [%g0]ASI_INTR_RECEIVE_STATUS, %g5 done + ALTENTRY(fast_trap_done_check_interrupts) fast_trap_done_chk_intr: ldxa [%g0]ASI_INTR_RECEIVE_STATUS, %g5 @@ -2999,4 +3009,46 @@ fast_trap_dummy_call: retl nop +#ifdef DEBUG +/* + * Currently we only support syscall interposition for branded zones on + * DEBUG kernels. The only brand that makes use of this functionality is + * the fake Solaris 10 brand. Since this brand is only used for exercising + * the framework, we don't want this overhead incurred on production + * systems. + */ +#define BRAND_CALLBACK(callback_id) \ + CPU_ADDR(%g1, %g2) /* load CPU struct addr to %g1 */ ;\ + ldn [%g1 + CPU_THREAD], %g2 /* load thread pointer */ ;\ + ldn [%g2 + T_PROCP], %g2 /* get proc pointer */ ;\ + ldn [%g2 + P_BRAND], %g2 /* get brand pointer */ ;\ + brz %g2, 1f /* No brand? No callback. */ ;\ + nop ;\ + ldn [%g2 + B_MACHOPS], %g2 /* get machops list */ ;\ + ldn [%g2 + (callback_id << 3)], %g2 ;\ + brz %g2, 1f ;\ + /* \ + * This isn't pretty. We want a low-latency way for the callback \ + * routine to decline to do anything. We just pass in an address \ + * the routine can directly jmp back to, pretending that nothing \ + * has happened. \ + */ \ + mov %pc, %g1 ;\ + add %g1, 16, %g1 ;\ + jmp %g2 ;\ + nop ;\ +1: + + ENTRY_NP(syscall_wrapper32) + BRAND_CALLBACK(BRAND_CB_SYSCALL32) + SYSCALL(syscall_trap32) + SET_SIZE(syscall_wrapper32) + + ENTRY_NP(syscall_wrapper) + BRAND_CALLBACK(BRAND_CB_SYSCALL) + SYSCALL(syscall_trap) + SET_SIZE(syscall_wrapper) + +#endif /* DEBUG */ + #endif /* lint */ diff --git a/usr/src/uts/sun4u/sn1_brand/Makefile b/usr/src/uts/sun4u/sn1_brand/Makefile new file mode 100644 index 0000000000..188dec81ff --- /dev/null +++ b/usr/src/uts/sun4u/sn1_brand/Makefile @@ -0,0 +1,83 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +#ident "%Z%%M% %I% %E% SMI" +# +# This makefile drives the production of the kernel component of +# the N-1 Solaris brand +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = sn1_brand +OBJECTS = $(SN1_BRAND_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(SN1_BRAND_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_BRAND_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/sun4u/Makefile.sun4u + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +INC_PATH += -I$(UTSBASE)/common/brand/sn1 + + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/sun4u/Makefile.targ |