summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/clone.c58
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/ioctl.c3
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/lx_brand.c99
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/misc.c22
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/signal.c6
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/socket.c209
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sysctl.c4
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/wait.c82
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s6
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h4
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h4
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h58
-rw-r--r--usr/src/lib/brand/lx/lx_support/lx_support.c34
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c74
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_syscall.c47
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvnops.c119
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h20
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_futex.h6
-rw-r--r--usr/src/uts/common/brand/sn1/sn1_brand.c16
-rw-r--r--usr/src/uts/common/os/zone.c7
-rw-r--r--usr/src/uts/common/sys/brand.h2
-rw-r--r--usr/src/uts/common/sys/zone.h1
22 files changed, 780 insertions, 101 deletions
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c
index 726017809a..b6ec17e6ad 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/clone.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -349,6 +349,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
volatile int clone_res;
int sig;
int rval;
+ int pid;
lx_regs_t *rp;
sigset_t sigmask;
@@ -367,15 +368,38 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
return (-EINVAL);
/*
- * CLONE_THREAD require CLONE_SIGHAND. CLONE_THREAD and
- * CLONE_DETACHED must both be either set or cleared.
+ * CLONE_THREAD requires CLONE_SIGHAND.
+ *
+ * CLONE_THREAD and CLONE_DETACHED must both be either set or cleared
+ * in kernel 2.4 and prior.
+ * In kernel 2.6 CLONE_DETACHED was dropped completely, so we no
+ * longer have this requirement.
*/
- if ((flags & CLONE_TD) &&
- (!(flags & LX_CLONE_SIGHAND) || ((flags & CLONE_TD) != CLONE_TD)))
- return (-EINVAL);
+
+ if (flags & CLONE_TD) {
+ if (!(flags & LX_CLONE_SIGHAND))
+ return (-EINVAL);
+ if ((lx_get_kern_version() <= LX_KERN_2_4) &&
+ (flags & CLONE_TD) != CLONE_TD)
+ return (-EINVAL);
+ }
rp = lx_syscall_regs();
+ /* test if pointer passed by user are writable */
+ if (flags & LX_CLONE_PARENT_SETTID) {
+ if (uucopy(ptidp, &pid, sizeof (int)) != 0)
+ return (-EFAULT);
+ if (uucopy(&pid, ptidp, sizeof (int)) != 0)
+ return (-EFAULT);
+ }
+ if (flags & LX_CLONE_CHILD_SETTID) {
+ if (uucopy(ctidp, &pid, sizeof (int)) != 0)
+ return (-EFAULT);
+ if (uucopy(&pid, ctidp, sizeof (int)) != 0)
+ return (-EFAULT);
+ }
+
/* See if this is a fork() operation or a thr_create(). */
if (IS_FORK(flags) || IS_VFORK(flags)) {
if (flags & LX_CLONE_PARENT) {
@@ -399,8 +423,26 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
(void) sleep(lx_rpm_delay);
}
- if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID))
- *((int *)ptidp) = rval;
+ /*
+ * Since we've already forked, we can't do much if uucopy fails,
+ * so we just ignore failure. Failure is unlikely since we've
+ * tested the memory before we did the fork.
+ */
+ if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) {
+ (void) uucopy(&rval, ptidp, sizeof (int));
+ }
+
+ if (rval == 0 && (flags & LX_CLONE_CHILD_SETTID)) {
+ /*
+ * lx_getpid should not fail, and if it does, there's
+ * not much we can do about it since we've already
+ * forked, so on failure, we just don't copy the
+ * memory.
+ */
+ pid = lx_getpid();
+ if (pid >= 0)
+ (void) uucopy(&pid, ctidp, sizeof (int));
+ }
/* Parent just returns */
if (rval != 0)
diff --git a/usr/src/lib/brand/lx/lx_brand/common/ioctl.c b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c
index 2be27b4237..2f2b022ed5 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/ioctl.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c
@@ -536,7 +536,7 @@ lx_ioctl(uintptr_t p1, uintptr_t p2, uintptr_t p3)
int cmd = (int)p2;
intptr_t arg = (uintptr_t)p3;
struct stat stat;
- ioc_cmd_translator_t *ict;
+ ioc_cmd_translator_t *ict = NULL;
ioc_errno_translator_t *iet = NULL;
major_t fd_major;
int i, ret;
@@ -561,7 +561,6 @@ lx_ioctl(uintptr_t p1, uintptr_t p2, uintptr_t p3)
switch (stat.st_mode & S_IFMT) {
default:
- ict = NULL;
break;
case S_IFREG:
/* Use file translators. */
diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
index 9368ba0116..993f9faf73 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -165,7 +165,9 @@ struct lx_sysent {
char sy_narg;
};
-static struct lx_sysent sysents[LINUX_MAX_SYSCALL + 1];
+static struct lx_sysent sysents[LX_NSYSCALLS + 1];
+/* Differs for kernel versions, set during lx_init */
+static int lx_max_syscall;
static uintptr_t stack_bottom;
@@ -399,7 +401,7 @@ lx_emulate(lx_regs_t *rp)
syscall_num = rp->lxr_eax;
- if (syscall_num < 0 || syscall_num > LINUX_MAX_SYSCALL)
+ if (syscall_num < 0 || syscall_num > lx_max_syscall)
s = &sysents[0];
else
s = &sysents[syscall_num];
@@ -638,6 +640,23 @@ lx_init(int argc, char *argv[], char *envp[])
char locale_translated_name[MAXLOCALENAMELEN];
static lx_tsd_t lx_tsd;
+ /* Look up the PID that serves as init for this zone */
+ if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0)
+ lx_err_fatal(gettext(
+ "Unable to find PID for zone init process: %s"),
+ strerror(err));
+
+ /*
+ * Ubuntu init will fail if its TERM environment variable is not set
+ * so if we are running init, and TERM is not set, we set term and
+ * reexec so that the new environment variable is propagated to the
+ * linux application stack.
+ */
+ if ((getpid() == zoneinit_pid) && (getenv("TERM") == NULL)) {
+ if (setenv("TERM", "vt100", 1) < 0 || execv(argv[0], argv) < 0)
+ lx_err_fatal(gettext("failed to set TERM"));
+ }
+
if ((set_l10n_alternate_root("/native") == 0) &&
(setlocale(LC_ALL, lx_translate_locale(locale_translated_name,
sizeof (locale_translated_name))) != NULL) &&
@@ -659,11 +678,22 @@ lx_init(int argc, char *argv[], char *envp[])
lx_debug_init();
- r = getenv("LX_RELEASE");
- if (r == NULL)
- (void) strlcpy(lx_release, LX_UNAME_RELEASE, 128);
+ if (lx_get_kern_version() <= LX_KERN_2_4)
+ lx_max_syscall = LX_NSYSCALLS_2_4;
else
+ lx_max_syscall = LX_NSYSCALLS_2_6;
+
+ r = getenv("LX_RELEASE");
+ if (r == NULL) {
+ if (lx_get_kern_version() == LX_KERN_2_6)
+ (void) strlcpy(lx_release, LX_UNAME_RELEASE_2_6,
+ sizeof (lx_release));
+ else
+ (void) strlcpy(lx_release, LX_UNAME_RELEASE_2_4,
+ sizeof (lx_release));
+ } else {
(void) strlcpy(lx_release, r, 128);
+ }
lx_debug("lx_release: %s\n", lx_release);
@@ -803,12 +833,6 @@ lx_init(int argc, char *argv[], char *envp[])
"Unable to initialize thread-specific data: %s"),
strerror(err));
- /* Look up the PID that serves as init for this zone */
- if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0)
- lx_err_fatal(gettext(
- "Unable to find PID for zone init process: %s"),
- strerror(err));
-
/*
* Save the current context of this thread.
* We'll restore this context when this thread attempts to exit.
@@ -1227,5 +1251,54 @@ static struct lx_sysent sysents[] = {
{"clock_nanosleep", lx_clock_nanosleep, 0, 4}, /* 267 */
{"statfs64", lx_statfs64, 0, 2}, /* 268 */
{"fstatfs64", lx_fstatfs64, 0, 2}, /* 269 */
- {"tgkill", lx_tgkill, 0, 3} /* 270 */
+ {"tgkill", lx_tgkill, 0, 3}, /* 270 */
+
+ /* The following system calls only exist in kernel 2.6 and greater */
+ {"utimes", utimes, SYS_PASSTHRU, 2}, /* 271 */
+ {"fadvise64_64", NULL, NOSYS_NULL, 0}, /* 272 */
+ {"vserver", NULL, NOSYS_NULL, 0}, /* 273 */
+ {"mbind", NULL, NOSYS_NULL, 0}, /* 274 */
+ {"get_mempolicy", NULL, NOSYS_NULL, 0}, /* 275 */
+ {"set_mempolicy", NULL, NOSYS_NULL, 0}, /* 276 */
+ {"mq_open", NULL, NOSYS_NULL, 0}, /* 277 */
+ {"mq_unlink", NULL, NOSYS_NULL, 0}, /* 278 */
+ {"mq_timedsend", NULL, NOSYS_NULL, 0}, /* 279 */
+ {"mq_timedreceive", NULL, NOSYS_NULL, 0}, /* 280 */
+ {"mq_notify", NULL, NOSYS_NULL, 0}, /* 281 */
+ {"mq_getsetattr", NULL, NOSYS_NULL, 0}, /* 282 */
+ {"kexec_load", NULL, NOSYS_NULL, 0}, /* 283 */
+ {"waitid", lx_waitid, 0, 4}, /* 284 */
+ {"sys_setaltroot", NULL, NOSYS_NULL, 0}, /* 285 */
+ {"add_key", NULL, NOSYS_NULL, 0}, /* 286 */
+ {"request_key", NULL, NOSYS_NULL, 0}, /* 287 */
+ {"keyctl", NULL, NOSYS_NULL, 0}, /* 288 */
+ {"ioprio_set", NULL, NOSYS_NULL, 0}, /* 289 */
+ {"ioprio_get", NULL, NOSYS_NULL, 0}, /* 290 */
+ {"inotify_init", NULL, NOSYS_NULL, 0}, /* 291 */
+ {"inotify_add_watch", NULL, NOSYS_NULL, 0}, /* 292 */
+ {"inotify_rm_watch", NULL, NOSYS_NULL, 0}, /* 293 */
+ {"migrate_pages", NULL, NOSYS_NULL, 0}, /* 294 */
+ {"openat", NULL, NOSYS_NULL, 0}, /* 295 */
+ {"mkdirat", NULL, NOSYS_NULL, 0}, /* 296 */
+ {"mknodat", NULL, NOSYS_NULL, 0}, /* 297 */
+ {"fchownat", NULL, NOSYS_NULL, 0}, /* 298 */
+ {"futimesat", NULL, NOSYS_NULL, 0}, /* 299 */
+ {"fstatat64", NULL, NOSYS_NULL, 0}, /* 300 */
+ {"unlinkat", NULL, NOSYS_NULL, 0}, /* 301 */
+ {"renameat", NULL, NOSYS_NULL, 0}, /* 302 */
+ {"linkat", NULL, NOSYS_NULL, 0}, /* 303 */
+ {"symlinkat", NULL, NOSYS_NULL, 0}, /* 304 */
+ {"readlinkat", NULL, NOSYS_NULL, 0}, /* 305 */
+ {"fchmodat", NULL, NOSYS_NULL, 0}, /* 306 */
+ {"faccessat", NULL, NOSYS_NULL, 0}, /* 307 */
+ {"pselect6", NULL, NOSYS_NULL, 0}, /* 308 */
+ {"ppoll", NULL, NOSYS_NULL, 0}, /* 309 */
+ {"unshare", NULL, NOSYS_NULL, 0}, /* 310 */
+ {"set_robust_list", NULL, NOSYS_NULL, 0}, /* 311 */
+ {"get_robust_list", NULL, NOSYS_NULL, 0}, /* 312 */
+ {"splice", NULL, NOSYS_NULL, 0}, /* 313 */
+ {"sync_file_range", NULL, NOSYS_NULL, 0}, /* 314 */
+ {"tee", NULL, NOSYS_NULL, 0}, /* 315 */
+ {"vmsplice", NULL, NOSYS_NULL, 0}, /* 316 */
+ {"move_pages", NULL, NOSYS_NULL, 0}, /* 317 */
};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c
index 77cf94d194..c6cceb1577 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/misc.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c
@@ -46,6 +46,7 @@
#include <sys/lx_thunk_server.h>
#include <unistd.h>
#include <libintl.h>
+#include <zone.h>
extern int sethostname(char *, int);
@@ -184,6 +185,27 @@ lx_getcwd(uintptr_t p1, uintptr_t p2)
}
int
+lx_get_kern_version(void)
+{
+ /*
+ * Since this function is called quite often, and zone_getattr is slow,
+ * we cache the kernel version in kvers_cache. -1 signifies that no
+ * value has yet been cached.
+ */
+ static int kvers_cache = -1;
+ /* dummy variable for use in zone_getattr */
+ int kvers;
+
+ if (kvers_cache != -1)
+ return (kvers_cache);
+ if (zone_getattr(getzoneid(), LX_KERN_VERSION_NUM, &kvers, sizeof (int))
+ != sizeof (int))
+ return (kvers_cache = LX_KERN_2_4);
+ else
+ return (kvers_cache = kvers);
+}
+
+int
lx_uname(uintptr_t p1)
{
struct lx_utsname *un = (struct lx_utsname *)p1;
diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c
index 119961c0d8..c6ac519e19 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/signal.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c
@@ -390,7 +390,7 @@ stol_sigcode(int si_code)
}
}
-static int
+int
stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
{
lx_siginfo_t lx_siginfo;
@@ -535,7 +535,7 @@ lx_sigaltstack(uintptr_t nsp, uintptr_t osp)
if (nsp) {
if (uucopy((void *)nsp, &ls, sizeof (lx_stack_t)) != 0)
- return (-errno);
+ return (-errno);
if ((ls.ss_flags & LX_SS_DISABLE) == 0 &&
ls.ss_size < LX_MINSIGSTKSZ)
@@ -1677,7 +1677,7 @@ lx_siginit(void)
if (sa.sa_handler == SIG_IGN) {
lx_debug("marking signal %d (lx %d) as SIG_IGN",
- sig, lx_sig);
+ sig, lx_sig);
lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN;
}
}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/socket.c b/usr/src/lib/brand/lx/lx_brand/common/socket.c
index 009018d8fe..41b84b6f4b 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/socket.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/socket.c
@@ -31,6 +31,7 @@
#include <errno.h>
#include <signal.h>
#include <stdio.h>
+#include <stdlib.h>
#include <libintl.h>
#include <strings.h>
#include <alloca.h>
@@ -52,6 +53,14 @@
#include <sys/lx_brand.h>
#include <sys/lx_misc.h>
+/*
+ * This string is used to prefix all abstract namespace unix sockets, ie all
+ * abstract namespace sockets are converted to regular sockets in the /tmp
+ * directory with .ABSK_ prefixed to their names.
+ */
+#define ABST_PRFX "/tmp/.ABSK_"
+#define ABST_PRFX_LEN 11
+
static int lx_socket(ulong_t *);
static int lx_bind(ulong_t *);
static int lx_connect(ulong_t *);
@@ -283,6 +292,11 @@ convert_cmsgs(int direction, struct lx_msghdr *msg, char *caller)
return (err);
}
+/*
+ * If inaddr is an abstract namespace unix socket, this function expects addr
+ * to have enough memory to hold the expanded socket name, ie it must be of
+ * size *len + ABST_PRFX_LEN.
+ */
static int
convert_sockaddr(struct sockaddr *addr, socklen_t *len,
struct sockaddr *inaddr, socklen_t inlen)
@@ -290,6 +304,7 @@ convert_sockaddr(struct sockaddr *addr, socklen_t *len,
sa_family_t family;
int lx_in6_len;
int size;
+ int i, orig_len;
/*
* Note that if the buffer at inaddr is ever smaller than inlen bytes,
@@ -341,6 +356,66 @@ convert_sockaddr(struct sockaddr *addr, socklen_t *len,
return (-EINVAL);
*len = inlen;
+
+ /*
+ * Linux supports abstract unix sockets, which are
+ * simply sockets that do not exist on the file system.
+ * These sockets are denoted by beginning the path with
+ * a NULL character. To support these, we strip out the
+ * leading NULL character and change the path to point
+ * to a real place in /tmp directory, by prepending
+ * ABST_PRFX and replacing all illegal characters with
+ * '_'.
+ */
+ if (addr->sa_data[0] == '\0') {
+
+ /*
+ * inlen is the entire size of the sockaddr_un
+ * data structure, including the sun_family, so
+ * we need to subtract this out. We subtract
+ * 1 since we want to overwrite the leadin NULL
+ * character, and thus do not include it in the
+ * length.
+ */
+ orig_len = inlen - sizeof (addr->sa_family) - 1;
+
+ /*
+ * Since abstract paths can contain illegal
+ * filename characters, we simply replace these
+ * with '_'
+ */
+ for (i = 1; i < orig_len + 1; i++) {
+ if (addr->sa_data[i] == '\0' ||
+ addr->sa_data[i] == '/')
+ addr->sa_data[i] = '_';
+ }
+
+ /*
+ * prepend ABST_PRFX to file name, minus the
+ * leading NULL character. This places the
+ * socket as a hidden file in the /tmp
+ * directory.
+ */
+ (void) memmove(addr->sa_data + ABST_PRFX_LEN,
+ addr->sa_data + 1, orig_len);
+ bcopy(ABST_PRFX, addr->sa_data, ABST_PRFX_LEN);
+
+ /*
+ * Since abstract socket paths may not be NULL
+ * terminated, we must explicitly NULL terminate
+ * our string.
+ */
+ addr->sa_data[orig_len + ABST_PRFX_LEN] = '\0';
+
+ /*
+ * Make len reflect the new len of our string.
+ * Although we removed the NULL character at the
+ * beginning of the string, we added a NULL
+ * character to the end, so the net gain in
+ * length is simply ABST_PRFX_LEN.
+ */
+ *len = inlen + ABST_PRFX_LEN;
+ }
break;
default:
@@ -453,6 +528,22 @@ lx_socket(ulong_t *args)
/* Right now IPv6 sockets don't work */
if (domain == AF_INET6)
return (-EAFNOSUPPORT);
+
+ /*
+ * Clients of the auditing subsystem used by CentOS 4 and 5 expect to
+ * be able to create AF_ROUTE SOCK_RAW sockets to communicate with the
+ * auditing daemons. Failure to create these sockets will cause login,
+ * ssh and useradd, amoung other programs to fail. To trick these
+ * programs into working, we convert the socket domain and type to
+ * something that we do support. Then when sendto is called on these
+ * sockets, we return an error code. See lx_sendto.
+ */
+ if (domain == AF_ROUTE && type == SOCK_RAW) {
+ domain = AF_INET;
+ type = SOCK_STREAM;
+ protocol = 0;
+ }
+
fd = socket(domain, type, protocol);
if (fd >= 0)
return (fd);
@@ -468,16 +559,71 @@ lx_bind(ulong_t *args)
{
int sockfd = (int)args[0];
struct stat64 statbuf;
- struct sockaddr *name;
+ struct sockaddr *name, oldname;
socklen_t len;
- int r;
+ int r, r2, ret, tmperrno;
+ int abst_sock;
+ struct stat sb;
+
+ if (uucopy((struct sockaddr *)args[1], &oldname,
+ sizeof (struct sockaddr)) != 0)
+ return (-errno);
+
+ /*
+ * Handle Linux abstract sockets, which are UNIX sockets whose path
+ * begins with a NULL character.
+ */
+ abst_sock = (oldname.sa_family == AF_UNIX) &&
+ (oldname.sa_data[0] == '\0');
- if ((name = SAFE_ALLOCA((socklen_t)args[2])) == NULL)
+ /*
+ * convert_sockaddr will expand the socket path if it is abstract, so
+ * we need to allocate extra memory for it now.
+ */
+ if ((name = SAFE_ALLOCA((socklen_t)args[2] +
+ abst_sock * ABST_PRFX_LEN)) == NULL)
return (-EINVAL);
+
if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1],
(socklen_t)args[2])) < 0)
return (r);
+ /*
+ * Linux abstract namespace unix sockets are simply socket that do not
+ * exist on the filesystem. We emulate them by changing their paths
+ * in covert_sockaddr so that they point real files names on the
+ * filesystem. Because in Linux they do not exist on the filesystem
+ * applications do not have to worry about deleting files, however in
+ * our filesystem based emulation we do. To solve this problem, we first
+ * check to see if the socket already exists before we create one. If it
+ * does we attempt to connect to it to see if it is in use, or just
+ * left over from a previous lx_bind call. If we are unable to connect,
+ * we assume it is not in use and remove the file, then continue on
+ * as if the file never existed.
+ */
+ if (abst_sock && stat(name->sa_data, &sb) == 0 &&
+ S_ISSOCK(sb.st_mode)) {
+ if ((r2 = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return (-ENOSR);
+ ret = connect(r2, name, len);
+ tmperrno = errno;
+ if (close(r2) < 0)
+ return (-EINVAL);
+
+ /*
+ * if we can't connect to the socket, assume no one is using it
+ * and remove it, otherwise assume it is in use and return
+ * EADDRINUSE.
+ */
+ if ((ret < 0) && (tmperrno == ECONNREFUSED)) {
+ if (unlink(name->sa_data) < 0) {
+ return (-EADDRINUSE);
+ }
+ } else {
+ return (-EADDRINUSE);
+ }
+ }
+
lx_debug("\tbind(%d, 0x%p, %d)", sockfd, name, len);
if (name->sa_family == AF_UNIX)
@@ -501,11 +647,26 @@ static int
lx_connect(ulong_t *args)
{
int sockfd = (int)args[0];
- struct sockaddr *name;
+ struct sockaddr *name, oldname;
socklen_t len;
int r;
+ int abst_sock;
+
+ if (uucopy((struct sockaddr *)args[1], &oldname,
+ sizeof (struct sockaddr)) != 0)
+ return (-errno);
+
- if ((name = SAFE_ALLOCA((socklen_t)args[2])) == NULL)
+ /* Handle Linux abstract sockets */
+ abst_sock = (oldname.sa_family == AF_UNIX) &&
+ (oldname.sa_data[0] == '\0');
+
+ /*
+ * convert_sockaddr will expand the socket path, if it is abstract, so
+ * we need to allocate extra memory for it now.
+ */
+ if ((name = SAFE_ALLOCA((socklen_t)args[2] +
+ abst_sock * ABST_PRFX_LEN)) == NULL)
return (-EINVAL);
if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1],
@@ -805,25 +966,44 @@ lx_sendto(ulong_t *args)
void *buf = (void *)args[1];
size_t len = (size_t)args[2];
int flags = (int)args[3];
- struct sockaddr *to;
+ struct sockaddr *to, oldto;
socklen_t tolen;
ssize_t r;
+ int abst_sock;
int nosigpipe = flags & LX_MSG_NOSIGNAL;
struct sigaction newact, oact;
- if ((to = SAFE_ALLOCA((socklen_t)args[5])) == NULL)
+ if (uucopy((struct sockaddr *)args[4], &oldto,
+ sizeof (struct sockaddr)) != 0)
+ return (-errno);
+
+ /* Handle Linux abstract sockets */
+ abst_sock = (oldto.sa_family == AF_UNIX) &&
+ (oldto.sa_data[0] == '\0');
+
+ /*
+ * convert_sockaddr will expand the socket path, if it is abstract, so
+ * we need to allocate extra memory for it now.
+ */
+ if ((to = SAFE_ALLOCA(args[5] + abst_sock * ABST_PRFX_LEN)) == NULL)
return (-EINVAL);
if ((r = convert_sockaddr(to, &tolen, (struct sockaddr *)args[4],
(socklen_t)args[5])) < 0)
return (r);
+
lx_debug("\tsendto(%d, 0x%p, 0x%d, 0x%x, 0x%x, %d)", sockfd, buf, len,
flags, to, tolen);
flags = convert_sockflags(flags);
+ /* return this error to make auditing subsystem happy */
+ if (to->sa_family == AF_ROUTE) {
+ return (-ECONNREFUSED);
+ }
+
/*
* If nosigpipe is set, we want to emulate the Linux action of
* not sending a SIGPIPE to the caller if the remote socket has
@@ -959,6 +1139,14 @@ lx_setsockopt(ulong_t *args)
optname <= 0 || optname >= (ltos_proto_opts[level].maxentries))
return (-ENOPROTOOPT);
+ /*
+ * Linux sets this option when it wants to send credentials over a
+ * socket. Currently we just ignore it to make Linux programs happy.
+ */
+ if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED))
+ return (0);
+
+
if ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK)) {
/*
* TCP_CORK is a Linux-only option that instructs the TCP
@@ -1024,8 +1212,13 @@ lx_getsockopt(ulong_t *args)
optname <= 0 || optname >= (ltos_proto_opts[level].maxentries))
return (-ENOPROTOOPT);
- if ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK)) {
+ if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED) ||
+ (level == IPPROTO_TCP) && (optname == LX_TCP_CORK)) {
/*
+ * Linux sets LX_SO_PASSCRED when it wants to send credentials
+ * over a socket. Since we do not support it, it is never set
+ * and we return 0.
+ *
* We don't support TCP_CORK but some apps rely on it. So,
* rather than return an error we just return 0. This
* isn't exactly a lie, since this option really isn't set,
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysctl.c b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c
index 03fcce4ef0..1cf4ca3ac1 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/sysctl.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -118,7 +118,7 @@ lx_sysctl(uintptr_t raw)
(void) strlcpy(namebuf, LX_UNAME_SYSNAME, oldlen);
break;
case LX_KERN_OSRELEASE:
- (void) strlcpy(namebuf, LX_UNAME_RELEASE, oldlen);
+ (void) strlcpy(namebuf, lx_release, oldlen);
break;
case LX_KERN_VERSION:
(void) strlcpy(namebuf, LX_UNAME_VERSION, oldlen);
diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c
index 0895e76bc0..33b3d49923 100644
--- a/usr/src/lib/brand/lx/lx_brand/common/wait.c
+++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -91,6 +91,10 @@
#define LX_WALL 0x40000000
#define LX_WCLONE 0x80000000
+#define LX_P_ALL 0x0
+#define LX_P_PID 0x1
+#define LX_P_GID 0x2
+
static int
ltos_options(uintptr_t options)
{
@@ -143,6 +147,36 @@ lx_wstat(int code, int status)
return (stat);
}
+/* wrapper to make solaris waitid work properly with ptrace */
+static int
+lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *info, int options)
+{
+ do {
+ /*
+ * It's possible that we return EINVAL here if the idtype is
+ * P_PID or P_PGID and id is out of bounds for a valid pid or
+ * pgid, but Linux expects to see ECHILD. No good way occurs to
+ * handle this so we'll punt for now.
+ */
+ if (waitid(idtype, id, info, options) < 0)
+ return (-errno);
+
+ /*
+ * If the WNOHANG flag was specified and no child was found
+ * return 0.
+ */
+ if ((options & WNOHANG) && info->si_pid == 0)
+ return (0);
+
+ /*
+ * It's possible that we may have a spurious return for one of
+ * the child processes created by the ptrace subsystem. If
+ * that's the case, we simply try again.
+ */
+ } while (lx_ptrace_wait(info) == -1);
+ return (0);
+}
+
int
lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
{
@@ -195,30 +229,14 @@ lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
options |= WEXITED | WTRAPPED;
-again:
- /*
- * It's possible that we return EINVAL here if the idtype is P_PID or
- * P_PGID and id is out of bounds for a valid pid or pgid, but Linux
- * expects to see ECHILD. No good way occurs to handle this so we'll
- * punt for now.
- */
- if (waitid(idtype, id, &info, options) < 0)
- return (-errno);
-
+ if ((rval = lx_waitid_helper(idtype, id, &info, options)) < 0)
+ return (rval);
/*
* If the WNOHANG flag was specified and no child was found return 0.
*/
if ((options & WNOHANG) && info.si_pid == 0)
return (0);
- /*
- * It's possible that we may have a spurious return for one of the
- * child processes created by the ptrace subsystem. In that's the case,
- * we simply try again.
- */
- if (lx_ptrace_wait(&info) == -1)
- goto again;
-
status = lx_wstat(info.si_code, info.si_status);
/*
@@ -242,3 +260,29 @@ lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3)
{
return (lx_wait4(p1, p2, p3, NULL));
}
+
+int
+lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt)
+{
+ int rval, options;
+ siginfo_t s_infop = {0};
+ if ((options = ltos_options(opt)) == -1)
+ return (-1);
+ switch (idtype) {
+ case LX_P_ALL:
+ idtype = P_ALL;
+ break;
+ case LX_P_PID:
+ idtype = P_PID;
+ break;
+ case LX_P_GID:
+ idtype = P_GID;
+ break;
+ default:
+ return (-EINVAL);
+ }
+ if ((rval = lx_waitid_helper(idtype, (id_t)id, &s_infop, options)) < 0)
+ return (rval);
+
+ return (stol_siginfo(&s_infop, (lx_siginfo_t *)infop));
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s
index b55eec5855..413ef9852d 100644
--- a/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s
+++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -126,13 +126,13 @@ lx_sigreturn_tolibc(uintptr_t sp)
.align 16
ENTRY_NP(lx_handler_trace_table)
TJMP256
- TJMP16
+ TJMP64
SET_SIZE(lx_handler_trace_table)
.align 16
ENTRY_NP(lx_handler_table)
JMP256
- JMP16
+ JMP64
SET_SIZE(lx_handler_table)
ENTRY_NP(lx_handler_trace)
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
index 68ea113600..2954275c75 100644
--- a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -127,6 +127,8 @@ extern int lx_lpid_to_spid(pid_t, pid_t *);
extern int lx_ptrace_wait(siginfo_t *);
extern void lx_ptrace_fork(void);
+extern int lx_get_kern_version(void);
+
extern int lx_check_alloca(size_t);
#define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL)
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
index 981c7d1ad2..b7609962ae 100644
--- a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -288,6 +288,8 @@ extern void lx_sigreturn_tolibc(uintptr_t);
extern void lx_sigdeliver(int, siginfo_t *, void *, size_t, void (*)(),
void (*)(), uintptr_t);
+extern int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop);
+
#endif /* !defined(_ASM) */
#ifdef __cplusplus
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
index 241faf7c5b..5c52b1acec 100644
--- a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -32,6 +32,7 @@
#if !defined(_ASM)
#include <sys/types.h>
+#include <sys/procset.h>
#ifdef __cplusplus
extern "C" {
@@ -82,6 +83,7 @@ extern int lx_setgroups(uintptr_t, uintptr_t);
extern int lx_waitpid(uintptr_t, uintptr_t, uintptr_t);
+extern int lx_waitid(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
extern int lx_wait4(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
extern int lx_getuid16(void);
@@ -193,6 +195,9 @@ extern int lx_rt_sigtimedwait(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
extern int lx_sync(void);
+extern int lx_futex(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
extern int lx_tkill(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
uintptr_t);
extern int lx_tgkill(uintptr_t, uintptr_t, uintptr_t);
@@ -222,12 +227,13 @@ extern int lx_sched_setscheduler(uintptr_t, uintptr_t, uintptr_t);
extern int lx_sched_get_priority_min(uintptr_t);
extern int lx_sched_get_priority_max(uintptr_t);
+extern int lx_keyctl(void);
+
extern int lx_ipc(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#endif /* !defined(_ASM) */
#define EBP_HAS_ARG6 0x01
-#define LINUX_MAX_SYSCALL 270
/*
* Linux syscall numbers
@@ -481,6 +487,54 @@ extern int lx_ipc(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
#define LX_SYS_clock_getres 266
#define LX_SYS_clock_nanosleep 267
#define LX_SYS_tgkill 270
+/* the following syscalls are for 2.6 and later kernels */
+#define LX_SYS_utimes 271
+#define LX_SYS_fadvise64_64 272
+#define LX_SYS_vserver 273
+#define LX_SYS_mbind 274
+#define LX_SYS_get_mempolicyd 275
+#define LX_SYS_set_mempolicy 276
+#define LX_SYS_mq_open 277
+#define LX_SYS_mq_unlink 278
+#define LX_SYS_mq_timedsend 279
+#define LX_SYS_mq_timedreceive 280
+#define LX_SYS_mq_notify 281
+#define LX_SYS_mq_getsetattr 282
+#define LX_SYS_kexec_load 283
+#define LX_SYS_waitid 284
+#define LX_SYS_setaltroot 285
+#define LX_SYS_add_key 286
+#define LX_SYS_request_key 287
+#define LX_SYS_keyctl 288
+#define LX_SYS_ioprio_set 289
+#define LX_SYS_ioprio_get 290
+#define LX_SYS_inotify_init 291
+#define LX_SYS_inotify_add_watch 292
+#define LX_SYS_inotify_rm_watch 293
+#define LX_SYS_migrate_pages 294
+#define LX_SYS_openat 295
+#define LX_SYS_mkdirat 296
+#define LX_SYS_mknodat 297
+#define LX_SYS_fchownat 298
+#define LX_SYS_futimesat 299
+#define LX_SYS_fstatat64 300
+#define LX_SYS_unlinkat 301
+#define LX_SYS_renameat 302
+#define LX_SYS_linkat 303
+#define LX_SYS_symlinkat 304
+#define LX_SYS_readlinkat 305
+#define LX_SYS_fchmodat 306
+#define LX_SYS_faccessat 307
+#define LX_SYS_pselect6 308
+#define LX_SYS_ppoll 309
+#define LX_SYS_unshare 310
+#define LX_SYS_set_robust_list 311
+#define LX_SYS_get_robust_list 312
+#define LX_SYS_splice 313
+#define LX_SYS_sync_file_range 314
+#define LX_SYS_tee 315
+#define LX_SYS_vmsplice 316
+#define LX_SYS_move_pages 317
#ifdef __cplusplus
}
diff --git a/usr/src/lib/brand/lx/lx_support/lx_support.c b/usr/src/lib/brand/lx/lx_support/lx_support.c
index 714fc38e50..20e6a1bf71 100644
--- a/usr/src/lib/brand/lx/lx_support/lx_support.c
+++ b/usr/src/lib/brand/lx/lx_support/lx_support.c
@@ -177,7 +177,7 @@ lxs_remove_autofsck()
*/
static void
lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio,
- char **idev, char **odev)
+ char **idev, char **odev, char **kvers)
{
struct zone_attrtab attrtab;
int err;
@@ -190,13 +190,15 @@ lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio,
*idev = (char *)malloc(INTSTRLEN);
*odev = (char *)malloc(INTSTRLEN);
- if (*idev == NULL || *odev == NULL)
+ *kvers = (char *)malloc(INTSTRLEN);
+ if (*idev == NULL || *odev == NULL || *kvers == NULL)
lxs_err(gettext("out of memory"));
*audio = B_FALSE;
*restart = B_FALSE;
bzero(*idev, INTSTRLEN);
bzero(*odev, INTSTRLEN);
+ bzero(*kvers, INTSTRLEN);
while ((err = zonecfg_getattrent(zdh, &attrtab)) == Z_OK) {
if ((strcmp(attrtab.zone_attr_name, "init-restart") == 0) &&
(zonecfg_get_attr_boolean(&attrtab, restart) != Z_OK))
@@ -216,6 +218,11 @@ lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio,
INTSTRLEN) != Z_OK))
lxs_err(gettext("invalid type for zone attribute: %s"),
attrtab.zone_attr_name);
+ if ((strcmp(attrtab.zone_attr_name, "kernel-version") == 0) &&
+ (zonecfg_get_attr_string(&attrtab, *kvers,
+ INTSTRLEN) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
}
/* some kind of error while looking up attributes */
@@ -336,7 +343,8 @@ lxs_boot()
zoneid_t zoneid;
zone_dochandle_t zdh;
boolean_t audio, restart;
- char *idev, *odev;
+ char *idev, *odev, *kvers;
+ int kversnum;
lxs_make_initctl();
lxs_remove_autofsck();
@@ -350,7 +358,7 @@ lxs_boot()
}
/* Extract any relevant attributes from the config file. */
- lxs_getattrs(zdh, &restart, &audio, &idev, &odev);
+ lxs_getattrs(zdh, &restart, &audio, &idev, &odev, &kvers);
zonecfg_fini_handle(zdh);
/* Configure the zone's audio support (if any). */
@@ -367,6 +375,15 @@ lxs_boot()
sizeof (boolean_t)) == -1)
lxs_err(gettext("error setting zone's restart_init property"));
+ if ((kvers != NULL) && (strcmp(kvers, "2.6") == 0))
+ kversnum = LX_KERN_2_6;
+ else
+ kversnum = LX_KERN_2_4;
+
+ if (zone_setattr(zoneid, LX_KERN_VERSION_NUM, &kversnum,
+ sizeof (int)) < 0)
+ lxs_err(gettext("unable to set kernel version"));
+
return (0);
}
@@ -419,7 +436,7 @@ lxs_verify(char *xmlfile)
struct zone_dstab dstab;
struct zone_devtab devtab;
boolean_t audio, restart;
- char *idev, *odev;
+ char *idev, *odev, *kvers;
zone_iptype_t iptype;
if ((handle = zonecfg_init_handle()) == NULL)
@@ -485,7 +502,7 @@ lxs_verify(char *xmlfile)
}
/* Extract any relevant attributes from the config file. */
- lxs_getattrs(handle, &restart, &audio, &idev, &odev);
+ lxs_getattrs(handle, &restart, &audio, &idev, &odev, &kvers);
zonecfg_fini_handle(handle);
if (audio) {
@@ -498,6 +515,11 @@ lxs_verify(char *xmlfile)
lxs_err(gettext("invalid value for zone attribute: %s"),
"audio-outputdev");
}
+ if (kvers) {
+ if ((strcmp(kvers, "2.4")) != 0 && (strcmp(kvers, "2.6") != 0))
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "kernel-version");
+ }
return (0);
}
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
index 3180028bd5..e9d417e123 100644
--- a/usr/src/uts/common/brand/lx/os/lx_brand.c
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -57,11 +57,15 @@
int lx_debug = 0;
+void lx_init_brand_data(zone_t *);
+void lx_free_brand_data(zone_t *);
void lx_setbrand(proc_t *);
int lx_getattr(zone_t *, int, void *, size_t *);
int lx_setattr(zone_t *, int, void *, size_t);
int lx_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
uintptr_t, uintptr_t, uintptr_t);
+int lx_get_kern_version(void);
+void lx_set_kern_version(zone_t *, int);
void lx_copy_procdata(proc_t *, proc_t *);
extern void lx_setrval(klwp_t *, int, int);
@@ -87,6 +91,8 @@ static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
/* lx brand */
struct brand_ops lx_brops = {
+ lx_init_brand_data,
+ lx_free_brand_data,
lx_brandsys,
lx_setbrand,
lx_getattr,
@@ -180,6 +186,7 @@ int
lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
{
boolean_t val;
+ int num;
if (attr == LX_ATTR_RESTART_INIT) {
if (bufsize > sizeof (boolean_t))
@@ -190,6 +197,13 @@ lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
return (EINVAL);
zone->zone_restart_init = val;
return (0);
+ } else if (attr == LX_KERN_VERSION_NUM) {
+ if (bufsize > sizeof (int))
+ return (ERANGE);
+ if (copyin(buf, &num, sizeof (num)) != 0)
+ return (EFAULT);
+ lx_set_kern_version(zone, num);
+ return (0);
}
return (EINVAL);
}
@@ -198,6 +212,7 @@ lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
int
lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
{
+ int num;
if (attr == LX_ATTR_RESTART_INIT) {
if (*bufsize < sizeof (boolean_t))
return (ERANGE);
@@ -206,6 +221,14 @@ lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
return (EFAULT);
*bufsize = sizeof (boolean_t);
return (0);
+ } else if (attr == LX_KERN_VERSION_NUM) {
+ if (*bufsize < sizeof (int))
+ return (ERANGE);
+ num = lx_get_kern_version();
+ if (copyout(&num, buf, sizeof (int)) != 0)
+ return (EFAULT);
+ *bufsize = sizeof (int);
+ return (0);
}
return (-EINVAL);
}
@@ -329,6 +352,27 @@ lx_brand_systrace_disable(void)
lx_systrace_enabled = 0;
}
+void
+lx_init_brand_data(zone_t *zone)
+{
+ lx_zone_data_t *data;
+ ASSERT(zone->zone_brand == &lx_brand);
+ ASSERT(zone->zone_brand_data == NULL);
+ data = (lx_zone_data_t *)kmem_zalloc(sizeof (lx_zone_data_t), KM_SLEEP);
+ /*
+ * Default kernel_version to LX_KERN_2_4, this can be changed by a call
+ * to setattr() which is made during zone boot
+ */
+ data->kernel_version = LX_KERN_2_4;
+ zone->zone_brand_data = data;
+}
+
+void
+lx_free_brand_data(zone_t *zone)
+{
+ kmem_free(zone->zone_brand_data, sizeof (lx_zone_data_t));
+}
+
/*
* Get the addresses of the user-space system call handler and attach it to
* the proc structure. Returning 0 indicates success; the value returned
@@ -548,6 +592,12 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
default:
linux_call = cmd - B_EMULATE_SYSCALL;
+ /*
+ * Only checking against highest syscall number for all kernel
+ * versions, since check for specific kernel version is done
+ * in userland prior to this call, and duplicating logic would
+ * be redundant.
+ */
if (linux_call >= 0 && linux_call < LX_NSYSCALLS) {
*rval = lx_emulate_syscall(linux_call, arg1, arg2,
arg3, arg4, arg5, arg6);
@@ -558,6 +608,24 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
return (EINVAL);
}
+int
+lx_get_zone_kern_version(zone_t *zone)
+{
+ return (((lx_zone_data_t *)zone->zone_brand_data)->kernel_version);
+}
+
+int
+lx_get_kern_version()
+{
+ return (lx_get_zone_kern_version(curzone));
+}
+
+void
+lx_set_kern_version(zone_t *zone, int vers)
+{
+ ((lx_zone_data_t *)zone->zone_brand_data)->kernel_version = vers;
+}
+
/*
* Copy the per-process brand data from a parent proc to a child.
*/
@@ -763,8 +831,10 @@ lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
* Linux 2.6 programs such as ps will print an error message if the
* following aux entry is missing
*/
- phdr_auxv32[1].a_type = AT_CLKTCK;
- phdr_auxv32[1].a_un.a_val = hz;
+ if (lx_get_kern_version() >= LX_KERN_2_6) {
+ phdr_auxv32[1].a_type = AT_CLKTCK;
+ phdr_auxv32[1].a_un.a_val = hz;
+ }
if (copyout(&phdr_auxv32, args->auxp_brand,
sizeof (phdr_auxv32)) == -1)
diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c
index 686afea458..4ccefe1f2d 100644
--- a/usr/src/uts/common/brand/lx/os/lx_syscall.c
+++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -342,9 +342,54 @@ lx_sysent_t lx_sysent[] =
LX_NOSYS("statfs64"),
LX_NOSYS("fstatfs64"),
LX_NOSYS("tgkill"), /* 270 */
+ /* The following are Linux 2.6 system calls */
LX_NOSYS("utimes"),
LX_NOSYS("fadvise64_64"),
LX_NOSYS("vserver"),
+ LX_NOSYS("mbind"),
+ LX_NOSYS("get_mempolicy"),
+ LX_NOSYS("set_mempolicy"),
+ LX_NOSYS("mq_open"),
+ LX_NOSYS("mq_unlink"),
+ LX_NOSYS("mq_timedsend"),
+ LX_NOSYS("mq_timedreceive"), /* 280 */
+ LX_NOSYS("mq_notify"),
+ LX_NOSYS("mq_getsetattr"),
+ LX_NOSYS("kexec_load"),
+ LX_NOSYS("waitid"),
+ LX_NOSYS("sys_setaltroot"),
+ LX_NOSYS("add_key"),
+ LX_NOSYS("request_key"),
+ LX_NOSYS("keyctl"),
+ LX_NOSYS("ioprio_set"),
+ LX_NOSYS("ioprio_get"), /* 290 */
+ LX_NOSYS("inotify_init"),
+ LX_NOSYS("inotify_add_watch"),
+ LX_NOSYS("inotify_rm_watch"),
+ LX_NOSYS("migrate_pages"),
+ LX_NOSYS("openat"),
+ LX_NOSYS("mkdirat"),
+ LX_NOSYS("mknodat"),
+ LX_NOSYS("fchownat"),
+ LX_NOSYS("futimesat"),
+ LX_NOSYS("fstatat64"), /* 300 */
+ LX_NOSYS("unlinkat"),
+ LX_NOSYS("renameat"),
+ LX_NOSYS("linkat"),
+ LX_NOSYS("syslinkat"),
+ LX_NOSYS("readlinkat"),
+ LX_NOSYS("fchmodat"),
+ LX_NOSYS("faccessat"),
+ LX_NOSYS("pselect6"),
+ LX_NOSYS("ppoll"),
+ LX_NOSYS("unshare"), /* 310 */
+ LX_NOSYS("set_robust_list"),
+ LX_NOSYS("get_robust_list"),
+ LX_NOSYS("splice"),
+ LX_NOSYS("sync_file_range"),
+ LX_NOSYS("tee"),
+ LX_NOSYS("vmsplice"),
+ LX_NOSYS("move_pages"),
NULL /* NULL-termination is required for lx_systrace */
};
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
index 14fa5f9122..d08b37c5eb 100644
--- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -1539,10 +1539,16 @@ lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
static void
lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
+ char *vers;
+ if (lx_get_zone_kern_version(LXPTOZ(lxpnp)) <= LX_KERN_2_4)
+ vers = LX_UNAME_RELEASE_2_4;
+ else
+ vers = LX_UNAME_RELEASE_2_6;
+
lxpr_uiobuf_printf(uiobuf,
"%s version %s (%s version %d.%d.%d) "
"#%s SMP %s\n",
- LX_UNAME_SYSNAME, LX_UNAME_RELEASE,
+ LX_UNAME_SYSNAME, vers,
#if defined(__GNUC__)
"gcc",
__GNUC__,
@@ -1564,6 +1570,7 @@ lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
*
*/
/* ARGSUSED */
+
static void
lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
{
@@ -1572,6 +1579,10 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
ulong_t idle_cum = 0;
ulong_t sys_cum = 0;
ulong_t user_cum = 0;
+ ulong_t irq_cum = 0;
+ uint_t cpu_nrunnable_cum = 0;
+ uint_t w_io_cum = 0;
+
ulong_t pgpgin_cum = 0;
ulong_t pgpgout_cum = 0;
ulong_t pgswapout_cum = 0;
@@ -1580,6 +1591,9 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
ulong_t pswitch_cum = 0;
ulong_t forks_cum = 0;
hrtime_t msnsecs[NCMSTATES];
+ int lx_kern_version = lx_get_zone_kern_version(LXPTOZ(lxpnp));
+ /* temporary variable since scalehrtime modifies data in place */
+ hrtime_t tmptime;
ASSERT(lxpnp->lxpr_type == LXPR_STAT);
@@ -1610,6 +1624,16 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
+ if (lx_kern_version >= LX_KERN_2_6) {
+ cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
+ w_io_cum += CPU_STATS(cp, sys.iowait);
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_cum += NSEC_TO_TICK(tmptime);
+ }
+ }
+
for (i = 0; i < PIL_MAX; i++)
intr_cum += CPU_STATS(cp, sys.intr[i]);
@@ -1623,15 +1647,24 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
cp = cp->cpu_next;
} while (cp != cpstart);
- lxpr_uiobuf_printf(uiobuf,
- "cpu %ld %ld %ld %ld\n",
- user_cum, 0, sys_cum, idle_cum);
+ if (lx_kern_version >= LX_KERN_2_6) {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu %ld %ld %ld %ld %ld %ld %ld\n",
+ user_cum, 0, sys_cum, idle_cum, 0, irq_cum, 0);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu %ld %ld %ld %ld\n",
+ user_cum, 0, sys_cum, idle_cum);
+ }
/* Do per processor stats */
do {
+ int i;
+
ulong_t idle_ticks;
ulong_t sys_ticks;
ulong_t user_ticks;
+ ulong_t irq_ticks = 0;
/*
* Don't count CPUs that aren't even in the system
@@ -1647,10 +1680,23 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
- lxpr_uiobuf_printf(uiobuf,
- "cpu%d %ld %ld %ld %ld\n",
- cp->cpu_id,
- user_ticks, 0, sys_ticks, idle_ticks);
+ if (lx_kern_version >= LX_KERN_2_6) {
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_ticks += NSEC_TO_TICK(tmptime);
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %ld %ld %ld %ld %ld %ld %ld\n",
+ cp->cpu_id, user_ticks, 0, sys_ticks, idle_ticks,
+ 0, irq_ticks, 0);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %ld %ld %ld %ld\n",
+ cp->cpu_id,
+ user_ticks, 0, sys_ticks, idle_ticks);
+ }
if (pools_enabled)
cp = cp->cpu_next_part;
@@ -1660,19 +1706,39 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
mutex_exit(&cpu_lock);
- lxpr_uiobuf_printf(uiobuf,
- "page %lu %lu\n"
- "swap %lu %lu\n"
- "intr %lu\n"
- "ctxt %lu\n"
- "btime %lu\n"
- "processes %lu\n",
- pgpgin_cum, pgpgout_cum,
- pgswapin_cum, pgswapout_cum,
- intr_cum,
- pswitch_cum,
- boot_time,
- forks_cum);
+ if (lx_kern_version >= LX_KERN_2_6) {
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n"
+ "procs_running %lu\n"
+ "procs_blocked %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum,
+ cpu_nrunnable_cum,
+ w_io_cum);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum);
+ }
}
@@ -2099,6 +2165,10 @@ lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr)
return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr));
}
+ /* If user is root allow access regardless of permission bits */
+ if (secpolicy_proc_access(cr) == 0)
+ return (0);
+
/*
* Access check is based on only
* one of owner, group, public.
@@ -2852,11 +2922,8 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr)
if (vp->v_type != VLNK)
return (EINVAL);
- /*
- * Try to produce a symlink name for anything that's really a regular
- * file or directory (but not for anything else)
- */
- if (rvp != NULL && (rvp->v_type == VDIR || rvp->v_type == VREG)) {
+ /* Try to produce a symlink name for anything that has a realvp */
+ if (rvp != NULL) {
if ((error = lxpr_access(vp, VREAD, 0, CRED())) != 0)
return (error);
if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
index 03ac9d09e6..b1d0281a32 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_brand.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -31,6 +31,7 @@
#ifndef _ASM
#include <sys/types.h>
#include <sys/cpuvar.h>
+#include <sys/zone.h>
#endif
#ifdef __cplusplus
@@ -43,7 +44,8 @@ extern "C" {
* Brand uname info
*/
#define LX_UNAME_SYSNAME "Linux"
-#define LX_UNAME_RELEASE "2.4.21"
+#define LX_UNAME_RELEASE_2_6 "2.6.18"
+#define LX_UNAME_RELEASE_2_4 "2.4.21"
#define LX_UNAME_VERSION "BrandZ fake linux"
#define LX_UNAME_MACHINE "i686"
@@ -54,7 +56,12 @@ extern "C" {
#define LX_LIB "lx_brand.so.1"
#define LX_LIB_PATH LIB_PATH LX_LIB
-#define LX_NSYSCALLS 270
+#define LX_NSYSCALLS_2_4 270
+#define LX_NSYSCALLS_2_6 317
+#define LX_NSYSCALLS LX_NSYSCALLS_2_6
+
+#define LX_KERN_2_4 0
+#define LX_KERN_2_6 1
/*
* brand(2) subcommands
@@ -76,6 +83,7 @@ extern "C" {
#define LX_VERSION LX_VERSION_1
#define LX_ATTR_RESTART_INIT ZONE_ATTR_BRAND_ATTRS
+#define LX_KERN_VERSION_NUM (ZONE_ATTR_BRAND_ATTRS + 1)
/* Aux vector containing phdr of linux executable, used by lx_librtld_db */
#define AT_SUN_BRAND_LX_PHDR AT_SUN_BRAND_AUX1
@@ -193,6 +201,11 @@ typedef struct lx_lwp_data {
uint_t br_ptrace; /* ptrace is active for this LWP */
} lx_lwp_data_t;
+/* brand specific data */
+typedef struct lx_zone_data {
+ int kernel_version;
+} lx_zone_data_t;
+
#define BR_CPU_BOUND 0x0001
#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
@@ -203,6 +216,9 @@ void lx_brand_int80_callback(void);
int64_t lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
uintptr_t, uintptr_t);
+extern int lx_get_zone_kern_version(zone_t *);
+extern int lx_get_kern_version(void);
+
extern int lx_debug;
#define lx_print if (lx_debug) printf
diff --git a/usr/src/uts/common/brand/lx/sys/lx_futex.h b/usr/src/uts/common/brand/lx/sys/lx_futex.h
index ac963b015b..b5c5334bff 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_futex.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_futex.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -39,10 +39,12 @@ extern "C" {
#define FUTEX_CMP_REQUEUE 4
#define FUTEX_MAX_CMD FUTEX_CMP_REQUEUE
+#ifdef _KERNEL
extern long lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout,
- uintptr_t addr2, int val2);
+ uintptr_t addr2, int val2);
extern void lx_futex_init(void);
extern int lx_futex_fini(void);
+#endif /* _KERNEL */
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c
index 25041d1b18..4c233ca407 100644
--- a/usr/src/uts/common/brand/sn1/sn1_brand.c
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.c
@@ -43,6 +43,8 @@
char *sn1_emulation_table = NULL;
+void sn1_init_brand_data(zone_t *);
+void sn1_free_brand_data(zone_t *);
void sn1_setbrand(proc_t *);
int sn1_getattr(zone_t *, int, void *, size_t *);
int sn1_setattr(zone_t *, int, void *, size_t);
@@ -60,6 +62,8 @@ int sn1_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
/* sn1 brand */
struct brand_ops sn1_brops = {
+ sn1_init_brand_data,
+ sn1_free_brand_data,
sn1_brandsys,
sn1_setbrand,
sn1_getattr,
@@ -202,6 +206,18 @@ sn1_initlwp(klwp_t *l)
/*ARGSUSED*/
void
+sn1_init_brand_data(zone_t *zone)
+{
+}
+
+/*ARGSUSED*/
+void
+sn1_free_brand_data(zone_t *zone)
+{
+}
+
+/*ARGSUSED*/
+void
sn1_forklwp(klwp_t *p, klwp_t *c)
{
}
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 2468192505..c89945e756 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -1728,7 +1728,10 @@ zone_set_brand(zone_t *zone, const char *brand)
return (EPERM);
}
+ /* set up the brand specific data */
zone->zone_brand = bp;
+ ZBROP(zone)->b_init_brand_data(zone);
+
mutex_exit(&zone_status_lock);
return (0);
}
@@ -3999,6 +4002,10 @@ zone_destroy(zoneid_t zoneid)
/* Get rid of the zone's kstats */
zone_kstat_delete(zone);
+ /* free brand specific data */
+ if (ZONE_IS_BRANDED(zone))
+ ZBROP(zone)->b_free_brand_data(zone);
+
/* Say goodbye to brand framework. */
brand_unregister_zone(zone->zone_brand);
diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h
index 99314a95f0..bf7d6d4359 100644
--- a/usr/src/uts/common/sys/brand.h
+++ b/usr/src/uts/common/sys/brand.h
@@ -75,6 +75,8 @@ struct intpdata;
struct execa;
struct brand_ops {
+ void (*b_init_brand_data)(zone_t *);
+ void (*b_free_brand_data)(zone_t *);
int (*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
uintptr_t, uintptr_t, uintptr_t);
void (*b_setbrand)(struct proc *);
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index eba224333f..7e7dd9e88a 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -411,6 +411,7 @@ typedef struct zone {
boolean_t zone_restart_init; /* Restart init if it dies? */
struct brand *zone_brand; /* zone's brand */
+ void *zone_brand_data; /* store brand specific data */
id_t zone_defaultcid; /* dflt scheduling class id */
kstat_t *zone_swapresv_kstat;
kstat_t *zone_lockedmem_kstat;