diff options
Diffstat (limited to 'usr/src')
22 files changed, 780 insertions, 101 deletions
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c index 726017809a..b6ec17e6ad 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/clone.c +++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -349,6 +349,7 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, volatile int clone_res; int sig; int rval; + int pid; lx_regs_t *rp; sigset_t sigmask; @@ -367,15 +368,38 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, return (-EINVAL); /* - * CLONE_THREAD require CLONE_SIGHAND. CLONE_THREAD and - * CLONE_DETACHED must both be either set or cleared. + * CLONE_THREAD requires CLONE_SIGHAND. + * + * CLONE_THREAD and CLONE_DETACHED must both be either set or cleared + * in kernel 2.4 and prior. + * In kernel 2.6 CLONE_DETACHED was dropped completely, so we no + * longer have this requirement. */ - if ((flags & CLONE_TD) && - (!(flags & LX_CLONE_SIGHAND) || ((flags & CLONE_TD) != CLONE_TD))) - return (-EINVAL); + + if (flags & CLONE_TD) { + if (!(flags & LX_CLONE_SIGHAND)) + return (-EINVAL); + if ((lx_get_kern_version() <= LX_KERN_2_4) && + (flags & CLONE_TD) != CLONE_TD) + return (-EINVAL); + } rp = lx_syscall_regs(); + /* test if pointer passed by user are writable */ + if (flags & LX_CLONE_PARENT_SETTID) { + if (uucopy(ptidp, &pid, sizeof (int)) != 0) + return (-EFAULT); + if (uucopy(&pid, ptidp, sizeof (int)) != 0) + return (-EFAULT); + } + if (flags & LX_CLONE_CHILD_SETTID) { + if (uucopy(ctidp, &pid, sizeof (int)) != 0) + return (-EFAULT); + if (uucopy(&pid, ctidp, sizeof (int)) != 0) + return (-EFAULT); + } + /* See if this is a fork() operation or a thr_create(). */ if (IS_FORK(flags) || IS_VFORK(flags)) { if (flags & LX_CLONE_PARENT) { @@ -399,8 +423,26 @@ lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, (void) sleep(lx_rpm_delay); } - if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) - *((int *)ptidp) = rval; + /* + * Since we've already forked, we can't do much if uucopy fails, + * so we just ignore failure. Failure is unlikely since we've + * tested the memory before we did the fork. + */ + if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) { + (void) uucopy(&rval, ptidp, sizeof (int)); + } + + if (rval == 0 && (flags & LX_CLONE_CHILD_SETTID)) { + /* + * lx_getpid should not fail, and if it does, there's + * not much we can do about it since we've already + * forked, so on failure, we just don't copy the + * memory. + */ + pid = lx_getpid(); + if (pid >= 0) + (void) uucopy(&pid, ctidp, sizeof (int)); + } /* Parent just returns */ if (rval != 0) diff --git a/usr/src/lib/brand/lx/lx_brand/common/ioctl.c b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c index 2be27b4237..2f2b022ed5 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/ioctl.c +++ b/usr/src/lib/brand/lx/lx_brand/common/ioctl.c @@ -536,7 +536,7 @@ lx_ioctl(uintptr_t p1, uintptr_t p2, uintptr_t p3) int cmd = (int)p2; intptr_t arg = (uintptr_t)p3; struct stat stat; - ioc_cmd_translator_t *ict; + ioc_cmd_translator_t *ict = NULL; ioc_errno_translator_t *iet = NULL; major_t fd_major; int i, ret; @@ -561,7 +561,6 @@ lx_ioctl(uintptr_t p1, uintptr_t p2, uintptr_t p3) switch (stat.st_mode & S_IFMT) { default: - ict = NULL; break; case S_IFREG: /* Use file translators. */ diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c index 9368ba0116..993f9faf73 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -165,7 +165,9 @@ struct lx_sysent { char sy_narg; }; -static struct lx_sysent sysents[LINUX_MAX_SYSCALL + 1]; +static struct lx_sysent sysents[LX_NSYSCALLS + 1]; +/* Differs for kernel versions, set during lx_init */ +static int lx_max_syscall; static uintptr_t stack_bottom; @@ -399,7 +401,7 @@ lx_emulate(lx_regs_t *rp) syscall_num = rp->lxr_eax; - if (syscall_num < 0 || syscall_num > LINUX_MAX_SYSCALL) + if (syscall_num < 0 || syscall_num > lx_max_syscall) s = &sysents[0]; else s = &sysents[syscall_num]; @@ -638,6 +640,23 @@ lx_init(int argc, char *argv[], char *envp[]) char locale_translated_name[MAXLOCALENAMELEN]; static lx_tsd_t lx_tsd; + /* Look up the PID that serves as init for this zone */ + if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0) + lx_err_fatal(gettext( + "Unable to find PID for zone init process: %s"), + strerror(err)); + + /* + * Ubuntu init will fail if its TERM environment variable is not set + * so if we are running init, and TERM is not set, we set term and + * reexec so that the new environment variable is propagated to the + * linux application stack. + */ + if ((getpid() == zoneinit_pid) && (getenv("TERM") == NULL)) { + if (setenv("TERM", "vt100", 1) < 0 || execv(argv[0], argv) < 0) + lx_err_fatal(gettext("failed to set TERM")); + } + if ((set_l10n_alternate_root("/native") == 0) && (setlocale(LC_ALL, lx_translate_locale(locale_translated_name, sizeof (locale_translated_name))) != NULL) && @@ -659,11 +678,22 @@ lx_init(int argc, char *argv[], char *envp[]) lx_debug_init(); - r = getenv("LX_RELEASE"); - if (r == NULL) - (void) strlcpy(lx_release, LX_UNAME_RELEASE, 128); + if (lx_get_kern_version() <= LX_KERN_2_4) + lx_max_syscall = LX_NSYSCALLS_2_4; else + lx_max_syscall = LX_NSYSCALLS_2_6; + + r = getenv("LX_RELEASE"); + if (r == NULL) { + if (lx_get_kern_version() == LX_KERN_2_6) + (void) strlcpy(lx_release, LX_UNAME_RELEASE_2_6, + sizeof (lx_release)); + else + (void) strlcpy(lx_release, LX_UNAME_RELEASE_2_4, + sizeof (lx_release)); + } else { (void) strlcpy(lx_release, r, 128); + } lx_debug("lx_release: %s\n", lx_release); @@ -803,12 +833,6 @@ lx_init(int argc, char *argv[], char *envp[]) "Unable to initialize thread-specific data: %s"), strerror(err)); - /* Look up the PID that serves as init for this zone */ - if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0) - lx_err_fatal(gettext( - "Unable to find PID for zone init process: %s"), - strerror(err)); - /* * Save the current context of this thread. * We'll restore this context when this thread attempts to exit. @@ -1227,5 +1251,54 @@ static struct lx_sysent sysents[] = { {"clock_nanosleep", lx_clock_nanosleep, 0, 4}, /* 267 */ {"statfs64", lx_statfs64, 0, 2}, /* 268 */ {"fstatfs64", lx_fstatfs64, 0, 2}, /* 269 */ - {"tgkill", lx_tgkill, 0, 3} /* 270 */ + {"tgkill", lx_tgkill, 0, 3}, /* 270 */ + + /* The following system calls only exist in kernel 2.6 and greater */ + {"utimes", utimes, SYS_PASSTHRU, 2}, /* 271 */ + {"fadvise64_64", NULL, NOSYS_NULL, 0}, /* 272 */ + {"vserver", NULL, NOSYS_NULL, 0}, /* 273 */ + {"mbind", NULL, NOSYS_NULL, 0}, /* 274 */ + {"get_mempolicy", NULL, NOSYS_NULL, 0}, /* 275 */ + {"set_mempolicy", NULL, NOSYS_NULL, 0}, /* 276 */ + {"mq_open", NULL, NOSYS_NULL, 0}, /* 277 */ + {"mq_unlink", NULL, NOSYS_NULL, 0}, /* 278 */ + {"mq_timedsend", NULL, NOSYS_NULL, 0}, /* 279 */ + {"mq_timedreceive", NULL, NOSYS_NULL, 0}, /* 280 */ + {"mq_notify", NULL, NOSYS_NULL, 0}, /* 281 */ + {"mq_getsetattr", NULL, NOSYS_NULL, 0}, /* 282 */ + {"kexec_load", NULL, NOSYS_NULL, 0}, /* 283 */ + {"waitid", lx_waitid, 0, 4}, /* 284 */ + {"sys_setaltroot", NULL, NOSYS_NULL, 0}, /* 285 */ + {"add_key", NULL, NOSYS_NULL, 0}, /* 286 */ + {"request_key", NULL, NOSYS_NULL, 0}, /* 287 */ + {"keyctl", NULL, NOSYS_NULL, 0}, /* 288 */ + {"ioprio_set", NULL, NOSYS_NULL, 0}, /* 289 */ + {"ioprio_get", NULL, NOSYS_NULL, 0}, /* 290 */ + {"inotify_init", NULL, NOSYS_NULL, 0}, /* 291 */ + {"inotify_add_watch", NULL, NOSYS_NULL, 0}, /* 292 */ + {"inotify_rm_watch", NULL, NOSYS_NULL, 0}, /* 293 */ + {"migrate_pages", NULL, NOSYS_NULL, 0}, /* 294 */ + {"openat", NULL, NOSYS_NULL, 0}, /* 295 */ + {"mkdirat", NULL, NOSYS_NULL, 0}, /* 296 */ + {"mknodat", NULL, NOSYS_NULL, 0}, /* 297 */ + {"fchownat", NULL, NOSYS_NULL, 0}, /* 298 */ + {"futimesat", NULL, NOSYS_NULL, 0}, /* 299 */ + {"fstatat64", NULL, NOSYS_NULL, 0}, /* 300 */ + {"unlinkat", NULL, NOSYS_NULL, 0}, /* 301 */ + {"renameat", NULL, NOSYS_NULL, 0}, /* 302 */ + {"linkat", NULL, NOSYS_NULL, 0}, /* 303 */ + {"symlinkat", NULL, NOSYS_NULL, 0}, /* 304 */ + {"readlinkat", NULL, NOSYS_NULL, 0}, /* 305 */ + {"fchmodat", NULL, NOSYS_NULL, 0}, /* 306 */ + {"faccessat", NULL, NOSYS_NULL, 0}, /* 307 */ + {"pselect6", NULL, NOSYS_NULL, 0}, /* 308 */ + {"ppoll", NULL, NOSYS_NULL, 0}, /* 309 */ + {"unshare", NULL, NOSYS_NULL, 0}, /* 310 */ + {"set_robust_list", NULL, NOSYS_NULL, 0}, /* 311 */ + {"get_robust_list", NULL, NOSYS_NULL, 0}, /* 312 */ + {"splice", NULL, NOSYS_NULL, 0}, /* 313 */ + {"sync_file_range", NULL, NOSYS_NULL, 0}, /* 314 */ + {"tee", NULL, NOSYS_NULL, 0}, /* 315 */ + {"vmsplice", NULL, NOSYS_NULL, 0}, /* 316 */ + {"move_pages", NULL, NOSYS_NULL, 0}, /* 317 */ }; diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c index 77cf94d194..c6cceb1577 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/misc.c +++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c @@ -46,6 +46,7 @@ #include <sys/lx_thunk_server.h> #include <unistd.h> #include <libintl.h> +#include <zone.h> extern int sethostname(char *, int); @@ -184,6 +185,27 @@ lx_getcwd(uintptr_t p1, uintptr_t p2) } int +lx_get_kern_version(void) +{ + /* + * Since this function is called quite often, and zone_getattr is slow, + * we cache the kernel version in kvers_cache. -1 signifies that no + * value has yet been cached. + */ + static int kvers_cache = -1; + /* dummy variable for use in zone_getattr */ + int kvers; + + if (kvers_cache != -1) + return (kvers_cache); + if (zone_getattr(getzoneid(), LX_KERN_VERSION_NUM, &kvers, sizeof (int)) + != sizeof (int)) + return (kvers_cache = LX_KERN_2_4); + else + return (kvers_cache = kvers); +} + +int lx_uname(uintptr_t p1) { struct lx_utsname *un = (struct lx_utsname *)p1; diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c index 119961c0d8..c6ac519e19 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/signal.c +++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c @@ -390,7 +390,7 @@ stol_sigcode(int si_code) } } -static int +int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop) { lx_siginfo_t lx_siginfo; @@ -535,7 +535,7 @@ lx_sigaltstack(uintptr_t nsp, uintptr_t osp) if (nsp) { if (uucopy((void *)nsp, &ls, sizeof (lx_stack_t)) != 0) - return (-errno); + return (-errno); if ((ls.ss_flags & LX_SS_DISABLE) == 0 && ls.ss_size < LX_MINSIGSTKSZ) @@ -1677,7 +1677,7 @@ lx_siginit(void) if (sa.sa_handler == SIG_IGN) { lx_debug("marking signal %d (lx %d) as SIG_IGN", - sig, lx_sig); + sig, lx_sig); lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN; } } diff --git a/usr/src/lib/brand/lx/lx_brand/common/socket.c b/usr/src/lib/brand/lx/lx_brand/common/socket.c index 009018d8fe..41b84b6f4b 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/socket.c +++ b/usr/src/lib/brand/lx/lx_brand/common/socket.c @@ -31,6 +31,7 @@ #include <errno.h> #include <signal.h> #include <stdio.h> +#include <stdlib.h> #include <libintl.h> #include <strings.h> #include <alloca.h> @@ -52,6 +53,14 @@ #include <sys/lx_brand.h> #include <sys/lx_misc.h> +/* + * This string is used to prefix all abstract namespace unix sockets, ie all + * abstract namespace sockets are converted to regular sockets in the /tmp + * directory with .ABSK_ prefixed to their names. + */ +#define ABST_PRFX "/tmp/.ABSK_" +#define ABST_PRFX_LEN 11 + static int lx_socket(ulong_t *); static int lx_bind(ulong_t *); static int lx_connect(ulong_t *); @@ -283,6 +292,11 @@ convert_cmsgs(int direction, struct lx_msghdr *msg, char *caller) return (err); } +/* + * If inaddr is an abstract namespace unix socket, this function expects addr + * to have enough memory to hold the expanded socket name, ie it must be of + * size *len + ABST_PRFX_LEN. + */ static int convert_sockaddr(struct sockaddr *addr, socklen_t *len, struct sockaddr *inaddr, socklen_t inlen) @@ -290,6 +304,7 @@ convert_sockaddr(struct sockaddr *addr, socklen_t *len, sa_family_t family; int lx_in6_len; int size; + int i, orig_len; /* * Note that if the buffer at inaddr is ever smaller than inlen bytes, @@ -341,6 +356,66 @@ convert_sockaddr(struct sockaddr *addr, socklen_t *len, return (-EINVAL); *len = inlen; + + /* + * Linux supports abstract unix sockets, which are + * simply sockets that do not exist on the file system. + * These sockets are denoted by beginning the path with + * a NULL character. To support these, we strip out the + * leading NULL character and change the path to point + * to a real place in /tmp directory, by prepending + * ABST_PRFX and replacing all illegal characters with + * '_'. + */ + if (addr->sa_data[0] == '\0') { + + /* + * inlen is the entire size of the sockaddr_un + * data structure, including the sun_family, so + * we need to subtract this out. We subtract + * 1 since we want to overwrite the leadin NULL + * character, and thus do not include it in the + * length. + */ + orig_len = inlen - sizeof (addr->sa_family) - 1; + + /* + * Since abstract paths can contain illegal + * filename characters, we simply replace these + * with '_' + */ + for (i = 1; i < orig_len + 1; i++) { + if (addr->sa_data[i] == '\0' || + addr->sa_data[i] == '/') + addr->sa_data[i] = '_'; + } + + /* + * prepend ABST_PRFX to file name, minus the + * leading NULL character. This places the + * socket as a hidden file in the /tmp + * directory. + */ + (void) memmove(addr->sa_data + ABST_PRFX_LEN, + addr->sa_data + 1, orig_len); + bcopy(ABST_PRFX, addr->sa_data, ABST_PRFX_LEN); + + /* + * Since abstract socket paths may not be NULL + * terminated, we must explicitly NULL terminate + * our string. + */ + addr->sa_data[orig_len + ABST_PRFX_LEN] = '\0'; + + /* + * Make len reflect the new len of our string. + * Although we removed the NULL character at the + * beginning of the string, we added a NULL + * character to the end, so the net gain in + * length is simply ABST_PRFX_LEN. + */ + *len = inlen + ABST_PRFX_LEN; + } break; default: @@ -453,6 +528,22 @@ lx_socket(ulong_t *args) /* Right now IPv6 sockets don't work */ if (domain == AF_INET6) return (-EAFNOSUPPORT); + + /* + * Clients of the auditing subsystem used by CentOS 4 and 5 expect to + * be able to create AF_ROUTE SOCK_RAW sockets to communicate with the + * auditing daemons. Failure to create these sockets will cause login, + * ssh and useradd, amoung other programs to fail. To trick these + * programs into working, we convert the socket domain and type to + * something that we do support. Then when sendto is called on these + * sockets, we return an error code. See lx_sendto. + */ + if (domain == AF_ROUTE && type == SOCK_RAW) { + domain = AF_INET; + type = SOCK_STREAM; + protocol = 0; + } + fd = socket(domain, type, protocol); if (fd >= 0) return (fd); @@ -468,16 +559,71 @@ lx_bind(ulong_t *args) { int sockfd = (int)args[0]; struct stat64 statbuf; - struct sockaddr *name; + struct sockaddr *name, oldname; socklen_t len; - int r; + int r, r2, ret, tmperrno; + int abst_sock; + struct stat sb; + + if (uucopy((struct sockaddr *)args[1], &oldname, + sizeof (struct sockaddr)) != 0) + return (-errno); + + /* + * Handle Linux abstract sockets, which are UNIX sockets whose path + * begins with a NULL character. + */ + abst_sock = (oldname.sa_family == AF_UNIX) && + (oldname.sa_data[0] == '\0'); - if ((name = SAFE_ALLOCA((socklen_t)args[2])) == NULL) + /* + * convert_sockaddr will expand the socket path if it is abstract, so + * we need to allocate extra memory for it now. + */ + if ((name = SAFE_ALLOCA((socklen_t)args[2] + + abst_sock * ABST_PRFX_LEN)) == NULL) return (-EINVAL); + if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1], (socklen_t)args[2])) < 0) return (r); + /* + * Linux abstract namespace unix sockets are simply socket that do not + * exist on the filesystem. We emulate them by changing their paths + * in covert_sockaddr so that they point real files names on the + * filesystem. Because in Linux they do not exist on the filesystem + * applications do not have to worry about deleting files, however in + * our filesystem based emulation we do. To solve this problem, we first + * check to see if the socket already exists before we create one. If it + * does we attempt to connect to it to see if it is in use, or just + * left over from a previous lx_bind call. If we are unable to connect, + * we assume it is not in use and remove the file, then continue on + * as if the file never existed. + */ + if (abst_sock && stat(name->sa_data, &sb) == 0 && + S_ISSOCK(sb.st_mode)) { + if ((r2 = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) + return (-ENOSR); + ret = connect(r2, name, len); + tmperrno = errno; + if (close(r2) < 0) + return (-EINVAL); + + /* + * if we can't connect to the socket, assume no one is using it + * and remove it, otherwise assume it is in use and return + * EADDRINUSE. + */ + if ((ret < 0) && (tmperrno == ECONNREFUSED)) { + if (unlink(name->sa_data) < 0) { + return (-EADDRINUSE); + } + } else { + return (-EADDRINUSE); + } + } + lx_debug("\tbind(%d, 0x%p, %d)", sockfd, name, len); if (name->sa_family == AF_UNIX) @@ -501,11 +647,26 @@ static int lx_connect(ulong_t *args) { int sockfd = (int)args[0]; - struct sockaddr *name; + struct sockaddr *name, oldname; socklen_t len; int r; + int abst_sock; + + if (uucopy((struct sockaddr *)args[1], &oldname, + sizeof (struct sockaddr)) != 0) + return (-errno); + - if ((name = SAFE_ALLOCA((socklen_t)args[2])) == NULL) + /* Handle Linux abstract sockets */ + abst_sock = (oldname.sa_family == AF_UNIX) && + (oldname.sa_data[0] == '\0'); + + /* + * convert_sockaddr will expand the socket path, if it is abstract, so + * we need to allocate extra memory for it now. + */ + if ((name = SAFE_ALLOCA((socklen_t)args[2] + + abst_sock * ABST_PRFX_LEN)) == NULL) return (-EINVAL); if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1], @@ -805,25 +966,44 @@ lx_sendto(ulong_t *args) void *buf = (void *)args[1]; size_t len = (size_t)args[2]; int flags = (int)args[3]; - struct sockaddr *to; + struct sockaddr *to, oldto; socklen_t tolen; ssize_t r; + int abst_sock; int nosigpipe = flags & LX_MSG_NOSIGNAL; struct sigaction newact, oact; - if ((to = SAFE_ALLOCA((socklen_t)args[5])) == NULL) + if (uucopy((struct sockaddr *)args[4], &oldto, + sizeof (struct sockaddr)) != 0) + return (-errno); + + /* Handle Linux abstract sockets */ + abst_sock = (oldto.sa_family == AF_UNIX) && + (oldto.sa_data[0] == '\0'); + + /* + * convert_sockaddr will expand the socket path, if it is abstract, so + * we need to allocate extra memory for it now. + */ + if ((to = SAFE_ALLOCA(args[5] + abst_sock * ABST_PRFX_LEN)) == NULL) return (-EINVAL); if ((r = convert_sockaddr(to, &tolen, (struct sockaddr *)args[4], (socklen_t)args[5])) < 0) return (r); + lx_debug("\tsendto(%d, 0x%p, 0x%d, 0x%x, 0x%x, %d)", sockfd, buf, len, flags, to, tolen); flags = convert_sockflags(flags); + /* return this error to make auditing subsystem happy */ + if (to->sa_family == AF_ROUTE) { + return (-ECONNREFUSED); + } + /* * If nosigpipe is set, we want to emulate the Linux action of * not sending a SIGPIPE to the caller if the remote socket has @@ -959,6 +1139,14 @@ lx_setsockopt(ulong_t *args) optname <= 0 || optname >= (ltos_proto_opts[level].maxentries)) return (-ENOPROTOOPT); + /* + * Linux sets this option when it wants to send credentials over a + * socket. Currently we just ignore it to make Linux programs happy. + */ + if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED)) + return (0); + + if ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK)) { /* * TCP_CORK is a Linux-only option that instructs the TCP @@ -1024,8 +1212,13 @@ lx_getsockopt(ulong_t *args) optname <= 0 || optname >= (ltos_proto_opts[level].maxentries)) return (-ENOPROTOOPT); - if ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK)) { + if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED) || + (level == IPPROTO_TCP) && (optname == LX_TCP_CORK)) { /* + * Linux sets LX_SO_PASSCRED when it wants to send credentials + * over a socket. Since we do not support it, it is never set + * and we return 0. + * * We don't support TCP_CORK but some apps rely on it. So, * rather than return an error we just return 0. This * isn't exactly a lie, since this option really isn't set, diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysctl.c b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c index 03fcce4ef0..1cf4ca3ac1 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/sysctl.c +++ b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -118,7 +118,7 @@ lx_sysctl(uintptr_t raw) (void) strlcpy(namebuf, LX_UNAME_SYSNAME, oldlen); break; case LX_KERN_OSRELEASE: - (void) strlcpy(namebuf, LX_UNAME_RELEASE, oldlen); + (void) strlcpy(namebuf, lx_release, oldlen); break; case LX_KERN_VERSION: (void) strlcpy(namebuf, LX_UNAME_VERSION, oldlen); diff --git a/usr/src/lib/brand/lx/lx_brand/common/wait.c b/usr/src/lib/brand/lx/lx_brand/common/wait.c index 0895e76bc0..33b3d49923 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/wait.c +++ b/usr/src/lib/brand/lx/lx_brand/common/wait.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -91,6 +91,10 @@ #define LX_WALL 0x40000000 #define LX_WCLONE 0x80000000 +#define LX_P_ALL 0x0 +#define LX_P_PID 0x1 +#define LX_P_GID 0x2 + static int ltos_options(uintptr_t options) { @@ -143,6 +147,36 @@ lx_wstat(int code, int status) return (stat); } +/* wrapper to make solaris waitid work properly with ptrace */ +static int +lx_waitid_helper(idtype_t idtype, id_t id, siginfo_t *info, int options) +{ + do { + /* + * It's possible that we return EINVAL here if the idtype is + * P_PID or P_PGID and id is out of bounds for a valid pid or + * pgid, but Linux expects to see ECHILD. No good way occurs to + * handle this so we'll punt for now. + */ + if (waitid(idtype, id, info, options) < 0) + return (-errno); + + /* + * If the WNOHANG flag was specified and no child was found + * return 0. + */ + if ((options & WNOHANG) && info->si_pid == 0) + return (0); + + /* + * It's possible that we may have a spurious return for one of + * the child processes created by the ptrace subsystem. If + * that's the case, we simply try again. + */ + } while (lx_ptrace_wait(info) == -1); + return (0); +} + int lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) { @@ -195,30 +229,14 @@ lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) options |= WEXITED | WTRAPPED; -again: - /* - * It's possible that we return EINVAL here if the idtype is P_PID or - * P_PGID and id is out of bounds for a valid pid or pgid, but Linux - * expects to see ECHILD. No good way occurs to handle this so we'll - * punt for now. - */ - if (waitid(idtype, id, &info, options) < 0) - return (-errno); - + if ((rval = lx_waitid_helper(idtype, id, &info, options)) < 0) + return (rval); /* * If the WNOHANG flag was specified and no child was found return 0. */ if ((options & WNOHANG) && info.si_pid == 0) return (0); - /* - * It's possible that we may have a spurious return for one of the - * child processes created by the ptrace subsystem. In that's the case, - * we simply try again. - */ - if (lx_ptrace_wait(&info) == -1) - goto again; - status = lx_wstat(info.si_code, info.si_status); /* @@ -242,3 +260,29 @@ lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3) { return (lx_wait4(p1, p2, p3, NULL)); } + +int +lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt) +{ + int rval, options; + siginfo_t s_infop = {0}; + if ((options = ltos_options(opt)) == -1) + return (-1); + switch (idtype) { + case LX_P_ALL: + idtype = P_ALL; + break; + case LX_P_PID: + idtype = P_PID; + break; + case LX_P_GID: + idtype = P_GID; + break; + default: + return (-EINVAL); + } + if ((rval = lx_waitid_helper(idtype, (id_t)id, &s_infop, options)) < 0) + return (rval); + + return (stol_siginfo(&s_infop, (lx_siginfo_t *)infop)); +} diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s index b55eec5855..413ef9852d 100644 --- a/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s +++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -126,13 +126,13 @@ lx_sigreturn_tolibc(uintptr_t sp) .align 16 ENTRY_NP(lx_handler_trace_table) TJMP256 - TJMP16 + TJMP64 SET_SIZE(lx_handler_trace_table) .align 16 ENTRY_NP(lx_handler_table) JMP256 - JMP16 + JMP64 SET_SIZE(lx_handler_table) ENTRY_NP(lx_handler_trace) diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h index 68ea113600..2954275c75 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -127,6 +127,8 @@ extern int lx_lpid_to_spid(pid_t, pid_t *); extern int lx_ptrace_wait(siginfo_t *); extern void lx_ptrace_fork(void); +extern int lx_get_kern_version(void); + extern int lx_check_alloca(size_t); #define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL) diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h index 981c7d1ad2..b7609962ae 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -288,6 +288,8 @@ extern void lx_sigreturn_tolibc(uintptr_t); extern void lx_sigdeliver(int, siginfo_t *, void *, size_t, void (*)(), void (*)(), uintptr_t); +extern int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop); + #endif /* !defined(_ASM) */ #ifdef __cplusplus diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h index 241faf7c5b..5c52b1acec 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,6 +32,7 @@ #if !defined(_ASM) #include <sys/types.h> +#include <sys/procset.h> #ifdef __cplusplus extern "C" { @@ -82,6 +83,7 @@ extern int lx_setgroups(uintptr_t, uintptr_t); extern int lx_waitpid(uintptr_t, uintptr_t, uintptr_t); +extern int lx_waitid(uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern int lx_wait4(uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern int lx_getuid16(void); @@ -193,6 +195,9 @@ extern int lx_rt_sigtimedwait(uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern int lx_sync(void); +extern int lx_futex(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, + uintptr_t); + extern int lx_tkill(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern int lx_tgkill(uintptr_t, uintptr_t, uintptr_t); @@ -222,12 +227,13 @@ extern int lx_sched_setscheduler(uintptr_t, uintptr_t, uintptr_t); extern int lx_sched_get_priority_min(uintptr_t); extern int lx_sched_get_priority_max(uintptr_t); +extern int lx_keyctl(void); + extern int lx_ipc(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); #endif /* !defined(_ASM) */ #define EBP_HAS_ARG6 0x01 -#define LINUX_MAX_SYSCALL 270 /* * Linux syscall numbers @@ -481,6 +487,54 @@ extern int lx_ipc(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); #define LX_SYS_clock_getres 266 #define LX_SYS_clock_nanosleep 267 #define LX_SYS_tgkill 270 +/* the following syscalls are for 2.6 and later kernels */ +#define LX_SYS_utimes 271 +#define LX_SYS_fadvise64_64 272 +#define LX_SYS_vserver 273 +#define LX_SYS_mbind 274 +#define LX_SYS_get_mempolicyd 275 +#define LX_SYS_set_mempolicy 276 +#define LX_SYS_mq_open 277 +#define LX_SYS_mq_unlink 278 +#define LX_SYS_mq_timedsend 279 +#define LX_SYS_mq_timedreceive 280 +#define LX_SYS_mq_notify 281 +#define LX_SYS_mq_getsetattr 282 +#define LX_SYS_kexec_load 283 +#define LX_SYS_waitid 284 +#define LX_SYS_setaltroot 285 +#define LX_SYS_add_key 286 +#define LX_SYS_request_key 287 +#define LX_SYS_keyctl 288 +#define LX_SYS_ioprio_set 289 +#define LX_SYS_ioprio_get 290 +#define LX_SYS_inotify_init 291 +#define LX_SYS_inotify_add_watch 292 +#define LX_SYS_inotify_rm_watch 293 +#define LX_SYS_migrate_pages 294 +#define LX_SYS_openat 295 +#define LX_SYS_mkdirat 296 +#define LX_SYS_mknodat 297 +#define LX_SYS_fchownat 298 +#define LX_SYS_futimesat 299 +#define LX_SYS_fstatat64 300 +#define LX_SYS_unlinkat 301 +#define LX_SYS_renameat 302 +#define LX_SYS_linkat 303 +#define LX_SYS_symlinkat 304 +#define LX_SYS_readlinkat 305 +#define LX_SYS_fchmodat 306 +#define LX_SYS_faccessat 307 +#define LX_SYS_pselect6 308 +#define LX_SYS_ppoll 309 +#define LX_SYS_unshare 310 +#define LX_SYS_set_robust_list 311 +#define LX_SYS_get_robust_list 312 +#define LX_SYS_splice 313 +#define LX_SYS_sync_file_range 314 +#define LX_SYS_tee 315 +#define LX_SYS_vmsplice 316 +#define LX_SYS_move_pages 317 #ifdef __cplusplus } diff --git a/usr/src/lib/brand/lx/lx_support/lx_support.c b/usr/src/lib/brand/lx/lx_support/lx_support.c index 714fc38e50..20e6a1bf71 100644 --- a/usr/src/lib/brand/lx/lx_support/lx_support.c +++ b/usr/src/lib/brand/lx/lx_support/lx_support.c @@ -177,7 +177,7 @@ lxs_remove_autofsck() */ static void lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio, - char **idev, char **odev) + char **idev, char **odev, char **kvers) { struct zone_attrtab attrtab; int err; @@ -190,13 +190,15 @@ lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio, *idev = (char *)malloc(INTSTRLEN); *odev = (char *)malloc(INTSTRLEN); - if (*idev == NULL || *odev == NULL) + *kvers = (char *)malloc(INTSTRLEN); + if (*idev == NULL || *odev == NULL || *kvers == NULL) lxs_err(gettext("out of memory")); *audio = B_FALSE; *restart = B_FALSE; bzero(*idev, INTSTRLEN); bzero(*odev, INTSTRLEN); + bzero(*kvers, INTSTRLEN); while ((err = zonecfg_getattrent(zdh, &attrtab)) == Z_OK) { if ((strcmp(attrtab.zone_attr_name, "init-restart") == 0) && (zonecfg_get_attr_boolean(&attrtab, restart) != Z_OK)) @@ -216,6 +218,11 @@ lxs_getattrs(zone_dochandle_t zdh, boolean_t *restart, boolean_t *audio, INTSTRLEN) != Z_OK)) lxs_err(gettext("invalid type for zone attribute: %s"), attrtab.zone_attr_name); + if ((strcmp(attrtab.zone_attr_name, "kernel-version") == 0) && + (zonecfg_get_attr_string(&attrtab, *kvers, + INTSTRLEN) != Z_OK)) + lxs_err(gettext("invalid type for zone attribute: %s"), + attrtab.zone_attr_name); } /* some kind of error while looking up attributes */ @@ -336,7 +343,8 @@ lxs_boot() zoneid_t zoneid; zone_dochandle_t zdh; boolean_t audio, restart; - char *idev, *odev; + char *idev, *odev, *kvers; + int kversnum; lxs_make_initctl(); lxs_remove_autofsck(); @@ -350,7 +358,7 @@ lxs_boot() } /* Extract any relevant attributes from the config file. */ - lxs_getattrs(zdh, &restart, &audio, &idev, &odev); + lxs_getattrs(zdh, &restart, &audio, &idev, &odev, &kvers); zonecfg_fini_handle(zdh); /* Configure the zone's audio support (if any). */ @@ -367,6 +375,15 @@ lxs_boot() sizeof (boolean_t)) == -1) lxs_err(gettext("error setting zone's restart_init property")); + if ((kvers != NULL) && (strcmp(kvers, "2.6") == 0)) + kversnum = LX_KERN_2_6; + else + kversnum = LX_KERN_2_4; + + if (zone_setattr(zoneid, LX_KERN_VERSION_NUM, &kversnum, + sizeof (int)) < 0) + lxs_err(gettext("unable to set kernel version")); + return (0); } @@ -419,7 +436,7 @@ lxs_verify(char *xmlfile) struct zone_dstab dstab; struct zone_devtab devtab; boolean_t audio, restart; - char *idev, *odev; + char *idev, *odev, *kvers; zone_iptype_t iptype; if ((handle = zonecfg_init_handle()) == NULL) @@ -485,7 +502,7 @@ lxs_verify(char *xmlfile) } /* Extract any relevant attributes from the config file. */ - lxs_getattrs(handle, &restart, &audio, &idev, &odev); + lxs_getattrs(handle, &restart, &audio, &idev, &odev, &kvers); zonecfg_fini_handle(handle); if (audio) { @@ -498,6 +515,11 @@ lxs_verify(char *xmlfile) lxs_err(gettext("invalid value for zone attribute: %s"), "audio-outputdev"); } + if (kvers) { + if ((strcmp(kvers, "2.4")) != 0 && (strcmp(kvers, "2.6") != 0)) + lxs_err(gettext("invalid value for zone attribute: %s"), + "kernel-version"); + } return (0); } diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c index 3180028bd5..e9d417e123 100644 --- a/usr/src/uts/common/brand/lx/os/lx_brand.c +++ b/usr/src/uts/common/brand/lx/os/lx_brand.c @@ -57,11 +57,15 @@ int lx_debug = 0; +void lx_init_brand_data(zone_t *); +void lx_free_brand_data(zone_t *); void lx_setbrand(proc_t *); int lx_getattr(zone_t *, int, void *, size_t *); int lx_setattr(zone_t *, int, void *, size_t); int lx_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +int lx_get_kern_version(void); +void lx_set_kern_version(zone_t *, int); void lx_copy_procdata(proc_t *, proc_t *); extern void lx_setrval(klwp_t *, int, int); @@ -87,6 +91,8 @@ static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, /* lx brand */ struct brand_ops lx_brops = { + lx_init_brand_data, + lx_free_brand_data, lx_brandsys, lx_setbrand, lx_getattr, @@ -180,6 +186,7 @@ int lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) { boolean_t val; + int num; if (attr == LX_ATTR_RESTART_INIT) { if (bufsize > sizeof (boolean_t)) @@ -190,6 +197,13 @@ lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) return (EINVAL); zone->zone_restart_init = val; return (0); + } else if (attr == LX_KERN_VERSION_NUM) { + if (bufsize > sizeof (int)) + return (ERANGE); + if (copyin(buf, &num, sizeof (num)) != 0) + return (EFAULT); + lx_set_kern_version(zone, num); + return (0); } return (EINVAL); } @@ -198,6 +212,7 @@ lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) int lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) { + int num; if (attr == LX_ATTR_RESTART_INIT) { if (*bufsize < sizeof (boolean_t)) return (ERANGE); @@ -206,6 +221,14 @@ lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) return (EFAULT); *bufsize = sizeof (boolean_t); return (0); + } else if (attr == LX_KERN_VERSION_NUM) { + if (*bufsize < sizeof (int)) + return (ERANGE); + num = lx_get_kern_version(); + if (copyout(&num, buf, sizeof (int)) != 0) + return (EFAULT); + *bufsize = sizeof (int); + return (0); } return (-EINVAL); } @@ -329,6 +352,27 @@ lx_brand_systrace_disable(void) lx_systrace_enabled = 0; } +void +lx_init_brand_data(zone_t *zone) +{ + lx_zone_data_t *data; + ASSERT(zone->zone_brand == &lx_brand); + ASSERT(zone->zone_brand_data == NULL); + data = (lx_zone_data_t *)kmem_zalloc(sizeof (lx_zone_data_t), KM_SLEEP); + /* + * Default kernel_version to LX_KERN_2_4, this can be changed by a call + * to setattr() which is made during zone boot + */ + data->kernel_version = LX_KERN_2_4; + zone->zone_brand_data = data; +} + +void +lx_free_brand_data(zone_t *zone) +{ + kmem_free(zone->zone_brand_data, sizeof (lx_zone_data_t)); +} + /* * Get the addresses of the user-space system call handler and attach it to * the proc structure. Returning 0 indicates success; the value returned @@ -548,6 +592,12 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, default: linux_call = cmd - B_EMULATE_SYSCALL; + /* + * Only checking against highest syscall number for all kernel + * versions, since check for specific kernel version is done + * in userland prior to this call, and duplicating logic would + * be redundant. + */ if (linux_call >= 0 && linux_call < LX_NSYSCALLS) { *rval = lx_emulate_syscall(linux_call, arg1, arg2, arg3, arg4, arg5, arg6); @@ -558,6 +608,24 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, return (EINVAL); } +int +lx_get_zone_kern_version(zone_t *zone) +{ + return (((lx_zone_data_t *)zone->zone_brand_data)->kernel_version); +} + +int +lx_get_kern_version() +{ + return (lx_get_zone_kern_version(curzone)); +} + +void +lx_set_kern_version(zone_t *zone, int vers) +{ + ((lx_zone_data_t *)zone->zone_brand_data)->kernel_version = vers; +} + /* * Copy the per-process brand data from a parent proc to a child. */ @@ -763,8 +831,10 @@ lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, * Linux 2.6 programs such as ps will print an error message if the * following aux entry is missing */ - phdr_auxv32[1].a_type = AT_CLKTCK; - phdr_auxv32[1].a_un.a_val = hz; + if (lx_get_kern_version() >= LX_KERN_2_6) { + phdr_auxv32[1].a_type = AT_CLKTCK; + phdr_auxv32[1].a_un.a_val = hz; + } if (copyout(&phdr_auxv32, args->auxp_brand, sizeof (phdr_auxv32)) == -1) diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c index 686afea458..4ccefe1f2d 100644 --- a/usr/src/uts/common/brand/lx/os/lx_syscall.c +++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -342,9 +342,54 @@ lx_sysent_t lx_sysent[] = LX_NOSYS("statfs64"), LX_NOSYS("fstatfs64"), LX_NOSYS("tgkill"), /* 270 */ + /* The following are Linux 2.6 system calls */ LX_NOSYS("utimes"), LX_NOSYS("fadvise64_64"), LX_NOSYS("vserver"), + LX_NOSYS("mbind"), + LX_NOSYS("get_mempolicy"), + LX_NOSYS("set_mempolicy"), + LX_NOSYS("mq_open"), + LX_NOSYS("mq_unlink"), + LX_NOSYS("mq_timedsend"), + LX_NOSYS("mq_timedreceive"), /* 280 */ + LX_NOSYS("mq_notify"), + LX_NOSYS("mq_getsetattr"), + LX_NOSYS("kexec_load"), + LX_NOSYS("waitid"), + LX_NOSYS("sys_setaltroot"), + LX_NOSYS("add_key"), + LX_NOSYS("request_key"), + LX_NOSYS("keyctl"), + LX_NOSYS("ioprio_set"), + LX_NOSYS("ioprio_get"), /* 290 */ + LX_NOSYS("inotify_init"), + LX_NOSYS("inotify_add_watch"), + LX_NOSYS("inotify_rm_watch"), + LX_NOSYS("migrate_pages"), + LX_NOSYS("openat"), + LX_NOSYS("mkdirat"), + LX_NOSYS("mknodat"), + LX_NOSYS("fchownat"), + LX_NOSYS("futimesat"), + LX_NOSYS("fstatat64"), /* 300 */ + LX_NOSYS("unlinkat"), + LX_NOSYS("renameat"), + LX_NOSYS("linkat"), + LX_NOSYS("syslinkat"), + LX_NOSYS("readlinkat"), + LX_NOSYS("fchmodat"), + LX_NOSYS("faccessat"), + LX_NOSYS("pselect6"), + LX_NOSYS("ppoll"), + LX_NOSYS("unshare"), /* 310 */ + LX_NOSYS("set_robust_list"), + LX_NOSYS("get_robust_list"), + LX_NOSYS("splice"), + LX_NOSYS("sync_file_range"), + LX_NOSYS("tee"), + LX_NOSYS("vmsplice"), + LX_NOSYS("move_pages"), NULL /* NULL-termination is required for lx_systrace */ }; diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c index 14fa5f9122..d08b37c5eb 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c @@ -1539,10 +1539,16 @@ lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) static void lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { + char *vers; + if (lx_get_zone_kern_version(LXPTOZ(lxpnp)) <= LX_KERN_2_4) + vers = LX_UNAME_RELEASE_2_4; + else + vers = LX_UNAME_RELEASE_2_6; + lxpr_uiobuf_printf(uiobuf, "%s version %s (%s version %d.%d.%d) " "#%s SMP %s\n", - LX_UNAME_SYSNAME, LX_UNAME_RELEASE, + LX_UNAME_SYSNAME, vers, #if defined(__GNUC__) "gcc", __GNUC__, @@ -1564,6 +1570,7 @@ lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) * */ /* ARGSUSED */ + static void lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { @@ -1572,6 +1579,10 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) ulong_t idle_cum = 0; ulong_t sys_cum = 0; ulong_t user_cum = 0; + ulong_t irq_cum = 0; + uint_t cpu_nrunnable_cum = 0; + uint_t w_io_cum = 0; + ulong_t pgpgin_cum = 0; ulong_t pgpgout_cum = 0; ulong_t pgswapout_cum = 0; @@ -1580,6 +1591,9 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) ulong_t pswitch_cum = 0; ulong_t forks_cum = 0; hrtime_t msnsecs[NCMSTATES]; + int lx_kern_version = lx_get_zone_kern_version(LXPTOZ(lxpnp)); + /* temporary variable since scalehrtime modifies data in place */ + hrtime_t tmptime; ASSERT(lxpnp->lxpr_type == LXPR_STAT); @@ -1610,6 +1624,16 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) pgswapin_cum += CPU_STATS(cp, vm.pgswapin); pgswapout_cum += CPU_STATS(cp, vm.pgswapout); + if (lx_kern_version >= LX_KERN_2_6) { + cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable; + w_io_cum += CPU_STATS(cp, sys.iowait); + for (i = 0; i < NCMSTATES; i++) { + tmptime = cp->cpu_intracct[i]; + scalehrtime(&tmptime); + irq_cum += NSEC_TO_TICK(tmptime); + } + } + for (i = 0; i < PIL_MAX; i++) intr_cum += CPU_STATS(cp, sys.intr[i]); @@ -1623,15 +1647,24 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) cp = cp->cpu_next; } while (cp != cpstart); - lxpr_uiobuf_printf(uiobuf, - "cpu %ld %ld %ld %ld\n", - user_cum, 0, sys_cum, idle_cum); + if (lx_kern_version >= LX_KERN_2_6) { + lxpr_uiobuf_printf(uiobuf, + "cpu %ld %ld %ld %ld %ld %ld %ld\n", + user_cum, 0, sys_cum, idle_cum, 0, irq_cum, 0); + } else { + lxpr_uiobuf_printf(uiobuf, + "cpu %ld %ld %ld %ld\n", + user_cum, 0, sys_cum, idle_cum); + } /* Do per processor stats */ do { + int i; + ulong_t idle_ticks; ulong_t sys_ticks; ulong_t user_ticks; + ulong_t irq_ticks = 0; /* * Don't count CPUs that aren't even in the system @@ -1647,10 +1680,23 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]); - lxpr_uiobuf_printf(uiobuf, - "cpu%d %ld %ld %ld %ld\n", - cp->cpu_id, - user_ticks, 0, sys_ticks, idle_ticks); + if (lx_kern_version >= LX_KERN_2_6) { + for (i = 0; i < NCMSTATES; i++) { + tmptime = cp->cpu_intracct[i]; + scalehrtime(&tmptime); + irq_ticks += NSEC_TO_TICK(tmptime); + } + + lxpr_uiobuf_printf(uiobuf, + "cpu%d %ld %ld %ld %ld %ld %ld %ld\n", + cp->cpu_id, user_ticks, 0, sys_ticks, idle_ticks, + 0, irq_ticks, 0); + } else { + lxpr_uiobuf_printf(uiobuf, + "cpu%d %ld %ld %ld %ld\n", + cp->cpu_id, + user_ticks, 0, sys_ticks, idle_ticks); + } if (pools_enabled) cp = cp->cpu_next_part; @@ -1660,19 +1706,39 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) mutex_exit(&cpu_lock); - lxpr_uiobuf_printf(uiobuf, - "page %lu %lu\n" - "swap %lu %lu\n" - "intr %lu\n" - "ctxt %lu\n" - "btime %lu\n" - "processes %lu\n", - pgpgin_cum, pgpgout_cum, - pgswapin_cum, pgswapout_cum, - intr_cum, - pswitch_cum, - boot_time, - forks_cum); + if (lx_kern_version >= LX_KERN_2_6) { + lxpr_uiobuf_printf(uiobuf, + "page %lu %lu\n" + "swap %lu %lu\n" + "intr %lu\n" + "ctxt %lu\n" + "btime %lu\n" + "processes %lu\n" + "procs_running %lu\n" + "procs_blocked %lu\n", + pgpgin_cum, pgpgout_cum, + pgswapin_cum, pgswapout_cum, + intr_cum, + pswitch_cum, + boot_time, + forks_cum, + cpu_nrunnable_cum, + w_io_cum); + } else { + lxpr_uiobuf_printf(uiobuf, + "page %lu %lu\n" + "swap %lu %lu\n" + "intr %lu\n" + "ctxt %lu\n" + "btime %lu\n" + "processes %lu\n", + pgpgin_cum, pgpgout_cum, + pgswapin_cum, pgswapout_cum, + intr_cum, + pswitch_cum, + boot_time, + forks_cum); + } } @@ -2099,6 +2165,10 @@ lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr) return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr)); } + /* If user is root allow access regardless of permission bits */ + if (secpolicy_proc_access(cr) == 0) + return (0); + /* * Access check is based on only * one of owner, group, public. @@ -2852,11 +2922,8 @@ lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr) if (vp->v_type != VLNK) return (EINVAL); - /* - * Try to produce a symlink name for anything that's really a regular - * file or directory (but not for anything else) - */ - if (rvp != NULL && (rvp->v_type == VDIR || rvp->v_type == VREG)) { + /* Try to produce a symlink name for anything that has a realvp */ + if (rvp != NULL) { if ((error = lxpr_access(vp, VREAD, 0, CRED())) != 0) return (error); if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h index 03ac9d09e6..b1d0281a32 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_brand.h +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -31,6 +31,7 @@ #ifndef _ASM #include <sys/types.h> #include <sys/cpuvar.h> +#include <sys/zone.h> #endif #ifdef __cplusplus @@ -43,7 +44,8 @@ extern "C" { * Brand uname info */ #define LX_UNAME_SYSNAME "Linux" -#define LX_UNAME_RELEASE "2.4.21" +#define LX_UNAME_RELEASE_2_6 "2.6.18" +#define LX_UNAME_RELEASE_2_4 "2.4.21" #define LX_UNAME_VERSION "BrandZ fake linux" #define LX_UNAME_MACHINE "i686" @@ -54,7 +56,12 @@ extern "C" { #define LX_LIB "lx_brand.so.1" #define LX_LIB_PATH LIB_PATH LX_LIB -#define LX_NSYSCALLS 270 +#define LX_NSYSCALLS_2_4 270 +#define LX_NSYSCALLS_2_6 317 +#define LX_NSYSCALLS LX_NSYSCALLS_2_6 + +#define LX_KERN_2_4 0 +#define LX_KERN_2_6 1 /* * brand(2) subcommands @@ -76,6 +83,7 @@ extern "C" { #define LX_VERSION LX_VERSION_1 #define LX_ATTR_RESTART_INIT ZONE_ATTR_BRAND_ATTRS +#define LX_KERN_VERSION_NUM (ZONE_ATTR_BRAND_ATTRS + 1) /* Aux vector containing phdr of linux executable, used by lx_librtld_db */ #define AT_SUN_BRAND_LX_PHDR AT_SUN_BRAND_AUX1 @@ -193,6 +201,11 @@ typedef struct lx_lwp_data { uint_t br_ptrace; /* ptrace is active for this LWP */ } lx_lwp_data_t; +/* brand specific data */ +typedef struct lx_zone_data { + int kernel_version; +} lx_zone_data_t; + #define BR_CPU_BOUND 0x0001 #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) @@ -203,6 +216,9 @@ void lx_brand_int80_callback(void); int64_t lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); +extern int lx_get_zone_kern_version(zone_t *); +extern int lx_get_kern_version(void); + extern int lx_debug; #define lx_print if (lx_debug) printf diff --git a/usr/src/uts/common/brand/lx/sys/lx_futex.h b/usr/src/uts/common/brand/lx/sys/lx_futex.h index ac963b015b..b5c5334bff 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_futex.h +++ b/usr/src/uts/common/brand/lx/sys/lx_futex.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,10 +39,12 @@ extern "C" { #define FUTEX_CMP_REQUEUE 4 #define FUTEX_MAX_CMD FUTEX_CMP_REQUEUE +#ifdef _KERNEL extern long lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout, - uintptr_t addr2, int val2); + uintptr_t addr2, int val2); extern void lx_futex_init(void); extern int lx_futex_fini(void); +#endif /* _KERNEL */ #ifdef __cplusplus } diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c index 25041d1b18..4c233ca407 100644 --- a/usr/src/uts/common/brand/sn1/sn1_brand.c +++ b/usr/src/uts/common/brand/sn1/sn1_brand.c @@ -43,6 +43,8 @@ char *sn1_emulation_table = NULL; +void sn1_init_brand_data(zone_t *); +void sn1_free_brand_data(zone_t *); void sn1_setbrand(proc_t *); int sn1_getattr(zone_t *, int, void *, size_t *); int sn1_setattr(zone_t *, int, void *, size_t); @@ -60,6 +62,8 @@ int sn1_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, /* sn1 brand */ struct brand_ops sn1_brops = { + sn1_init_brand_data, + sn1_free_brand_data, sn1_brandsys, sn1_setbrand, sn1_getattr, @@ -202,6 +206,18 @@ sn1_initlwp(klwp_t *l) /*ARGSUSED*/ void +sn1_init_brand_data(zone_t *zone) +{ +} + +/*ARGSUSED*/ +void +sn1_free_brand_data(zone_t *zone) +{ +} + +/*ARGSUSED*/ +void sn1_forklwp(klwp_t *p, klwp_t *c) { } diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 2468192505..c89945e756 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -1728,7 +1728,10 @@ zone_set_brand(zone_t *zone, const char *brand) return (EPERM); } + /* set up the brand specific data */ zone->zone_brand = bp; + ZBROP(zone)->b_init_brand_data(zone); + mutex_exit(&zone_status_lock); return (0); } @@ -3999,6 +4002,10 @@ zone_destroy(zoneid_t zoneid) /* Get rid of the zone's kstats */ zone_kstat_delete(zone); + /* free brand specific data */ + if (ZONE_IS_BRANDED(zone)) + ZBROP(zone)->b_free_brand_data(zone); + /* Say goodbye to brand framework. */ brand_unregister_zone(zone->zone_brand); diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h index 99314a95f0..bf7d6d4359 100644 --- a/usr/src/uts/common/sys/brand.h +++ b/usr/src/uts/common/sys/brand.h @@ -75,6 +75,8 @@ struct intpdata; struct execa; struct brand_ops { + void (*b_init_brand_data)(zone_t *); + void (*b_free_brand_data)(zone_t *); int (*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); void (*b_setbrand)(struct proc *); diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index eba224333f..7e7dd9e88a 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -411,6 +411,7 @@ typedef struct zone { boolean_t zone_restart_init; /* Restart init if it dies? */ struct brand *zone_brand; /* zone's brand */ + void *zone_brand_data; /* store brand specific data */ id_t zone_defaultcid; /* dflt scheduling class id */ kstat_t *zone_swapresv_kstat; kstat_t *zone_lockedmem_kstat; |