diff options
author | Patrick Mooney <pmooney@pfmooney.com> | 2016-04-22 21:58:46 +0000 |
---|---|---|
committer | Patrick Mooney <pmooney@pfmooney.com> | 2016-05-20 22:40:28 +0000 |
commit | e6e2cd9acea32fbe5d71f93e2ca4b306cafcd46e (patch) | |
tree | e53591de4c4b090ada255be2228933aa9e3cc455 | |
parent | d1b35f547df99c68d7ada287497766d9fb0431c5 (diff) | |
download | illumos-joyent-e6e2cd9acea32fbe5d71f93e2ca4b306cafcd46e.tar.gz |
OS-5322 lxbrand use comm page for vdso
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Joshua M. Clulow <jmc@joyent.com>
-rw-r--r-- | manifest | 1 | ||||
-rw-r--r-- | usr/src/common/brand/lx/lx_auxv.c | 11 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_brand/common/lx_brand.c | 39 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_vdso/Makefile | 8 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_vdso/Makefile.com | 25 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_vdso/amd64/Makefile | 7 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_vdso/amd64/lx_vdso.s | 132 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_vdso/amd64/vdso_subr.s | 68 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_vdso/common/vdso_main.c | 160 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_vdso/i386/Makefile | 43 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/lx_vdso/i386/vdso_subr.s | 92 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/os/lx_brand.c | 100 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/procfs/lx_prvnops.c | 16 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/sys/lx_brand.h | 12 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/sys/lx_syscalls.h | 20 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/comm_page_util.c | 4 | ||||
-rw-r--r-- | usr/src/uts/i86pc/sys/comm_page.h | 1 |
17 files changed, 499 insertions, 240 deletions
@@ -5078,6 +5078,7 @@ f usr/lib/brand/lx/lx_boot_zone_ubuntu 0755 root root f usr/lib/brand/lx/lxinit 0755 root root f usr/lib/brand/lx/lx_librtld_db.so.1 0755 root root f usr/lib/brand/lx/lx_support 0755 root root +f usr/lib/brand/lx/lx_vdso.so.1 0755 root root f usr/lib/brand/lx/platform.xml 0444 root root d usr/lib/brand/shared 0755 root sys f usr/lib/brand/shared/common.ksh 0444 root bin diff --git a/usr/src/common/brand/lx/lx_auxv.c b/usr/src/common/brand/lx/lx_auxv.c index bd7b588ac8..8994ea31a5 100644 --- a/usr/src/common/brand/lx/lx_auxv.c +++ b/usr/src/common/brand/lx/lx_auxv.c @@ -37,14 +37,9 @@ lx_auxv_stol(const auxv_t *ap, auxv_t *oap, const lx_elf_data_t *edp) oap->a_un.a_val = edp->ed_phnum; break; case AT_SUN_BRAND_LX_SYSINFO_EHDR: - if (edp->ed_vdso != 0) { - oap->a_type = AT_SYSINFO_EHDR; - oap->a_un.a_val = edp->ed_vdso; - return (0); - } else { - /* No vDSO for i386 */ - return (1); - } + oap->a_type = AT_SYSINFO_EHDR; + oap->a_un.a_val = ap->a_un.a_val; + return (0); case AT_SUN_BRAND_LX_CLKTCK: oap->a_type = AT_CLKTCK; oap->a_un.a_val = ap->a_un.a_val; diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c index bce525a78e..75fba80b07 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c @@ -422,29 +422,6 @@ lx_close_fh(FILE *file) extern int set_l10n_alternate_root(char *path); -#if defined(_LP64) -static void * -map_vdso() -{ - int fd; - mmapobj_result_t mpp[10]; /* we know the size of our lib */ - mmapobj_result_t *smpp = mpp; - uint_t mapnum = 10; - - if ((fd = open("/native/usr/lib/brand/lx/amd64/lx_vdso.so.1", - O_RDONLY)) == -1) - lx_err_fatal("couldn't open lx_vdso.so.1"); - - if (mmapobj(fd, MMOBJ_INTERPRET, smpp, &mapnum, NULL) == -1) - lx_err_fatal("couldn't mmapobj lx_vdso.so.1"); - - (void) close(fd); - - /* assume first segment is the base of the mapping */ - return (smpp->mr_addr); -} -#endif - /* * Initialize the thread specific data for this thread. */ @@ -592,12 +569,8 @@ lx_init(int argc, char *argv[], char *envp[]) lx_elf_data_t edp; lx_brand_registration_t reg; lx_tsd_t *lxtsd; -#if defined(_LP64) - void *vdso_hdr; -#endif bzero(®, sizeof (reg)); - stack_size = 2 * sysconf(_SC_PAGESIZE); /* @@ -686,18 +659,6 @@ lx_init(int argc, char *argv[], char *envp[]) if (lx_statfs_init() != 0) lx_err_fatal("failed to setup the statfs translator"); -#if defined(_LP64) - vdso_hdr = map_vdso(); - edp.ed_vdso = (uintptr_t)vdso_hdr; - /* - * Notify the kernel of this mapping location to keep its - * representation of the auxv consistent with reality. - */ - (void) syscall(SYS_brand, B_NOTIFY_VDSO_LOC, (void *)vdso_hdr); -#else - edp.ed_vdso = 0; -#endif - /* * Find the aux vector on the stack. */ diff --git a/usr/src/lib/brand/lx/lx_vdso/Makefile b/usr/src/lib/brand/lx/lx_vdso/Makefile index b71a8a1aeb..56dd0a7a3c 100644 --- a/usr/src/lib/brand/lx/lx_vdso/Makefile +++ b/usr/src/lib/brand/lx/lx_vdso/Makefile @@ -10,17 +10,14 @@ # # -# Copyright 2015 Joyent, Inc. +# Copyright 2016 Joyent, Inc. # include ../../../Makefile.lib -SUBDIRS = tools +SUBDIRS = tools $(MACH) $(BUILD64)SUBDIRS += $(MACH64) -LINT_SUBDIRS = -$(BUILD64)LINT_SUBDIRS += $(MACH64) - all := TARGET= all clean := TARGET= clean clobber := TARGET= clobber @@ -33,6 +30,7 @@ all install clean clobber: $(SUBDIRS) lint: $(LINT_SUBDIRS) +$(MACH): tools $(MACH64): tools $(SUBDIRS): FRC diff --git a/usr/src/lib/brand/lx/lx_vdso/Makefile.com b/usr/src/lib/brand/lx/lx_vdso/Makefile.com index 3839260345..0cdb1aaf70 100644 --- a/usr/src/lib/brand/lx/lx_vdso/Makefile.com +++ b/usr/src/lib/brand/lx/lx_vdso/Makefile.com @@ -10,14 +10,16 @@ # # -# Copyright 2015 Joyent, Inc. +# Copyright 2016 Joyent, Inc. # LIBRARY = lx_vdso.a VERS = .1 -COBJS = lx_vdso.o -OBJECTS = $(COBJS) +include $(SRC)/lib/commpage/Makefile.shared.com + +COBJS = vdso_main.o vdso_subr.o +OBJECTS = $(COBJS) $(COMMPAGE_OBJS) include ../../../../Makefile.lib include ../../Makefile.lx @@ -31,14 +33,15 @@ LIBNAME = lx_vdso MAPFILES = ../common/mapfile-vers MAPOPTS = $(MAPFILES:%=-M%) -ASOBJS = lx_vdso.o -OBJECTS = $(ASOBJS) - -ASSRCS = $(ASOBJS:%o=$(ISASRCDIR)/%s) -SRCS = $(ASSRCS) +ASOBJS = vdso_subr.o +COBJS = vdso_main.o +OBJECTS = $(ASOBJS) $(COBJS) $(COMMPAGE_OBJS) SRCDIR = ../common -UTSBASE = ../../../../../uts + +ASSRCS = $(ASOBJS:%.o=$(ISASRCDIR)/%.s) +CSRCS = $(COBJS:%.o=$(SRCDIR)/%.c) +SRCS = $(ASSRCS) $(CSRCS) LIBS = $(DYNLIB) DYNFLAGS += $(DYNFLAGS_$(CLASS)) @@ -72,7 +75,11 @@ all: $(LIBS) lint: $(LINTLIB) lintcheck include ../../../../Makefile.targ +include $(SRC)/lib/commpage/Makefile.shared.targ pics/%.o: $(ISASRCDIR)/%.s $(COMPILE.s) -o $@ $< $(POST_PROCESS_O) + +pics/vdso_main.o := CPPFLAGS += $(COMMPAGE_CPPFLAGS) +pics/vdso_subr.o := ASFLAGS += -I$(SRC)/uts/common/brand/lx diff --git a/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile b/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile index cdf6eaa62d..f1c17dcd91 100644 --- a/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile +++ b/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile @@ -10,14 +10,19 @@ # # -# Copyright 2014 Joyent, Inc. All rights reserved. +# Copyright 2016 Joyent, Inc. # ISASRCDIR=. +TARGET_ARCH=$(MACH64) include ../Makefile.com include $(SRC)/lib/Makefile.lib.64 +ASFLAGS += -D__$(MACH64) + +SONAME = linux-vdso.so.1 + # # You might ask, why aren't we overriding BUILD.SO in Makefile.com. # That's a sad story. The answer is that Makefile.lib.64 includes diff --git a/usr/src/lib/brand/lx/lx_vdso/amd64/lx_vdso.s b/usr/src/lib/brand/lx/lx_vdso/amd64/lx_vdso.s deleted file mode 100644 index 039f9b95b3..0000000000 --- a/usr/src/lib/brand/lx/lx_vdso/amd64/lx_vdso.s +++ /dev/null @@ -1,132 +0,0 @@ -/* - * - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - * - */ - -/* - * Copyright 2015 Joyent, Inc. - */ - -#include <sys/asm_linkage.h> -#include <sys/trap.h> - -/* - * lx vDSO emulation library - * - * This so needs to look like the correct Linux vDSO elf library. We cannot - * use any native symbols or link with any native libraries, particularly libc. - */ - -#define LX_SYS_gettimeofday 96 -#define LX_SYS_time 201 -#define LX_SYS_clock_gettime 228 -#define LX_SYS_getcpu 309 - -#if defined(lint) -int -__vdso_gettimeofday(void *tp, void *tz) -{} - -time_t -__vdso_time(void *tp) -{} - -time_t -__vdso_clock_gettime(uintptr_t id, void *tp) -{} - -int -__vdso_getcpu(void *cpu, void *np, void *cp) -{} - -#else /* lint */ - - /* - * We know the arguments are already in the correct registers (e.g. arg0 - * already in %rdi, arg1 already in %rsi, etc.). %rax has result of - * call. - */ - - /* - * Uses fasttrap, based on lib/libc/amd64/sys/gettimeofday.s - */ - ENTRY_NP(__vdso_gettimeofday) - pushq %rdi /* pointer to timeval */ - movl $T_GETHRESTIME, %eax - int $T_FASTTRAP - /* - * gethrestime trap returns seconds in %rax, nsecs in %edx - * need to convert nsecs to usecs & store into area pointed - * to by struct timeval * argument. - */ - popq %rcx /* pointer to timeval */ - jrcxz 1f /* bail if we get a null pointer */ - movq %rax, (%rcx) /* store seconds into timeval ptr */ - movl $274877907, %eax /* divide by 1000 as impl. by gcc */ - imull %edx /* See Hacker's Delight pg 162 */ - sarl $6, %edx /* simplified by 0 <= nsec <= 1e9 */ - movq %rdx, 8(%rcx) /* store usecs into timeval ptr + 8. */ -1: - xorq %rax, %rax /* return 0 */ - ret - SET_SIZE(__vdso_gettimeofday) - - /* - * Uses fasttrap, based on lib/libc/amd64/sys/gettimeofday.s, but only - * returns seconds. This is based on what the kernel's gtime function - * will do. - */ - ENTRY_NP(__vdso_time) - pushq %rdi /* pointer to time_t */ - movl $T_GETHRESTIME, %eax - int $T_FASTTRAP - /* - * gethrestime trap returns seconds in %rax - * store secs into area pointed by time_t * argument. - */ - popq %rcx /* pointer to time_t */ - jrcxz 1f /* don't save if we get a null pointer */ - movq %rax, (%rcx) /* store seconds into time_t ptr */ -1: - ret /* return seconds in %rax */ - SET_SIZE(__vdso_time) - - /* - * Does not use fasttrap since there more work to emulate than we can - * do with a fasttrap. - */ - ENTRY_NP(__vdso_clock_gettime) - movq $LX_SYS_clock_gettime, %rax - syscall - ret - SET_SIZE(__vdso_clock_gettime) - - /* - * Uses fasttrap. - * getcpu takes 3 pointers but we only support saving the cpu ID into - * the first pointer. - */ - ENTRY_NP(__vdso_getcpu) - pushq %rdi /* pointer to int */ - movl $T_GETLGRP, %eax - int $T_FASTTRAP - /* - * getlgrp trap returns CPU ID in %eax - * store it into area pointed by int * argument. - */ - popq %rcx /* pointer to int */ - jrcxz 1f /* don't save if we get a null pointer */ - movl %eax, (%rcx) /* store CPU ID into int ptr */ -1: - xorq %rax, %rax /* return 0 */ - ret - SET_SIZE(__vdso_getcpu) -#endif diff --git a/usr/src/lib/brand/lx/lx_vdso/amd64/vdso_subr.s b/usr/src/lib/brand/lx/lx_vdso/amd64/vdso_subr.s new file mode 100644 index 0000000000..bf066600aa --- /dev/null +++ b/usr/src/lib/brand/lx/lx_vdso/amd64/vdso_subr.s @@ -0,0 +1,68 @@ +/* + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + */ + +/* + * Copyright 2016 Joyent, Inc. + */ + + +#include <sys/asm_linkage.h> +#include <sys/lx_syscalls.h> + + +#if defined(lint) + +comm_page_t * +__vdso_find_commpage() +{} + +long +__vdso_sys_clock_gettime(uint_t clock_id, timespec_t *tp) +{} + +int +__vdso_sys_gettimeofday(timespec_t *tp, struct lx_timezone *tz) +{} + +time_t +__vdso_sys_time(timespec_t *tp) +{} + +#else /* lint */ + + ENTRY_NP(__vdso_find_commpage) + leaq 0x0(%rip), %rax + andq $LX_VDSO_ADDR_MASK, %rax + addq $LX_VDSO_SIZE, %rax + ret + SET_SIZE(__vdso_find_commpage) + + ENTRY_NP(__vdso_sys_clock_gettime) + movl $LX_SYS_clock_gettime, %eax + syscall + ret + SET_SIZE(__vdso_sys_clock_gettime) + + ENTRY_NP(__vdso_sys_gettimeofday) + movl $LX_SYS_gettimeofday, %eax + syscall + ret + SET_SIZE(__vdso_sys_gettimeofday) + + ENTRY_NP(__vdso_sys_time) + movl $LX_SYS_time, %eax + syscall + ret + SET_SIZE(__vdso_sys_time) + +#endif /* lint */ diff --git a/usr/src/lib/brand/lx/lx_vdso/common/vdso_main.c b/usr/src/lib/brand/lx/lx_vdso/common/vdso_main.c new file mode 100644 index 0000000000..a0474bf26e --- /dev/null +++ b/usr/src/lib/brand/lx/lx_vdso/common/vdso_main.c @@ -0,0 +1,160 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2016 Joyent, Inc. + */ + +#include <cp_defs.h> + + +struct lx_timezone { + int tz_minuteswest; /* minutes W of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +extern comm_page_t *__vdso_find_commpage(); +extern int __vdso_sys_gettimeofday(timespec_t *, struct lx_timezone *); +extern time_t __vdso_sys_time(timespec_t *); +extern long __vdso_sys_clock_gettime(uint_t, timespec_t *); + +#define LX_CLOCK_REALTIME 0 /* CLOCK_REALTIME */ +#define LX_CLOCK_MONOTONIC 1 /* CLOCK_HIGHRES */ +#define LX_CLOCK_PROCESS_CPUTIME_ID 2 /* Emulated */ +#define LX_CLOCK_THREAD_CPUTIME_ID 3 /* Emulated */ +#define LX_CLOCK_MONOTONIC_RAW 4 /* CLOCK_HIGHRES */ +#define LX_CLOCK_REALTIME_COARSE 5 /* CLOCK_REALTIME */ +#define LX_CLOCK_MONOTONIC_COARSE 6 /* CLOCK_HIGHRES */ + + +void +__hrt2ts(hrtime_t hrt, timespec_t *tsp) +{ + uint32_t sec, nsec, tmp; + + tmp = (uint32_t)(hrt >> 30); + sec = tmp - (tmp >> 2); + sec = tmp - (sec >> 5); + sec = tmp + (sec >> 1); + sec = tmp - (sec >> 6) + 7; + sec = tmp - (sec >> 3); + sec = tmp + (sec >> 1); + sec = tmp + (sec >> 3); + sec = tmp + (sec >> 4); + tmp = (sec << 7) - sec - sec - sec; + tmp = (tmp << 7) - tmp - tmp - tmp; + tmp = (tmp << 7) - tmp - tmp - tmp; + nsec = (uint32_t)hrt - (tmp << 9); + while (nsec >= NANOSEC) { + nsec -= NANOSEC; + sec++; + } + tsp->tv_sec = (time_t)sec; + tsp->tv_nsec = nsec; +} + +int +__vdso_gettimeofday(timespec_t *tp, struct lx_timezone *tz) +{ + comm_page_t *cp = __vdso_find_commpage(); + + if (__cp_can_gettime(cp) != 0) { + return (__vdso_sys_gettimeofday(tp, tz)); + } + + if (tp != NULL) { + long usec, nsec; + + __cp_clock_gettime_realtime(cp, tp); + + nsec = tp->tv_nsec; + usec = nsec + (nsec >> 2); + usec = nsec + (usec >> 1); + usec = nsec + (usec >> 2); + usec = nsec + (usec >> 4); + usec = nsec - (usec >> 3); + usec = nsec + (usec >> 2); + usec = nsec + (usec >> 3); + usec = nsec + (usec >> 4); + usec = nsec + (usec >> 1); + usec = nsec + (usec >> 6); + usec = usec >> 10; + tp->tv_nsec = usec; + } + + if (tz != NULL) { + tz->tz_minuteswest = 0; + tz->tz_dsttime = 0; + } + + return (0); +} + +time_t +__vdso_time(timespec_t *tp) +{ + comm_page_t *cp = __vdso_find_commpage(); + timespec_t ts; + + if (__cp_can_gettime(cp) != 0) { + return (__vdso_sys_time(tp)); + } + + __cp_clock_gettime_realtime(cp, &ts); + if (tp != NULL) { + tp->tv_sec = ts.tv_sec; + tp->tv_nsec = 0; + } + return (ts.tv_sec); +} + +long +__vdso_clock_gettime(uint_t clock_id, timespec_t *tp) +{ + comm_page_t *cp = __vdso_find_commpage(); + + if (__cp_can_gettime(cp) != 0) { + return (__vdso_sys_clock_gettime(clock_id, tp)); + } + + switch (clock_id) { + case LX_CLOCK_REALTIME: + case LX_CLOCK_REALTIME_COARSE: + __cp_clock_gettime_realtime(cp, tp); + return (0); + + case LX_CLOCK_MONOTONIC: + case LX_CLOCK_MONOTONIC_RAW: + case LX_CLOCK_MONOTONIC_COARSE: + __hrt2ts(__cp_gethrtime(cp), tp); + return (0); + + case LX_CLOCK_PROCESS_CPUTIME_ID: + case LX_CLOCK_THREAD_CPUTIME_ID: + default: + break; + } + return (__vdso_sys_clock_gettime(clock_id, tp)); +} + +long +__vdso_getcpu(uint_t *cpu, uint_t *node, void *tcache) +{ + comm_page_t *cp = __vdso_find_commpage(); + + if (cpu != NULL) { + *cpu = __cp_getcpu(cp); + } + if (node != NULL) { + *node = 0; + } + return (0); +} diff --git a/usr/src/lib/brand/lx/lx_vdso/i386/Makefile b/usr/src/lib/brand/lx/lx_vdso/i386/Makefile new file mode 100644 index 0000000000..7f9a6a13e6 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_vdso/i386/Makefile @@ -0,0 +1,43 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2016 Joyent, Inc. +# + +ISASRCDIR=. +TARGET_ARCH=$(MACH) + +include ../Makefile.com + +ASFLAGS += -D__$(MACH) + +SONAME = linux-gate.so.1 + +# +# You might ask, why aren't we overriding BUILD.SO in Makefile.com. +# See the amd64 Makefile for more answers +# +BUILD.SO = $(LD) -o $@ $(GSHARED) $(DYNFLAGS) $(PICS) $(LDLIBS) + +ASSYMDEP_OBJS = lx_vdso.o + +CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB) + +# Set the object entry point for __vsyscall-ers +entryfix: $(DYNLIB) + $(ELFEDIT) -e "ehdr:e_entry \ + $$($(ELFEDIT) -re 'sym:st_value -osimple __vsyscall' $(DYNLIB))" \ + $(DYNLIB) + +all: entryfix + +install: all $(ROOTLIBS) diff --git a/usr/src/lib/brand/lx/lx_vdso/i386/vdso_subr.s b/usr/src/lib/brand/lx/lx_vdso/i386/vdso_subr.s new file mode 100644 index 0000000000..ed7be8bb23 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_vdso/i386/vdso_subr.s @@ -0,0 +1,92 @@ +/* + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + */ + +/* + * Copyright 2016 Joyent, Inc. + */ + + +#include <sys/asm_linkage.h> +#include <sys/lx_syscalls.h> + + +#if defined(lint) + +comm_page_t * +__vdso_find_commpage() +{} + +long +__vdso_sys_clock_gettime(uint_t clock_id, timespec_t *tp) +{} + +int +__vdso_sys_gettimeofday(timespec_t *tp, struct lx_timezone *tz) +{} + +time_t +__vdso_sys_time(timespec_t *tp) +{} + +#else /* lint */ + + ENTRY_NP(__vdso_find_commpage) + call 1f +1: popl %eax + andl $LX_VDSO_ADDR_MASK, %eax + addl $LX_VDSO_SIZE, %eax + ret + SET_SIZE(__vdso_find_commpage) + + ENTRY_NP(__vdso_sys_clock_gettime) + movl $LX_SYS_clock_gettime, %eax + movl 0x4(%esp), %ebx + movl 0x8(%esp), %ecx + int $0x80 + ret + SET_SIZE(__vdso_sys_clock_gettime) + + ENTRY_NP(__vdso_sys_gettimeofday) + movl $LX_SYS_gettimeofday, %eax + movl 0x4(%esp), %ebx + movl 0x8(%esp), %ecx + int $0x80 + ret + SET_SIZE(__vdso_sys_gettimeofday) + + ENTRY_NP(__vdso_sys_time) + movl $LX_SYS_time, %eax + movl 0x4(%esp), %ebx + int $0x80 + ret + SET_SIZE(__vdso_sys_time) + + ENTRY_NP(__vsyscall) + /* + * On 32-bit Linux, the VDSO entry point (specified by e_entry) + * provides a potentially accelerated means to vector into the kernel. + * Normally this means using 'sysenter' with a Linux-custom calling + * convention so programs expecting int80 behavior are not required to + * change how arguments are passed. + * + * The SunOS sysenter entry point does _not_ tolerate such a departure + * from convention, so if this function is updated to use sysenter, it + * must properly marshal arguments onto the stack from the int80 style. + * Such an enhancement can only occur once sysenter receives the same + * branding hooks as syscall and int80. + */ + int $0x80 + ret + SET_SIZE(__vsyscall) + +#endif /* lint */ diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c index 17ce72dbf3..6cff045a80 100644 --- a/usr/src/uts/common/brand/lx/os/lx_brand.c +++ b/usr/src/uts/common/brand/lx/os/lx_brand.c @@ -1358,7 +1358,6 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, led32.ed_entry = (int)pd->l_elf_data.ed_entry; led32.ed_base = (int)pd->l_elf_data.ed_base; led32.ed_ldentry = (int)pd->l_elf_data.ed_ldentry; - led32.ed_vdso = 0; mutex_exit(&p->p_lock); if (copyout(&led32, (void *)arg1, @@ -1792,30 +1791,6 @@ lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, return (0); } - case B_NOTIFY_VDSO_LOC: { -#if defined(_LP64) - if (get_udatamodel() == DATAMODEL_NATIVE) { - int i; - - mutex_enter(&p->p_lock); - pd = ptolxproc(p); - pd->l_elf_data.ed_vdso = arg1; - /* overwrite the auxv data too */ - for (i = 0; i < __KERN_NAUXV_IMPL; i++) { - if (p->p_user.u_auxv[i].a_type == - AT_SUN_BRAND_LX_SYSINFO_EHDR) { - p->p_user.u_auxv[i].a_un.a_val = arg1; - break; - } - } - mutex_exit(&p->p_lock); - return (0); - } -#endif /* defined(_LP64) */ - /* This is not valid for 32bit processes */ - return (EINVAL); - } - case B_GET_PERSONALITY: { unsigned int result; @@ -1967,6 +1942,63 @@ extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, extern int elf32exec(struct vnode *, execa_t *, uarg_t *, intpdata_t *, int, long *, int, caddr_t, cred_t *, int *); +static uintptr_t +lx_map_vdso(struct uarg *args, struct cred *cred) +{ + int err; + char *fpath = LX_VDSO_PATH; + vnode_t *vp; + vattr_t attr; + caddr_t addr; + +#if defined(_LP64) + if (args->to_model != DATAMODEL_NATIVE) { + fpath = LX_VDSO_PATH32; + } +#endif + + /* + * The comm page should have been mapped in already. + */ + if (args->commpage == NULL) { + return (NULL); + } + + /* + * Ensure the VDSO library is present and appropriately sized. + * This lookup is started at the zone root to avoid complications for + * processes which have chrooted. For the specified lookup root to be + * used, the leading slash must be dropped from the path. + */ + ASSERT(fpath[0] == '/'); + fpath++; + if (lookupnameat(fpath, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, + curzone->zone_rootvp) != 0) { + return (NULL); + } + + /* + * The VDSO requires data exposed via the comm page in order to + * function properly. The VDSO is always mapped in at a fixed known + * offset from the comm page, providing an easy means to locate it. + */ + addr = (caddr_t)(args->commpage - LX_VDSO_SIZE); + attr.va_mask = AT_SIZE; + if (VOP_GETATTR(vp, &attr, 0, cred, NULL) != 0 || + attr.va_size > LX_VDSO_SIZE) { + VN_RELE(vp); + return (NULL); + } + + err = execmap(vp, addr, attr.va_size, 0, 0, + PROT_USER|PROT_READ|PROT_EXEC, 1, 0); + VN_RELE(vp); + if (err != 0) { + return (NULL); + } + return ((uintptr_t)addr); +} + /* * Exec routine called by elfexec() to load either 32-bit or 64-bit Linux * binaries. @@ -2159,6 +2191,12 @@ lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, * brand emulation library and its linker. */ + /* + * After execing the brand library (which should have implicitly mapped + * in the comm page), map the VDSO into the approprate place in the AS. + */ + lxpd->l_vdso = lx_map_vdso(args, cred); + bzero(&env, sizeof (env)); /* @@ -2347,11 +2385,7 @@ lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, phdr_auxv[0].a_un.a_val = edp.ed_phdr; phdr_auxv[1].a_un.a_val = ldaddr; phdr_auxv[2].a_un.a_val = hz; - /* - * The userspace brand library will map in the vDSO and notify - * the kernel of its location during lx_init. - */ - phdr_auxv[3].a_un.a_val = 1; + phdr_auxv[3].a_un.a_val = lxpd->l_vdso; if (copyout(&phdr_auxv, args->auxp_brand, sizeof (phdr_auxv)) == -1) @@ -2368,11 +2402,7 @@ lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args, phdr_auxv32[0].a_un.a_val = edp.ed_phdr; phdr_auxv32[1].a_un.a_val = ldaddr; phdr_auxv32[2].a_un.a_val = hz; - /* - * Unused on i386 due to lack of vDSO. - * It will be cleaned up during lx_init. - */ - phdr_auxv32[3].a_un.a_val = 0; + phdr_auxv32[3].a_un.a_val = lxpd->l_vdso; if (copyout(&phdr_auxv32, args->auxp_brand, sizeof (phdr_auxv32)) == -1) diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c index b74df9d818..262339c31c 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c @@ -1568,6 +1568,7 @@ static void lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { proc_t *p; + lx_proc_data_t *lxpd; struct as *as; struct seg *seg; char *buf; @@ -1579,6 +1580,7 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) char prot[5]; uintptr_t offset; vnode_t *vp; + char *name_override; struct print_data *next; } *print_head = NULL; struct print_data **print_tail = &print_head; @@ -1593,6 +1595,7 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) } as = p->p_as; + lxpd = ptolxproc(p); if (as == &kas) { lxpr_unlock(p); @@ -1637,6 +1640,15 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr); + pbuf->name_override = NULL; + if (lxpd != NULL) { + if (pbuf->saddr == lxpd->l_vdso) { + pbuf->name_override = "[vdso]"; + } else if (pbuf->saddr == p->p_user.u_commpagep) { + pbuf->name_override = "[vvar]"; + } + } + pbuf->next = NULL; *print_tail = pbuf; print_tail = &pbuf->next; @@ -1658,7 +1670,9 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) ino_t inode = 0; *buf = '\0'; - if (pbuf->vp != NULL) { + if (pbuf->name_override != NULL) { + (void) strncpy(buf, pbuf->name_override, buflen); + } else if (pbuf->vp != NULL) { vattr.va_mask = AT_FSID | AT_NODEID; if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(), NULL) == 0) { diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h index 034b87c758..4906e444f1 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_brand.h +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -59,12 +59,17 @@ extern "C" { #define LX_LIB_PATH32 "/native/usr/lib/lx_brand.so.1" #define LX_LIB_PATH64 "/native/usr/lib/amd64/lx_brand.so.1" +#define LX_VDSO_PATH32 "/native/usr/lib/brand/lx/lx_vdso.so.1" +#define LX_VDSO_PATH64 "/native/usr/lib/brand/lx/amd64/lx_vdso.so.1" + #if defined(_LP64) #define LX_LIB_PATH LX_LIB_PATH64 #define LX_UNAME_MACHINE LX_UNAME_MACHINE64 +#define LX_VDSO_PATH LX_VDSO_PATH64 #else #define LX_LIB_PATH LX_LIB_PATH32 #define LX_UNAME_MACHINE LX_UNAME_MACHINE32 +#define LX_VDSO_PATH LX_VDSO_PATH32 #endif /* @@ -103,7 +108,7 @@ extern "C" { #define B_SET_NATIVE_STACK 147 #define B_SIGEV_THREAD_ID 148 #define B_OVERRIDE_KERN_VER 149 -#define B_NOTIFY_VDSO_LOC 150 +/* formerly B_NOTIFY_VDSO_LOC 150 */ #define B_GET_PERSONALITY 151 #ifndef _ASM @@ -229,7 +234,6 @@ typedef struct lx_elf_data64 { uintptr_t ed_entry; uintptr_t ed_base; uintptr_t ed_ldentry; - uintptr_t ed_vdso; } lx_elf_data64_t; typedef struct lx_elf_data32 { @@ -239,7 +243,6 @@ typedef struct lx_elf_data32 { uint32_t ed_entry; uint32_t ed_base; uint32_t ed_ldentry; - uint32_t ed_vdso; } lx_elf_data32_t; #if defined(_LP64) @@ -313,6 +316,9 @@ typedef struct lx_proc_data { /* Linux process personality */ unsigned int l_personality; + + /* VDSO location */ + uintptr_t l_vdso; } lx_proc_data_t; #endif /* _KERNEL */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h index 483f30545b..64084b77f1 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h +++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h @@ -184,12 +184,13 @@ extern long lx_writev(); #define LX_VSYSCALL_SIZE (uintptr_t)0x1000 #endif +#endif /* _KERNEL */ /* * System call numbers for revectoring: */ -#if defined(_LP64) +#if defined(__amd64) #define LX_SYS_close 3 #define LX_SYS_gettimeofday 96 #define LX_SYS_time 201 @@ -203,17 +204,26 @@ extern long lx_writev(); #define LX_SYS32_clock_gettime 265 #define LX_SYS32_io_setup 245 #define LX_SYS32_getcpu 318 -#else +#elif defined(__i386) #define LX_SYS_close 6 #define LX_SYS_gettimeofday 78 #define LX_SYS_time 13 #define LX_SYS_clock_gettime 265 #define LX_SYS_io_setup 245 #define LX_SYS_getcpu 318 -#endif - +#else +#error "Architecture not supported" +#endif /* defined(__amd64) */ -#endif /* _KERNEL */ +/* + * The current code in the VDSO operates under the expectation that it will be + * mapped at a fixed offset from the comm page. This simplifies the act of + * locating said page without any other reference. The VDSO must fit within + * this offset, matching the same value as COMM_PAGE_ALIGN. + * See: uts/i86pc/sys/comm_page.h + */ +#define LX_VDSO_SIZE 0x4000 +#define LX_VDSO_ADDR_MASK ~(LX_VDSO_SIZE - 1) #ifdef __cplusplus } diff --git a/usr/src/uts/i86pc/os/comm_page_util.c b/usr/src/uts/i86pc/os/comm_page_util.c index 3c635fe79b..1c8c9f8afd 100644 --- a/usr/src/uts/i86pc/os/comm_page_util.c +++ b/usr/src/uts/i86pc/os/comm_page_util.c @@ -39,12 +39,12 @@ comm_page_mapin() { #if defined(__amd64) && !defined(__xpv) proc_t *p = curproc; - caddr_t addr = NULL; + caddr_t addr = (caddr_t)COMM_PAGE_ALIGN; size_t len = COMM_PAGE_SIZE; uint_t prot = PROT_USER | PROT_READ; segumap_crargs_t suarg; - map_addr(&addr, len, (offset_t)0, 1, 0); + map_addr(&addr, len, (offset_t)0, 1, MAP_ALIGN); if (addr == NULL || valid_usr_range(addr, len, prot, p->p_as, p->p_as->a_userlimit) != RANGE_OKAY) { return (NULL); diff --git a/usr/src/uts/i86pc/sys/comm_page.h b/usr/src/uts/i86pc/sys/comm_page.h index 9d94a27763..dbf00bc7a7 100644 --- a/usr/src/uts/i86pc/sys/comm_page.h +++ b/usr/src/uts/i86pc/sys/comm_page.h @@ -27,6 +27,7 @@ extern "C" { #endif #define COMM_PAGE_SIZE PAGESIZE +#define COMM_PAGE_ALIGN 0x4000 #ifndef _ASM |