diff options
Diffstat (limited to 'src/pkg/syscall/exec_plan9.go')
| -rw-r--r-- | src/pkg/syscall/exec_plan9.go | 521 | 
1 files changed, 521 insertions, 0 deletions
| diff --git a/src/pkg/syscall/exec_plan9.go b/src/pkg/syscall/exec_plan9.go new file mode 100644 index 000000000..962b39b78 --- /dev/null +++ b/src/pkg/syscall/exec_plan9.go @@ -0,0 +1,521 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Fork, exec, wait, etc. + +package syscall + +import ( +	"sync" +	"unsafe" +) + +// Lock synchronizing creation of new file descriptors with fork. +// +// We want the child in a fork/exec sequence to inherit only the +// file descriptors we intend.  To do that, we mark all file +// descriptors close-on-exec and then, in the child, explicitly +// unmark the ones we want the exec'ed program to keep. +// Unix doesn't make this easy: there is, in general, no way to +// allocate a new file descriptor close-on-exec.  Instead you +// have to allocate the descriptor and then mark it close-on-exec. +// If a fork happens between those two events, the child's exec +// will inherit an unwanted file descriptor. +// +// This lock solves that race: the create new fd/mark close-on-exec +// operation is done holding ForkLock for reading, and the fork itself +// is done holding ForkLock for writing.  At least, that's the idea. +// There are some complications. +// +// Some system calls that create new file descriptors can block +// for arbitrarily long times: open on a hung NFS server or named +// pipe, accept on a socket, and so on.  We can't reasonably grab +// the lock across those operations. +// +// It is worse to inherit some file descriptors than others. +// If a non-malicious child accidentally inherits an open ordinary file, +// that's not a big deal.  On the other hand, if a long-lived child +// accidentally inherits the write end of a pipe, then the reader +// of that pipe will not see EOF until that child exits, potentially +// causing the parent program to hang.  This is a common problem +// in threaded C programs that use popen. +// +// Luckily, the file descriptors that are most important not to +// inherit are not the ones that can take an arbitrarily long time +// to create: pipe returns instantly, and the net package uses +// non-blocking I/O to accept on a listening socket. +// The rules for which file descriptor-creating operations use the +// ForkLock are as follows: +// +// 1) Pipe.    Does not block.  Use the ForkLock. +// 2) Socket.  Does not block.  Use the ForkLock. +// 3) Accept.  If using non-blocking mode, use the ForkLock. +//             Otherwise, live with the race. +// 4) Open.    Can block.  Use O_CLOEXEC if available (Linux). +//             Otherwise, live with the race. +// 5) Dup.     Does not block.  Use the ForkLock. +//             On Linux, could use fcntl F_DUPFD_CLOEXEC +//             instead of the ForkLock, but only for dup(fd, -1). + +var ForkLock sync.RWMutex + +// Convert array of string to array +// of NUL-terminated byte pointer. +func StringArrayPtr(ss []string) []*byte { +	bb := make([]*byte, len(ss)+1) +	for i := 0; i < len(ss); i++ { +		bb[i] = StringBytePtr(ss[i]) +	} +	bb[len(ss)] = nil +	return bb +} + +// gbit16 reads a 16-bit numeric value from a 9P protocol message strored in b, +// returning the value and the remaining slice of b. +func gbit16(b []byte) (uint16, []byte) { +	return uint16(b[0]) | uint16(b[1])<<8, b[2:] +} + +// gstring reads a string from a 9P protocol message strored in b, +// returning the value as a Go string and the remaining slice of b. +func gstring(b []byte) (string, []byte) { +	n, b := gbit16(b) +	return string(b[0:n]), b[n:] +} + +// readdirnames returns the names of files inside the directory represented by dirfd. +func readdirnames(dirfd int) (names []string, err Error) { +	result := make([]string, 0, 100) +	var buf [STATMAX]byte + +	for { +		n, e := Read(dirfd, buf[:]) +		if e != nil { +			return []string{}, e +		} +		if n == 0 { +			break +		} + +		for i := 0; i < n; { +			m, _ := gbit16(buf[i:]) +			m += 2 + +			if m < STATFIXLEN { +				return []string{}, NewError("malformed stat buffer") +			} + +			name, _ := gstring(buf[i+41:]) +			result = append(result, name) + +			i += int(m) +		} +	} +	return []string{}, nil +} + +// readdupdevice returns a list of currently opened fds (excluding stdin, stdout, stderr) from the dup device #d. +// ForkLock should be write locked before calling, so that no new fds would be created while the fd list is being read. +func readdupdevice() (fds []int, err Error) { +	dupdevfd, err := Open("#d", O_RDONLY) + +	if err != nil { +		return +	} +	defer Close(dupdevfd) + +	fileNames, err := readdirnames(dupdevfd) +	if err != nil { +		return +	} + +	fds = make([]int, 0, len(fileNames)>>1) +	for _, fdstr := range fileNames { +		if l := len(fdstr); l > 2 && fdstr[l-3] == 'c' && fdstr[l-2] == 't' && fdstr[l-1] == 'l' { +			continue +		} + +		fd := int(atoi([]byte(fdstr))) + +		if fd == 0 || fd == 1 || fd == 2 || fd == dupdevfd { +			continue +		} + +		fds = append(fds, fd) +	} + +	return fds[0:len(fds)], nil +} + +var startupFds []int + +// Plan 9 does not allow clearing the OCEXEC flag +// from the underlying channel backing an open file descriptor, +// therefore we store a list of already opened file descriptors +// inside startupFds and skip them when manually closing descriptors +// not meant to be passed to a child exec. +func init() { +	startupFds, _ = readdupdevice() +} + +// forkAndExecInChild forks the process, calling dup onto 0..len(fd) +// and finally invoking exec(argv0, argvv, envv) in the child. +// If a dup or exec fails, it writes the error string to pipe. +// (The pipe write end is close-on-exec so if exec succeeds, it will be closed.) +// +// In the child, this function must not acquire any locks, because +// they might have been locked at the time of the fork.  This means +// no rescheduling, no malloc calls, and no new stack segments. +// The calls to RawSyscall are okay because they are assembly +// functions that do not grow the stack. +func forkAndExecInChild(argv0 *byte, argv []*byte, envv []envItem, chroot, dir *byte, attr *ProcAttr, fdsToClose []int, pipe int) (pid int, err Error) { +	// Declare all variables at top in case any +	// declarations require heap allocation (e.g., errbuf). +	var ( +		r1       uintptr +		nextfd   int +		i        int +		clearenv int +		envfd    int +		errbuf   [ERRMAX]byte +	) + +	// guard against side effects of shuffling fds below. +	fd := append([]int(nil), attr.Files...) + +	if envv != nil { +		clearenv = RFCENVG +	} + +	// About to call fork. +	// No more allocation or calls of non-assembly functions. +	r1, _, _ = RawSyscall(SYS_RFORK, uintptr(RFPROC|RFFDG|RFREND|clearenv), 0, 0) + +	if r1 != 0 { +		if int(r1) == -1 { +			return 0, NewError(errstr()) +		} +		// parent; return PID +		return int(r1), nil +	} + +	// Fork succeeded, now in child. + +	// Close fds we don't need. +	for i = 0; i < len(fdsToClose); i++ { +		r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(fdsToClose[i]), 0, 0) +		if int(r1) == -1 { +			goto childerror +		} +	} + +	if envv != nil { +		// Write new environment variables. +		for i = 0; i < len(envv); i++ { +			r1, _, _ = RawSyscall(SYS_CREATE, uintptr(unsafe.Pointer(envv[i].name)), uintptr(O_WRONLY), uintptr(0666)) + +			if int(r1) == -1 { +				goto childerror +			} + +			envfd = int(r1) + +			r1, _, _ = RawSyscall6(SYS_PWRITE, uintptr(envfd), uintptr(unsafe.Pointer(envv[i].value)), uintptr(envv[i].nvalue), +				^uintptr(0), ^uintptr(0), 0) + +			if int(r1) == -1 || int(r1) != envv[i].nvalue { +				goto childerror +			} + +			r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(envfd), 0, 0) + +			if int(r1) == -1 { +				goto childerror +			} +		} +	} + +	// Chdir +	if dir != nil { +		r1, _, _ = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0) +		if int(r1) == -1 { +			goto childerror +		} +	} + +	// Pass 1: look for fd[i] < i and move those up above len(fd) +	// so that pass 2 won't stomp on an fd it needs later. +	nextfd = int(len(fd)) +	if pipe < nextfd { +		r1, _, _ = RawSyscall(SYS_DUP, uintptr(pipe), uintptr(nextfd), 0) +		if int(r1) == -1 { +			goto childerror +		} +		pipe = nextfd +		nextfd++ +	} +	for i = 0; i < len(fd); i++ { +		if fd[i] >= 0 && fd[i] < int(i) { +			r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(nextfd), 0) +			if int(r1) == -1 { +				goto childerror +			} + +			fd[i] = nextfd +			nextfd++ +			if nextfd == pipe { // don't stomp on pipe +				nextfd++ +			} +		} +	} + +	// Pass 2: dup fd[i] down onto i. +	for i = 0; i < len(fd); i++ { +		if fd[i] == -1 { +			RawSyscall(SYS_CLOSE, uintptr(i), 0, 0) +			continue +		} +		if fd[i] == int(i) { +			continue +		} + +		r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(i), 0) +		if int(r1) == -1 { +			goto childerror +		} +		RawSyscall(SYS_CLOSE, uintptr(fd[i]), 0, 0) +	} + +	// Time to exec. +	r1, _, _ = RawSyscall(SYS_EXEC, +		uintptr(unsafe.Pointer(argv0)), +		uintptr(unsafe.Pointer(&argv[0])), 0) + +childerror: +	// send error string on pipe +	RawSyscall(SYS_ERRSTR, uintptr(unsafe.Pointer(&errbuf[0])), uintptr(len(errbuf)), 0) +	errbuf[len(errbuf)-1] = 0 +	i = 0 +	for i < len(errbuf) && errbuf[i] != 0 { +		i++ +	} + +	RawSyscall6(SYS_PWRITE, uintptr(pipe), uintptr(unsafe.Pointer(&errbuf[0])), uintptr(i), +		^uintptr(0), ^uintptr(0), 0) + +	for { +		RawSyscall(SYS_EXITS, 0, 0, 0) +	} + +	// Calling panic is not actually safe, +	// but the for loop above won't break +	// and this shuts up the compiler. +	panic("unreached") +} + +func cexecPipe(p []int) Error { +	e := Pipe(p) +	if e != nil { +		return e +	} + +	fd, e := Open("#d/"+itoa(p[1]), O_CLOEXEC) +	if e != nil { +		Close(p[0]) +		Close(p[1]) +		return e +	} + +	Close(fd) +	return nil +} + +type envItem struct { +	name   *byte +	value  *byte +	nvalue int +} + +type ProcAttr struct { +	Dir    string   // Current working directory. +	Env    []string // Environment. +	Files  []int    // File descriptors. +	Chroot string   // Chroot. +} + +var zeroAttributes ProcAttr + + +func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err Error) { +	var ( +		p      [2]int +		n      int +		errbuf [ERRMAX]byte +		wmsg   Waitmsg +	) + +	if attr == nil { +		attr = &zeroAttributes +	} + +	p[0] = -1 +	p[1] = -1 + +	// Convert args to C form. +	argv0p := StringBytePtr(argv0) +	argvp := StringArrayPtr(argv) + +	var chroot *byte +	if attr.Chroot != "" { +		chroot = StringBytePtr(attr.Chroot) +	} +	var dir *byte +	if attr.Dir != "" { +		dir = StringBytePtr(attr.Dir) +	} +	var envvParsed []envItem +	if attr.Env != nil { +		envvParsed = make([]envItem, 0, len(attr.Env)) +		for _, v := range attr.Env { +			i := 0 +			for i < len(v) && v[i] != '=' { +				i++ +			} + +			envvParsed = append(envvParsed, envItem{StringBytePtr("/env/" + v[:i]), StringBytePtr(v[i+1:]), len(v) - i}) +		} +	} + +	// Acquire the fork lock to prevent other threads from creating new fds before we fork. +	ForkLock.Lock() + +	// get a list of open fds, excluding stdin,stdout and stderr that need to be closed in the child. +	// no new fds can be created while we hold the ForkLock for writing. +	openFds, e := readdupdevice() + +	if e != nil { +		ForkLock.Unlock() +		return 0, e +	} + +	fdsToClose := make([]int, 0, len(openFds)) +	// exclude fds opened from startup from the list of fds to be closed. +	for _, fd := range openFds { +		isReserved := false +		for _, reservedFd := range startupFds { +			if fd == reservedFd { +				isReserved = true +				break +			} +		} + +		if !isReserved { +			fdsToClose = append(fdsToClose, fd) +		} +	} + +	// exclude fds requested by the caller from the list of fds to be closed. +	for _, fd := range openFds { +		isReserved := false +		for _, reservedFd := range attr.Files { +			if fd == reservedFd { +				isReserved = true +				break +			} +		} + +		if !isReserved { +			fdsToClose = append(fdsToClose, fd) +		} +	} + +	// Allocate child status pipe close on exec.	 +	e = cexecPipe(p[:]) + +	if e != nil { +		return 0, e +	} +	fdsToClose = append(fdsToClose, p[0]) + +	// Kick off child. +	pid, err = forkAndExecInChild(argv0p, argvp, envvParsed, chroot, dir, attr, fdsToClose, p[1]) + +	if err != nil { +		if p[0] >= 0 { +			Close(p[0]) +			Close(p[1]) +		} +		ForkLock.Unlock() +		return 0, err +	} +	ForkLock.Unlock() + +	// Read child error status from pipe. +	Close(p[1]) +	n, err = Read(p[0], errbuf[:]) +	Close(p[0]) + +	if err != nil || n != 0 { +		if n != 0 { +			err = NewError(string(errbuf[:])) +		} + +		// Child failed; wait for it to exit, to make sure +		// the zombies don't accumulate. +		for wmsg.Pid != pid { +			Await(&wmsg) +		} +		return 0, err +	} + +	// Read got EOF, so pipe closed on exec, so exec succeeded. +	return pid, nil +} + +// Combination of fork and exec, careful to be thread safe. +func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err Error) { +	return forkExec(argv0, argv, attr) +} + +// StartProcess wraps ForkExec for package os. +func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid, handle int, err Error) { +	pid, err = forkExec(argv0, argv, attr) +	return pid, 0, err +} + +// Ordinary exec. +func Exec(argv0 string, argv []string, envv []string) (err Error) { +	if envv != nil { +		r1, _, _ := RawSyscall(SYS_RFORK, RFCENVG, 0, 0) +		if int(r1) == -1 { +			return NewError(errstr()) +		} + +		for _, v := range envv { +			i := 0 +			for i < len(v) && v[i] != '=' { +				i++ +			} + +			fd, e := Create("/env/"+v[:i], O_WRONLY, 0666) +			if e != nil { +				return e +			} + +			_, e = Write(fd, []byte(v[i+1:])) +			if e != nil { +				Close(fd) +				return e +			} +			Close(fd) +		} +	} + +	_, _, e := Syscall(SYS_EXEC, +		uintptr(unsafe.Pointer(StringBytePtr(argv0))), +		uintptr(unsafe.Pointer(&StringArrayPtr(argv)[0])), +		0) + +	return NewError(e) +} | 
