summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/brand/lx/syscall/lx_miscsys.c
blob: 25f06e134bc9241c5f701c94dc3cd664ad8692f2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2019 Joyent, Inc.
 */

#include <sys/systeminfo.h>
#include <sys/fcntl.h>
#include <sys/resource.h>
#include <sys/uadmin.h>
#include <sys/lx_misc.h>
#include <lx_syscall.h>

#define	LINUX_REBOOT_MAGIC1		0xfee1dead
#define	LINUX_REBOOT_MAGIC2		672274793
#define	LINUX_REBOOT_MAGIC2A		85072278
#define	LINUX_REBOOT_MAGIC2B		369367448
#define	LINUX_REBOOT_MAGIC2C		537993216

#define	LINUX_REBOOT_CMD_RESTART	0x1234567
#define	LINUX_REBOOT_CMD_HALT		0xcdef0123
#define	LINUX_REBOOT_CMD_CAD_ON		0x89abcdef
#define	LINUX_REBOOT_CMD_CAD_OFF	0
#define	LINUX_REBOOT_CMD_POWER_OFF	0x4321fedc
#define	LINUX_REBOOT_CMD_RESTART2	0xa1b2c3d4
#define	LINUX_REBOOT_CMD_SW_SUSPEND	0xD000FCE2
#define	LINUX_REBOOT_CMD_KEXEC		0x45584543

#define	LX_RUSAGE_SELF			0
#define	LX_RUSAGE_CHILDREN		(-1)
#define	LX_RUSAGE_BOTH			(-2)
#define	LX_RUSAGE_THREAD		1

#define	LX_SWAP_PRIOMASK		0x7fff
#define	LX_SWAP_PREFER			0x8000
#define	LX_SWAP_DISCARD			0x10000
#define	LX_SWAP_DISCARD_ONCE		0x20000
#define	LX_SWAP_DISCARD_PAGES		0x40000

#define	LX_SWAP_ALL			(LX_SWAP_DISCARD_PAGES | \
					LX_SWAP_DISCARD_ONCE | \
					LX_SWAP_DISCARD | \
					LX_SWAP_PREFER | LX_SWAP_PRIOMASK)

/* From uts/common/fs/vfs.c */
extern void vfs_sync(int);
/* From uts/common/os/grow.c */
extern int mincore(caddr_t, size_t, char *);
extern int munmap(caddr_t, size_t);
/* From uts/common/os/session.c */
extern int vhangup();
/* From uts/common/syscall/alarm.c */
extern int alarm(int);
/* From uts/common/syscall/chdir.c */
extern int chdir(char *);
extern int chroot(char *);
extern int fchdir(int);
/* From uts/common/syscall/nice.c */
extern int nice(int);
/* From uts/common/syscall/open.c */
extern int open(char *, int, int);
/* From uts/common/syscall/pause.c */
extern int pause();
/* From uts/common/syscall/rusagesys.c */
extern int rusagesys(int, void *, void *, void *, void *);
/* From uts/common/syscall/systeminfo.c */
extern long systeminfo(int, char *, long);
/* From uts/common/syscall/timers.c */
extern int getitimer(uint_t, struct itimerval *);
/* From uts/common/syscall/time.c */
extern int stime(time_t);
/* From uts/common/syscall/uadmin.c */
extern int uadmin(int, int, uintptr_t);
/* From uts/common/syscall/chdir.c */
extern int chdir_proc(proc_t *, vnode_t *, boolean_t, boolean_t);
/* From uts/common/fs/lookup.c */
extern int lookupname(char *, enum uio_seg, int, vnode_t **, vnode_t **);
/* From uts/common/fs/fs_subr.c */
extern int fs_need_estale_retry(int);
/* From uts/common/os/acct.c */
extern int sysacct(char *);

/* The callback arguments when handling a FS clone group. */
typedef struct {
	vnode_t	*lcfa_vp;
	boolean_t lcfa_type;
	boolean_t lcfa_traverse;
} lx_clone_fs_arg_t;

long
lx_alarm(int seconds)
{
	return (alarm(seconds));
}

static int
lx_clone_fs_cb(proc_t *pp, void *arg)
{
	lx_clone_fs_arg_t *ap = (lx_clone_fs_arg_t *)arg;
	int err;

	/*
	 * Either:
	 * A) The initial lookupname() from lx_clone_fs_do_group() will have
	 *    added a hold on the vnode to ensure its existence throughout the
	 *    walk.
	 * B) We added a hold in fchdir.
	 * We need to add another hold for each process in the group.
	 */
	VN_HOLD(ap->lcfa_vp);
	if ((err = chdir_proc(pp, ap->lcfa_vp, ap->lcfa_type,
	    ap->lcfa_traverse)) != 0) {
		/* if we failed, chdir_proc already did a rele on vp */
		return (err);
	}

	return (0);
}

/*
 * Check to see if the process is in a CLONE_FS clone group. Return false
 * if not (the normal case), otherwise perform the setup, do the group walk
 * and return true.
 */
static boolean_t
lx_clone_fs_do_group(char *path, boolean_t is_chroot, int *errp)
{
	lx_proc_data_t *lproc = ttolxproc(curthread);
	vnode_t *vp;
	lx_clone_fs_arg_t arg;
	int err;
	int estale_retry = 0;

	if (!lx_clone_grp_member(lproc, LX_CLONE_FS))
		return (B_FALSE);

	/* Handle the rare case of being in a CLONE_FS clone group */

retry:
	err = lookupname(path, UIO_USERSPACE, FOLLOW, NULLVPP, &vp);
	if (err != 0) {
		if (err == ESTALE && fs_need_estale_retry(estale_retry++))
			goto retry;
		*errp = err;
		return (B_TRUE);
	}

	arg.lcfa_vp = vp;
	arg.lcfa_type = is_chroot;
	arg.lcfa_traverse = B_TRUE;

	/*
	 * We use the VN_HOLD from the lookup to guarantee vp exists for the
	 * entire walk.
	 */
	err = lx_clone_grp_walk(lproc, LX_CLONE_FS, lx_clone_fs_cb,
	    (void *)&arg);
	VN_RELE(vp);
	*errp = err;
	return (B_TRUE);
}

long
lx_chdir(char *path)
{
	int err;

	/* Handle the rare case of being in a CLONE_FS clone group */
	if (lx_clone_fs_do_group(path, B_FALSE, &err))
		return ((err != 0) ? set_errno(err) : 0);

	return (chdir(path));
}

long
lx_chroot(char *path)
{
	int err;

	/* Handle the rare case of being in a CLONE_FS clone group */
	if (lx_clone_fs_do_group(path, B_TRUE, &err))
		return ((err != 0) ? set_errno(err) : 0);

	return (chroot(path));
}

long
lx_creat(char *path, mode_t mode)
{
	return (open(path, O_WRONLY | O_CREAT | O_TRUNC, mode));
}

long
lx_fchdir(int fd)
{
	lx_proc_data_t *lproc = ttolxproc(curthread);

	if (lx_clone_grp_member(lproc, LX_CLONE_FS)) {
		/* Handle the rare case of being in a CLONE_FS clone group */
		file_t *fp;
		vnode_t *vp;
		lx_clone_fs_arg_t arg;
		int err;

		if ((fp = getf(fd)) == NULL)
			return (set_errno(EBADF));
		vp = fp->f_vnode;
		VN_HOLD(vp);
		releasef(fd);

		arg.lcfa_vp = vp;
		arg.lcfa_type = B_FALSE;
		arg.lcfa_traverse = B_FALSE;

		/*
		 * We use the VN_HOLD above to guarantee vp exists for the
		 * entire walk.
		 */
		err = lx_clone_grp_walk(lproc, LX_CLONE_FS, lx_clone_fs_cb,
		    (void *)&arg);
		VN_RELE(vp);
		if (err)
			return (set_errno(err));
		return (0);
	}

	return (fchdir(fd));
}

long
lx_getitimer(int which, struct itimerval *value)
{
	return (getitimer(which, value));
}

/* Linux and illumos have the same rusage structures. */
long
lx_getrusage(int who, struct rusage *rup)
{
	int code;

	switch (who) {
	case LX_RUSAGE_SELF:
		code = _RUSAGESYS_GETRUSAGE;
		break;
	case LX_RUSAGE_CHILDREN:
		code = _RUSAGESYS_GETRUSAGE_CHLD;
		break;
	case LX_RUSAGE_THREAD:
		code = _RUSAGESYS_GETRUSAGE_LWP;
		break;
	default:
		return (set_errno(EINVAL));
	}

	return (rusagesys(code, rup, NULL, NULL, NULL));
}

long
lx_mincore(caddr_t addr, size_t len, char *vec)
{
	int r;

	r = mincore(addr, len, vec);
	if (r == EINVAL) {
		/*
		 * LTP mincore01 expects mincore with a huge len to fail with
		 * ENOMEM on a modern kernel, although on Linux 2.6.11 and
		 * earlier, it will return EINVAL.
		 */
		if (lx_kern_release_cmp(curzone, "2.6.11") > 0 && (long)len < 0)
			return (set_errno(ENOMEM));
	}
	return (r);
}

long
lx_nice(int incr)
{
	return (nice(incr));
}

long
lx_pause(void)
{
	return (pause());
}

/*ARGSUSED*/
long
lx_reboot(int magic1, int magic2, uint_t flag, uintptr_t p4)
{
	if (magic1 != LINUX_REBOOT_MAGIC1)
		return (set_errno(EINVAL));

	switch (magic2) {
	case LINUX_REBOOT_MAGIC2:
	case LINUX_REBOOT_MAGIC2A:
	case LINUX_REBOOT_MAGIC2B:
	case LINUX_REBOOT_MAGIC2C:
		break;
	default:
		return (set_errno(EINVAL));
	}

	/*
	 * Once we have better Linux capabilities(7) support we should check
	 * CAP_SYS_BOOT instead.
	 */
	if (crgetuid(CRED()) != 0)
		return (set_errno(EPERM));

	switch (flag) {
	case LINUX_REBOOT_CMD_CAD_ON:
	case LINUX_REBOOT_CMD_CAD_OFF:
		/* ignored */
		return (0);

	case LINUX_REBOOT_CMD_POWER_OFF:
	case LINUX_REBOOT_CMD_HALT:
		return (uadmin(A_SHUTDOWN, AD_HALT, (uintptr_t)NULL));

	case LINUX_REBOOT_CMD_RESTART:
	case LINUX_REBOOT_CMD_RESTART2:
		/* RESTART2 may need more work */
		return (uadmin(A_SHUTDOWN, AD_BOOT, (uintptr_t)NULL));

	default:
		return (set_errno(EINVAL));
	}
}

long
lx_setdomainname(char *name, long len)
{
	if (len < 0 || len >= LX_SYS_UTS_LN)
		return (set_errno(EINVAL));

	ttolwp(curthread)->lwp_errno = 0;
	(void) systeminfo(SI_SET_SRPC_DOMAIN, name, len);
	if (ttolwp(curthread)->lwp_errno != 0)
		return (ttolwp(curthread)->lwp_errno);
	return (0);
}

long
lx_sethostname(char *name, size_t len)
{
	ttolwp(curthread)->lwp_errno = 0;
	(void) systeminfo(SI_SET_HOSTNAME, name, len);
	if (ttolwp(curthread)->lwp_errno != 0)
		return (ttolwp(curthread)->lwp_errno);
	return (0);
}

long
lx_stime(time_t *tp)
{
	time_t time;

	if (copyin(tp, &time, sizeof (time)) != 0)
		return (set_errno(EFAULT));

	return (stime(time));
}

long
lx_sync(void)
{
	vfs_sync(0);
	return (0);
}

/*
 * For syslog, since there is no Linux kernel and nothing to log, we simply
 * emulate a kernel buffer (LOG_BUF_LEN) of 0 bytes and only handle errors for
 * bad input. All actions except 3 and 10 require CAP_SYS_ADMIN or CAP_SYSLOG
 * so without full capabilities support, for now we just perform an euid check.
 */
long
lx_syslog(int type, char *bufp, int len)
{
	if (type < 0 || type > 10)
		return (set_errno(EINVAL));

	if (type != 3 && type != 10 && crgetuid(CRED()) != 0)
		return (set_errno(EPERM));

	if (type >= 2 && type <= 4 && (bufp == NULL || len < 0))
		return (set_errno(EINVAL));

	if (type == 8 && (len < 1 || len > 8))
		return (set_errno(EINVAL));

	return (0);
}

long
lx_vhangup(void)
{
	if (crgetuid(CRED()) != 0)
		return (set_errno(EPERM));

	/*
	 * The native vhangup code does nothing except check for the sys_config
	 * privilege. Eventually we'll first want to check our emulation for the
	 * Linux CAP_SYS_TTY_CONFIG capability, but currently, since we've
	 * already checked that our process is root, just succeed.
	 */
	return (0);
}

long
lx_acct(char *p)
{
	return (sysacct(p));
}

/*
 * Support for Linux namespaces is not yet implemented. Normally we would
 * simply return ENOSYS for this. However, "systemd" uses mount namespaces to
 * provide the PrivateTmp feature for some services. Use of this feature is
 * becoming common and these services will fail to run without namespace
 * support. "systemd" has a fallback to allow these types of services to run if
 * it sees either EACCES or EPERM when it tries to setup the namespace. Until
 * we have namespace support, we return EPERM to workaround this issue.
 */
/*ARGSUSED*/
long
lx_unshare(int flags)
{
	return (set_errno(EPERM));
}

/*
 * The whole idea of "swap space" within a zone is a complete fabrication.
 * However, some apps expect to be able to see swap space data in the /proc
 * files, while other apps actually don't want there to be any swap space
 * configured. We use the swapon/off syscalls to allow this visibility to be
 * controlled from within the zone iself. Note that the "swapon" CLI tends to
 * do a lot of additional validation which will fail within a zone.
 *
 * Once we have better Linux capabilities(7) support we should check
 * CAP_SYS_ADMIN instead of uid == 0.
 */
long
lx_swapoff(char *path)
{
	char buf[MAXPATHLEN];
	size_t len;
	lx_zone_data_t *lxzd;

	/* Simple validaton of the argument */
	if (copyinstr(path, buf, sizeof (buf), &len) != 0)
		return (set_errno(EFAULT));
	if (crgetuid(CRED()) != 0)
		return (set_errno(EPERM));

	lxzd = ztolxzd(curzone);
	ASSERT(lxzd != NULL);

	lxzd->lxzd_swap_disabled = B_TRUE;
	return (0);
}

long
lx_swapon(char *path, int flags)
{
	char buf[MAXPATHLEN];
	size_t len;
	lx_zone_data_t *lxzd;

	/* Simple validaton of the arguments */
	if (copyinstr(path, buf, sizeof (buf), &len) != 0)
		return (set_errno(EFAULT));
	if (flags & ~LX_SWAP_ALL)
		return (set_errno(EINVAL));
	if (crgetuid(CRED()) != 0)
		return (set_errno(EPERM));

	lxzd = ztolxzd(curzone);
	ASSERT(lxzd != NULL);

	lxzd->lxzd_swap_disabled = B_FALSE;
	return (0);
}