diff options
Diffstat (limited to 'usr/src/uts/common/brand/lx/os/lx_audit.c')
-rw-r--r-- | usr/src/uts/common/brand/lx/os/lx_audit.c | 1604 |
1 files changed, 1604 insertions, 0 deletions
diff --git a/usr/src/uts/common/brand/lx/os/lx_audit.c b/usr/src/uts/common/brand/lx/os/lx_audit.c new file mode 100644 index 0000000000..6e522e6d8d --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_audit.c @@ -0,0 +1,1604 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +/* + * The Linux auditing system provides a fairly complex rule-based syntax + * for configuring what actions are to be audited. The user-level details + * are generally described in the Linux audit.rules(7), auditctl(8), and + * auditd(8) man pages. The user/kernel netlink API does not seem to be + * documented. The Linux kernel source and the user-level auditd source must + * be used to understand the interface we have to emulate. The relevant Linux + * source files are: + * include/uapi/linux/audit.h + * include/linux/audit.h + * kernel/audit.c + * + * The lx_netlink module implements the API used for getting or changing the + * audit configuration. For rule-oriented operations (list, append, delete), + * an lx_audit_rule_t structure (or sequence when listing) is passed in/out of + * the kernel. The netlink code calls into the lx_audit_append_rule or + * lx_audit_delete_rule functions here to perform the relevant operation. + * Within the lx_audit_rule_t structure, each member has the following + * meaning: + * lxar_flag: corresponds to user-level list (e.g. "exit" for syscall return) + * lxar_action: user-level action (e.g. "always") + * lxar_fld_cnt: number of fields specified in lxar_fields, lxar_values, and + * lxar_flg_flag arrays + * lxar_mask: syscall number bitmask the rule applies to (bit position in + * the array corresponds to the syscall number) + * laxr_fields: array of fields in the rule (i.e. each -F on user-level rule). + * A numeric code (e.g. LX_RF_AUDIT_ARCH) is assigned to each + * possible field. + * lxar_values: array of numeric field values (e.g. the internal b64 value on + * the -F AUDIT_ARCH=b64 rule) + * lxar_fld_flag: array of field operators (e.g. the '=' operator on the + * -F AUDIT_ARCH=b64 rule) + * lxar_buflen: length of the buffer data immediately following + * lxar_buf: A variable amount of additional field string data. Non-numeric + * field values are passed here. For example, the string associated + * with the '-F key=...' or -F path=...' rules. For string values, + * the corresponding lxar_values entry is the length of the string. + * The strings in lxar_buf are not C strings because they are not + * NULL terminated. The character data is pulled out of lxar_buf + * in chunks specified by the value and the pointer into the buf + * is advanced accordingly. + * + * There are two primary kinds of actions which we are currently interested in + * auditing; + * 1) system call return + * this corresponds to user-level "exit" rule actions + * 2) file system related actions + * this corresponds to user-level file system watch rules (-w) + * + * Only system call return is currently implemented, and only a very limited + * subset of all of the possible rule selection behavior. + * + * The Linux audit rule syntax defines that all selection criteria within a + * rule is ANDed together before an audit record is created. However, multiple + * rules can be defined for a specific syscall. For example, this user-level + * syntax defines two different rules for the "open" syscall: + * -a always,exit -F arch=b64 -S open -F auid>=1000 -F key=user-open + * -a always,exit -F arch=b64 -S open -F auid=0 -F key=priv-open + * The first rule would cause an audit record to be created when an "open" + * syscall returns and the syscall was performed by a process with a + * loginuid >= 1000. The key added to that audit record would be "user-open". + * The second rule would create an audit record if the loginuid was 0 and the + * record's key would be "priv-open". + * + * When auditing is enabled for a syscall return, we have to look at multiple + * rules and create an audit record for each rule that matches the selection + * criteria. + * + * Although the current implementation is limited, the overall structure is + * designed to be enhanced as more auditing support is added over time. + * + * By default, auditing is not enabled for a zone and no internal audit data + * exists. When the first netlink audit msg is received, the zone's audit state + * (lx_audit_state_t) is allocated (via lx_audit_init) and attached to the + * zone's lx brand-specific data (lxzd_audit_state). Once allocated, the audit + * data will persist until the zone halts. + * + * Audit records are enqueued onto the lxast_ev_queue and a worker thread + * (lx_audit_worker) is responsible for dequeueing the audit records and + * sending them up to the user-level auditd. + * + * Audit rules are stored in the lxast_rules list. This is an internal list + * consisting of elements of type lx_audit_rule_ent_t. Each element contains + * the input rule (lxare_rule) along with some additional data parsed out of + * the rule when it is appended (currently only the arch and key). + * + * When auditing is enabled for a syscall, the appropriate entry in the + * lxast_sys64_rulep (or lxast_sys32_rulep) array will point to the first + * rule that is applicable to the syscall. When that syscall returns, rule + * matching proceeds from that rule to the end of the rule list. + * + * New rules are always appended at the end of the list and Linux expects that + * rules are matched in order. + * + * If the rule list ever gets large enough that a linear search, anchored off + * the syscall pointer, becomes a performance bottleneck, then we'll have to + * explore alternate implementations. However, use of auditing is not that + * common to begin with, and most syscalls are typically not audited, so as + * long as the number of rules is in the order of tens, then the current + * implementation should be fine. + * + * When a rule is deleted, all associated syscall entries (lxast_sys64_rulep or + * lxast_sys32_rulep) are cleared, then the rule list is searched to see if + * there are any remaining rules which are applicable to the syscall(s). If so, + * pointers are reestablished in the relevant lxast_sys64_rulep (or 32) array. + */ + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/ddi.h> +#include <sys/zone.h> +#include <sys/strsubr.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sunddi.h> +#include <sys/strsun.h> +#include <sys/tihdr.h> +#include <sys/sockio.h> +#include <sys/brand.h> +#include <sys/debug.h> +#include <sys/ucred.h> +#include <sys/session.h> +#include <sys/lx_types.h> +#include <sys/lx_audit.h> +#include <sys/lx_brand.h> +#include <sys/lx_misc.h> +#include <sys/lx_socket.h> +#include <sys/bitmap.h> +#include <sockcommon.h> + +#define LX_AUDIT_FEATURE_VERSION 1 + +/* + * Audit status mask values (lxas_mask in structure defined below) + * See Linux include/uapi/linux/audit.h + */ +#define LX_AUDIT_STATUS_ENABLED 0x001 +#define LX_AUDIT_STATUS_FAILURE 0x002 +#define LX_AUDIT_STATUS_PID 0x004 +#define LX_AUDIT_STATUS_RATE_LIMIT 0x008 +#define LX_AUDIT_STATUS_BACKLOG_LIMIT 0x010 +#define LX_AUDIT_STATUS_BACKLOG_WAIT_TIME 0x020 +#define LX_AUDIT_STATUS_LOST 0x040 + +/* + * Audit features + * See Linux include/uapi/linux/audit.h + */ +#define LX_AUDIT_F_BACKLOG_LIMIT 0x001 +#define LX_AUDIT_F_BACKLOG_WAIT_TIME 0x002 +#define LX_AUDIT_F_EXECUTABLE_PATH 0x004 +#define LX_AUDIT_F_EXCLUDE_EXTEND 0x008 +#define LX_AUDIT_F_SESSIONID_FILTER 0x010 +#define LX_AUDIT_F_LOST_RESET 0x020 +#define LX_AUDIT_F_FILTER_FS 0x040 + +#define LX_AUDIT_FEATURE_ALL (LX_AUDIT_F_BACKLOG_LIMIT | \ + LX_AUDIT_F_BACKLOG_WAIT_TIME | LX_AUDIT_F_EXECUTABLE_PATH | \ + LX_AUDIT_F_EXCLUDE_EXTEND | LX_AUDIT_F_SESSIONID_FILTER | \ + LX_AUDIT_F_LOST_RESET | LX_AUDIT_F_FILTER_FS) + + +/* Audit events */ +#define LX_AUDIT_SYSCALL 1300 /* syscall */ +#define LX_AUDIT_PATH 1302 /* file path */ +#define LX_AUDIT_CONFIG_CHANGE 1305 /* configuration change */ +#define LX_AUDIT_CWD 1307 /* current working directory */ +#define LX_AUDIT_EXECVE 1309 /* exec args */ +#define LX_AUDIT_EOE 1320 /* end of multi-record event */ + +#define LX_AUDIT_BITMASK_SIZE 64 +#define LX_AUDIT_MAX_KEY_LEN 256 + +/* Audit rule filter type */ +#define LX_AUDIT_FILTER_USER 0 /* user generated msgs */ +#define LX_AUDIT_FILTER_TASK 1 /* task creation */ +#define LX_AUDIT_FILTER_ENTRY 2 /* syscall entry - obsolete */ +#define LX_AUDIT_FILTER_WATCH 3 /* fs watch */ +#define LX_AUDIT_FILTER_EXIT 4 /* syscall return */ +#define LX_AUDIT_FILTER_TYPE 5 /* audit log start */ +#define LX_AUDIT_FILTER_FS 6 /* audit inode child */ + +/* Audit rule action type */ +#define LX_AUDIT_ACT_NEVER 0 +#define LX_AUDIT_ACT_POSSIBLE 1 +#define LX_AUDIT_ACT_ALWAYS 2 /* the common case */ + +#define LX_AUDIT_RULE_MAX_FIELDS 64 + +/* Linux defaults */ +#define LX_AUDIT_DEF_BACKLOG_LIMIT 64 +#define LX_AUDIT_DEF_WAIT_TIME (60 * HZ_TO_LX_USERHZ(hz)) + +/* + * Audit rule field types + * Linux defines a lot of Rule Field values in include/uapi/linux/audit.h. + * We currently only handle a few. + */ +#define LX_RF_AUDIT_LOGINUID 9 /* e.g. auid>=1000 */ +#define LX_RF_AUDIT_ARCH 11 /* e.g. -F arch=b64 */ +#define LX_RF_AUDIT_WATCH 105 /* user-level -w rule */ +#define LX_RF_AUDIT_PERM 106 /* user-level -p option */ +#define LX_RF_AUDIT_FILTERKEY 210 /* user-level -k key option */ + +/* + * Audit rule field operators + * Linux defines the operator values in include/uapi/linux/audit.h. + * These 4 bits are combined in various ways for additional operators. + */ +#define LX_OF_AUDIT_BM 0x08000000 /* bit mask (&) */ +#define LX_OF_AUDIT_LT 0x10000000 +#define LX_OF_AUDIT_GT 0x20000000 +#define LX_OF_AUDIT_EQ 0x40000000 +#define LX_OF_AUDIT_NE (LX_OF_AUDIT_LT | LX_OF_AUDIT_GT) +#define LX_OF_AUDIT_BT (LX_OF_AUDIT_BM | LX_OF_AUDIT_EQ) /* bit test (&=) */ +#define LX_OF_AUDIT_LE (LX_OF_AUDIT_LT | LX_OF_AUDIT_EQ) +#define LX_OF_AUDIT_GE (LX_OF_AUDIT_GT | LX_OF_AUDIT_EQ) +#define LX_OF_AUDIT_ALL (LX_OF_AUDIT_EQ | LX_OF_AUDIT_NE | LX_OF_AUDIT_BM) + +/* + * Audit rule arch specification + * See Linux EM_X86_64 and EM_386 defs. + * -F arch=b64 looks like: 0xc000003e + * -F arch=b32 looks like: 0x40000003 + * If no arch is specified (possible with '-S syslog', '-S all', or '-w <file>') + * the rule applies to both architectures and LX_RF_AUDIT_ARCH is not passed. + */ +#define LX_AUDIT_ARCH64 0xc000003e +#define LX_AUDIT_ARCH32 0x40000003 + +/* + * See Linux include/uapi/linux/audit.h, AUDIT_MESSAGE_TEXT_MAX is 8560. + * The auditd src has MAX_AUDIT_MESSAGE_LENGTH as 8970. + * Until necessary, we'll limit ourselves to a smaller length. + */ +#define LX_AUDIT_MESSAGE_TEXT_MAX 1024 + +typedef struct lx_audit_features { + uint32_t lxaf_version; + uint32_t lxaf_mask; + uint32_t lxaf_features; + uint32_t lxaf_lock; +} lx_audit_features_t; + +typedef struct lx_audit_status { + uint32_t lxas_mask; + uint32_t lxas_enabled; + uint32_t lxas_failure; + uint32_t lxas_pid; + uint32_t lxas_rate_limit; + uint32_t lxas_backlog_limit; + uint32_t lxas_lost; + uint32_t lxas_backlog; + /* LINTED: E_ANONYMOUS_UNION_DECL */ + union { + uint32_t lxas_version; + uint32_t lxas_feature_bitmap; + }; + uint32_t lxas_backlog_wait_time; +} lx_audit_status_t; + +typedef struct lx_audit_rule { + uint32_t lxar_flag; + uint32_t lxar_action; + uint32_t lxar_fld_cnt; + uint32_t lxar_mask[LX_AUDIT_BITMASK_SIZE]; + uint32_t lxar_fields[LX_AUDIT_RULE_MAX_FIELDS]; + uint32_t lxar_values[LX_AUDIT_RULE_MAX_FIELDS]; + uint32_t lxar_fld_flag[LX_AUDIT_RULE_MAX_FIELDS]; + uint32_t lxar_buflen; + /* LINTED: E_ZERO_OR_NEGATIVE_SUBSCRIPT */ + char lxar_buf[0]; +} lx_audit_rule_t; + +/* + * Internal structure for an audit rule. + * Each rule is on the zone's top-level list of all rules (lxast_rules). + * This structure also holds the parsed character string fields from the + * original input rule (lxar_buf) so that we don't need to re-parse that + * data on every match. + */ +typedef struct lx_audit_rule_ent { + list_node_t lxare_link; + lx_audit_rule_t lxare_rule; + char *lxare_buf; + boolean_t lxare_is32bit; + boolean_t lxare_is64bit; + char *lxare_key; +} lx_audit_rule_ent_t; + +typedef enum lx_audit_fail { + LXAE_SILENT, + LXAE_PRINT, /* default */ + LXAE_PANIC /* reboot the zone */ +} lx_audit_fail_t; + +typedef struct lx_audit_record { + list_node_t lxar_link; + uint32_t lxar_type; + char *lxar_msg; +} lx_audit_record_t; + +/* + * Per-zone audit state + * Lazy allocated when first needed. + * + * lxast_rate_limit + * Currently unused, but can be get/set. Linux default is 0. + * lxast_backlog_limit + * The maximum number of outstanding audit events allowed (the Linux kernel + * default is 64). If the limit is reached, lxast_failure determines what + * to do. + * lxast_backlog_wait_time + * Currently unused, but can be get/set. Linux default is 60HZ. + */ +typedef struct lx_audit_state { + lx_audit_fail_t lxast_failure; /* failure behavior */ + uint32_t lxast_rate_limit; + uint32_t lxast_backlog_limit; + uint32_t lxast_backlog_wait_time; + lx_audit_rule_ent_t *lxast_sys32_rulep[LX_NSYSCALLS]; + lx_audit_rule_ent_t *lxast_sys64_rulep[LX_NSYSCALLS]; + kcondvar_t lxast_worker_cv; + kmutex_t lxast_lock; /* protects members below */ + pid_t lxast_pid; /* auditd pid */ + uint64_t lxast_seq; /* event sequence num */ + uint32_t lxast_backlog; /* num of queued events */ + uint32_t lxast_lost; /* num of lost events */ + void *lxast_sock; /* auditd lx_netlink_sock_t */ + boolean_t lxast_exit; /* taskq worker should quit */ + boolean_t lxast_panicing; /* audit forcing reboot? */ + kthread_t *lxast_worker; + list_t lxast_ev_queue; /* audit record queue */ + list_t lxast_rules; /* the list of rules */ +} lx_audit_state_t; + +/* + * Function pointer to netlink function used by audit worker threads to send + * audit messages up to the user-level auditd. + */ +static int (*lx_audit_emit_msg)(void *, uint_t, const char *, uint_t); +static kmutex_t lx_audit_em_lock; /* protects emit_msg above */ + +/* From uts/common/brand/lx/syscall/lx_socket.c */ +extern long lx_socket(int, int, int); +/* From uts/common/syscall/close.c */ +extern int close(int); + +static int +lx_audit_emit_syscall_event(uint_t mtype, void *lxsock, const char *msg) +{ + int err; + + err = lx_audit_emit_msg(lxsock, mtype, msg, LX_AUDIT_MESSAGE_TEXT_MAX); + if (err != 0) + return (err); + err = lx_audit_emit_msg(lxsock, 0, NULL, 0); + return (err); +} + +/* + * Worker thread for audit record output up to user-level auditd. + */ +static void +lx_audit_worker(void *a) +{ + lx_audit_state_t *asp = (lx_audit_state_t *)a; + lx_audit_record_t *rp; + int err; + + VERIFY(asp != NULL); + + mutex_enter(&asp->lxast_lock); + + while (!asp->lxast_exit) { + + if (asp->lxast_backlog == 0 || asp->lxast_sock == NULL || + asp->lxast_pid == 0) { + cv_wait(&asp->lxast_worker_cv, &asp->lxast_lock); + continue; + } + + rp = list_remove_head(&asp->lxast_ev_queue); + asp->lxast_backlog--; + + err = lx_audit_emit_syscall_event(rp->lxar_type, + asp->lxast_sock, rp->lxar_msg); + if (err != ENOMEM && err != ENOSPC) { + kmem_free(rp->lxar_msg, LX_AUDIT_MESSAGE_TEXT_MAX); + kmem_free(rp, sizeof (lx_audit_record_t)); + } else { + /* + * Put it back on the list, drop the mutex so that + * any other audit-related action could occur (such as + * socket deletion), then wait briefly before retry. + */ + list_insert_head(&asp->lxast_ev_queue, rp); + asp->lxast_backlog++; + mutex_exit(&asp->lxast_lock); + /* wait 1/10th second and try again */ + delay(drv_usectohz(100000)); + mutex_enter(&asp->lxast_lock); + } + } + + /* Leave state ready for new worker when auditing restarted */ + asp->lxast_exit = B_FALSE; + mutex_exit(&asp->lxast_lock); + + thread_exit(); +} + +static void +lx_audit_set_worker(uint32_t pid, void *lxsock, + void (*cb)(void *, boolean_t)) +{ + lx_audit_state_t *asp = ztolxzd(curzone)->lxzd_audit_state; + + ASSERT(asp != NULL); + ASSERT(MUTEX_HELD(&asp->lxast_lock)); + + /* First, stop any existing worker thread */ + while (asp->lxast_sock != NULL) { + mutex_exit(&asp->lxast_lock); + lx_audit_stop_worker(NULL, cb); + mutex_enter(&asp->lxast_lock); + /* unlikely we loop, but handle racing setters */ + } + + VERIFY(asp->lxast_pid == 0); + VERIFY(asp->lxast_sock == NULL); + VERIFY(asp->lxast_exit == B_FALSE); + VERIFY(asp->lxast_worker == NULL); + if (pid != 0) { + /* Start a worker with the new socket */ + asp->lxast_sock = lxsock; + cb(asp->lxast_sock, B_TRUE); + asp->lxast_pid = pid; + asp->lxast_worker = thread_create(NULL, 0, lx_audit_worker, + asp, 0, curzone->zone_zsched, TS_RUN, minclsyspri); + } +} + +static boolean_t +lx_audit_match_val(uint32_t op, uint32_t ruleval, uint32_t curval) +{ + switch (op) { + case LX_OF_AUDIT_LT: + return (curval < ruleval); + case LX_OF_AUDIT_GT: + return (curval > ruleval); + case LX_OF_AUDIT_EQ: + return (curval == ruleval); + case LX_OF_AUDIT_NE: + return (curval != ruleval); + case LX_OF_AUDIT_LE: + return (curval <= ruleval); + case LX_OF_AUDIT_GE: + return (curval >= ruleval); + case LX_OF_AUDIT_BM: /* bit mask - any bit is set? */ + return ((curval & ruleval) != 0); + case LX_OF_AUDIT_BT: /* bit test - all bits must be set */ + return ((curval & ruleval) == ruleval); + default: + break; + } + return (B_FALSE); +} + +/* + * Per the Linux audit.rules(7) man page, a rule with an auid of -1 means the + * process does not have a loginuid. We'll use the absence of a session on the + * process to mimic this behavior. + */ +static uint32_t +lx_audit_get_auid() +{ + sess_t *s; + uint32_t v; + + /* + * A process with no session has: + * s_dev == 0xffffffffffffffff + * s_vp == NULL + * s_cred == NULL + */ + s = curproc->p_sessp; + if (s != NULL && s->s_vp != NULL) { + v = crgetsuid(CRED()); + } else { + v = UINT32_MAX; /* emulate auid of -1 */ + } + + return (v); +} + +/* + * Determine if the rule matches. + * Currently, we're actually just checking LX_RF_AUDIT_LOGINUID (-F auid) + * fields, but as we add support for additional field matching, this function + * should be enhanced. + */ +static boolean_t +lx_audit_syscall_rule_match(lx_audit_rule_ent_t *erp) +{ + uint32_t i, v; + lx_audit_rule_t *rp = &erp->lxare_rule; + + for (i = 0; i < rp->lxar_fld_cnt; i++) { + uint32_t ftype, fval, fop; + + ftype = rp->lxar_fields[i]; + if (ftype != LX_RF_AUDIT_LOGINUID) + continue; + + fop = rp->lxar_fld_flag[i]; + fval = rp->lxar_values[i]; + v = lx_audit_get_auid(); + + if (!lx_audit_match_val(fop, fval, v)) + return (B_FALSE); + } + return (B_TRUE); +} + +static int +lx_audit_write(file_t *fp, const char *msg) +{ + int fflag; + ssize_t count; + size_t nwrite = 0; + struct uio auio; + struct iovec aiov; + + count = strlen(msg); + fflag = fp->f_flag; + + aiov.iov_base = (void *) msg; + aiov.iov_len = count; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_loffset = fp->f_offset; + auio.uio_resid = count; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_llimit = curproc->p_fsz_ctl; + auio.uio_fmode = fflag; + auio.uio_extflg = UIO_COPY_DEFAULT; + + return (lx_write_common(fp, &auio, &nwrite, B_FALSE)); +} + +/* + * We first try to send the msg out to the zone's logging service, then + * fallback to the zone's console, although in practice, that is unlikely to + * be useful to most users. + */ +static void +lx_audit_log_msg(const char *msg) +{ + int fd; + struct sockaddr_un addr; + struct sonode *so; + uint_t alen; + uint_t sizediff = (sizeof (addr) - sizeof (addr.sun_path)); + file_t *fp; + int err; + vnode_t *vp; + + ttolwp(curthread)->lwp_errno = 0; + fd = lx_socket(LX_AF_UNIX, LX_SOCK_DGRAM, 0); + if (ttolwp(curthread)->lwp_errno != 0) + goto trycons; + + bzero((char *)&addr, sizeof (addr)); + addr.sun_family = AF_UNIX; + (void) strncpy(addr.sun_path, "/dev/log", sizeof (addr.sun_path) - 1); + alen = strlen(addr.sun_path) + 1 + sizediff; + + /* + * We can't use lx_connect here since that expects to be called from + * user-land, so we do the (streamlined) connect ourselves. + */ + if ((so = getsonode(fd, &err, &fp)) == NULL) { + (void) close(fd); + goto trycons; + } + + err = socket_connect(so, (struct sockaddr *)&addr, alen, fp->f_flag, + _SOCONNECT_XPG4_2, CRED()); + + if (err == 0) + err = lx_audit_write(fp, msg); + + releasef(fd); /* release getsonode hold */ + (void) close(fd); + + if (err == 0) + return; + +trycons: + /* "open" the console device */ + if (lookupnameatcred("/dev/console", UIO_SYSSPACE, FOLLOW, NULLVPP, + &vp, NULL, CRED()) != 0) + return; + + if (falloc(vp, FWRITE, &fp, &fd) != 0) { + VN_RELE(vp); + return; + } + mutex_exit(&fp->f_tlock); + setf(fd, fp); + + /* nothing left to do if console write fails */ + (void) lx_audit_write(fp, msg); + close(fd); +} + +static void +lx_audit_fail(lx_audit_state_t *asp, const char *msg) +{ + ASSERT(MUTEX_HELD(&asp->lxast_lock)); + + if (asp->lxast_failure == LXAE_PRINT || + asp->lxast_failure == LXAE_PANIC) { + /* + * Linux can ratelimit the amount of log spam here, so we'll + * do something similar, especially since this could be called + * on many syscall returns if the audit daemon is down or + * not consuming audit records for some other reason. + */ + if (asp->lxast_lost % 100 == 0) + lx_audit_log_msg(msg); + if (asp->lxast_failure == LXAE_PANIC && + !asp->lxast_panicing) { + /* + * Reboot the zone so that no audit records are lost. + * We delay a second to give the zone's logger a chance + * to handle the log message. We have to drop the lock + * here in case the zone's logger itself is making + * syscalls which would be audited, although that + * wouldn't be the ideal configuration. + */ + asp->lxast_panicing = B_TRUE; + mutex_exit(&asp->lxast_lock); + lx_audit_log_msg("audit: panic"); + delay(drv_usectohz(1000000)); + zone_kadmin(A_SHUTDOWN, AD_BOOT, NULL, kcred); + mutex_enter(&asp->lxast_lock); + } + } + asp->lxast_lost++; +} + +/* + * This formats the input string into a format that matches Linux. The input + * strings are small right now (<= PSARGSZ) so for simpicity we're using + * a temporary buffer of adequate size. + */ +static void +lx_audit_fmt_str(char *dst, char *str, uint_t dlen) +{ + char *sp, tmp[100]; + + (void) strlcpy(tmp, str, sizeof (tmp)); + if ((sp = strchr(tmp, ' ')) != NULL) + *sp = '\0'; + + if ((sp = strchr(tmp, '"')) == NULL) { + (void) snprintf(dst, dlen, "\"%s\"", tmp); + } else { + char *p, *dp; + uint_t olen = 0; + + ASSERT(dlen > 2); + dlen -= 2; /* leave room for terminating nul */ + dp = dst; + for (p = str; *p != '\0' && olen < dlen; p++) { + (void) sprintf(dp, "%02x", *p); + dp += 2; + olen += 2; + } + *dp = '\0'; + } +} + +/* + * Format and enqueue a syscall audit record. + */ +static void +lx_audit_syscall_fmt_rcd(int sysnum, uint32_t arch, long ret, + lx_audit_state_t *asp, lx_audit_rule_ent_t *erp, uint64_t seq, + timestruc_t *tsp) +{ + klwp_t *lwp; + proc_t *p; + uint32_t items, sessid; + lx_lwp_data_t *lwpd; + lx_audit_record_t *rp; + cred_t *cr = CRED(); + minor_t minor; + char key[LX_AUDIT_MAX_KEY_LEN + 6]; /* for key="%s" formatting */ + char exe[PSARGSZ * 2 + 8], comm[MAXCOMLEN * 2 + 8]; + + ASSERT(MUTEX_HELD(&asp->lxast_lock)); + + if (asp->lxast_backlog >= asp->lxast_backlog_limit) { + lx_audit_fail(asp, "audit: backlog limit exceeded"); + return; + } + + if (arch == LX_AUDIT_ARCH32) { + items = MIN(4, lx_sysent32[sysnum].sy_narg); + } else { + ASSERT3U(arch, ==, LX_AUDIT_ARCH64); + items = MIN(4, lx_sysent64[sysnum].sy_narg); + } + + lwp = ttolwp(curthread); + lwpd = lwptolxlwp(lwp); + p = curproc; + + /* + * For the key, if no key has been set on the rule, Linux formats the + * string "(null)" (with no quotes - i.e. key=(null)). + */ + if (erp->lxare_key != NULL) { + (void) snprintf(key, sizeof (key), "key=\"%s\"", + erp->lxare_key); + } else { + (void) snprintf(key, sizeof (key), "key=(null)"); + } + + rp = kmem_alloc(sizeof (lx_audit_record_t), KM_NOSLEEP); + if (rp == NULL) { + lx_audit_fail(asp, "audit: no kernel memory"); + return; + } + rp->lxar_msg = kmem_zalloc(LX_AUDIT_MESSAGE_TEXT_MAX, KM_NOSLEEP); + if (rp->lxar_msg == NULL) { + kmem_free(rp, sizeof (lx_audit_record_t)); + lx_audit_fail(asp, "audit: no kernel memory"); + return; + } + rp->lxar_type = LX_AUDIT_SYSCALL; + + mutex_enter(&p->p_splock); + sessid = p->p_sessp->s_sid; + minor = getminor(p->p_sessp->s_dev); + mutex_exit(&p->p_splock); + + mutex_enter(&p->p_lock); + lx_audit_fmt_str(exe, p->p_user.u_psargs, sizeof (exe)); + lx_audit_fmt_str(comm, p->p_user.u_comm, sizeof (comm)); + mutex_exit(&p->p_lock); + + /* + * See Linux audit_log_exit() for how a syscall exit record is + * formatted. + * + * For "arch" value, see Linux AUDIT_ARCH_IA64, AUDIT_ARCH_I386, + * __AUDIT_ARCH_64BIT and __AUDIT_ARCH_LE definitions. + * + * For fsuid/fsgid, see lx_setfsuid/lx_setfsgid for how we handle that. + */ + (void) snprintf(rp->lxar_msg, LX_AUDIT_MESSAGE_TEXT_MAX, + "audit(%lu.%03lu:%lu): arch=%x syscall=%u " + "success=%s exit=%ld a0=%lu a1=%lu a2=%lu a3=%lu items=%u " + "ppid=%u pid=%u auid=%u uid=%u gid=%u euid=%u suid=%u " + "fsuid=%u egid=%u sgid=%u fsgid=%u tty=pts%u ses=%u " + "comm=%s exe=%s %s", + (uint64_t)tsp->tv_sec, /* zone's timestamp */ + (uint64_t)tsp->tv_nsec / 1000000, + seq, /* serial number */ + arch, /* arch */ + sysnum, /* syscall */ + (lwp->lwp_errno == 0 ? "yes" : "no"), /* success */ + ret, /* exit */ + lwpd->br_syscall_args[0], /* a0 */ + lwpd->br_syscall_args[1], /* a1 */ + lwpd->br_syscall_args[2], /* a2 */ + lwpd->br_syscall_args[3], /* a3 */ + items, /* items */ + lx_lwp_ppid(lwp, NULL, NULL), /* ppid */ + (lwpd->br_pid == curzone->zone_proc_initpid ? 1 : lwpd->br_pid), + lx_audit_get_auid(), /* auid */ + crgetruid(cr), /* uid */ + crgetrgid(cr), /* gid */ + crgetuid(cr), /* euid */ + crgetsuid(cr), /* saved uid */ + crgetuid(cr), /* fsuid */ + crgetgid(cr), /* egid */ + crgetsgid(cr), /* saved gid */ + crgetgid(cr), /* fsgid */ + minor, /* tty */ + sessid, /* ses */ + comm, /* comm */ + exe, /* exe */ + key); /* key="VAL" */ + + list_insert_tail(&asp->lxast_ev_queue, rp); + if (asp->lxast_backlog == 0) + cv_signal(&asp->lxast_worker_cv); + asp->lxast_backlog++; +} + +/* + * Get the next rule in the list that is generally applicable to the given + * syscall. + */ +static lx_audit_rule_ent_t * +lx_audit_next_applicable_rule(int sysnum, uint32_t arch, lx_audit_state_t *asp, + lx_audit_rule_ent_t *erp) +{ + ASSERT(MUTEX_HELD(&asp->lxast_lock)); + + for (erp = list_next(&asp->lxast_rules, erp); + erp != NULL; + erp = list_next(&asp->lxast_rules, erp)) { + lx_audit_rule_t *r = &erp->lxare_rule; + + /* Determine if the rule in the list has the same ARCH. */ + if (arch == LX_AUDIT_ARCH32 && !erp->lxare_is32bit) + continue; + if (arch == LX_AUDIT_ARCH64 && !erp->lxare_is64bit) + continue; + + /* Determine if this rule applies to the relevant syscall. */ + if (BT_TEST32(r->lxar_mask, sysnum)) + return (erp); + } + + return (NULL); +} + +void +lx_audit_syscall_exit(int sysnum, long ret) +{ + lx_zone_data_t *lxzd = ztolxzd(curzone); + lx_audit_state_t *asp; + uint64_t seq; + lx_audit_rule_ent_t *erp; + timestruc_t ts; + uint32_t arch; + + if (lxzd->lxzd_audit_enabled == LXAE_DISABLED) + return; + + if (sysnum >= LX_NSYSCALLS) + return; + + asp = lxzd->lxzd_audit_state; + ASSERT(asp != NULL); + + if (get_udatamodel() == DATAMODEL_ILP32) { + arch = LX_AUDIT_ARCH32; + } else { + ASSERT(get_udatamodel() == DATAMODEL_LP64); + arch = LX_AUDIT_ARCH64; + } + + /* + * Fast top-level check to see if we're auditing this syscall. + * We don't take the mutex for this since there is no need. + */ + if (arch == LX_AUDIT_ARCH32) { + if (asp->lxast_sys32_rulep[sysnum] == NULL) + return; + } else { + if (asp->lxast_sys64_rulep[sysnum] == NULL) + return; + } + + mutex_enter(&asp->lxast_lock); + if (arch == LX_AUDIT_ARCH32) { + erp = asp->lxast_sys32_rulep[sysnum]; + } else { + erp = asp->lxast_sys64_rulep[sysnum]; + } + + if (erp == NULL) { + /* Hit a race and the syscall is no longer being audited */ + mutex_exit(&asp->lxast_lock); + return; + } + + /* + * All of the records in the set (i.e. same serial number) have + * the same timestamp. + */ + seq = asp->lxast_seq++; + gethrestime(&ts); + ts.tv_sec -= curzone->zone_boot_time; + + /* + * We have to determine if the first rule associated with the syscall, + * or any subsequent applicable rules, match. + * + * The first rule associated with the syscall may (or may not) match, + * but there can be additional rules which might also match. The first + * possible rule is always the one that enables the syscall auditing, + * but we also have to iterate to the end of the list to see if any + * other rules are applicable to this syscall. + */ + for (; erp != NULL; + erp = lx_audit_next_applicable_rule(sysnum, arch, asp, erp)) { + if (!lx_audit_syscall_rule_match(erp)) + continue; + + lx_audit_syscall_fmt_rcd(sysnum, arch, ret, asp, erp, seq, &ts); + } + + /* + * TODO: Currently we only output a single SYSCALL record. + * Real Linux emits a set of audit records for a syscall exit event + * (e.g. for an unlink syscall): + * type=SYSCALL + * type=CWD + * type=PATH - one for the parent dir + * type=PATH - one for the actual file unlinked + * type=PROCTITLE - (this one seems worthless) + * followed by an AUDIT_EOE message (which seems to be ignored). + * + * For syscalls that don't change files in the file system (e.g. ioctl) + * there are no PATH records. + */ + mutex_exit(&asp->lxast_lock); +} + +/* + * Determine which syscalls this rule applies to and setup a fast pointer for + * the syscall to enable it's rule match. + * + * We have to look at each bit and translate the external syscall bits into the + * internal syscall number. + */ +static void +lx_enable_syscall_rule(lx_audit_state_t *asp, lx_audit_rule_t *rulep, + lx_audit_rule_ent_t *rp) +{ + uint_t sysnum; + + ASSERT(MUTEX_HELD(&asp->lxast_lock)); + + for (sysnum = 0; sysnum < LX_NSYSCALLS; sysnum++) { + if (BT_TEST32(rulep->lxar_mask, sysnum)) { + if (rp->lxare_is32bit) { + if (asp->lxast_sys32_rulep[sysnum] == NULL) + asp->lxast_sys32_rulep[sysnum] = rp; + } + if (rp->lxare_is64bit) { + if (asp->lxast_sys64_rulep[sysnum] == NULL) + asp->lxast_sys64_rulep[sysnum] = rp; + } + } + } +} + +int +lx_audit_append_rule(void *r, uint_t datalen) +{ + lx_audit_rule_t *rulep = (lx_audit_rule_t *)r; + char *datap; + uint_t i; + lx_audit_rule_ent_t *rp; + lx_audit_state_t *asp; + boolean_t is_32bit = B_TRUE, is_64bit = B_TRUE, sys_found = B_FALSE; + char *tdp; + char key[LX_AUDIT_MAX_KEY_LEN + 1]; + uint32_t tlen; + + if (ztolxzd(curproc->p_zone)->lxzd_audit_enabled == LXAE_LOCKED) + return (EPERM); + + if (datalen < sizeof (lx_audit_rule_t)) + return (EINVAL); + datalen -= sizeof (lx_audit_rule_t); + + if (rulep->lxar_fld_cnt > LX_AUDIT_RULE_MAX_FIELDS) + return (EINVAL); + + if (rulep->lxar_buflen > datalen) + return (EINVAL); + + datap = rulep->lxar_buf; + + /* + * First check the rule to determine if we support the flag, actions, + * and all of the fields specified (since currently, our rule support + * is incomplete). + * + * NOTE: We currently only handle syscall exit rules. + */ + if (rulep->lxar_flag != LX_AUDIT_FILTER_EXIT || + rulep->lxar_action != LX_AUDIT_ACT_ALWAYS) + return (ENOTSUP); + if (rulep->lxar_fld_cnt > LX_AUDIT_RULE_MAX_FIELDS) + return (EINVAL); + tdp = datap; + tlen = rulep->lxar_buflen; + key[0] = '\0'; + for (i = 0; i < rulep->lxar_fld_cnt; i++) { + uint32_t ftype, fval, fop; + + fop = rulep->lxar_fld_flag[i]; + ftype = rulep->lxar_fields[i]; + fval = rulep->lxar_values[i]; + DTRACE_PROBE3(lx__audit__field, uint32_t, fop, + uint32_t, ftype, uint32_t, fval); + + if (ftype == LX_RF_AUDIT_ARCH) { + if (fop != LX_OF_AUDIT_EQ) + return (ENOTSUP); + if (!is_32bit || !is_64bit) + return (EINVAL); + if (fval == LX_AUDIT_ARCH64) { + is_32bit = B_FALSE; + } else if (fval == LX_AUDIT_ARCH32) { + is_64bit = B_FALSE; + } else { + return (ENOTSUP); + } + } else if (ftype == LX_RF_AUDIT_LOGINUID) { + if ((fop & LX_OF_AUDIT_ALL) == 0) + return (ENOTSUP); + } else if (ftype == LX_RF_AUDIT_FILTERKEY) { + if (fop != LX_OF_AUDIT_EQ) + return (ENOTSUP); + if (tlen < fval || fval > LX_AUDIT_MAX_KEY_LEN) + return (EINVAL); + if (key[0] != '\0') + return (EINVAL); + /* while we're here, save the parsed key */ + bcopy(tdp, key, fval); + key[fval] = '\0'; + tdp += fval; + tlen -= fval; + } else { + /* + * TODO: expand the support for additional Linux field + * options. + */ + return (ENOTSUP); + } + } + for (i = 0; i < LX_NSYSCALLS; i++) { + if (BT_TEST32(rulep->lxar_mask, i)) { + /* At least one syscall enabled in this mask entry */ + sys_found = B_TRUE; + break; + } + } + if (!sys_found) + return (ENOTSUP); + + asp = ztolxzd(curzone)->lxzd_audit_state; + ASSERT(asp != NULL); + + /* + * We have confirmed that we can handle the rule specified. + * Before taking the lock, allocate and setup the internal rule struct. + */ + rp = kmem_alloc(sizeof (lx_audit_rule_ent_t), KM_SLEEP); + bcopy(rulep, &rp->lxare_rule, sizeof (lx_audit_rule_t)); + rp->lxare_buf = kmem_alloc(rulep->lxar_buflen, KM_SLEEP); + bcopy(datap, rp->lxare_buf, rulep->lxar_buflen); + rp->lxare_is32bit = is_32bit; + rp->lxare_is64bit = is_64bit; + if (key[0] == '\0') { + rp->lxare_key = NULL; + } else { + int slen = strlen(key); + rp->lxare_key = kmem_alloc(slen + 1, KM_SLEEP); + (void) strlcpy(rp->lxare_key, key, slen + 1); + } + + mutex_enter(&asp->lxast_lock); + /* Save the rule on our top-level list. */ + list_insert_tail(&asp->lxast_rules, rp); + /* Enable tracing on the relevant syscalls. */ + lx_enable_syscall_rule(asp, rulep, rp); + mutex_exit(&asp->lxast_lock); + + return (0); +} + +int +lx_audit_delete_rule(void *r, uint_t datalen) +{ + lx_audit_rule_t *rulep = (lx_audit_rule_t *)r; + char *datap; + uint_t sysnum; + lx_audit_state_t *asp; + lx_audit_rule_ent_t *erp; + + if (ztolxzd(curproc->p_zone)->lxzd_audit_enabled == LXAE_LOCKED) + return (EPERM); + + if (datalen < sizeof (lx_audit_rule_t)) + return (EINVAL); + datalen -= sizeof (lx_audit_rule_t); + + if (rulep->lxar_fld_cnt > LX_AUDIT_RULE_MAX_FIELDS) + return (EINVAL); + + if (rulep->lxar_buflen > datalen) + return (EINVAL); + + datap = rulep->lxar_buf; + + asp = ztolxzd(curzone)->lxzd_audit_state; + ASSERT(asp != NULL); + + mutex_enter(&asp->lxast_lock); + + /* Find the matching rule from the rule list */ + for (erp = list_head(&asp->lxast_rules); + erp != NULL; + erp = list_next(&asp->lxast_rules, erp)) { + lx_audit_rule_t *r; + uint_t i; + boolean_t mtch; + + r = &erp->lxare_rule; + if (rulep->lxar_flag != r->lxar_flag) + continue; + if (rulep->lxar_action != r->lxar_action) + continue; + if (rulep->lxar_fld_cnt != r->lxar_fld_cnt) + continue; + for (i = 0, mtch = B_TRUE; i < LX_AUDIT_BITMASK_SIZE; i++) { + if (rulep->lxar_mask[i] != r->lxar_mask[i]) { + mtch = B_FALSE; + break; + } + } + if (!mtch) + continue; + + for (i = 0, mtch = B_TRUE; i < rulep->lxar_fld_cnt; i++) { + if (rulep->lxar_fields[i] != r->lxar_fields[i] || + rulep->lxar_values[i] != r->lxar_values[i] || + rulep->lxar_fld_flag[i] != r->lxar_fld_flag[i]) { + mtch = B_FALSE; + break; + } + } + if (!mtch) + continue; + if (rulep->lxar_buflen != r->lxar_buflen) + continue; + if (bcmp(datap, erp->lxare_buf, r->lxar_buflen) == 0) + break; + } + + /* There is no matching rule */ + if (erp == NULL) { + mutex_exit(&asp->lxast_lock); + return (ENOENT); + } + + /* + * Disable each relevant syscall enabling. + */ + for (sysnum = 0; sysnum < LX_NSYSCALLS; sysnum++) { + if (BT_TEST32(rulep->lxar_mask, sysnum)) { + /* + * If this was the first rule on the list for the + * given syscall (likely, since usually only one rule + * per syscall) then either disable tracing for that + * syscall, or point to the next applicable rule in the + * list. + */ + if (erp->lxare_is32bit) { + if (asp->lxast_sys32_rulep[sysnum] == erp) { + asp->lxast_sys32_rulep[sysnum] = + lx_audit_next_applicable_rule( + sysnum, LX_AUDIT_ARCH32, asp, erp); + } + } + if (erp->lxare_is64bit) { + if (asp->lxast_sys64_rulep[sysnum] == erp) { + asp->lxast_sys64_rulep[sysnum] = + lx_audit_next_applicable_rule( + sysnum, LX_AUDIT_ARCH64, asp, erp); + } + } + } + } + + /* Remove the rule from the top-level list */ + list_remove(&asp->lxast_rules, erp); + + kmem_free(erp->lxare_buf, erp->lxare_rule.lxar_buflen); + if (erp->lxare_key != NULL) + kmem_free(erp->lxare_key, strlen(erp->lxare_key) + 1); + kmem_free(erp, sizeof (lx_audit_rule_ent_t)); + + mutex_exit(&asp->lxast_lock); + return (0); +} + +void +lx_audit_emit_user_msg(uint_t mtype, uint_t len, char *datap) +{ + lx_zone_data_t *lxzd = ztolxzd(curzone); + lx_audit_state_t *asp; + lx_audit_record_t *rp; + timestruc_t ts; + uint_t sessid; + proc_t *p = curproc; + lx_lwp_data_t *lwpd = lwptolxlwp(ttolwp(curthread)); + uint_t prelen, alen; + char msg[LX_AUDIT_MESSAGE_TEXT_MAX]; + + /* + * For user messages, auditing may not actually be initialized. If not, + * just return. + */ + if (lxzd->lxzd_audit_enabled == LXAE_DISABLED || + lxzd->lxzd_audit_state == NULL) + return; + + if (len >= sizeof (msg)) + len = sizeof (msg) - 1; + + mutex_enter(&p->p_splock); + sessid = p->p_sessp->s_sid; + mutex_exit(&p->p_splock); + + asp = lxzd->lxzd_audit_state; + ASSERT(asp != NULL); + + mutex_enter(&asp->lxast_lock); + + if (asp->lxast_backlog >= asp->lxast_backlog_limit) { + lx_audit_fail(asp, "audit: backlog limit exceeded"); + mutex_exit(&asp->lxast_lock); + return; + } + + rp = kmem_alloc(sizeof (lx_audit_record_t), KM_NOSLEEP); + if (rp == NULL) { + lx_audit_fail(asp, "audit: no kernel memory"); + mutex_exit(&asp->lxast_lock); + return; + } + rp->lxar_msg = kmem_zalloc(LX_AUDIT_MESSAGE_TEXT_MAX, KM_NOSLEEP); + if (rp->lxar_msg == NULL) { + lx_audit_fail(asp, "audit: no kernel memory"); + mutex_exit(&asp->lxast_lock); + kmem_free(rp, sizeof (lx_audit_record_t)); + return; + } + rp->lxar_type = mtype; + bcopy(datap, msg, len); + msg[len] = '\0'; + + gethrestime(&ts); + ts.tv_sec -= curzone->zone_boot_time; + + (void) snprintf(rp->lxar_msg, LX_AUDIT_MESSAGE_TEXT_MAX, + "audit(%lu.%03lu:%lu): pid=%u uid=%u auid=%u ses=%u msg=\'", + (uint64_t)ts.tv_sec, /* zone's timestamp */ + (uint64_t)ts.tv_nsec / 1000000, + asp->lxast_seq++, /* serial number */ + (lwpd->br_pid == curzone->zone_proc_initpid ? 1 : lwpd->br_pid), + crgetruid(CRED()), /* uid */ + lx_audit_get_auid(), /* auid */ + sessid); /* ses */ + + prelen = strlen(rp->lxar_msg); + alen = LX_AUDIT_MESSAGE_TEXT_MAX - prelen - 2; + (void) strlcat(rp->lxar_msg + prelen, msg, alen); + (void) strlcat(rp->lxar_msg, "\'", LX_AUDIT_MESSAGE_TEXT_MAX); + + list_insert_tail(&asp->lxast_ev_queue, rp); + if (asp->lxast_backlog == 0) + cv_signal(&asp->lxast_worker_cv); + asp->lxast_backlog++; + mutex_exit(&asp->lxast_lock); +} + +void +lx_audit_list_rules(void *reply, + void (*cb)(void *, void *, uint_t, void *, uint_t)) +{ + lx_audit_state_t *asp; + lx_audit_rule_ent_t *rp; + + asp = ztolxzd(curzone)->lxzd_audit_state; + ASSERT(asp != NULL); + + /* + * Output the rule list + */ + mutex_enter(&asp->lxast_lock); + for (rp = list_head(&asp->lxast_rules); rp != NULL; + rp = list_next(&asp->lxast_rules, rp)) { + cb(reply, &rp->lxare_rule, sizeof (lx_audit_rule_t), + rp->lxare_buf, rp->lxare_rule.lxar_buflen); + } + mutex_exit(&asp->lxast_lock); +} + +void +lx_audit_get_feature(void *reply, void (*cb)(void *, void *, uint_t)) +{ + lx_audit_features_t af; + + af.lxaf_version = LX_AUDIT_FEATURE_VERSION; + af.lxaf_mask = 0xffffffff; + af.lxaf_features = 0; + af.lxaf_lock = 0; + + cb(reply, &af, sizeof (af)); +} + +void +lx_audit_get(void *reply, void (*cb)(void *, void *, uint_t)) +{ + lx_audit_status_t status; + lx_zone_data_t *lxzd; + lx_audit_state_t *asp; + + lxzd = ztolxzd(curproc->p_zone); + asp = lxzd->lxzd_audit_state; + ASSERT(asp != NULL); + + bzero(&status, sizeof (status)); + + mutex_enter(&asp->lxast_lock); + status.lxas_enabled = lxzd->lxzd_audit_enabled; + status.lxas_failure = asp->lxast_failure; + status.lxas_pid = asp->lxast_pid; + status.lxas_rate_limit = asp->lxast_rate_limit; + status.lxas_backlog_limit = asp->lxast_backlog_limit; + status.lxas_lost = asp->lxast_lost; + status.lxas_backlog = asp->lxast_backlog; + status.lxas_backlog_wait_time = asp->lxast_backlog_wait_time; + status.lxas_feature_bitmap = LX_AUDIT_FEATURE_ALL; + mutex_exit(&asp->lxast_lock); + + cb(reply, &status, sizeof (status)); +} + +int +lx_audit_set(void *lxsock, void *s, uint_t datalen, + void (*cb)(void *, boolean_t)) +{ + lx_audit_status_t *statusp = (lx_audit_status_t *)s; + lx_zone_data_t *lxzd; + lx_audit_state_t *asp; + + /* + * Unfortunately, some user-level code does not send down a full + * lx_audit_status_t structure in the message (e.g. this occurs on + * CentOS7). Only the structure up to, but not including, the embedded + * union is being sent in. This appears to be a result of the user-level + * code being built for older versions of the kernel. To handle this, + * we have to subtract the last 8 bytes from the size in order to + * accomodate this code. We'll revalidate with the full size if + * LX_AUDIT_STATUS_BACKLOG_WAIT_TIME were to be set in the mask. + */ + if (datalen < sizeof (lx_audit_status_t) - 8) + return (EINVAL); + + lxzd = ztolxzd(curproc->p_zone); + asp = lxzd->lxzd_audit_state; + ASSERT(asp != NULL); + + /* Once the config is locked, we only allow changing the auditd pid */ + mutex_enter(&asp->lxast_lock); + if (lxzd->lxzd_audit_enabled == LXAE_LOCKED && + (statusp->lxas_mask & ~LX_AUDIT_STATUS_PID)) { + mutex_exit(&asp->lxast_lock); + return (EPERM); + } + + if (statusp->lxas_mask & LX_AUDIT_STATUS_FAILURE) { + switch (statusp->lxas_failure) { + case LXAE_SILENT: + case LXAE_PRINT: + case LXAE_PANIC: + asp->lxast_failure = statusp->lxas_failure; + break; + default: + mutex_exit(&asp->lxast_lock); + return (EINVAL); + } + } + if (statusp->lxas_mask & LX_AUDIT_STATUS_PID) { + /* + * The process that sets the pid is the daemon, so this is the + * socket we'll write audit records out to. + */ + lx_audit_set_worker(statusp->lxas_pid, lxsock, cb); + } + if (statusp->lxas_mask & LX_AUDIT_STATUS_RATE_LIMIT) { + asp->lxast_rate_limit = statusp->lxas_rate_limit; + } + if (statusp->lxas_mask & LX_AUDIT_STATUS_BACKLOG_LIMIT) { + asp->lxast_backlog_limit = statusp->lxas_backlog_limit; + } + if (statusp->lxas_mask & LX_AUDIT_STATUS_BACKLOG_WAIT_TIME) { + /* + * See the comment above. We have to revalidate the full struct + * size since we previously only validated for a shorter struct. + */ + if (datalen < sizeof (lx_audit_status_t)) { + mutex_exit(&asp->lxast_lock); + return (EINVAL); + } + asp->lxast_backlog_wait_time = statusp->lxas_backlog_wait_time; + } + if (statusp->lxas_mask & LX_AUDIT_STATUS_LOST) { + asp->lxast_lost = statusp->lxas_lost; + } + + if (statusp->lxas_mask & LX_AUDIT_STATUS_ENABLED) { + switch (statusp->lxas_enabled) { + case 0: + lxzd->lxzd_audit_enabled = LXAE_DISABLED; + break; + case 1: + lxzd->lxzd_audit_enabled = LXAE_ENABLED; + break; + case 2: + lxzd->lxzd_audit_enabled = LXAE_LOCKED; + break; + default: + mutex_exit(&asp->lxast_lock); + return (EINVAL); + } + } + mutex_exit(&asp->lxast_lock); + + return (0); +} + +void +lx_audit_stop_worker(void *s, void (*cb)(void *, boolean_t)) +{ + lx_audit_state_t *asp = ztolxzd(curzone)->lxzd_audit_state; + kt_did_t tid = 0; + + ASSERT(asp != NULL); + mutex_enter(&asp->lxast_lock); + if (s == NULL) { + s = asp->lxast_sock; + } else { + VERIFY(s == asp->lxast_sock); + } + asp->lxast_sock = NULL; + asp->lxast_pid = 0; + if (asp->lxast_worker != NULL) { + tid = asp->lxast_worker->t_did; + asp->lxast_worker = NULL; + asp->lxast_exit = B_TRUE; + cv_signal(&asp->lxast_worker_cv); + } + if (s != NULL) + cb(s, B_FALSE); + mutex_exit(&asp->lxast_lock); + + if (tid != 0) + thread_join(tid); +} + +/* + * Called when audit netlink message received, in order to perform lazy + * allocation of audit state for the zone. We also perform the one-time step to + * cache the netlink callback used by the audit worker thread to send messages + * up to the auditd. + */ +void +lx_audit_init(int (*cb)(void *, uint_t, const char *, uint_t)) +{ + lx_zone_data_t *lxzd = ztolxzd(curzone); + lx_audit_state_t *asp; + + mutex_enter(&lxzd->lxzd_lock); + + if (lxzd->lxzd_audit_state != NULL) { + mutex_exit(&lxzd->lxzd_lock); + return; + } + + asp = kmem_zalloc(sizeof (lx_audit_state_t), KM_SLEEP); + + mutex_init(&asp->lxast_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&asp->lxast_worker_cv, NULL, CV_DEFAULT, NULL); + list_create(&asp->lxast_ev_queue, sizeof (lx_audit_record_t), + offsetof(lx_audit_record_t, lxar_link)); + list_create(&asp->lxast_rules, sizeof (lx_audit_rule_ent_t), + offsetof(lx_audit_rule_ent_t, lxare_link)); + asp->lxast_failure = LXAE_PRINT; + asp->lxast_backlog_limit = LX_AUDIT_DEF_BACKLOG_LIMIT; + asp->lxast_backlog_wait_time = LX_AUDIT_DEF_WAIT_TIME; + + lxzd->lxzd_audit_state = asp; + + mutex_exit(&lxzd->lxzd_lock); + + mutex_enter(&lx_audit_em_lock); + if (lx_audit_emit_msg == NULL) + lx_audit_emit_msg = cb; + mutex_exit(&lx_audit_em_lock); +} + +/* + * Called when netlink module is unloading so that we can clear the cached + * netlink callback used by the audit worker thread to send messages up to the + * auditd. + */ +void +lx_audit_cleanup(void) +{ + mutex_enter(&lx_audit_em_lock); + lx_audit_emit_msg = NULL; + mutex_exit(&lx_audit_em_lock); +} + +/* + * Called when the zone is being destroyed, not when auditing is being disabled. + * Note that zsched has already exited and any lxast_worker thread has exited. + */ +void +lx_audit_fini(zone_t *zone) +{ + lx_zone_data_t *lxzd = ztolxzd(zone); + lx_audit_state_t *asp; + lx_audit_record_t *rp; + lx_audit_rule_ent_t *erp; + + ASSERT(MUTEX_HELD(&lxzd->lxzd_lock)); + + if ((asp = lxzd->lxzd_audit_state) == NULL) + return; + + mutex_enter(&asp->lxast_lock); + + VERIFY(asp->lxast_worker == NULL); + + rp = list_remove_head(&asp->lxast_ev_queue); + while (rp != NULL) { + kmem_free(rp->lxar_msg, LX_AUDIT_MESSAGE_TEXT_MAX); + kmem_free(rp, sizeof (lx_audit_record_t)); + rp = list_remove_head(&asp->lxast_ev_queue); + } + + list_destroy(&asp->lxast_ev_queue); + asp->lxast_backlog = 0; + asp->lxast_pid = 0; + + erp = list_remove_head(&asp->lxast_rules); + while (erp != NULL) { + kmem_free(erp->lxare_buf, erp->lxare_rule.lxar_buflen); + if (erp->lxare_key != NULL) + kmem_free(erp->lxare_key, strlen(erp->lxare_key) + 1); + kmem_free(erp, sizeof (lx_audit_rule_ent_t)); + erp = list_remove_head(&asp->lxast_rules); + } + list_destroy(&asp->lxast_rules); + + mutex_exit(&asp->lxast_lock); + + cv_destroy(&asp->lxast_worker_cv); + mutex_destroy(&asp->lxast_lock); + lxzd->lxzd_audit_state = NULL; + kmem_free(asp, sizeof (lx_audit_state_t)); +} + +/* + * Audit initialization/cleanup when lx brand module is loaded and + * unloaded. + */ +void +lx_audit_ld() +{ + mutex_init(&lx_audit_em_lock, NULL, MUTEX_DEFAULT, NULL); +} + +void +lx_audit_unld() +{ + mutex_destroy(&lx_audit_em_lock); +} |