summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/brand/lx/os/lx_audit.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/brand/lx/os/lx_audit.c')
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_audit.c1604
1 files changed, 1604 insertions, 0 deletions
diff --git a/usr/src/uts/common/brand/lx/os/lx_audit.c b/usr/src/uts/common/brand/lx/os/lx_audit.c
new file mode 100644
index 0000000000..6e522e6d8d
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_audit.c
@@ -0,0 +1,1604 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+/*
+ * The Linux auditing system provides a fairly complex rule-based syntax
+ * for configuring what actions are to be audited. The user-level details
+ * are generally described in the Linux audit.rules(7), auditctl(8), and
+ * auditd(8) man pages. The user/kernel netlink API does not seem to be
+ * documented. The Linux kernel source and the user-level auditd source must
+ * be used to understand the interface we have to emulate. The relevant Linux
+ * source files are:
+ * include/uapi/linux/audit.h
+ * include/linux/audit.h
+ * kernel/audit.c
+ *
+ * The lx_netlink module implements the API used for getting or changing the
+ * audit configuration. For rule-oriented operations (list, append, delete),
+ * an lx_audit_rule_t structure (or sequence when listing) is passed in/out of
+ * the kernel. The netlink code calls into the lx_audit_append_rule or
+ * lx_audit_delete_rule functions here to perform the relevant operation.
+ * Within the lx_audit_rule_t structure, each member has the following
+ * meaning:
+ * lxar_flag: corresponds to user-level list (e.g. "exit" for syscall return)
+ * lxar_action: user-level action (e.g. "always")
+ * lxar_fld_cnt: number of fields specified in lxar_fields, lxar_values, and
+ * lxar_flg_flag arrays
+ * lxar_mask: syscall number bitmask the rule applies to (bit position in
+ * the array corresponds to the syscall number)
+ * laxr_fields: array of fields in the rule (i.e. each -F on user-level rule).
+ * A numeric code (e.g. LX_RF_AUDIT_ARCH) is assigned to each
+ * possible field.
+ * lxar_values: array of numeric field values (e.g. the internal b64 value on
+ * the -F AUDIT_ARCH=b64 rule)
+ * lxar_fld_flag: array of field operators (e.g. the '=' operator on the
+ * -F AUDIT_ARCH=b64 rule)
+ * lxar_buflen: length of the buffer data immediately following
+ * lxar_buf: A variable amount of additional field string data. Non-numeric
+ * field values are passed here. For example, the string associated
+ * with the '-F key=...' or -F path=...' rules. For string values,
+ * the corresponding lxar_values entry is the length of the string.
+ * The strings in lxar_buf are not C strings because they are not
+ * NULL terminated. The character data is pulled out of lxar_buf
+ * in chunks specified by the value and the pointer into the buf
+ * is advanced accordingly.
+ *
+ * There are two primary kinds of actions which we are currently interested in
+ * auditing;
+ * 1) system call return
+ * this corresponds to user-level "exit" rule actions
+ * 2) file system related actions
+ * this corresponds to user-level file system watch rules (-w)
+ *
+ * Only system call return is currently implemented, and only a very limited
+ * subset of all of the possible rule selection behavior.
+ *
+ * The Linux audit rule syntax defines that all selection criteria within a
+ * rule is ANDed together before an audit record is created. However, multiple
+ * rules can be defined for a specific syscall. For example, this user-level
+ * syntax defines two different rules for the "open" syscall:
+ * -a always,exit -F arch=b64 -S open -F auid>=1000 -F key=user-open
+ * -a always,exit -F arch=b64 -S open -F auid=0 -F key=priv-open
+ * The first rule would cause an audit record to be created when an "open"
+ * syscall returns and the syscall was performed by a process with a
+ * loginuid >= 1000. The key added to that audit record would be "user-open".
+ * The second rule would create an audit record if the loginuid was 0 and the
+ * record's key would be "priv-open".
+ *
+ * When auditing is enabled for a syscall return, we have to look at multiple
+ * rules and create an audit record for each rule that matches the selection
+ * criteria.
+ *
+ * Although the current implementation is limited, the overall structure is
+ * designed to be enhanced as more auditing support is added over time.
+ *
+ * By default, auditing is not enabled for a zone and no internal audit data
+ * exists. When the first netlink audit msg is received, the zone's audit state
+ * (lx_audit_state_t) is allocated (via lx_audit_init) and attached to the
+ * zone's lx brand-specific data (lxzd_audit_state). Once allocated, the audit
+ * data will persist until the zone halts.
+ *
+ * Audit records are enqueued onto the lxast_ev_queue and a worker thread
+ * (lx_audit_worker) is responsible for dequeueing the audit records and
+ * sending them up to the user-level auditd.
+ *
+ * Audit rules are stored in the lxast_rules list. This is an internal list
+ * consisting of elements of type lx_audit_rule_ent_t. Each element contains
+ * the input rule (lxare_rule) along with some additional data parsed out of
+ * the rule when it is appended (currently only the arch and key).
+ *
+ * When auditing is enabled for a syscall, the appropriate entry in the
+ * lxast_sys64_rulep (or lxast_sys32_rulep) array will point to the first
+ * rule that is applicable to the syscall. When that syscall returns, rule
+ * matching proceeds from that rule to the end of the rule list.
+ *
+ * New rules are always appended at the end of the list and Linux expects that
+ * rules are matched in order.
+ *
+ * If the rule list ever gets large enough that a linear search, anchored off
+ * the syscall pointer, becomes a performance bottleneck, then we'll have to
+ * explore alternate implementations. However, use of auditing is not that
+ * common to begin with, and most syscalls are typically not audited, so as
+ * long as the number of rules is in the order of tens, then the current
+ * implementation should be fine.
+ *
+ * When a rule is deleted, all associated syscall entries (lxast_sys64_rulep or
+ * lxast_sys32_rulep) are cleared, then the rule list is searched to see if
+ * there are any remaining rules which are applicable to the syscall(s). If so,
+ * pointers are reestablished in the relevant lxast_sys64_rulep (or 32) array.
+ */
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/ddi.h>
+#include <sys/zone.h>
+#include <sys/strsubr.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sunddi.h>
+#include <sys/strsun.h>
+#include <sys/tihdr.h>
+#include <sys/sockio.h>
+#include <sys/brand.h>
+#include <sys/debug.h>
+#include <sys/ucred.h>
+#include <sys/session.h>
+#include <sys/lx_types.h>
+#include <sys/lx_audit.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_socket.h>
+#include <sys/bitmap.h>
+#include <sockcommon.h>
+
+#define LX_AUDIT_FEATURE_VERSION 1
+
+/*
+ * Audit status mask values (lxas_mask in structure defined below)
+ * See Linux include/uapi/linux/audit.h
+ */
+#define LX_AUDIT_STATUS_ENABLED 0x001
+#define LX_AUDIT_STATUS_FAILURE 0x002
+#define LX_AUDIT_STATUS_PID 0x004
+#define LX_AUDIT_STATUS_RATE_LIMIT 0x008
+#define LX_AUDIT_STATUS_BACKLOG_LIMIT 0x010
+#define LX_AUDIT_STATUS_BACKLOG_WAIT_TIME 0x020
+#define LX_AUDIT_STATUS_LOST 0x040
+
+/*
+ * Audit features
+ * See Linux include/uapi/linux/audit.h
+ */
+#define LX_AUDIT_F_BACKLOG_LIMIT 0x001
+#define LX_AUDIT_F_BACKLOG_WAIT_TIME 0x002
+#define LX_AUDIT_F_EXECUTABLE_PATH 0x004
+#define LX_AUDIT_F_EXCLUDE_EXTEND 0x008
+#define LX_AUDIT_F_SESSIONID_FILTER 0x010
+#define LX_AUDIT_F_LOST_RESET 0x020
+#define LX_AUDIT_F_FILTER_FS 0x040
+
+#define LX_AUDIT_FEATURE_ALL (LX_AUDIT_F_BACKLOG_LIMIT | \
+ LX_AUDIT_F_BACKLOG_WAIT_TIME | LX_AUDIT_F_EXECUTABLE_PATH | \
+ LX_AUDIT_F_EXCLUDE_EXTEND | LX_AUDIT_F_SESSIONID_FILTER | \
+ LX_AUDIT_F_LOST_RESET | LX_AUDIT_F_FILTER_FS)
+
+
+/* Audit events */
+#define LX_AUDIT_SYSCALL 1300 /* syscall */
+#define LX_AUDIT_PATH 1302 /* file path */
+#define LX_AUDIT_CONFIG_CHANGE 1305 /* configuration change */
+#define LX_AUDIT_CWD 1307 /* current working directory */
+#define LX_AUDIT_EXECVE 1309 /* exec args */
+#define LX_AUDIT_EOE 1320 /* end of multi-record event */
+
+#define LX_AUDIT_BITMASK_SIZE 64
+#define LX_AUDIT_MAX_KEY_LEN 256
+
+/* Audit rule filter type */
+#define LX_AUDIT_FILTER_USER 0 /* user generated msgs */
+#define LX_AUDIT_FILTER_TASK 1 /* task creation */
+#define LX_AUDIT_FILTER_ENTRY 2 /* syscall entry - obsolete */
+#define LX_AUDIT_FILTER_WATCH 3 /* fs watch */
+#define LX_AUDIT_FILTER_EXIT 4 /* syscall return */
+#define LX_AUDIT_FILTER_TYPE 5 /* audit log start */
+#define LX_AUDIT_FILTER_FS 6 /* audit inode child */
+
+/* Audit rule action type */
+#define LX_AUDIT_ACT_NEVER 0
+#define LX_AUDIT_ACT_POSSIBLE 1
+#define LX_AUDIT_ACT_ALWAYS 2 /* the common case */
+
+#define LX_AUDIT_RULE_MAX_FIELDS 64
+
+/* Linux defaults */
+#define LX_AUDIT_DEF_BACKLOG_LIMIT 64
+#define LX_AUDIT_DEF_WAIT_TIME (60 * HZ_TO_LX_USERHZ(hz))
+
+/*
+ * Audit rule field types
+ * Linux defines a lot of Rule Field values in include/uapi/linux/audit.h.
+ * We currently only handle a few.
+ */
+#define LX_RF_AUDIT_LOGINUID 9 /* e.g. auid>=1000 */
+#define LX_RF_AUDIT_ARCH 11 /* e.g. -F arch=b64 */
+#define LX_RF_AUDIT_WATCH 105 /* user-level -w rule */
+#define LX_RF_AUDIT_PERM 106 /* user-level -p option */
+#define LX_RF_AUDIT_FILTERKEY 210 /* user-level -k key option */
+
+/*
+ * Audit rule field operators
+ * Linux defines the operator values in include/uapi/linux/audit.h.
+ * These 4 bits are combined in various ways for additional operators.
+ */
+#define LX_OF_AUDIT_BM 0x08000000 /* bit mask (&) */
+#define LX_OF_AUDIT_LT 0x10000000
+#define LX_OF_AUDIT_GT 0x20000000
+#define LX_OF_AUDIT_EQ 0x40000000
+#define LX_OF_AUDIT_NE (LX_OF_AUDIT_LT | LX_OF_AUDIT_GT)
+#define LX_OF_AUDIT_BT (LX_OF_AUDIT_BM | LX_OF_AUDIT_EQ) /* bit test (&=) */
+#define LX_OF_AUDIT_LE (LX_OF_AUDIT_LT | LX_OF_AUDIT_EQ)
+#define LX_OF_AUDIT_GE (LX_OF_AUDIT_GT | LX_OF_AUDIT_EQ)
+#define LX_OF_AUDIT_ALL (LX_OF_AUDIT_EQ | LX_OF_AUDIT_NE | LX_OF_AUDIT_BM)
+
+/*
+ * Audit rule arch specification
+ * See Linux EM_X86_64 and EM_386 defs.
+ * -F arch=b64 looks like: 0xc000003e
+ * -F arch=b32 looks like: 0x40000003
+ * If no arch is specified (possible with '-S syslog', '-S all', or '-w <file>')
+ * the rule applies to both architectures and LX_RF_AUDIT_ARCH is not passed.
+ */
+#define LX_AUDIT_ARCH64 0xc000003e
+#define LX_AUDIT_ARCH32 0x40000003
+
+/*
+ * See Linux include/uapi/linux/audit.h, AUDIT_MESSAGE_TEXT_MAX is 8560.
+ * The auditd src has MAX_AUDIT_MESSAGE_LENGTH as 8970.
+ * Until necessary, we'll limit ourselves to a smaller length.
+ */
+#define LX_AUDIT_MESSAGE_TEXT_MAX 1024
+
+typedef struct lx_audit_features {
+ uint32_t lxaf_version;
+ uint32_t lxaf_mask;
+ uint32_t lxaf_features;
+ uint32_t lxaf_lock;
+} lx_audit_features_t;
+
+typedef struct lx_audit_status {
+ uint32_t lxas_mask;
+ uint32_t lxas_enabled;
+ uint32_t lxas_failure;
+ uint32_t lxas_pid;
+ uint32_t lxas_rate_limit;
+ uint32_t lxas_backlog_limit;
+ uint32_t lxas_lost;
+ uint32_t lxas_backlog;
+ /* LINTED: E_ANONYMOUS_UNION_DECL */
+ union {
+ uint32_t lxas_version;
+ uint32_t lxas_feature_bitmap;
+ };
+ uint32_t lxas_backlog_wait_time;
+} lx_audit_status_t;
+
+typedef struct lx_audit_rule {
+ uint32_t lxar_flag;
+ uint32_t lxar_action;
+ uint32_t lxar_fld_cnt;
+ uint32_t lxar_mask[LX_AUDIT_BITMASK_SIZE];
+ uint32_t lxar_fields[LX_AUDIT_RULE_MAX_FIELDS];
+ uint32_t lxar_values[LX_AUDIT_RULE_MAX_FIELDS];
+ uint32_t lxar_fld_flag[LX_AUDIT_RULE_MAX_FIELDS];
+ uint32_t lxar_buflen;
+ /* LINTED: E_ZERO_OR_NEGATIVE_SUBSCRIPT */
+ char lxar_buf[0];
+} lx_audit_rule_t;
+
+/*
+ * Internal structure for an audit rule.
+ * Each rule is on the zone's top-level list of all rules (lxast_rules).
+ * This structure also holds the parsed character string fields from the
+ * original input rule (lxar_buf) so that we don't need to re-parse that
+ * data on every match.
+ */
+typedef struct lx_audit_rule_ent {
+ list_node_t lxare_link;
+ lx_audit_rule_t lxare_rule;
+ char *lxare_buf;
+ boolean_t lxare_is32bit;
+ boolean_t lxare_is64bit;
+ char *lxare_key;
+} lx_audit_rule_ent_t;
+
+typedef enum lx_audit_fail {
+ LXAE_SILENT,
+ LXAE_PRINT, /* default */
+ LXAE_PANIC /* reboot the zone */
+} lx_audit_fail_t;
+
+typedef struct lx_audit_record {
+ list_node_t lxar_link;
+ uint32_t lxar_type;
+ char *lxar_msg;
+} lx_audit_record_t;
+
+/*
+ * Per-zone audit state
+ * Lazy allocated when first needed.
+ *
+ * lxast_rate_limit
+ * Currently unused, but can be get/set. Linux default is 0.
+ * lxast_backlog_limit
+ * The maximum number of outstanding audit events allowed (the Linux kernel
+ * default is 64). If the limit is reached, lxast_failure determines what
+ * to do.
+ * lxast_backlog_wait_time
+ * Currently unused, but can be get/set. Linux default is 60HZ.
+ */
+typedef struct lx_audit_state {
+ lx_audit_fail_t lxast_failure; /* failure behavior */
+ uint32_t lxast_rate_limit;
+ uint32_t lxast_backlog_limit;
+ uint32_t lxast_backlog_wait_time;
+ lx_audit_rule_ent_t *lxast_sys32_rulep[LX_NSYSCALLS];
+ lx_audit_rule_ent_t *lxast_sys64_rulep[LX_NSYSCALLS];
+ kcondvar_t lxast_worker_cv;
+ kmutex_t lxast_lock; /* protects members below */
+ pid_t lxast_pid; /* auditd pid */
+ uint64_t lxast_seq; /* event sequence num */
+ uint32_t lxast_backlog; /* num of queued events */
+ uint32_t lxast_lost; /* num of lost events */
+ void *lxast_sock; /* auditd lx_netlink_sock_t */
+ boolean_t lxast_exit; /* taskq worker should quit */
+ boolean_t lxast_panicing; /* audit forcing reboot? */
+ kthread_t *lxast_worker;
+ list_t lxast_ev_queue; /* audit record queue */
+ list_t lxast_rules; /* the list of rules */
+} lx_audit_state_t;
+
+/*
+ * Function pointer to netlink function used by audit worker threads to send
+ * audit messages up to the user-level auditd.
+ */
+static int (*lx_audit_emit_msg)(void *, uint_t, const char *, uint_t);
+static kmutex_t lx_audit_em_lock; /* protects emit_msg above */
+
+/* From uts/common/brand/lx/syscall/lx_socket.c */
+extern long lx_socket(int, int, int);
+/* From uts/common/syscall/close.c */
+extern int close(int);
+
+static int
+lx_audit_emit_syscall_event(uint_t mtype, void *lxsock, const char *msg)
+{
+ int err;
+
+ err = lx_audit_emit_msg(lxsock, mtype, msg, LX_AUDIT_MESSAGE_TEXT_MAX);
+ if (err != 0)
+ return (err);
+ err = lx_audit_emit_msg(lxsock, 0, NULL, 0);
+ return (err);
+}
+
+/*
+ * Worker thread for audit record output up to user-level auditd.
+ */
+static void
+lx_audit_worker(void *a)
+{
+ lx_audit_state_t *asp = (lx_audit_state_t *)a;
+ lx_audit_record_t *rp;
+ int err;
+
+ VERIFY(asp != NULL);
+
+ mutex_enter(&asp->lxast_lock);
+
+ while (!asp->lxast_exit) {
+
+ if (asp->lxast_backlog == 0 || asp->lxast_sock == NULL ||
+ asp->lxast_pid == 0) {
+ cv_wait(&asp->lxast_worker_cv, &asp->lxast_lock);
+ continue;
+ }
+
+ rp = list_remove_head(&asp->lxast_ev_queue);
+ asp->lxast_backlog--;
+
+ err = lx_audit_emit_syscall_event(rp->lxar_type,
+ asp->lxast_sock, rp->lxar_msg);
+ if (err != ENOMEM && err != ENOSPC) {
+ kmem_free(rp->lxar_msg, LX_AUDIT_MESSAGE_TEXT_MAX);
+ kmem_free(rp, sizeof (lx_audit_record_t));
+ } else {
+ /*
+ * Put it back on the list, drop the mutex so that
+ * any other audit-related action could occur (such as
+ * socket deletion), then wait briefly before retry.
+ */
+ list_insert_head(&asp->lxast_ev_queue, rp);
+ asp->lxast_backlog++;
+ mutex_exit(&asp->lxast_lock);
+ /* wait 1/10th second and try again */
+ delay(drv_usectohz(100000));
+ mutex_enter(&asp->lxast_lock);
+ }
+ }
+
+ /* Leave state ready for new worker when auditing restarted */
+ asp->lxast_exit = B_FALSE;
+ mutex_exit(&asp->lxast_lock);
+
+ thread_exit();
+}
+
+static void
+lx_audit_set_worker(uint32_t pid, void *lxsock,
+ void (*cb)(void *, boolean_t))
+{
+ lx_audit_state_t *asp = ztolxzd(curzone)->lxzd_audit_state;
+
+ ASSERT(asp != NULL);
+ ASSERT(MUTEX_HELD(&asp->lxast_lock));
+
+ /* First, stop any existing worker thread */
+ while (asp->lxast_sock != NULL) {
+ mutex_exit(&asp->lxast_lock);
+ lx_audit_stop_worker(NULL, cb);
+ mutex_enter(&asp->lxast_lock);
+ /* unlikely we loop, but handle racing setters */
+ }
+
+ VERIFY(asp->lxast_pid == 0);
+ VERIFY(asp->lxast_sock == NULL);
+ VERIFY(asp->lxast_exit == B_FALSE);
+ VERIFY(asp->lxast_worker == NULL);
+ if (pid != 0) {
+ /* Start a worker with the new socket */
+ asp->lxast_sock = lxsock;
+ cb(asp->lxast_sock, B_TRUE);
+ asp->lxast_pid = pid;
+ asp->lxast_worker = thread_create(NULL, 0, lx_audit_worker,
+ asp, 0, curzone->zone_zsched, TS_RUN, minclsyspri);
+ }
+}
+
+static boolean_t
+lx_audit_match_val(uint32_t op, uint32_t ruleval, uint32_t curval)
+{
+ switch (op) {
+ case LX_OF_AUDIT_LT:
+ return (curval < ruleval);
+ case LX_OF_AUDIT_GT:
+ return (curval > ruleval);
+ case LX_OF_AUDIT_EQ:
+ return (curval == ruleval);
+ case LX_OF_AUDIT_NE:
+ return (curval != ruleval);
+ case LX_OF_AUDIT_LE:
+ return (curval <= ruleval);
+ case LX_OF_AUDIT_GE:
+ return (curval >= ruleval);
+ case LX_OF_AUDIT_BM: /* bit mask - any bit is set? */
+ return ((curval & ruleval) != 0);
+ case LX_OF_AUDIT_BT: /* bit test - all bits must be set */
+ return ((curval & ruleval) == ruleval);
+ default:
+ break;
+ }
+ return (B_FALSE);
+}
+
+/*
+ * Per the Linux audit.rules(7) man page, a rule with an auid of -1 means the
+ * process does not have a loginuid. We'll use the absence of a session on the
+ * process to mimic this behavior.
+ */
+static uint32_t
+lx_audit_get_auid()
+{
+ sess_t *s;
+ uint32_t v;
+
+ /*
+ * A process with no session has:
+ * s_dev == 0xffffffffffffffff
+ * s_vp == NULL
+ * s_cred == NULL
+ */
+ s = curproc->p_sessp;
+ if (s != NULL && s->s_vp != NULL) {
+ v = crgetsuid(CRED());
+ } else {
+ v = UINT32_MAX; /* emulate auid of -1 */
+ }
+
+ return (v);
+}
+
+/*
+ * Determine if the rule matches.
+ * Currently, we're actually just checking LX_RF_AUDIT_LOGINUID (-F auid)
+ * fields, but as we add support for additional field matching, this function
+ * should be enhanced.
+ */
+static boolean_t
+lx_audit_syscall_rule_match(lx_audit_rule_ent_t *erp)
+{
+ uint32_t i, v;
+ lx_audit_rule_t *rp = &erp->lxare_rule;
+
+ for (i = 0; i < rp->lxar_fld_cnt; i++) {
+ uint32_t ftype, fval, fop;
+
+ ftype = rp->lxar_fields[i];
+ if (ftype != LX_RF_AUDIT_LOGINUID)
+ continue;
+
+ fop = rp->lxar_fld_flag[i];
+ fval = rp->lxar_values[i];
+ v = lx_audit_get_auid();
+
+ if (!lx_audit_match_val(fop, fval, v))
+ return (B_FALSE);
+ }
+ return (B_TRUE);
+}
+
+static int
+lx_audit_write(file_t *fp, const char *msg)
+{
+ int fflag;
+ ssize_t count;
+ size_t nwrite = 0;
+ struct uio auio;
+ struct iovec aiov;
+
+ count = strlen(msg);
+ fflag = fp->f_flag;
+
+ aiov.iov_base = (void *) msg;
+ aiov.iov_len = count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_loffset = fp->f_offset;
+ auio.uio_resid = count;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_llimit = curproc->p_fsz_ctl;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ return (lx_write_common(fp, &auio, &nwrite, B_FALSE));
+}
+
+/*
+ * We first try to send the msg out to the zone's logging service, then
+ * fallback to the zone's console, although in practice, that is unlikely to
+ * be useful to most users.
+ */
+static void
+lx_audit_log_msg(const char *msg)
+{
+ int fd;
+ struct sockaddr_un addr;
+ struct sonode *so;
+ uint_t alen;
+ uint_t sizediff = (sizeof (addr) - sizeof (addr.sun_path));
+ file_t *fp;
+ int err;
+ vnode_t *vp;
+
+ ttolwp(curthread)->lwp_errno = 0;
+ fd = lx_socket(LX_AF_UNIX, LX_SOCK_DGRAM, 0);
+ if (ttolwp(curthread)->lwp_errno != 0)
+ goto trycons;
+
+ bzero((char *)&addr, sizeof (addr));
+ addr.sun_family = AF_UNIX;
+ (void) strncpy(addr.sun_path, "/dev/log", sizeof (addr.sun_path) - 1);
+ alen = strlen(addr.sun_path) + 1 + sizediff;
+
+ /*
+ * We can't use lx_connect here since that expects to be called from
+ * user-land, so we do the (streamlined) connect ourselves.
+ */
+ if ((so = getsonode(fd, &err, &fp)) == NULL) {
+ (void) close(fd);
+ goto trycons;
+ }
+
+ err = socket_connect(so, (struct sockaddr *)&addr, alen, fp->f_flag,
+ _SOCONNECT_XPG4_2, CRED());
+
+ if (err == 0)
+ err = lx_audit_write(fp, msg);
+
+ releasef(fd); /* release getsonode hold */
+ (void) close(fd);
+
+ if (err == 0)
+ return;
+
+trycons:
+ /* "open" the console device */
+ if (lookupnameatcred("/dev/console", UIO_SYSSPACE, FOLLOW, NULLVPP,
+ &vp, NULL, CRED()) != 0)
+ return;
+
+ if (falloc(vp, FWRITE, &fp, &fd) != 0) {
+ VN_RELE(vp);
+ return;
+ }
+ mutex_exit(&fp->f_tlock);
+ setf(fd, fp);
+
+ /* nothing left to do if console write fails */
+ (void) lx_audit_write(fp, msg);
+ close(fd);
+}
+
+static void
+lx_audit_fail(lx_audit_state_t *asp, const char *msg)
+{
+ ASSERT(MUTEX_HELD(&asp->lxast_lock));
+
+ if (asp->lxast_failure == LXAE_PRINT ||
+ asp->lxast_failure == LXAE_PANIC) {
+ /*
+ * Linux can ratelimit the amount of log spam here, so we'll
+ * do something similar, especially since this could be called
+ * on many syscall returns if the audit daemon is down or
+ * not consuming audit records for some other reason.
+ */
+ if (asp->lxast_lost % 100 == 0)
+ lx_audit_log_msg(msg);
+ if (asp->lxast_failure == LXAE_PANIC &&
+ !asp->lxast_panicing) {
+ /*
+ * Reboot the zone so that no audit records are lost.
+ * We delay a second to give the zone's logger a chance
+ * to handle the log message. We have to drop the lock
+ * here in case the zone's logger itself is making
+ * syscalls which would be audited, although that
+ * wouldn't be the ideal configuration.
+ */
+ asp->lxast_panicing = B_TRUE;
+ mutex_exit(&asp->lxast_lock);
+ lx_audit_log_msg("audit: panic");
+ delay(drv_usectohz(1000000));
+ zone_kadmin(A_SHUTDOWN, AD_BOOT, NULL, kcred);
+ mutex_enter(&asp->lxast_lock);
+ }
+ }
+ asp->lxast_lost++;
+}
+
+/*
+ * This formats the input string into a format that matches Linux. The input
+ * strings are small right now (<= PSARGSZ) so for simpicity we're using
+ * a temporary buffer of adequate size.
+ */
+static void
+lx_audit_fmt_str(char *dst, char *str, uint_t dlen)
+{
+ char *sp, tmp[100];
+
+ (void) strlcpy(tmp, str, sizeof (tmp));
+ if ((sp = strchr(tmp, ' ')) != NULL)
+ *sp = '\0';
+
+ if ((sp = strchr(tmp, '"')) == NULL) {
+ (void) snprintf(dst, dlen, "\"%s\"", tmp);
+ } else {
+ char *p, *dp;
+ uint_t olen = 0;
+
+ ASSERT(dlen > 2);
+ dlen -= 2; /* leave room for terminating nul */
+ dp = dst;
+ for (p = str; *p != '\0' && olen < dlen; p++) {
+ (void) sprintf(dp, "%02x", *p);
+ dp += 2;
+ olen += 2;
+ }
+ *dp = '\0';
+ }
+}
+
+/*
+ * Format and enqueue a syscall audit record.
+ */
+static void
+lx_audit_syscall_fmt_rcd(int sysnum, uint32_t arch, long ret,
+ lx_audit_state_t *asp, lx_audit_rule_ent_t *erp, uint64_t seq,
+ timestruc_t *tsp)
+{
+ klwp_t *lwp;
+ proc_t *p;
+ uint32_t items, sessid;
+ lx_lwp_data_t *lwpd;
+ lx_audit_record_t *rp;
+ cred_t *cr = CRED();
+ minor_t minor;
+ char key[LX_AUDIT_MAX_KEY_LEN + 6]; /* for key="%s" formatting */
+ char exe[PSARGSZ * 2 + 8], comm[MAXCOMLEN * 2 + 8];
+
+ ASSERT(MUTEX_HELD(&asp->lxast_lock));
+
+ if (asp->lxast_backlog >= asp->lxast_backlog_limit) {
+ lx_audit_fail(asp, "audit: backlog limit exceeded");
+ return;
+ }
+
+ if (arch == LX_AUDIT_ARCH32) {
+ items = MIN(4, lx_sysent32[sysnum].sy_narg);
+ } else {
+ ASSERT3U(arch, ==, LX_AUDIT_ARCH64);
+ items = MIN(4, lx_sysent64[sysnum].sy_narg);
+ }
+
+ lwp = ttolwp(curthread);
+ lwpd = lwptolxlwp(lwp);
+ p = curproc;
+
+ /*
+ * For the key, if no key has been set on the rule, Linux formats the
+ * string "(null)" (with no quotes - i.e. key=(null)).
+ */
+ if (erp->lxare_key != NULL) {
+ (void) snprintf(key, sizeof (key), "key=\"%s\"",
+ erp->lxare_key);
+ } else {
+ (void) snprintf(key, sizeof (key), "key=(null)");
+ }
+
+ rp = kmem_alloc(sizeof (lx_audit_record_t), KM_NOSLEEP);
+ if (rp == NULL) {
+ lx_audit_fail(asp, "audit: no kernel memory");
+ return;
+ }
+ rp->lxar_msg = kmem_zalloc(LX_AUDIT_MESSAGE_TEXT_MAX, KM_NOSLEEP);
+ if (rp->lxar_msg == NULL) {
+ kmem_free(rp, sizeof (lx_audit_record_t));
+ lx_audit_fail(asp, "audit: no kernel memory");
+ return;
+ }
+ rp->lxar_type = LX_AUDIT_SYSCALL;
+
+ mutex_enter(&p->p_splock);
+ sessid = p->p_sessp->s_sid;
+ minor = getminor(p->p_sessp->s_dev);
+ mutex_exit(&p->p_splock);
+
+ mutex_enter(&p->p_lock);
+ lx_audit_fmt_str(exe, p->p_user.u_psargs, sizeof (exe));
+ lx_audit_fmt_str(comm, p->p_user.u_comm, sizeof (comm));
+ mutex_exit(&p->p_lock);
+
+ /*
+ * See Linux audit_log_exit() for how a syscall exit record is
+ * formatted.
+ *
+ * For "arch" value, see Linux AUDIT_ARCH_IA64, AUDIT_ARCH_I386,
+ * __AUDIT_ARCH_64BIT and __AUDIT_ARCH_LE definitions.
+ *
+ * For fsuid/fsgid, see lx_setfsuid/lx_setfsgid for how we handle that.
+ */
+ (void) snprintf(rp->lxar_msg, LX_AUDIT_MESSAGE_TEXT_MAX,
+ "audit(%lu.%03lu:%lu): arch=%x syscall=%u "
+ "success=%s exit=%ld a0=%lu a1=%lu a2=%lu a3=%lu items=%u "
+ "ppid=%u pid=%u auid=%u uid=%u gid=%u euid=%u suid=%u "
+ "fsuid=%u egid=%u sgid=%u fsgid=%u tty=pts%u ses=%u "
+ "comm=%s exe=%s %s",
+ (uint64_t)tsp->tv_sec, /* zone's timestamp */
+ (uint64_t)tsp->tv_nsec / 1000000,
+ seq, /* serial number */
+ arch, /* arch */
+ sysnum, /* syscall */
+ (lwp->lwp_errno == 0 ? "yes" : "no"), /* success */
+ ret, /* exit */
+ lwpd->br_syscall_args[0], /* a0 */
+ lwpd->br_syscall_args[1], /* a1 */
+ lwpd->br_syscall_args[2], /* a2 */
+ lwpd->br_syscall_args[3], /* a3 */
+ items, /* items */
+ lx_lwp_ppid(lwp, NULL, NULL), /* ppid */
+ (lwpd->br_pid == curzone->zone_proc_initpid ? 1 : lwpd->br_pid),
+ lx_audit_get_auid(), /* auid */
+ crgetruid(cr), /* uid */
+ crgetrgid(cr), /* gid */
+ crgetuid(cr), /* euid */
+ crgetsuid(cr), /* saved uid */
+ crgetuid(cr), /* fsuid */
+ crgetgid(cr), /* egid */
+ crgetsgid(cr), /* saved gid */
+ crgetgid(cr), /* fsgid */
+ minor, /* tty */
+ sessid, /* ses */
+ comm, /* comm */
+ exe, /* exe */
+ key); /* key="VAL" */
+
+ list_insert_tail(&asp->lxast_ev_queue, rp);
+ if (asp->lxast_backlog == 0)
+ cv_signal(&asp->lxast_worker_cv);
+ asp->lxast_backlog++;
+}
+
+/*
+ * Get the next rule in the list that is generally applicable to the given
+ * syscall.
+ */
+static lx_audit_rule_ent_t *
+lx_audit_next_applicable_rule(int sysnum, uint32_t arch, lx_audit_state_t *asp,
+ lx_audit_rule_ent_t *erp)
+{
+ ASSERT(MUTEX_HELD(&asp->lxast_lock));
+
+ for (erp = list_next(&asp->lxast_rules, erp);
+ erp != NULL;
+ erp = list_next(&asp->lxast_rules, erp)) {
+ lx_audit_rule_t *r = &erp->lxare_rule;
+
+ /* Determine if the rule in the list has the same ARCH. */
+ if (arch == LX_AUDIT_ARCH32 && !erp->lxare_is32bit)
+ continue;
+ if (arch == LX_AUDIT_ARCH64 && !erp->lxare_is64bit)
+ continue;
+
+ /* Determine if this rule applies to the relevant syscall. */
+ if (BT_TEST32(r->lxar_mask, sysnum))
+ return (erp);
+ }
+
+ return (NULL);
+}
+
+void
+lx_audit_syscall_exit(int sysnum, long ret)
+{
+ lx_zone_data_t *lxzd = ztolxzd(curzone);
+ lx_audit_state_t *asp;
+ uint64_t seq;
+ lx_audit_rule_ent_t *erp;
+ timestruc_t ts;
+ uint32_t arch;
+
+ if (lxzd->lxzd_audit_enabled == LXAE_DISABLED)
+ return;
+
+ if (sysnum >= LX_NSYSCALLS)
+ return;
+
+ asp = lxzd->lxzd_audit_state;
+ ASSERT(asp != NULL);
+
+ if (get_udatamodel() == DATAMODEL_ILP32) {
+ arch = LX_AUDIT_ARCH32;
+ } else {
+ ASSERT(get_udatamodel() == DATAMODEL_LP64);
+ arch = LX_AUDIT_ARCH64;
+ }
+
+ /*
+ * Fast top-level check to see if we're auditing this syscall.
+ * We don't take the mutex for this since there is no need.
+ */
+ if (arch == LX_AUDIT_ARCH32) {
+ if (asp->lxast_sys32_rulep[sysnum] == NULL)
+ return;
+ } else {
+ if (asp->lxast_sys64_rulep[sysnum] == NULL)
+ return;
+ }
+
+ mutex_enter(&asp->lxast_lock);
+ if (arch == LX_AUDIT_ARCH32) {
+ erp = asp->lxast_sys32_rulep[sysnum];
+ } else {
+ erp = asp->lxast_sys64_rulep[sysnum];
+ }
+
+ if (erp == NULL) {
+ /* Hit a race and the syscall is no longer being audited */
+ mutex_exit(&asp->lxast_lock);
+ return;
+ }
+
+ /*
+ * All of the records in the set (i.e. same serial number) have
+ * the same timestamp.
+ */
+ seq = asp->lxast_seq++;
+ gethrestime(&ts);
+ ts.tv_sec -= curzone->zone_boot_time;
+
+ /*
+ * We have to determine if the first rule associated with the syscall,
+ * or any subsequent applicable rules, match.
+ *
+ * The first rule associated with the syscall may (or may not) match,
+ * but there can be additional rules which might also match. The first
+ * possible rule is always the one that enables the syscall auditing,
+ * but we also have to iterate to the end of the list to see if any
+ * other rules are applicable to this syscall.
+ */
+ for (; erp != NULL;
+ erp = lx_audit_next_applicable_rule(sysnum, arch, asp, erp)) {
+ if (!lx_audit_syscall_rule_match(erp))
+ continue;
+
+ lx_audit_syscall_fmt_rcd(sysnum, arch, ret, asp, erp, seq, &ts);
+ }
+
+ /*
+ * TODO: Currently we only output a single SYSCALL record.
+ * Real Linux emits a set of audit records for a syscall exit event
+ * (e.g. for an unlink syscall):
+ * type=SYSCALL
+ * type=CWD
+ * type=PATH - one for the parent dir
+ * type=PATH - one for the actual file unlinked
+ * type=PROCTITLE - (this one seems worthless)
+ * followed by an AUDIT_EOE message (which seems to be ignored).
+ *
+ * For syscalls that don't change files in the file system (e.g. ioctl)
+ * there are no PATH records.
+ */
+ mutex_exit(&asp->lxast_lock);
+}
+
+/*
+ * Determine which syscalls this rule applies to and setup a fast pointer for
+ * the syscall to enable it's rule match.
+ *
+ * We have to look at each bit and translate the external syscall bits into the
+ * internal syscall number.
+ */
+static void
+lx_enable_syscall_rule(lx_audit_state_t *asp, lx_audit_rule_t *rulep,
+ lx_audit_rule_ent_t *rp)
+{
+ uint_t sysnum;
+
+ ASSERT(MUTEX_HELD(&asp->lxast_lock));
+
+ for (sysnum = 0; sysnum < LX_NSYSCALLS; sysnum++) {
+ if (BT_TEST32(rulep->lxar_mask, sysnum)) {
+ if (rp->lxare_is32bit) {
+ if (asp->lxast_sys32_rulep[sysnum] == NULL)
+ asp->lxast_sys32_rulep[sysnum] = rp;
+ }
+ if (rp->lxare_is64bit) {
+ if (asp->lxast_sys64_rulep[sysnum] == NULL)
+ asp->lxast_sys64_rulep[sysnum] = rp;
+ }
+ }
+ }
+}
+
+int
+lx_audit_append_rule(void *r, uint_t datalen)
+{
+ lx_audit_rule_t *rulep = (lx_audit_rule_t *)r;
+ char *datap;
+ uint_t i;
+ lx_audit_rule_ent_t *rp;
+ lx_audit_state_t *asp;
+ boolean_t is_32bit = B_TRUE, is_64bit = B_TRUE, sys_found = B_FALSE;
+ char *tdp;
+ char key[LX_AUDIT_MAX_KEY_LEN + 1];
+ uint32_t tlen;
+
+ if (ztolxzd(curproc->p_zone)->lxzd_audit_enabled == LXAE_LOCKED)
+ return (EPERM);
+
+ if (datalen < sizeof (lx_audit_rule_t))
+ return (EINVAL);
+ datalen -= sizeof (lx_audit_rule_t);
+
+ if (rulep->lxar_fld_cnt > LX_AUDIT_RULE_MAX_FIELDS)
+ return (EINVAL);
+
+ if (rulep->lxar_buflen > datalen)
+ return (EINVAL);
+
+ datap = rulep->lxar_buf;
+
+ /*
+ * First check the rule to determine if we support the flag, actions,
+ * and all of the fields specified (since currently, our rule support
+ * is incomplete).
+ *
+ * NOTE: We currently only handle syscall exit rules.
+ */
+ if (rulep->lxar_flag != LX_AUDIT_FILTER_EXIT ||
+ rulep->lxar_action != LX_AUDIT_ACT_ALWAYS)
+ return (ENOTSUP);
+ if (rulep->lxar_fld_cnt > LX_AUDIT_RULE_MAX_FIELDS)
+ return (EINVAL);
+ tdp = datap;
+ tlen = rulep->lxar_buflen;
+ key[0] = '\0';
+ for (i = 0; i < rulep->lxar_fld_cnt; i++) {
+ uint32_t ftype, fval, fop;
+
+ fop = rulep->lxar_fld_flag[i];
+ ftype = rulep->lxar_fields[i];
+ fval = rulep->lxar_values[i];
+ DTRACE_PROBE3(lx__audit__field, uint32_t, fop,
+ uint32_t, ftype, uint32_t, fval);
+
+ if (ftype == LX_RF_AUDIT_ARCH) {
+ if (fop != LX_OF_AUDIT_EQ)
+ return (ENOTSUP);
+ if (!is_32bit || !is_64bit)
+ return (EINVAL);
+ if (fval == LX_AUDIT_ARCH64) {
+ is_32bit = B_FALSE;
+ } else if (fval == LX_AUDIT_ARCH32) {
+ is_64bit = B_FALSE;
+ } else {
+ return (ENOTSUP);
+ }
+ } else if (ftype == LX_RF_AUDIT_LOGINUID) {
+ if ((fop & LX_OF_AUDIT_ALL) == 0)
+ return (ENOTSUP);
+ } else if (ftype == LX_RF_AUDIT_FILTERKEY) {
+ if (fop != LX_OF_AUDIT_EQ)
+ return (ENOTSUP);
+ if (tlen < fval || fval > LX_AUDIT_MAX_KEY_LEN)
+ return (EINVAL);
+ if (key[0] != '\0')
+ return (EINVAL);
+ /* while we're here, save the parsed key */
+ bcopy(tdp, key, fval);
+ key[fval] = '\0';
+ tdp += fval;
+ tlen -= fval;
+ } else {
+ /*
+ * TODO: expand the support for additional Linux field
+ * options.
+ */
+ return (ENOTSUP);
+ }
+ }
+ for (i = 0; i < LX_NSYSCALLS; i++) {
+ if (BT_TEST32(rulep->lxar_mask, i)) {
+ /* At least one syscall enabled in this mask entry */
+ sys_found = B_TRUE;
+ break;
+ }
+ }
+ if (!sys_found)
+ return (ENOTSUP);
+
+ asp = ztolxzd(curzone)->lxzd_audit_state;
+ ASSERT(asp != NULL);
+
+ /*
+ * We have confirmed that we can handle the rule specified.
+ * Before taking the lock, allocate and setup the internal rule struct.
+ */
+ rp = kmem_alloc(sizeof (lx_audit_rule_ent_t), KM_SLEEP);
+ bcopy(rulep, &rp->lxare_rule, sizeof (lx_audit_rule_t));
+ rp->lxare_buf = kmem_alloc(rulep->lxar_buflen, KM_SLEEP);
+ bcopy(datap, rp->lxare_buf, rulep->lxar_buflen);
+ rp->lxare_is32bit = is_32bit;
+ rp->lxare_is64bit = is_64bit;
+ if (key[0] == '\0') {
+ rp->lxare_key = NULL;
+ } else {
+ int slen = strlen(key);
+ rp->lxare_key = kmem_alloc(slen + 1, KM_SLEEP);
+ (void) strlcpy(rp->lxare_key, key, slen + 1);
+ }
+
+ mutex_enter(&asp->lxast_lock);
+ /* Save the rule on our top-level list. */
+ list_insert_tail(&asp->lxast_rules, rp);
+ /* Enable tracing on the relevant syscalls. */
+ lx_enable_syscall_rule(asp, rulep, rp);
+ mutex_exit(&asp->lxast_lock);
+
+ return (0);
+}
+
+int
+lx_audit_delete_rule(void *r, uint_t datalen)
+{
+ lx_audit_rule_t *rulep = (lx_audit_rule_t *)r;
+ char *datap;
+ uint_t sysnum;
+ lx_audit_state_t *asp;
+ lx_audit_rule_ent_t *erp;
+
+ if (ztolxzd(curproc->p_zone)->lxzd_audit_enabled == LXAE_LOCKED)
+ return (EPERM);
+
+ if (datalen < sizeof (lx_audit_rule_t))
+ return (EINVAL);
+ datalen -= sizeof (lx_audit_rule_t);
+
+ if (rulep->lxar_fld_cnt > LX_AUDIT_RULE_MAX_FIELDS)
+ return (EINVAL);
+
+ if (rulep->lxar_buflen > datalen)
+ return (EINVAL);
+
+ datap = rulep->lxar_buf;
+
+ asp = ztolxzd(curzone)->lxzd_audit_state;
+ ASSERT(asp != NULL);
+
+ mutex_enter(&asp->lxast_lock);
+
+ /* Find the matching rule from the rule list */
+ for (erp = list_head(&asp->lxast_rules);
+ erp != NULL;
+ erp = list_next(&asp->lxast_rules, erp)) {
+ lx_audit_rule_t *r;
+ uint_t i;
+ boolean_t mtch;
+
+ r = &erp->lxare_rule;
+ if (rulep->lxar_flag != r->lxar_flag)
+ continue;
+ if (rulep->lxar_action != r->lxar_action)
+ continue;
+ if (rulep->lxar_fld_cnt != r->lxar_fld_cnt)
+ continue;
+ for (i = 0, mtch = B_TRUE; i < LX_AUDIT_BITMASK_SIZE; i++) {
+ if (rulep->lxar_mask[i] != r->lxar_mask[i]) {
+ mtch = B_FALSE;
+ break;
+ }
+ }
+ if (!mtch)
+ continue;
+
+ for (i = 0, mtch = B_TRUE; i < rulep->lxar_fld_cnt; i++) {
+ if (rulep->lxar_fields[i] != r->lxar_fields[i] ||
+ rulep->lxar_values[i] != r->lxar_values[i] ||
+ rulep->lxar_fld_flag[i] != r->lxar_fld_flag[i]) {
+ mtch = B_FALSE;
+ break;
+ }
+ }
+ if (!mtch)
+ continue;
+ if (rulep->lxar_buflen != r->lxar_buflen)
+ continue;
+ if (bcmp(datap, erp->lxare_buf, r->lxar_buflen) == 0)
+ break;
+ }
+
+ /* There is no matching rule */
+ if (erp == NULL) {
+ mutex_exit(&asp->lxast_lock);
+ return (ENOENT);
+ }
+
+ /*
+ * Disable each relevant syscall enabling.
+ */
+ for (sysnum = 0; sysnum < LX_NSYSCALLS; sysnum++) {
+ if (BT_TEST32(rulep->lxar_mask, sysnum)) {
+ /*
+ * If this was the first rule on the list for the
+ * given syscall (likely, since usually only one rule
+ * per syscall) then either disable tracing for that
+ * syscall, or point to the next applicable rule in the
+ * list.
+ */
+ if (erp->lxare_is32bit) {
+ if (asp->lxast_sys32_rulep[sysnum] == erp) {
+ asp->lxast_sys32_rulep[sysnum] =
+ lx_audit_next_applicable_rule(
+ sysnum, LX_AUDIT_ARCH32, asp, erp);
+ }
+ }
+ if (erp->lxare_is64bit) {
+ if (asp->lxast_sys64_rulep[sysnum] == erp) {
+ asp->lxast_sys64_rulep[sysnum] =
+ lx_audit_next_applicable_rule(
+ sysnum, LX_AUDIT_ARCH64, asp, erp);
+ }
+ }
+ }
+ }
+
+ /* Remove the rule from the top-level list */
+ list_remove(&asp->lxast_rules, erp);
+
+ kmem_free(erp->lxare_buf, erp->lxare_rule.lxar_buflen);
+ if (erp->lxare_key != NULL)
+ kmem_free(erp->lxare_key, strlen(erp->lxare_key) + 1);
+ kmem_free(erp, sizeof (lx_audit_rule_ent_t));
+
+ mutex_exit(&asp->lxast_lock);
+ return (0);
+}
+
+void
+lx_audit_emit_user_msg(uint_t mtype, uint_t len, char *datap)
+{
+ lx_zone_data_t *lxzd = ztolxzd(curzone);
+ lx_audit_state_t *asp;
+ lx_audit_record_t *rp;
+ timestruc_t ts;
+ uint_t sessid;
+ proc_t *p = curproc;
+ lx_lwp_data_t *lwpd = lwptolxlwp(ttolwp(curthread));
+ uint_t prelen, alen;
+ char msg[LX_AUDIT_MESSAGE_TEXT_MAX];
+
+ /*
+ * For user messages, auditing may not actually be initialized. If not,
+ * just return.
+ */
+ if (lxzd->lxzd_audit_enabled == LXAE_DISABLED ||
+ lxzd->lxzd_audit_state == NULL)
+ return;
+
+ if (len >= sizeof (msg))
+ len = sizeof (msg) - 1;
+
+ mutex_enter(&p->p_splock);
+ sessid = p->p_sessp->s_sid;
+ mutex_exit(&p->p_splock);
+
+ asp = lxzd->lxzd_audit_state;
+ ASSERT(asp != NULL);
+
+ mutex_enter(&asp->lxast_lock);
+
+ if (asp->lxast_backlog >= asp->lxast_backlog_limit) {
+ lx_audit_fail(asp, "audit: backlog limit exceeded");
+ mutex_exit(&asp->lxast_lock);
+ return;
+ }
+
+ rp = kmem_alloc(sizeof (lx_audit_record_t), KM_NOSLEEP);
+ if (rp == NULL) {
+ lx_audit_fail(asp, "audit: no kernel memory");
+ mutex_exit(&asp->lxast_lock);
+ return;
+ }
+ rp->lxar_msg = kmem_zalloc(LX_AUDIT_MESSAGE_TEXT_MAX, KM_NOSLEEP);
+ if (rp->lxar_msg == NULL) {
+ lx_audit_fail(asp, "audit: no kernel memory");
+ mutex_exit(&asp->lxast_lock);
+ kmem_free(rp, sizeof (lx_audit_record_t));
+ return;
+ }
+ rp->lxar_type = mtype;
+ bcopy(datap, msg, len);
+ msg[len] = '\0';
+
+ gethrestime(&ts);
+ ts.tv_sec -= curzone->zone_boot_time;
+
+ (void) snprintf(rp->lxar_msg, LX_AUDIT_MESSAGE_TEXT_MAX,
+ "audit(%lu.%03lu:%lu): pid=%u uid=%u auid=%u ses=%u msg=\'",
+ (uint64_t)ts.tv_sec, /* zone's timestamp */
+ (uint64_t)ts.tv_nsec / 1000000,
+ asp->lxast_seq++, /* serial number */
+ (lwpd->br_pid == curzone->zone_proc_initpid ? 1 : lwpd->br_pid),
+ crgetruid(CRED()), /* uid */
+ lx_audit_get_auid(), /* auid */
+ sessid); /* ses */
+
+ prelen = strlen(rp->lxar_msg);
+ alen = LX_AUDIT_MESSAGE_TEXT_MAX - prelen - 2;
+ (void) strlcat(rp->lxar_msg + prelen, msg, alen);
+ (void) strlcat(rp->lxar_msg, "\'", LX_AUDIT_MESSAGE_TEXT_MAX);
+
+ list_insert_tail(&asp->lxast_ev_queue, rp);
+ if (asp->lxast_backlog == 0)
+ cv_signal(&asp->lxast_worker_cv);
+ asp->lxast_backlog++;
+ mutex_exit(&asp->lxast_lock);
+}
+
+void
+lx_audit_list_rules(void *reply,
+ void (*cb)(void *, void *, uint_t, void *, uint_t))
+{
+ lx_audit_state_t *asp;
+ lx_audit_rule_ent_t *rp;
+
+ asp = ztolxzd(curzone)->lxzd_audit_state;
+ ASSERT(asp != NULL);
+
+ /*
+ * Output the rule list
+ */
+ mutex_enter(&asp->lxast_lock);
+ for (rp = list_head(&asp->lxast_rules); rp != NULL;
+ rp = list_next(&asp->lxast_rules, rp)) {
+ cb(reply, &rp->lxare_rule, sizeof (lx_audit_rule_t),
+ rp->lxare_buf, rp->lxare_rule.lxar_buflen);
+ }
+ mutex_exit(&asp->lxast_lock);
+}
+
+void
+lx_audit_get_feature(void *reply, void (*cb)(void *, void *, uint_t))
+{
+ lx_audit_features_t af;
+
+ af.lxaf_version = LX_AUDIT_FEATURE_VERSION;
+ af.lxaf_mask = 0xffffffff;
+ af.lxaf_features = 0;
+ af.lxaf_lock = 0;
+
+ cb(reply, &af, sizeof (af));
+}
+
+void
+lx_audit_get(void *reply, void (*cb)(void *, void *, uint_t))
+{
+ lx_audit_status_t status;
+ lx_zone_data_t *lxzd;
+ lx_audit_state_t *asp;
+
+ lxzd = ztolxzd(curproc->p_zone);
+ asp = lxzd->lxzd_audit_state;
+ ASSERT(asp != NULL);
+
+ bzero(&status, sizeof (status));
+
+ mutex_enter(&asp->lxast_lock);
+ status.lxas_enabled = lxzd->lxzd_audit_enabled;
+ status.lxas_failure = asp->lxast_failure;
+ status.lxas_pid = asp->lxast_pid;
+ status.lxas_rate_limit = asp->lxast_rate_limit;
+ status.lxas_backlog_limit = asp->lxast_backlog_limit;
+ status.lxas_lost = asp->lxast_lost;
+ status.lxas_backlog = asp->lxast_backlog;
+ status.lxas_backlog_wait_time = asp->lxast_backlog_wait_time;
+ status.lxas_feature_bitmap = LX_AUDIT_FEATURE_ALL;
+ mutex_exit(&asp->lxast_lock);
+
+ cb(reply, &status, sizeof (status));
+}
+
+int
+lx_audit_set(void *lxsock, void *s, uint_t datalen,
+ void (*cb)(void *, boolean_t))
+{
+ lx_audit_status_t *statusp = (lx_audit_status_t *)s;
+ lx_zone_data_t *lxzd;
+ lx_audit_state_t *asp;
+
+ /*
+ * Unfortunately, some user-level code does not send down a full
+ * lx_audit_status_t structure in the message (e.g. this occurs on
+ * CentOS7). Only the structure up to, but not including, the embedded
+ * union is being sent in. This appears to be a result of the user-level
+ * code being built for older versions of the kernel. To handle this,
+ * we have to subtract the last 8 bytes from the size in order to
+ * accomodate this code. We'll revalidate with the full size if
+ * LX_AUDIT_STATUS_BACKLOG_WAIT_TIME were to be set in the mask.
+ */
+ if (datalen < sizeof (lx_audit_status_t) - 8)
+ return (EINVAL);
+
+ lxzd = ztolxzd(curproc->p_zone);
+ asp = lxzd->lxzd_audit_state;
+ ASSERT(asp != NULL);
+
+ /* Once the config is locked, we only allow changing the auditd pid */
+ mutex_enter(&asp->lxast_lock);
+ if (lxzd->lxzd_audit_enabled == LXAE_LOCKED &&
+ (statusp->lxas_mask & ~LX_AUDIT_STATUS_PID)) {
+ mutex_exit(&asp->lxast_lock);
+ return (EPERM);
+ }
+
+ if (statusp->lxas_mask & LX_AUDIT_STATUS_FAILURE) {
+ switch (statusp->lxas_failure) {
+ case LXAE_SILENT:
+ case LXAE_PRINT:
+ case LXAE_PANIC:
+ asp->lxast_failure = statusp->lxas_failure;
+ break;
+ default:
+ mutex_exit(&asp->lxast_lock);
+ return (EINVAL);
+ }
+ }
+ if (statusp->lxas_mask & LX_AUDIT_STATUS_PID) {
+ /*
+ * The process that sets the pid is the daemon, so this is the
+ * socket we'll write audit records out to.
+ */
+ lx_audit_set_worker(statusp->lxas_pid, lxsock, cb);
+ }
+ if (statusp->lxas_mask & LX_AUDIT_STATUS_RATE_LIMIT) {
+ asp->lxast_rate_limit = statusp->lxas_rate_limit;
+ }
+ if (statusp->lxas_mask & LX_AUDIT_STATUS_BACKLOG_LIMIT) {
+ asp->lxast_backlog_limit = statusp->lxas_backlog_limit;
+ }
+ if (statusp->lxas_mask & LX_AUDIT_STATUS_BACKLOG_WAIT_TIME) {
+ /*
+ * See the comment above. We have to revalidate the full struct
+ * size since we previously only validated for a shorter struct.
+ */
+ if (datalen < sizeof (lx_audit_status_t)) {
+ mutex_exit(&asp->lxast_lock);
+ return (EINVAL);
+ }
+ asp->lxast_backlog_wait_time = statusp->lxas_backlog_wait_time;
+ }
+ if (statusp->lxas_mask & LX_AUDIT_STATUS_LOST) {
+ asp->lxast_lost = statusp->lxas_lost;
+ }
+
+ if (statusp->lxas_mask & LX_AUDIT_STATUS_ENABLED) {
+ switch (statusp->lxas_enabled) {
+ case 0:
+ lxzd->lxzd_audit_enabled = LXAE_DISABLED;
+ break;
+ case 1:
+ lxzd->lxzd_audit_enabled = LXAE_ENABLED;
+ break;
+ case 2:
+ lxzd->lxzd_audit_enabled = LXAE_LOCKED;
+ break;
+ default:
+ mutex_exit(&asp->lxast_lock);
+ return (EINVAL);
+ }
+ }
+ mutex_exit(&asp->lxast_lock);
+
+ return (0);
+}
+
+void
+lx_audit_stop_worker(void *s, void (*cb)(void *, boolean_t))
+{
+ lx_audit_state_t *asp = ztolxzd(curzone)->lxzd_audit_state;
+ kt_did_t tid = 0;
+
+ ASSERT(asp != NULL);
+ mutex_enter(&asp->lxast_lock);
+ if (s == NULL) {
+ s = asp->lxast_sock;
+ } else {
+ VERIFY(s == asp->lxast_sock);
+ }
+ asp->lxast_sock = NULL;
+ asp->lxast_pid = 0;
+ if (asp->lxast_worker != NULL) {
+ tid = asp->lxast_worker->t_did;
+ asp->lxast_worker = NULL;
+ asp->lxast_exit = B_TRUE;
+ cv_signal(&asp->lxast_worker_cv);
+ }
+ if (s != NULL)
+ cb(s, B_FALSE);
+ mutex_exit(&asp->lxast_lock);
+
+ if (tid != 0)
+ thread_join(tid);
+}
+
+/*
+ * Called when audit netlink message received, in order to perform lazy
+ * allocation of audit state for the zone. We also perform the one-time step to
+ * cache the netlink callback used by the audit worker thread to send messages
+ * up to the auditd.
+ */
+void
+lx_audit_init(int (*cb)(void *, uint_t, const char *, uint_t))
+{
+ lx_zone_data_t *lxzd = ztolxzd(curzone);
+ lx_audit_state_t *asp;
+
+ mutex_enter(&lxzd->lxzd_lock);
+
+ if (lxzd->lxzd_audit_state != NULL) {
+ mutex_exit(&lxzd->lxzd_lock);
+ return;
+ }
+
+ asp = kmem_zalloc(sizeof (lx_audit_state_t), KM_SLEEP);
+
+ mutex_init(&asp->lxast_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&asp->lxast_worker_cv, NULL, CV_DEFAULT, NULL);
+ list_create(&asp->lxast_ev_queue, sizeof (lx_audit_record_t),
+ offsetof(lx_audit_record_t, lxar_link));
+ list_create(&asp->lxast_rules, sizeof (lx_audit_rule_ent_t),
+ offsetof(lx_audit_rule_ent_t, lxare_link));
+ asp->lxast_failure = LXAE_PRINT;
+ asp->lxast_backlog_limit = LX_AUDIT_DEF_BACKLOG_LIMIT;
+ asp->lxast_backlog_wait_time = LX_AUDIT_DEF_WAIT_TIME;
+
+ lxzd->lxzd_audit_state = asp;
+
+ mutex_exit(&lxzd->lxzd_lock);
+
+ mutex_enter(&lx_audit_em_lock);
+ if (lx_audit_emit_msg == NULL)
+ lx_audit_emit_msg = cb;
+ mutex_exit(&lx_audit_em_lock);
+}
+
+/*
+ * Called when netlink module is unloading so that we can clear the cached
+ * netlink callback used by the audit worker thread to send messages up to the
+ * auditd.
+ */
+void
+lx_audit_cleanup(void)
+{
+ mutex_enter(&lx_audit_em_lock);
+ lx_audit_emit_msg = NULL;
+ mutex_exit(&lx_audit_em_lock);
+}
+
+/*
+ * Called when the zone is being destroyed, not when auditing is being disabled.
+ * Note that zsched has already exited and any lxast_worker thread has exited.
+ */
+void
+lx_audit_fini(zone_t *zone)
+{
+ lx_zone_data_t *lxzd = ztolxzd(zone);
+ lx_audit_state_t *asp;
+ lx_audit_record_t *rp;
+ lx_audit_rule_ent_t *erp;
+
+ ASSERT(MUTEX_HELD(&lxzd->lxzd_lock));
+
+ if ((asp = lxzd->lxzd_audit_state) == NULL)
+ return;
+
+ mutex_enter(&asp->lxast_lock);
+
+ VERIFY(asp->lxast_worker == NULL);
+
+ rp = list_remove_head(&asp->lxast_ev_queue);
+ while (rp != NULL) {
+ kmem_free(rp->lxar_msg, LX_AUDIT_MESSAGE_TEXT_MAX);
+ kmem_free(rp, sizeof (lx_audit_record_t));
+ rp = list_remove_head(&asp->lxast_ev_queue);
+ }
+
+ list_destroy(&asp->lxast_ev_queue);
+ asp->lxast_backlog = 0;
+ asp->lxast_pid = 0;
+
+ erp = list_remove_head(&asp->lxast_rules);
+ while (erp != NULL) {
+ kmem_free(erp->lxare_buf, erp->lxare_rule.lxar_buflen);
+ if (erp->lxare_key != NULL)
+ kmem_free(erp->lxare_key, strlen(erp->lxare_key) + 1);
+ kmem_free(erp, sizeof (lx_audit_rule_ent_t));
+ erp = list_remove_head(&asp->lxast_rules);
+ }
+ list_destroy(&asp->lxast_rules);
+
+ mutex_exit(&asp->lxast_lock);
+
+ cv_destroy(&asp->lxast_worker_cv);
+ mutex_destroy(&asp->lxast_lock);
+ lxzd->lxzd_audit_state = NULL;
+ kmem_free(asp, sizeof (lx_audit_state_t));
+}
+
+/*
+ * Audit initialization/cleanup when lx brand module is loaded and
+ * unloaded.
+ */
+void
+lx_audit_ld()
+{
+ mutex_init(&lx_audit_em_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+lx_audit_unld()
+{
+ mutex_destroy(&lx_audit_em_lock);
+}