diff options
author | Dan McDonald <danmcd@mnx.io> | 2022-11-16 11:19:58 -0500 |
---|---|---|
committer | Dan McDonald <danmcd@mnx.io> | 2022-11-16 11:19:58 -0500 |
commit | 499546739d4558c1c4e8fd25d572b92ddf77883c (patch) | |
tree | 12f4d01c377962369d0882fe765a16009003ccac | |
parent | cd75df2bda342d38890513e04267a306c9666ad3 (diff) | |
parent | 3cfbf5be38df79575cc7d2705bb059b2feca1332 (diff) | |
download | illumos-joyent-release-20221117.tar.gz |
[illumos-gate merge]release-20221117
commit 3cfbf5be38df79575cc7d2705bb059b2feca1332
15155 WPTS BVT_SMB2Basic_Query_FileNormalizedNameInformation
commit bf4554a2fffed30a37fb5ff4467267a2eaeea07f
15154 WPTS FSA BVT_AlternateDataStream_ListStreams_File
commit 2266458a6b75760fe455b5b5b6ab3421ea102c97
15153 WPTS FileInfo_Query_FileIdInformation_Dir/File fails
commit b219643fc2667abe7d09cda95ef286f8b16dedc6
15150 WPTS FileInfo_Query_FileAccessInformation_DataSuffix
commit 9621f52ed119bdb5188286f4d81d713cf7ce8a08
15127 libnsl: '&' within '^'
commit 4ac713da4ff2c45287699af975f8c98142bbd9d3
14251 operationalize bhyve dirty page tracking
24 files changed, 431 insertions, 226 deletions
diff --git a/usr/src/lib/libnsl/rpc/clnt_vc.c b/usr/src/lib/libnsl/rpc/clnt_vc.c index 77c6d0b290..19112b52e6 100644 --- a/usr/src/lib/libnsl/rpc/clnt_vc.c +++ b/usr/src/lib/libnsl/rpc/clnt_vc.c @@ -133,9 +133,9 @@ struct ct_data { ushort_t ct_blocking_mode; uint_t ct_bufferSize; /* Total size of the buffer. */ uint_t ct_bufferPendingSize; /* Size of unsent data. */ - char *ct_buffer; /* Pointer to the buffer. */ - char *ct_bufferWritePtr; /* Ptr to the first free byte. */ - char *ct_bufferReadPtr; /* Ptr to the first byte of data. */ + char *ct_buffer; /* Pointer to the buffer. */ + char *ct_bufferWritePtr; /* Ptr to the first free byte. */ + char *ct_bufferReadPtr; /* Ptr to the first byte of data. */ }; struct nb_reg_node { @@ -216,7 +216,7 @@ set_blocking_connection(struct ct_data *ct, bool_t blocking) */ CLIENT * clnt_vc_create(const int fd, struct netbuf *svcaddr, const rpcprog_t prog, - const rpcvers_t vers, const uint_t sendsz, const uint_t recvsz) + const rpcvers_t vers, const uint_t sendsz, const uint_t recvsz) { return (_clnt_vc_create_timed(fd, svcaddr, prog, vers, sendsz, recvsz, NULL)); @@ -235,7 +235,7 @@ clnt_vc_create(const int fd, struct netbuf *svcaddr, const rpcprog_t prog, */ CLIENT * _clnt_vc_create_timed(int fd, struct netbuf *svcaddr, rpcprog_t prog, - rpcvers_t vers, uint_t sendsz, uint_t recvsz, const struct timeval *tp) + rpcvers_t vers, uint_t sendsz, uint_t recvsz, const struct timeval *tp) { CLIENT *cl; /* client handle */ struct ct_data *ct; /* private data */ @@ -395,7 +395,6 @@ _set_tcp_conntime(int fd, int optval) int *ip; char buf[TCPOPT_BUFSIZE]; - /* LINTED pointer cast */ opt = (struct opthdr *)buf; opt->level = IPPROTO_TCP; opt->name = TCP_CONN_ABORT_THRESHOLD; @@ -404,7 +403,6 @@ _set_tcp_conntime(int fd, int optval) req.flags = T_NEGOTIATE; req.opt.len = sizeof (struct opthdr) + opt->len; req.opt.buf = (char *)opt; - /* LINTED pointer cast */ ip = (int *)((char *)buf + sizeof (struct opthdr)); *ip = optval; @@ -429,7 +427,6 @@ _get_tcp_conntime(int fd) int *ip, retval; char buf[TCPOPT_BUFSIZE]; - /* LINTED pointer cast */ opt = (struct opthdr *)buf; opt->level = IPPROTO_TCP; opt->name = TCP_CONN_ABORT_THRESHOLD; @@ -438,7 +435,6 @@ _get_tcp_conntime(int fd) req.flags = T_CURRENT; req.opt.len = sizeof (struct opthdr) + opt->len; req.opt.buf = (char *)opt; - /* LINTED pointer cast */ ip = (int *)((char *)buf + sizeof (struct opthdr)); *ip = 0; @@ -449,7 +445,6 @@ _get_tcp_conntime(int fd) return (-1); } - /* LINTED pointer cast */ ip = (int *)((char *)buf + sizeof (struct opthdr)); retval = *ip; return (retval); @@ -485,7 +480,6 @@ set_up_connection(int fd, struct netbuf *svcaddr, struct ct_data *ct, /* * Connect only if state is IDLE and svcaddr known */ -/* LINTED pointer alignment */ rcvcall = (struct t_call *)t_alloc(fd, T_CALL, T_OPT|T_ADDR); if (rcvcall == NULL) { rpc_createerr.cf_stat = RPC_TLIERROR; @@ -642,14 +636,12 @@ set_up_connection(int fd, struct netbuf *svcaddr, struct ct_data *ct, static enum clnt_stat clnt_vc_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr, - xdrproc_t xdr_results, caddr_t results_ptr, struct timeval timeout) + xdrproc_t xdr_results, caddr_t results_ptr, struct timeval timeout) { -/* LINTED pointer alignment */ struct ct_data *ct = (struct ct_data *)cl->cl_private; XDR *xdrs = &(ct->ct_xdrs); struct rpc_msg reply_msg; uint32_t x_id; -/* LINTED pointer alignment */ uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall); /* yuk */ bool_t shipnow; int refreshes = 2; @@ -702,7 +694,6 @@ call_again: return (rpc_callerr.re_status); } } else { -/* LINTED pointer alignment */ uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos]; IXDR_PUT_U_INT32(u, proc); if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall, @@ -810,11 +801,9 @@ call_again: static enum clnt_stat clnt_vc_send(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr) { -/* LINTED pointer alignment */ struct ct_data *ct = (struct ct_data *)cl->cl_private; XDR *xdrs = &(ct->ct_xdrs); uint32_t x_id; -/* LINTED pointer alignment */ uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall); /* yuk */ if (rpc_fd_lock(vctbl, ct->ct_fd)) { @@ -847,7 +836,6 @@ clnt_vc_send(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr) return (rpc_callerr.re_status); } } else { -/* LINTED pointer alignment */ uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos]; IXDR_PUT_U_INT32(u, proc); if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall, @@ -882,7 +870,6 @@ clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp) static bool_t clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, caddr_t res_ptr) { -/* LINTED pointer alignment */ struct ct_data *ct = (struct ct_data *)cl->cl_private; XDR *xdrs = &(ct->ct_xdrs); bool_t stat; @@ -899,12 +886,10 @@ clnt_vc_abort(void) { } -/*ARGSUSED*/ static bool_t clnt_vc_control(CLIENT *cl, int request, char *info) { bool_t ret; -/* LINTED pointer alignment */ struct ct_data *ct = (struct ct_data *)cl->cl_private; if (rpc_fd_lock(vctbl, ct->ct_fd)) { @@ -925,9 +910,7 @@ clnt_vc_control(CLIENT *cl, int request, char *info) if (ct->ct_io_mode == RPC_CL_NONBLOCKING) { int res; res = do_flush(ct, (info == NULL || - /* LINTED pointer cast */ *(int *)info == RPC_CL_DEFAULT_FLUSH)? - /* LINTED pointer cast */ ct->ct_blocking_mode: *(int *)info); ret = (0 == res); } else { @@ -944,31 +927,25 @@ clnt_vc_control(CLIENT *cl, int request, char *info) } switch (request) { case CLSET_TIMEOUT: -/* LINTED pointer alignment */ if (time_not_ok((struct timeval *)info)) { rpc_fd_unlock(vctbl, ct->ct_fd); return (FALSE); } -/* LINTED pointer alignment */ ct->ct_wait = __rpc_timeval_to_msec((struct timeval *)info); ct->ct_waitset = TRUE; break; case CLGET_TIMEOUT: -/* LINTED pointer alignment */ ((struct timeval *)info)->tv_sec = ct->ct_wait / 1000; -/* LINTED pointer alignment */ ((struct timeval *)info)->tv_usec = (ct->ct_wait % 1000) * 1000; break; case CLGET_SERVER_ADDR: /* For compatibility only */ (void) memcpy(info, ct->ct_addr.buf, (size_t)ct->ct_addr.len); break; case CLGET_FD: -/* LINTED pointer alignment */ *(int *)info = ct->ct_fd; break; case CLGET_SVC_ADDR: /* The caller should not free this memory area */ -/* LINTED pointer alignment */ *(struct netbuf *)info = ct->ct_addr; break; case CLSET_SVC_ADDR: /* set to new address */ @@ -999,12 +976,10 @@ clnt_vc_control(CLIENT *cl, int request, char *info) * first element in the call structure * This will get the xid of the PREVIOUS call */ -/* LINTED pointer alignment */ *(uint32_t *)info = ntohl(*(uint32_t *)ct->ct_mcall); break; case CLSET_XID: /* This will set the xid of the NEXT call */ -/* LINTED pointer alignment */ *(uint32_t *)ct->ct_mcall = htonl(*(uint32_t *)info + 1); /* increment by 1 as clnt_vc_call() decrements once */ break; @@ -1015,15 +990,12 @@ clnt_vc_control(CLIENT *cl, int request, char *info) * begining of the RPC header. MUST be changed if the * call_struct is changed */ -/* LINTED pointer alignment */ *(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall + 4 * BYTES_PER_XDR_UNIT)); break; case CLSET_VERS: -/* LINTED pointer alignment */ *(uint32_t *)(ct->ct_mcall + 4 * BYTES_PER_XDR_UNIT) = -/* LINTED pointer alignment */ htonl(*(uint32_t *)info); break; @@ -1034,20 +1006,16 @@ clnt_vc_control(CLIENT *cl, int request, char *info) * begining of the RPC header. MUST be changed if the * call_struct is changed */ -/* LINTED pointer alignment */ *(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall + 3 * BYTES_PER_XDR_UNIT)); break; case CLSET_PROG: -/* LINTED pointer alignment */ *(uint32_t *)(ct->ct_mcall + 3 * BYTES_PER_XDR_UNIT) = -/* LINTED pointer alignment */ htonl(*(uint32_t *)info); break; case CLSET_IO_MODE: - /* LINTED pointer cast */ if (!set_io_mode(ct, *(int *)info)) { rpc_fd_unlock(vctbl, ct->ct_fd); return (FALSE); @@ -1055,19 +1023,16 @@ clnt_vc_control(CLIENT *cl, int request, char *info) break; case CLSET_FLUSH_MODE: /* Set a specific FLUSH_MODE */ - /* LINTED pointer cast */ if (!set_flush_mode(ct, *(int *)info)) { rpc_fd_unlock(vctbl, ct->ct_fd); return (FALSE); } break; case CLGET_FLUSH_MODE: - /* LINTED pointer cast */ *(rpcflushmode_t *)info = ct->ct_blocking_mode; break; case CLGET_IO_MODE: - /* LINTED pointer cast */ *(rpciomode_t *)info = ct->ct_io_mode; break; @@ -1076,7 +1041,6 @@ clnt_vc_control(CLIENT *cl, int request, char *info) * Returns the current amount of memory allocated * to pending requests */ - /* LINTED pointer cast */ *(int *)info = ct->ct_bufferPendingSize; break; @@ -1090,11 +1054,9 @@ clnt_vc_control(CLIENT *cl, int request, char *info) * If the new size is equal to the current size, * there is nothing to do. */ - /* LINTED pointer cast */ if (ct->ct_bufferSize == *(uint_t *)info) break; - /* LINTED pointer cast */ ct->ct_bufferSize = *(uint_t *)info; if (ct->ct_buffer) { free(ct->ct_buffer); @@ -1108,7 +1070,6 @@ clnt_vc_control(CLIENT *cl, int request, char *info) * Returns the size of buffer allocated * to pending requests */ - /* LINTED pointer cast */ *(uint_t *)info = ct->ct_bufferSize; break; @@ -1123,7 +1084,6 @@ clnt_vc_control(CLIENT *cl, int request, char *info) static void clnt_vc_destroy(CLIENT *cl) { -/* LINTED pointer alignment */ struct ct_data *ct = (struct ct_data *)cl->cl_private; int ct_fd = ct->ct_fd; @@ -1675,12 +1635,18 @@ do_flush(struct ct_data *ct, uint_t flush_mode) * Non blocking send. */ +/* + * Test if this is last fragment. See comment in front of xdr_rec.c + * for details. + */ +#define LAST_FRAG(x) ((ntohl(*(uint32_t *)x) & (1U << 31)) == (1U << 31)) + static int nb_send(struct ct_data *ct, void *buff, unsigned int nBytes) { int result; - if (!(ntohl(*(uint32_t *)buff) & 2^31)) { + if (!LAST_FRAG(buff)) { return (-1); } diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run index 921e0aca80..b59d10ba67 100644 --- a/usr/src/test/bhyve-tests/runfiles/default.run +++ b/usr/src/test/bhyve-tests/runfiles/default.run @@ -62,7 +62,8 @@ tests = [ 'rdmsr', 'wrmsr', 'triple_fault', - 'exit_paging' + 'exit_paging', + 'page_dirty' ] [/opt/bhyve-tests/tests/viona] diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.c b/usr/src/test/bhyve-tests/tests/common/common.c index ca34dc8cb2..ca34dc8cb2 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/common.c +++ b/usr/src/test/bhyve-tests/tests/common/common.c diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.h b/usr/src/test/bhyve-tests/tests/common/common.h index a1147395ef..a1147395ef 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/common.h +++ b/usr/src/test/bhyve-tests/tests/common/common.h diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.c b/usr/src/test/bhyve-tests/tests/common/in_guest.c index 7d27cf194d..42df592bc8 100644 --- a/usr/src/test/bhyve-tests/tests/common/in_guest.c +++ b/usr/src/test/bhyve-tests/tests/common/in_guest.c @@ -123,7 +123,7 @@ populate_desc_tables(struct vmctx *ctx) } -static void +void test_cleanup(bool is_failure) { if (test_vmctx != NULL) { @@ -144,6 +144,7 @@ test_cleanup(bool is_failure) if (!is_failure || !keep_on_fail) { vm_destroy(test_vmctx); } + test_name = NULL; test_vmctx = NULL; } } @@ -309,6 +310,12 @@ load_payload(struct vmctx *ctx) struct vmctx * test_initialize(const char *tname) { + return (test_initialize_flags(tname, 0)); +} + +struct vmctx * +test_initialize_flags(const char *tname, uint64_t create_flags) +{ char vm_name[VM_MAX_NAMELEN]; int err; struct vmctx *ctx; @@ -320,7 +327,7 @@ test_initialize(const char *tname) (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d", test_name, getpid()); - err = vm_create(vm_name, 0); + err = vm_create(vm_name, create_flags); if (err != 0) { test_fail_errno(err, "Could not create VM"); } diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.h b/usr/src/test/bhyve-tests/tests/common/in_guest.h index 8d6e04a6da..fdacd6f540 100644 --- a/usr/src/test/bhyve-tests/tests/common/in_guest.h +++ b/usr/src/test/bhyve-tests/tests/common/in_guest.h @@ -19,6 +19,8 @@ #include "payload_common.h" struct vmctx *test_initialize(const char *); +struct vmctx *test_initialize_flags(const char *, uint64_t); +void test_cleanup(bool); void test_fail_errno(int err, const char *msg); void test_fail_msg(const char *fmt, ...); void test_fail_vmexit(const struct vm_exit *vexit); diff --git a/usr/src/test/bhyve-tests/tests/inst_emul/Makefile b/usr/src/test/bhyve-tests/tests/inst_emul/Makefile index f01a9c3bee..88f4d17119 100644 --- a/usr/src/test/bhyve-tests/tests/inst_emul/Makefile +++ b/usr/src/test/bhyve-tests/tests/inst_emul/Makefile @@ -32,7 +32,7 @@ CPAYLOADS = cpuid PAYLOADS = $(PROG) include ../Makefile.in_guest -COMMON_OBJS = in_guest.o +COMMON_OBJS = in_guest.o common.o CLEANFILES = $(COMMON_OBJS) $(PAYLOAD_CLEANFILES) payload_utils.o CLOBBERFILES = $(PROG) diff --git a/usr/src/test/bhyve-tests/tests/inst_emul/page_dirty.c b/usr/src/test/bhyve-tests/tests/inst_emul/page_dirty.c index 2e3a06bf47..36b9721701 100644 --- a/usr/src/test/bhyve-tests/tests/inst_emul/page_dirty.c +++ b/usr/src/test/bhyve-tests/tests/inst_emul/page_dirty.c @@ -29,6 +29,7 @@ #include <sys/vmm_dev.h> #include <vmmapi.h> +#include "common.h" #include "in_guest.h" #define PAGE_SZ 4096 @@ -70,6 +71,73 @@ count_dirty_pages(const uint8_t *bitmap) return (count); } +void +check_supported(const char *test_suite_name) +{ + char name[VM_MAX_NAMELEN]; + int err; + + name_test_vm(test_suite_name, name); + + err = vm_create(name, VCF_TRACK_DIRTY); + if (err == 0) { + /* + * We created the VM successfully, so we know that dirty page + * tracking is supported. + */ + err = destroy_instance(test_suite_name); + if (err != 0) { + (void) fprintf(stderr, + "Could not destroy VM: %s\n", strerror(errno)); + (void) printf("FAIL %s\n", test_suite_name); + exit(EXIT_FAILURE); + } + } else if (errno == ENOTSUP) { + (void) printf( + "Skipping test: dirty page tracking not supported\n"); + (void) printf("PASS %s\n", test_suite_name); + exit(EXIT_SUCCESS); + } else { + /* + * Ignore any other errors, they'll be caught by subsequent + * test routines. + */ + } +} + +void +test_dirty_tracking_disabled(const char *test_suite_name) +{ + struct vmctx *ctx = NULL; + int err; + + uint8_t dirty_bitmap[DIRTY_BITMAP_SZ] = { 0 }; + struct vmm_dirty_tracker track = { + .vdt_start_gpa = 0, + .vdt_len = MEM_TOTAL_SZ, + .vdt_pfns = (void *)dirty_bitmap, + }; + + /* Create VM without VCF_TRACK_DIRTY flag */ + ctx = test_initialize_flags(test_suite_name, 0); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + /* Try to query for dirty pages */ + err = ioctl(vm_get_device_fd(ctx), VM_TRACK_DIRTY_PAGES, &track); + if (err == 0) { + test_fail_msg("VM_TRACK_DIRTY_PAGES succeeded unexpectedly\n"); + } else if (errno != EPERM) { + test_fail_errno(errno, + "VM_TRACK_DIRTY_PAGES failed with unexpected error"); + } + + test_cleanup(false); +} + int main(int argc, char *argv[]) { @@ -77,12 +145,13 @@ main(int argc, char *argv[]) struct vmctx *ctx = NULL; int err; - ctx = test_initialize(test_suite_name); + /* Skip test if CPU doesn't support HW A/D tracking */ + check_supported(test_suite_name); + + /* Test for expected error with dirty tracking disabled */ + test_dirty_tracking_disabled(test_suite_name); - /* Until #14251 is fixed, warn the user of the test requirement */ - (void) fprintf(stderr, - "Ensure that 'gpt_track_dirty' is set to 1 via mdb -kw\n" - "The reasoning is described in illumos #14251\n"); + ctx = test_initialize_flags(test_suite_name, VCF_TRACK_DIRTY); err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); if (err != 0) { diff --git a/usr/src/test/bhyve-tests/tests/vmm/Makefile b/usr/src/test/bhyve-tests/tests/vmm/Makefile index 3b0528644c..658a6b6b64 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/Makefile +++ b/usr/src/test/bhyve-tests/tests/vmm/Makefile @@ -50,7 +50,8 @@ CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \ $(CPPFLAGS.master) \ -I$(SRC)/uts/intel/io/vmm \ - -I$(SRC)/uts/intel + -I$(SRC)/uts/intel \ + -I../common $(PROG) := LDLIBS += -lvmmapi all: $(PROG) @@ -72,6 +73,10 @@ $(TESTDIR): $(TESTDIR)/%: % $(INS.file) +%.o: ../common/%.c + $(COMPILE.c) -o $@ $^ + $(POST_PROCESS) + %: %.o $(LINK.c) -o $@ $< $(COMMON_OBJS) $(LDLIBS) $(POST_PROCESS) diff --git a/usr/src/uts/common/fs/smbsrv/smb2_qinfo_file.c b/usr/src/uts/common/fs/smbsrv/smb2_qinfo_file.c index 00198aa31f..dcfd771baa 100644 --- a/usr/src/uts/common/fs/smbsrv/smb2_qinfo_file.c +++ b/usr/src/uts/common/fs/smbsrv/smb2_qinfo_file.c @@ -32,6 +32,7 @@ static uint32_t smb2_qif_internal(smb_request_t *, smb_queryinfo_t *); static uint32_t smb2_qif_ea_size(smb_request_t *, smb_queryinfo_t *); static uint32_t smb2_qif_access(smb_request_t *, smb_queryinfo_t *); static uint32_t smb2_qif_name(smb_request_t *, smb_queryinfo_t *); +static uint32_t smb2_qif_normalized_name(smb_request_t *, smb_queryinfo_t *); static uint32_t smb2_qif_position(smb_request_t *, smb_queryinfo_t *); static uint32_t smb2_qif_full_ea(smb_request_t *, smb_queryinfo_t *); static uint32_t smb2_qif_mode(smb_request_t *, smb_queryinfo_t *); @@ -45,6 +46,7 @@ static uint32_t smb2_qif_pipe_rem(smb_request_t *, smb_queryinfo_t *); static uint32_t smb2_qif_compr(smb_request_t *, smb_queryinfo_t *); static uint32_t smb2_qif_opens(smb_request_t *, smb_queryinfo_t *); static uint32_t smb2_qif_tags(smb_request_t *, smb_queryinfo_t *); +static uint32_t smb2_qif_id_info(smb_request_t *, smb_queryinfo_t *); uint32_t @@ -80,6 +82,7 @@ smb2_qinfo_file(smb_request_t *sr, smb_queryinfo_t *qi) break; case FileNameInformation: + case FileNormalizedNameInformation: getname = B_TRUE; break; @@ -99,6 +102,11 @@ smb2_qinfo_file(smb_request_t *sr, smb_queryinfo_t *qi) case FileNetworkOpenInformation: mask = SMB_AT_BASIC | SMB_AT_STANDARD; + break; + + case FileIdInformation: + mask = SMB_AT_NODEID; + break; default: break; @@ -141,6 +149,9 @@ smb2_qinfo_file(smb_request_t *sr, smb_queryinfo_t *qi) case FileNameInformation: status = smb2_qif_name(sr, qi); break; + case FileNormalizedNameInformation: + status = smb2_qif_normalized_name(sr, qi); + break; case FilePositionInformation: status = smb2_qif_position(sr, qi); break; @@ -180,6 +191,9 @@ smb2_qinfo_file(smb_request_t *sr, smb_queryinfo_t *qi) case FileAttributeTagInformation: status = smb2_qif_tags(sr, qi); break; + case FileIdInformation: + status = smb2_qif_id_info(sr, qi); + break; default: status = NT_STATUS_INVALID_INFO_CLASS; break; @@ -392,15 +406,51 @@ smb2_qif_access(smb_request_t *sr, smb_queryinfo_t *qi) static uint32_t smb2_qif_name(smb_request_t *sr, smb_queryinfo_t *qi) { + char *name; + uint32_t nlen; int rc; - ASSERT(qi->qi_namelen > 0); + /* SMB2 leaves off the leading / */ + nlen = qi->qi_namelen; + name = qi->qi_name; + if (qi->qi_name[0] == '\\') { + name++; + nlen -= 2; + } rc = smb_mbc_encodef( &sr->raw_data, "llU", 0, /* FileIndex (l) */ - qi->qi_namelen, /* l */ - qi->qi_name); /* U */ + nlen, /* l */ + name); /* U */ + if (rc != 0) + return (NT_STATUS_BUFFER_OVERFLOW); + + return (0); +} + +/* + * FileNormalizedNameInformation + */ +static uint32_t +smb2_qif_normalized_name(smb_request_t *sr, smb_queryinfo_t *qi) +{ + char *name; + uint32_t nlen; + int rc; + + /* SMB2 leaves off the leading / */ + nlen = qi->qi_namelen; + name = qi->qi_name; + if (qi->qi_name[0] == '\\') { + name++; + nlen -= 2; + } + + rc = smb_mbc_encodef( + &sr->raw_data, "lU", + nlen, /* l */ + name); /* U */ if (rc != 0) return (NT_STATUS_BUFFER_OVERFLOW); @@ -645,3 +695,46 @@ smb2_qif_tags(smb_request_t *sr, smb_queryinfo_t *qi) return (0); } + +/* + * FileIdInformation + * + * Returns a A FILE_ID_INFORMATION + * VolumeSerialNumber (8 bytes) + * FileId (16 bytes) + * + * Take the volume serial from the share root, + * and compose the FileId from the nodeid and fsid + * of the file (in case we crossed mounts) + */ +static uint32_t +smb2_qif_id_info(smb_request_t *sr, smb_queryinfo_t *qi) +{ + smb_attr_t *sa = &qi->qi_attr; + smb_ofile_t *of = sr->fid_ofile; + smb_tree_t *tree = sr->tid_tree; + vfs_t *f_vfs; // file + vfs_t *s_vfs; // share + uint64_t nodeid; + int rc; + + ASSERT((sa->sa_mask & SMB_AT_NODEID) != 0); + if (of->f_ftype != SMB_FTYPE_DISK) + return (NT_STATUS_INVALID_INFO_CLASS); + + s_vfs = SMB_NODE_VFS(tree->t_snode); + f_vfs = SMB_NODE_VFS(of->f_node); + nodeid = (uint64_t)sa->sa_vattr.va_nodeid; + + rc = smb_mbc_encodef( + &sr->raw_data, "llqll", + s_vfs->vfs_fsid.val[0], /* l */ + s_vfs->vfs_fsid.val[1], /* l */ + nodeid, /* q */ + f_vfs->vfs_fsid.val[0], /* l */ + f_vfs->vfs_fsid.val[1]); /* l */ + if (rc != 0) + return (NT_STATUS_INFO_LENGTH_MISMATCH); + + return (0); +} diff --git a/usr/src/uts/common/fs/smbsrv/smb_pathname.c b/usr/src/uts/common/fs/smbsrv/smb_pathname.c index 3dd99c9a61..5edbecb733 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_pathname.c +++ b/usr/src/uts/common/fs/smbsrv/smb_pathname.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. - * Copyright 2021 RackTop Systems, Inc. + * Copyright 2022 RackTop Systems, Inc. */ #include <smbsrv/smb_kproto.h> @@ -804,47 +804,99 @@ smb_pathname_init(smb_request_t *sr, smb_pathname_t *pn, char *path) /* parse pn->pn_path into its constituent parts */ pname = pn->pn_path; - fname = strrchr(pn->pn_path, '\\'); - if (fname) { + /* + * Split the string between the directory and filename. + * Either part may be empty. + * + * Fill in pn->pn_pname (the path name) + */ + fname = strrchr(pname, '\\'); + if (fname != NULL) { if (fname == pname) { + /* + * Last '/' is at start of string. + * No directory part (dir is root) + */ pn->pn_pname = NULL; } else { + /* + * Directory part ends at the last '/' + * Temporarily truncate and copy + */ *fname = '\0'; pn->pn_pname = smb_pathname_strdup(sr, pname); *fname = '\\'; } ++fname; + /* fname is just after the '/' */ } else { + /* + * No '/' at all in the string. + * It's all filename + */ fname = pname; pn->pn_pname = NULL; } - if (fname[0] == '\0') { - pn->pn_fname = NULL; + /* + * Find end of the filename part of the string, + * which may be the null terminator, or may be + * the start of the optional :sname suffix. + */ + sname = strchr(fname, ':'); + if (sname == NULL) { + /* + * No :sname suffix. We're done. + */ + pn->pn_fname = smb_pathname_strdup(sr, fname); return; } - if (!smb_is_stream_name(fname)) { + /* + * We have a stream name, and maybe a stream type. + * Can't use smb_is_stream_name(fname) here because + * we need to allow sname="::$DATA" + */ + if (sname == fname) { + /* + * The ":sname" part is at the start of + * the file name, which means that the + * file name is "" and this pathname + * refers to a stream on the directory. + */ + pn->pn_fname = NULL; + } else { + /* + * The filename part ends at the ':' + * Temporarily truncate and copy + */ + *sname = '\0'; pn->pn_fname = smb_pathname_strdup(sr, fname); - return; + *sname = ':'; } /* - * find sname and stype in fname. - * sname can't be NULL smb_is_stream_name checks this + * Special case "::$DATA" which "points to" + * the "unnamed" stream (the file itself). + * Basically ignore the "::$DATA" */ - sname = strchr(fname, ':'); - if (sname == fname) - fname = NULL; - else { + if (strcasecmp(sname, "::$DATA") == 0) { + ASSERT(sname >= pname && + sname < (pname + strlen(pname))); *sname = '\0'; - pn->pn_fname = - smb_pathname_strdup(sr, fname); - *sname = ':'; + return; } + /* + * sname points to ":sname:stype" in pn_path + * If ":stype" is missing, add it, then set + * pn_stype to point after the 2nd ':' + * + * Caller knows pn_stype is NOT allocated. + * Allocations here are free'd via smb_srm_fini + */ pn->pn_sname = smb_pathname_strdup(sr, sname); pn->pn_stype = strchr(pn->pn_sname + 1, ':'); if (pn->pn_stype) { @@ -1065,6 +1117,9 @@ smb_validate_dirname(smb_request_t *sr, smb_pathname_t *pn) } } + if (pn->pn_sname) + return (smb_validate_stream_name(sr, pn)); + return (B_TRUE); } @@ -1230,14 +1285,6 @@ smb_validate_stream_name(smb_request_t *sr, smb_pathname_t *pn) ASSERT(pn); ASSERT(pn->pn_sname); - if ((!(pn->pn_sname)) || - ((pn->pn_pname) && !(pn->pn_fname))) { - smbsr_error(sr, NT_STATUS_OBJECT_NAME_INVALID, - ERRDOS, ERROR_INVALID_NAME); - return (B_FALSE); - } - - if (pn->pn_stype != NULL) { for (i = 0; i < sizeof (strmtype) / sizeof (strmtype[0]); ++i) { if (strcasecmp(pn->pn_stype, strmtype[i]) == 0) diff --git a/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c b/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c index 6baef805aa..6f903cb2e0 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c +++ b/usr/src/uts/common/fs/smbsrv/smb_query_fileinfo.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2022 Tintri by DDN, Inc. All rights reserved. + * Copyright 2022 RackTop Systems, Inc. */ #include <smbsrv/smb_kproto.h> @@ -88,8 +89,6 @@ static boolean_t smb_query_pipe_valid_infolev(smb_request_t *, uint16_t); static int smb_query_encode_response(smb_request_t *, smb_xa_t *, uint16_t, smb_queryinfo_t *); -static boolean_t smb_stream_fits(smb_request_t *, mbuf_chain_t *, - char *, uint32_t); static int smb_query_pathname(smb_request_t *, smb_node_t *, boolean_t, smb_queryinfo_t *); @@ -599,10 +598,9 @@ smb_query_encode_response(smb_request_t *sr, smb_xa_t *xa, * those streams but there should not be an entry for the unnamed * stream. * - * Note that the stream name lengths exclude the null terminator but - * the field lengths (i.e. next offset calculations) need to include - * the null terminator and be padded to a multiple of 8 bytes. The - * last entry does not seem to need any padding. + * Note that the stream names are NOT null terminated, and the lengths + * reflect that. Entries are aligned on 8-byte boundaries with padding + * and the "next offset" tells where the next entry begins. * * If an error is encountered when trying to read the stream entries * (smb_odir_read_streaminfo) it is treated as if there are no [more] @@ -619,19 +617,14 @@ smb_query_stream_info(smb_request_t *sr, mbuf_chain_t *mbc, smb_queryinfo_t *qinfo) { char *stream_name; - uint32_t next_offset; uint32_t stream_nlen; - uint32_t pad; - u_offset_t datasz, allocsz; - smb_streaminfo_t *sinfo, *sinfo_next; + smb_streaminfo_t *sinfo; int rc = 0; - boolean_t done = B_FALSE; - boolean_t eos = B_FALSE; + int prev_ent_off; + int cur_ent_off; smb_odir_t *od = NULL; uint32_t status = 0; - smb_node_t *fnode = qinfo->qi_node; - smb_attr_t *attr = &qinfo->qi_attr; ASSERT(fnode); if (SMB_IS_STREAM(fnode)) { @@ -641,10 +634,50 @@ smb_query_stream_info(smb_request_t *sr, mbuf_chain_t *mbc, ASSERT(fnode->n_magic == SMB_NODE_MAGIC); ASSERT(fnode->n_state != SMB_NODE_STATE_DESTROYING); - sinfo = kmem_alloc(sizeof (smb_streaminfo_t), KM_SLEEP); - sinfo_next = kmem_alloc(sizeof (smb_streaminfo_t), KM_SLEEP); - datasz = attr->sa_vattr.va_size; - allocsz = attr->sa_allocsz; + sinfo = smb_srm_alloc(sr, sizeof (smb_streaminfo_t)); + + /* + * Keep track of where the last entry starts so we can + * come back and poke the NextEntryOffset field. Also, + * after enumeration finishes, the caller uses this to + * poke the last entry again with zero to mark it as + * the end of the enumeration. + */ + ASSERT(mbc->chain_offset == 0); + cur_ent_off = prev_ent_off = 0; + + /* + * If the unnamed stream is a file, encode an entry for + * the unnamed stream. Note we can't generally get the + * size or allocsize from qi_attr because those may be + * from one of the named streams. Get the sizes. + */ + if (smb_node_is_file(fnode)) { + smb_attr_t attr; + uint64_t datasz, allocsz; + + bzero(&attr, sizeof (attr)); + attr.sa_mask = SMB_AT_SIZE | SMB_AT_ALLOCSZ; + rc = smb_node_getattr(sr, fnode, sr->user_cr, NULL, &attr); + if (rc != 0) { + status = smb_errno2status(rc); + goto out; + } + + stream_name = "::$DATA"; + stream_nlen = smb_ascii_or_unicode_strlen(sr, stream_name); + datasz = attr.sa_vattr.va_size; + allocsz = attr.sa_allocsz; + /* Leave NextEntryOffset=0, set later. */ + rc = smb_mbc_encodef(mbc, "%llqq#u", sr, + 0, stream_nlen, datasz, allocsz, + stream_nlen, stream_name); + if (rc != 0) { + /* Ran out of room. */ + status = NT_STATUS_BUFFER_OVERFLOW; + goto out; + } + } status = smb_odir_openat(sr, fnode, &od, B_TRUE); switch (status) { @@ -655,88 +688,61 @@ smb_query_stream_info(smb_request_t *sr, mbuf_chain_t *mbc, case NT_STATUS_NOT_SUPPORTED: /* No streams. */ status = 0; - done = B_TRUE; - break; + goto out; default: - return (status); + goto out; } - if (!done) { + for (;;) { + boolean_t eos = B_FALSE; rc = smb_odir_read_streaminfo(sr, od, sinfo, &eos); - if ((rc != 0) || (eos)) - done = B_TRUE; - } - - /* If not a directory, encode an entry for the unnamed stream. */ - if (qinfo->qi_isdir == 0) { - stream_name = "::$DATA"; - stream_nlen = smb_ascii_or_unicode_strlen(sr, stream_name); - next_offset = SMB_STREAM_ENCODE_FIXED_SZ + stream_nlen + - smb_ascii_or_unicode_null_len(sr); - - /* Can unnamed stream fit in response buffer? */ - if (MBC_ROOM_FOR(mbc, next_offset) == 0) { - done = B_TRUE; - status = NT_STATUS_BUFFER_OVERFLOW; - } else { - /* Can first named stream fit in rsp buffer? */ - if (!done && !smb_stream_fits(sr, mbc, sinfo->si_name, - next_offset)) { - done = B_TRUE; - status = NT_STATUS_BUFFER_OVERFLOW; - } - - if (done) - next_offset = 0; - - (void) smb_mbc_encodef(mbc, "%llqqu", sr, - next_offset, stream_nlen, datasz, allocsz, - stream_name); - } - } - - /* - * If there is no next entry, or there is not enough space in - * the response buffer for the next entry, the next_offset and - * padding are 0. - */ - while (!done) { - stream_nlen = smb_ascii_or_unicode_strlen(sr, sinfo->si_name); - sinfo_next->si_name[0] = 0; - - rc = smb_odir_read_streaminfo(sr, od, sinfo_next, &eos); if ((rc != 0) || (eos)) { - done = B_TRUE; - } else { - next_offset = SMB_STREAM_ENCODE_FIXED_SZ + - stream_nlen + - smb_ascii_or_unicode_null_len(sr); - pad = smb_pad_align(next_offset, 8); - next_offset += pad; - - /* Can next named stream fit in response buffer? */ - if (!smb_stream_fits(sr, mbc, sinfo_next->si_name, - next_offset)) { - done = B_TRUE; - status = NT_STATUS_BUFFER_OVERFLOW; - } + status = 0; + break; // normal termination } - if (done) { - next_offset = 0; - pad = 0; + /* + * We have a directory entry to process. + * Align before encoding. + */ + rc = smb_mbc_put_align(mbc, 8); + if (rc != 0) { + status = NT_STATUS_BUFFER_OVERFLOW; + break; } + cur_ent_off = mbc->chain_offset; - (void) smb_mbc_encodef(mbc, "%llqqu#.", - sr, next_offset, stream_nlen, + /* + * Encode it. + */ + stream_name = sinfo->si_name; + stream_nlen = smb_ascii_or_unicode_strlen(sr, stream_name); + /* Leave NextEntryOffset=0, set later. */ + rc = smb_mbc_encodef(mbc, "%llqq#u", sr, + 0, stream_nlen, sinfo->si_size, sinfo->si_alloc_size, - sinfo->si_name, pad); + stream_nlen, stream_name); + if (rc != 0) { + status = NT_STATUS_BUFFER_OVERFLOW; + break; + } - (void) memcpy(sinfo, sinfo_next, sizeof (smb_streaminfo_t)); + /* + * We succeeded encoding the current entry, so + * fill in NextEntryOffset in the previous entry. + * When listing streams on a file, we're always at + * the 2nd or later entry due to "::$DATA" above. + * However, when listing streams on a directory, + * there might not be previous entry. + */ + if (cur_ent_off > 0) { + (void) smb_mbc_poke(mbc, prev_ent_off, "l", + cur_ent_off - prev_ent_off); + } + prev_ent_off = cur_ent_off; } - kmem_free(sinfo, sizeof (smb_streaminfo_t)); - kmem_free(sinfo_next, sizeof (smb_streaminfo_t)); +out: if (od) { smb_odir_close(od); smb_odir_release(od); @@ -746,33 +752,6 @@ smb_query_stream_info(smb_request_t *sr, mbuf_chain_t *mbc, } /* - * smb_stream_fits - * - * Check if the named stream entry can fit in the response buffer. - * - * Required space = - * offset (size of current entry) - * + SMB_STREAM_ENCODE_FIXED_SIZE - * + length of encoded stream name - * + length of null terminator - * + alignment padding - */ -static boolean_t -smb_stream_fits(smb_request_t *sr, mbuf_chain_t *mbc, - char *name, uint32_t offset) -{ - uint32_t len, pad; - - len = SMB_STREAM_ENCODE_FIXED_SZ + - smb_ascii_or_unicode_strlen(sr, name) + - smb_ascii_or_unicode_null_len(sr); - pad = smb_pad_align(len, 8); - len += pad; - - return (MBC_ROOM_FOR(mbc, offset + len) != 0); -} - -/* * smb_query_fileinfo * * Populate smb_queryinfo_t structure for SMB_FTYPE_DISK diff --git a/usr/src/uts/common/smbsrv/ntifs.h b/usr/src/uts/common/smbsrv/ntifs.h index 7c1d837c08..6057cd95c6 100644 --- a/usr/src/uts/common/smbsrv/ntifs.h +++ b/usr/src/uts/common/smbsrv/ntifs.h @@ -24,6 +24,7 @@ * * Copyright 2017 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2016 by Delphix. All rights reserved. + * Copyright 2022 RackTop Systems, Inc. */ #ifndef _SMBSRV_NTIFS_H @@ -274,6 +275,11 @@ typedef enum _FILE_INFORMATION_CLASS { FileInformationReserved52, /* 52 */ FileInformationReserved53, /* 53 */ FileStandardLinkInformation, /* 54 */ + FileInformationReserved55, /* 55 */ + FileInformationReserved56, /* 56 */ + FileInformationReserved57, /* 57 */ + FileInformationReserved58, /* 58 */ + FileIdInformation, /* 59 */ FileMaximumInformation } FILE_INFORMATION_CLASS; diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h b/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h index a425fb53ec..2692c6bec8 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h @@ -11,7 +11,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #ifndef _VMM_GPT_H @@ -52,6 +52,7 @@ enum vmm_gpt_node_level { * cleared. Returns non-zero if the previous value of the bit was set. * vpeo_get_pmtp: Generate a properly formatted PML4 (EPTP/nCR3), given the root * PFN for the GPT. + * vpeo_hw_ad_supported: Returns true IFF hardware A/D tracking is supported. */ typedef struct vmm_pte_ops vmm_pte_ops_t; struct vmm_pte_ops { @@ -62,7 +63,8 @@ struct vmm_pte_ops { uint_t (*vpeo_pte_prot)(uint64_t); uint_t (*vpeo_reset_dirty)(uint64_t *, bool); uint_t (*vpeo_reset_accessed)(uint64_t *, bool); - uint64_t (*vpeo_get_pmtp)(pfn_t); + uint64_t (*vpeo_get_pmtp)(pfn_t, bool); + bool (*vpeo_hw_ad_supported)(void); }; extern vmm_pte_ops_t ept_pte_ops; @@ -82,7 +84,7 @@ void vmm_gpt_vacate_region(vmm_gpt_t *, uint64_t, uint64_t); bool vmm_gpt_map(vmm_gpt_t *, uint64_t, pfn_t, uint_t, uint8_t); bool vmm_gpt_unmap(vmm_gpt_t *, uint64_t); size_t vmm_gpt_unmap_region(vmm_gpt_t *, uint64_t, uint64_t); -uint64_t vmm_gpt_get_pmtp(vmm_gpt_t *); +uint64_t vmm_gpt_get_pmtp(vmm_gpt_t *, bool); bool vmm_gpt_is_mapped(vmm_gpt_t *, uint64_t *, pfn_t *, uint_t *); uint_t vmm_gpt_reset_accessed(vmm_gpt_t *, uint64_t *, bool); diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h index 1ef2d48adf..8af6df5c66 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h @@ -138,7 +138,7 @@ bool vm_is_paused(struct vm *); /* * APIs that race against hardware. */ -void vm_track_dirty_pages(struct vm *, uint64_t, size_t, uint8_t *); +int vm_track_dirty_pages(struct vm *, uint64_t, size_t, uint8_t *); /* * APIs that modify the guest memory map require all vcpus to be frozen. diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_vm.h b/usr/src/uts/intel/io/vmm/sys/vmm_vm.h index 57d0ec8b00..6edba02bc5 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_vm.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_vm.h @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #ifndef _VMM_VM_H @@ -40,7 +40,7 @@ vm_client_t *vmspace_client_alloc(vmspace_t *); uint64_t vmspace_table_root(vmspace_t *); uint64_t vmspace_table_gen(vmspace_t *); uint64_t vmspace_resident_count(vmspace_t *); -void vmspace_track_dirty(vmspace_t *, uint64_t, size_t, uint8_t *); +int vmspace_track_dirty(vmspace_t *, uint64_t, size_t, uint8_t *); /* vm_client_t operations */ vm_page_t *vmc_hold(vm_client_t *, uintptr_t, int); diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c index 136c38c5ab..11f09c2102 100644 --- a/usr/src/uts/intel/io/vmm/vmm.c +++ b/usr/src/uts/intel/io/vmm/vmm.c @@ -551,12 +551,6 @@ vm_init(struct vm *vm, bool create) uint_t cores_per_package = 1; uint_t threads_per_core = 1; -/* - * Debugging tunable to enable dirty-page-tracking. - * (Remains off by default for now) - */ -bool gpt_track_dirty = false; - int vm_create(uint64_t flags, struct vm **retvm) { @@ -570,7 +564,11 @@ vm_create(uint64_t flags, struct vm **retvm) if (!vmm_initialized) return (ENXIO); - vmspace = vmspace_alloc(VM_MAXUSER_ADDRESS, pte_ops, gpt_track_dirty); + bool track_dirty = (flags & VCF_TRACK_DIRTY) != 0; + if (track_dirty && !pte_ops->vpeo_hw_ad_supported()) + return (ENOTSUP); + + vmspace = vmspace_alloc(VM_MAXUSER_ADDRESS, pte_ops, track_dirty); if (vmspace == NULL) return (ENOMEM); @@ -1357,11 +1355,11 @@ vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state, uint8_t sipi_vec) return (0); } -void +int vm_track_dirty_pages(struct vm *vm, uint64_t gpa, size_t len, uint8_t *bitmap) { vmspace_t *vms = vm_get_vmspace(vm); - vmspace_track_dirty(vms, gpa, len, bitmap); + return (vmspace_track_dirty(vms, gpa, len, bitmap)); } static void diff --git a/usr/src/uts/intel/io/vmm/vmm_gpt.c b/usr/src/uts/intel/io/vmm/vmm_gpt.c index a542dba055..a464f8e964 100644 --- a/usr/src/uts/intel/io/vmm/vmm_gpt.c +++ b/usr/src/uts/intel/io/vmm/vmm_gpt.c @@ -579,7 +579,8 @@ vmm_gpt_reset_dirty(vmm_gpt_t *gpt, uint64_t *entry, bool on) * Get properly formatted PML4 (EPTP/nCR3) for GPT. */ uint64_t -vmm_gpt_get_pmtp(vmm_gpt_t *gpt) +vmm_gpt_get_pmtp(vmm_gpt_t *gpt, bool track_dirty) { - return (gpt->vgpt_pte_ops->vpeo_get_pmtp(gpt->vgpt_root->vgn_host_pfn)); + const pfn_t root_pfn = gpt->vgpt_root->vgn_host_pfn; + return (gpt->vgpt_pte_ops->vpeo_get_pmtp(root_pfn, track_dirty)); } diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c index c66f1ce17a..d89bca5ce0 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c @@ -1703,9 +1703,10 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, } len = roundup(tracker.vdt_len / PAGESIZE, 8) / 8; bitmap = kmem_zalloc(len, KM_SLEEP); - vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa, + error = vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa, tracker.vdt_len, bitmap); - if (ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) { + if (error == 0 && + ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) { error = EFAULT; } kmem_free(bitmap, len); diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_ept.c b/usr/src/uts/intel/io/vmm/vmm_sol_ept.c index fde4a030ce..c34ec4684e 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_ept.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_ept.c @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -40,6 +40,8 @@ #define EPT_MAX_LEVELS 4 CTASSERT(EPT_MAX_LEVELS <= MAX_GPT_LEVEL); +#define EPTP_FLAG_ACCESSED_DIRTY (1 << 6) + CTASSERT(EPT_R == PROT_READ); CTASSERT(EPT_W == PROT_WRITE); CTASSERT(EPT_X == PROT_EXEC); @@ -120,13 +122,20 @@ ept_reset_accessed(uint64_t *entry, bool on) } static uint64_t -ept_get_pmtp(pfn_t root_pfn) +ept_get_pmtp(pfn_t root_pfn, bool track_dirty) { - /* TODO: enable AD tracking when required */ - return ((root_pfn << PAGESHIFT | + const uint64_t ad_flag = track_dirty ? EPTP_FLAG_ACCESSED_DIRTY : 0; + return ((root_pfn << PAGESHIFT | ad_flag | (EPT_MAX_LEVELS - 1) << 3 | MTRR_TYPE_WB)); } +static bool +ept_hw_ad_supported(void) +{ + uint64_t ept_caps = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); + return ((ept_caps & IA32_VMX_EPT_VPID_HW_AD) != 0); +} + vmm_pte_ops_t ept_pte_ops = { .vpeo_map_table = ept_map_table, .vpeo_map_page = ept_map_page, @@ -136,4 +145,5 @@ vmm_pte_ops_t ept_pte_ops = { .vpeo_reset_dirty = ept_reset_dirty, .vpeo_reset_accessed = ept_reset_accessed, .vpeo_get_pmtp = ept_get_pmtp, + .vpeo_hw_ad_supported = ept_hw_ad_supported, }; diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c b/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c index 8b45782d25..fb7afd069b 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -140,11 +140,18 @@ rvi_reset_accessed(uint64_t *entry, bool on) } static uint64_t -rvi_get_pmtp(pfn_t root_pfn) +rvi_get_pmtp(pfn_t root_pfn, bool track_dirty) { return (root_pfn << PAGESHIFT); } +static bool +rvi_hw_ad_supported(void) +{ + return (true); +} + + vmm_pte_ops_t rvi_pte_ops = { .vpeo_map_table = rvi_map_table, .vpeo_map_page = rvi_map_page, @@ -154,4 +161,5 @@ vmm_pte_ops_t rvi_pte_ops = { .vpeo_reset_dirty = rvi_reset_dirty, .vpeo_reset_accessed = rvi_reset_accessed, .vpeo_get_pmtp = rvi_get_pmtp, + .vpeo_hw_ad_supported = rvi_hw_ad_supported, }; diff --git a/usr/src/uts/intel/io/vmm/vmm_vm.c b/usr/src/uts/intel/io/vmm/vmm_vm.c index 42d963a53c..609f034d10 100644 --- a/usr/src/uts/intel/io/vmm/vmm_vm.c +++ b/usr/src/uts/intel/io/vmm/vmm_vm.c @@ -292,9 +292,12 @@ vmspace_resident_count(vmspace_t *vms) return (vms->vms_pages_mapped); } -void +int vmspace_track_dirty(vmspace_t *vms, uint64_t gpa, size_t len, uint8_t *bitmap) { + if (!vms->vms_track_dirty) + return (EPERM); + /* * Accumulate dirty bits into the given bit vector. Note that this * races both against hardware writes from running vCPUs and @@ -327,6 +330,8 @@ vmspace_track_dirty(vmspace_t *vms, uint64_t gpa, size_t len, uint8_t *bitmap) vmc_space_invalidate(vmc, gpa, len, vms->vms_pt_gen); } vmspace_hold_exit(vms, true); + + return (0); } static pfn_t @@ -839,7 +844,7 @@ vmspace_client_alloc(vmspace_t *vms) uint64_t vmspace_table_root(vmspace_t *vms) { - return (vmm_gpt_get_pmtp(vms->vms_gpt)); + return (vmm_gpt_get_pmtp(vms->vms_gpt, vms->vms_track_dirty)); } /* diff --git a/usr/src/uts/intel/sys/vmm.h b/usr/src/uts/intel/sys/vmm.h index 45e0fe8f34..8a0b45ca5c 100644 --- a/usr/src/uts/intel/sys/vmm.h +++ b/usr/src/uts/intel/sys/vmm.h @@ -404,6 +404,11 @@ enum vm_create_flags { * rather than attempting to create transient allocations. */ VCF_RESERVOIR_MEM = (1 << 0), + + /* + * Enable dirty page tracking for the guest. + */ + VCF_TRACK_DIRTY = (1 << 1), }; /* diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h index 6fe04a633e..c0d07ad049 100644 --- a/usr/src/uts/intel/sys/vmm_dev.h +++ b/usr/src/uts/intel/sys/vmm_dev.h @@ -402,7 +402,7 @@ struct vm_legacy_cpuid { * best-effort activity. Nothing is to be inferred about the magnitude of a * change when the version is modified. It follows no rules like semver. */ -#define VMM_CURRENT_INTERFACE_VERSION 7 +#define VMM_CURRENT_INTERFACE_VERSION 8 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) |