diff options
author | Jason King <jason.brian.king@gmail.com> | 2021-04-17 09:08:24 +0000 |
---|---|---|
committer | Andy Fiddaman <omnios@citrus-it.co.uk> | 2021-10-07 09:11:03 +0000 |
commit | aa693e996c2928c92cccd8a3efe91373e85a6967 (patch) | |
tree | 23d7431e48a5194bf8ae93968c3caedc6c8bc7a6 | |
parent | 2d2dd8359f765a17f6caaa2d37d86837c0c40915 (diff) | |
download | illumos-gate-aa693e996c2928c92cccd8a3efe91373e85a6967.tar.gz |
13380 Add virtio-9p (aka VirtFS) filesystem sharing to bhyve
Portions contributed by: Andy Fiddaman <andy@omnios.org>
Reviewed by: Jason King <jason.brian.king@gmail.com>
Reviewed by: Jorge Schrauwen <sjorge@blackdot.be>
Approved by: Robert Mustacchi <rm@fingolfin.org>
44 files changed, 12810 insertions, 9 deletions
diff --git a/exception_lists/cstyle b/exception_lists/cstyle index bf1856d5f0..3b15aa6700 100644 --- a/exception_lists/cstyle +++ b/exception_lists/cstyle @@ -1357,6 +1357,7 @@ usr/src/cmd/bhyve/pci_lpc.[ch] usr/src/cmd/bhyve/pci_nvme.c usr/src/cmd/bhyve/pci_passthru.c usr/src/cmd/bhyve/pci_uart.c +usr/src/cmd/bhyve/pci_virtio_9p.c usr/src/cmd/bhyve/pci_virtio_block.c usr/src/cmd/bhyve/pci_virtio_console.c usr/src/cmd/bhyve/pci_virtio_net.c @@ -1390,3 +1391,4 @@ usr/src/uts/i86pc/io/vmm/amd/amdvi_*.[ch] usr/src/uts/i86pc/io/vmm/amd/ivrs_*.c usr/src/uts/i86pc/sys/vmm.h usr/src/uts/i86pc/sys/vmm_dev.h +usr/src/lib/lib9p/common/* diff --git a/exception_lists/hdrchk b/exception_lists/hdrchk index fc022b3782..0c9c154ff0 100644 --- a/exception_lists/hdrchk +++ b/exception_lists/hdrchk @@ -433,3 +433,4 @@ usr/src/uts/i86pc/io/vmm/vmm_util.h usr/src/uts/i86pc/io/vmm/x86.h usr/src/uts/i86pc/sys/vmm.h usr/src/uts/i86pc/sys/vmm_dev.h +usr/src/lib/lib9p/common/* diff --git a/exception_lists/packaging b/exception_lists/packaging index 591b4b9711..47acb0988a 100644 --- a/exception_lists/packaging +++ b/exception_lists/packaging @@ -862,6 +862,12 @@ usr/lib/sparcv9/libdwarf.so sparc usr/lib/libdwarf.so # +# lib9p is private +# +usr/include/lib9p.h +usr/lib/amd64/lib9p.so i386 + +# # We're not quite ready to ship ctfconvert and ctfmerge # usr/bin/ctfconvert diff --git a/exception_lists/wscheck b/exception_lists/wscheck index 462546802f..fdebb77910 100644 --- a/exception_lists/wscheck +++ b/exception_lists/wscheck @@ -69,6 +69,7 @@ usr/src/cmd/bhyve/pci_lpc.[ch] usr/src/cmd/bhyve/pci_nvme.c usr/src/cmd/bhyve/pci_passthru.c usr/src/cmd/bhyve/pci_uart.c +usr/src/cmd/bhyve/pci_virtio_9p.c usr/src/cmd/bhyve/pci_virtio_block.c usr/src/cmd/bhyve/pci_virtio_console.c usr/src/cmd/bhyve/pci_virtio_net.c @@ -95,3 +96,4 @@ usr/src/cmd/bhyve/xmsr.[ch] usr/src/cmd/bhyvectl/bhyvectl.c usr/src/contrib/bhyve/* usr/src/lib/libvmmapi/common/vmmapi.[ch] +usr/src/lib/lib9p/common/* diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile index bbc966d67f..4e54c6be42 100644 --- a/usr/src/cmd/bhyve/Makefile +++ b/usr/src/cmd/bhyve/Makefile @@ -56,6 +56,7 @@ SRCS = acpi.c \ pci_nvme.c \ pci_passthru.c \ pci_uart.c \ + pci_virtio_9p.c \ pci_virtio_block.c \ pci_virtio_console.c \ pci_virtio_net.c \ @@ -115,6 +116,7 @@ CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \ -I$(CONTRIB)/bhyve/dev/usb/controller \ -I$(CONTRIB)/bhyve/dev/mii \ + -I$(SRC)/lib/lib9p/common \ -I$(SRC)/uts/common/io/e1000api \ $(CPPFLAGS.master) \ -I$(SRC)/uts/i86pc/io/vmm \ @@ -128,6 +130,8 @@ pci_nvme.o := SMOFF += kmalloc_wrong_size pci_passthru.o := CERRWARN += -_gcc10=-Wno-address-of-packed-member +pci_virtio_9p.o := SMOFF += kmalloc_wrong_size + pci_xhci.o := CERRWARN += -_gcc10=-Wno-address-of-packed-member SMOFF += all_func_returns,leaks,no_if_block @@ -136,6 +140,7 @@ SMOFF += all_func_returns,leaks,no_if_block CSTD= $(CSTD_GNU99) $(PROG) := LDLIBS += \ + -l9p \ -lsocket \ -lnsl \ -ldlpi \ diff --git a/usr/src/cmd/bhyve/README.sync b/usr/src/cmd/bhyve/README.sync index 4f71c1420e..bec61410ee 100644 --- a/usr/src/cmd/bhyve/README.sync +++ b/usr/src/cmd/bhyve/README.sync @@ -24,12 +24,6 @@ The draft Save/Restore functionality, added in FreeBSD commit yet. It is not built by default in FreeBSD, so we're not interested in taking it until it successfully endures more in-depth testing. -The VirtFS filesystem sharing feature, added in FreeBSD commit -100353cfbf882e23c911300ebd0cb458bd3ee975, has not been synced into illumos bhyve -yet. It depends on the userland lib9p which needs a fair amount of work to -build and run on illumos. The integration of this feature is being tracked in -https://www.illumos.org/issues/13380 - The stub usr/src/compat/bhyve/stdatomic.h file only includes enough glue to satisfy the use of <stdatomic.h> in usr/src/cmd/bhyve/rfb.c, and in particular assumes that atomic variables are sized as an int. If other bhyve diff --git a/usr/src/cmd/bhyve/pci_virtio_9p.c b/usr/src/cmd/bhyve/pci_virtio_9p.c new file mode 100644 index 0000000000..b3fdb2db2c --- /dev/null +++ b/usr/src/cmd/bhyve/pci_virtio_9p.c @@ -0,0 +1,406 @@ +/*- + * Copyright (c) 2015 iXsystems Inc. + * Copyright (c) 2017-2018 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * VirtIO filesystem passthrough using 9p protocol. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/linker_set.h> +#include <sys/uio.h> +#ifndef WITHOUT_CAPSICUM +#include <sys/capsicum.h> +#endif + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <pthread.h> + +#include <lib9p.h> +#include <backend/fs.h> + +#include "bhyverun.h" +#include "config.h" +#include "debug.h" +#include "pci_emul.h" +#include "virtio.h" + +#ifndef __FreeBSD__ +#include "privileges.h" +#endif + +#define VT9P_MAX_IOV 128 +#define VT9P_RINGSZ 256 +#define VT9P_MAXTAGSZ 256 +#define VT9P_CONFIGSPACESZ (VT9P_MAXTAGSZ + sizeof(uint16_t)) + +static int pci_vt9p_debug; +#define DPRINTF(params) if (pci_vt9p_debug) printf params +#define WPRINTF(params) printf params + +/* + * Per-device softc + */ +struct pci_vt9p_softc { + struct virtio_softc vsc_vs; + struct vqueue_info vsc_vq; + pthread_mutex_t vsc_mtx; + uint64_t vsc_cfg; + uint64_t vsc_features; + char * vsc_rootpath; + struct pci_vt9p_config * vsc_config; + struct l9p_backend * vsc_fs_backend; + struct l9p_server * vsc_server; + struct l9p_connection * vsc_conn; +}; + +struct pci_vt9p_request { + struct pci_vt9p_softc * vsr_sc; + struct iovec * vsr_iov; + size_t vsr_niov; + size_t vsr_respidx; + size_t vsr_iolen; + uint16_t vsr_idx; +}; + +struct pci_vt9p_config { + uint16_t tag_len; + char tag[0]; +} __attribute__((packed)); + +static int pci_vt9p_send(struct l9p_request *, const struct iovec *, + const size_t, const size_t, void *); +static void pci_vt9p_drop(struct l9p_request *, const struct iovec *, size_t, + void *); +static void pci_vt9p_reset(void *); +static void pci_vt9p_notify(void *, struct vqueue_info *); +static int pci_vt9p_cfgread(void *, int, int, uint32_t *); +static void pci_vt9p_neg_features(void *, uint64_t); + +static struct virtio_consts vt9p_vi_consts = { + "vt9p", /* our name */ + 1, /* we support 1 virtqueue */ + VT9P_CONFIGSPACESZ, /* config reg size */ + pci_vt9p_reset, /* reset */ + pci_vt9p_notify, /* device-wide qnotify */ + pci_vt9p_cfgread, /* read virtio config */ + NULL, /* write virtio config */ + pci_vt9p_neg_features, /* apply negotiated features */ + (1 << 0), /* our capabilities */ +}; + + +static void +pci_vt9p_reset(void *vsc) +{ + struct pci_vt9p_softc *sc; + + sc = vsc; + + DPRINTF(("vt9p: device reset requested !\n")); + vi_reset_dev(&sc->vsc_vs); +} + +static void +pci_vt9p_neg_features(void *vsc, uint64_t negotiated_features) +{ + struct pci_vt9p_softc *sc = vsc; + + sc->vsc_features = negotiated_features; +} + +static int +pci_vt9p_cfgread(void *vsc, int offset, int size, uint32_t *retval) +{ + struct pci_vt9p_softc *sc = vsc; + void *ptr; + + ptr = (uint8_t *)sc->vsc_config + offset; + memcpy(retval, ptr, size); + return (0); +} + +static int +pci_vt9p_get_buffer(struct l9p_request *req, struct iovec *iov, size_t *niov, + void *arg) +{ + struct pci_vt9p_request *preq = req->lr_aux; + size_t n = preq->vsr_niov - preq->vsr_respidx; + + memcpy(iov, preq->vsr_iov + preq->vsr_respidx, + n * sizeof(struct iovec)); + *niov = n; + return (0); +} + +static int +pci_vt9p_send(struct l9p_request *req, const struct iovec *iov, + const size_t niov, const size_t iolen, void *arg) +{ + struct pci_vt9p_request *preq = req->lr_aux; + struct pci_vt9p_softc *sc = preq->vsr_sc; + + preq->vsr_iolen = iolen; + + pthread_mutex_lock(&sc->vsc_mtx); + vq_relchain(&sc->vsc_vq, preq->vsr_idx, preq->vsr_iolen); + vq_endchains(&sc->vsc_vq, 1); + pthread_mutex_unlock(&sc->vsc_mtx); + free(preq); + return (0); +} + +static void +pci_vt9p_drop(struct l9p_request *req, const struct iovec *iov, size_t niov, + void *arg) +{ + struct pci_vt9p_request *preq = req->lr_aux; + struct pci_vt9p_softc *sc = preq->vsr_sc; + + pthread_mutex_lock(&sc->vsc_mtx); + vq_relchain(&sc->vsc_vq, preq->vsr_idx, 0); + vq_endchains(&sc->vsc_vq, 1); + pthread_mutex_unlock(&sc->vsc_mtx); + free(preq); +} + +static void +pci_vt9p_notify(void *vsc, struct vqueue_info *vq) +{ + struct iovec iov[VT9P_MAX_IOV]; + struct pci_vt9p_softc *sc; + struct pci_vt9p_request *preq; + uint16_t idx, n, i; + uint16_t flags[VT9P_MAX_IOV]; + + sc = vsc; + + while (vq_has_descs(vq)) { + n = vq_getchain(vq, &idx, iov, VT9P_MAX_IOV, flags); + preq = calloc(1, sizeof(struct pci_vt9p_request)); +#ifndef __FreeBSD__ + if (preq == NULL) { + EPRINTLN("virtio-9p: allocation failure: %s", + strerror(errno)); + break; + } +#endif + preq->vsr_sc = sc; + preq->vsr_idx = idx; + preq->vsr_iov = iov; + preq->vsr_niov = n; + preq->vsr_respidx = 0; + + /* Count readable descriptors */ + for (i = 0; i < n; i++) { + if (flags[i] & VRING_DESC_F_WRITE) + break; + + preq->vsr_respidx++; + } + + for (int i = 0; i < n; i++) { + DPRINTF(("vt9p: vt9p_notify(): desc%d base=%p, " + "len=%zu, flags=0x%04x\r\n", i, iov[i].iov_base, + iov[i].iov_len, flags[i])); + } + + l9p_connection_recv(sc->vsc_conn, iov, preq->vsr_respidx, preq); + } +} + +static int +pci_vt9p_legacy_config(nvlist_t *nvl, const char *opts) +{ + char *sharename = NULL, *tofree, *token, *tokens; + + if (opts == NULL) + return (0); + + tokens = tofree = strdup(opts); + while ((token = strsep(&tokens, ",")) != NULL) { + if (strchr(token, '=') != NULL) { + if (sharename != NULL) { + EPRINTLN( + "virtio-9p: more than one share name given"); + return (-1); + } + + sharename = strsep(&token, "="); + set_config_value_node(nvl, "sharename", sharename); + set_config_value_node(nvl, "path", token); + } else + set_config_bool_node(nvl, token, true); + } + free(tofree); + + return (0); +} + +static int +pci_vt9p_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) +{ + struct pci_vt9p_softc *sc; + const char *value; + const char *sharename; + int rootfd; + bool ro; +#ifndef WITHOUT_CAPSICUM + cap_rights_t rootcap; +#endif + + ro = get_config_bool_node_default(nvl, "ro", false); + +#ifndef __FreeBSD__ + illumos_priv_add_min(PRIV_FILE_DAC_READ, "vt9p"); + illumos_priv_add_min(PRIV_FILE_DAC_SEARCH, "vt9p"); + + if (!ro) { + illumos_priv_add_min(PRIV_FILE_CHOWN, "vt9p"); + illumos_priv_add_min(PRIV_FILE_CHOWN_SELF, "vt9p"); + illumos_priv_add_min(PRIV_FILE_WRITE, "vt9p"); + illumos_priv_add_min(PRIV_FILE_DAC_WRITE, "vt9p"); + illumos_priv_add_min(PRIV_FILE_OWNER, "vt9p"); + illumos_priv_add_min(PRIV_FILE_LINK_ANY, "vt9p"); + } +#endif + + value = get_config_value_node(nvl, "path"); + if (value == NULL) { + EPRINTLN("virtio-9p: path required"); + return (1); + } + rootfd = open(value, O_DIRECTORY); + if (rootfd < 0) { + EPRINTLN("virtio-9p: failed to open '%s': %s", value, + strerror(errno)); + return (-1); + } + + sharename = get_config_value_node(nvl, "sharename"); + if (sharename == NULL) { + EPRINTLN("virtio-9p: share name required"); + return (1); + } + if (strlen(sharename) > VT9P_MAXTAGSZ) { + EPRINTLN("virtio-9p: share name too long"); + return (1); + } + + sc = calloc(1, sizeof(struct pci_vt9p_softc)); +#ifndef __FreeBSD__ + if (sc == NULL) { + EPRINTLN("virtio-9p: soft state allocation failure: %s", + strerror(errno)); + return (1); + } +#endif + sc->vsc_config = calloc(1, sizeof(struct pci_vt9p_config) + + VT9P_MAXTAGSZ); +#ifndef __FreeBSD__ + if (sc == NULL) { + EPRINTLN("virtio-9p: vsc_config allocation failure: %s", + strerror(errno)); + return (1); + } +#endif + + pthread_mutex_init(&sc->vsc_mtx, NULL); + +#ifndef WITHOUT_CAPSICUM + cap_rights_init(&rootcap, + CAP_LOOKUP, CAP_ACL_CHECK, CAP_ACL_DELETE, CAP_ACL_GET, + CAP_ACL_SET, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSTAT, + CAP_CREATE, CAP_FCHMODAT, CAP_FCHOWNAT, CAP_FTRUNCATE, + CAP_LINKAT_SOURCE, CAP_LINKAT_TARGET, CAP_MKDIRAT, CAP_MKNODAT, + CAP_PREAD, CAP_PWRITE, CAP_RENAMEAT_SOURCE, CAP_RENAMEAT_TARGET, + CAP_SEEK, CAP_SYMLINKAT, CAP_UNLINKAT, CAP_EXTATTR_DELETE, + CAP_EXTATTR_GET, CAP_EXTATTR_LIST, CAP_EXTATTR_SET, + CAP_FUTIMES, CAP_FSTATFS, CAP_FSYNC, CAP_FPATHCONF); + + if (cap_rights_limit(rootfd, &rootcap) != 0) + return (1); +#endif + + sc->vsc_config->tag_len = (uint16_t)strlen(sharename); + memcpy(sc->vsc_config->tag, sharename, sc->vsc_config->tag_len); + + if (l9p_backend_fs_init(&sc->vsc_fs_backend, rootfd, ro) != 0) { + errno = ENXIO; + return (1); + } + + if (l9p_server_init(&sc->vsc_server, sc->vsc_fs_backend) != 0) { + errno = ENXIO; + return (1); + } + + if (l9p_connection_init(sc->vsc_server, &sc->vsc_conn) != 0) { + errno = EIO; + return (1); + } + + sc->vsc_conn->lc_msize = L9P_MAX_IOV * PAGE_SIZE; + sc->vsc_conn->lc_lt.lt_get_response_buffer = pci_vt9p_get_buffer; + sc->vsc_conn->lc_lt.lt_send_response = pci_vt9p_send; + sc->vsc_conn->lc_lt.lt_drop_response = pci_vt9p_drop; + + vi_softc_linkup(&sc->vsc_vs, &vt9p_vi_consts, sc, pi, &sc->vsc_vq); + sc->vsc_vs.vs_mtx = &sc->vsc_mtx; + sc->vsc_vq.vq_qsize = VT9P_RINGSZ; + + /* initialize config space */ + pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_9P); + pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); + pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_9P); + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); + + if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) + return (1); + vi_set_io_bar(&sc->vsc_vs, 0); + + return (0); +} + +struct pci_devemu pci_de_v9p = { + .pe_emu = "virtio-9p", + .pe_legacy_config = pci_vt9p_legacy_config, + .pe_init = pci_vt9p_init, + .pe_barwrite = vi_pci_write, + .pe_barread = vi_pci_read +}; +PCI_EMUL_SET(pci_de_v9p); diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index 5c796c3caf..2673e008d8 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -274,6 +274,7 @@ SUBDIRS += \ $($(MACH)_SUBDIRS) i386_SUBDIRS= \ + lib9p \ libfdisk \ libppt \ libsaveargs \ @@ -489,6 +490,7 @@ HDRSUBDIRS= \ $($(MACH)_HDRSUBDIRS) i386_HDRSUBDIRS= \ + lib9p \ libfdisk \ libppt \ libsaveargs \ @@ -581,6 +583,7 @@ gss_mechs/mech_krb5: libgss libresolv2 pkcs11 libkstat gss_mechs/mech_spnego: gss_mechs/mech_krb5 hal: dbusdeps krb5: gss_mechs/mech_krb5 libtecla libldap5 +lib9p: libsec libcustr libads: libnsl libadt_jni: libbsm libadutils: libldap5 libresolv2 diff --git a/usr/src/lib/lib9p/COPYRIGHT b/usr/src/lib/lib9p/COPYRIGHT new file mode 100644 index 0000000000..b02f09aabd --- /dev/null +++ b/usr/src/lib/lib9p/COPYRIGHT @@ -0,0 +1,47 @@ +Copyright 2016 Jakub Klama <jceel@FreeBSD.org> +All rights reserved + +Redistribution and use in source and binary forms, with or without +modification, are permitted providing that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +Some parts of the code are based on libixp (http://libs.suckless.org/libixp) +library code released under following license: + +© 2005-2006 Anselm R. Garbe <garbeam@gmail.com> +© 2006-2010 Kris Maglione <maglione.k at Gmail> + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/usr/src/lib/lib9p/COPYRIGHT.descrip b/usr/src/lib/lib9p/COPYRIGHT.descrip new file mode 100644 index 0000000000..d854795482 --- /dev/null +++ b/usr/src/lib/lib9p/COPYRIGHT.descrip @@ -0,0 +1 @@ +lib9p library diff --git a/usr/src/lib/lib9p/Makefile b/usr/src/lib/lib9p/Makefile new file mode 100644 index 0000000000..65f8a88fae --- /dev/null +++ b/usr/src/lib/lib9p/Makefile @@ -0,0 +1,41 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2021 OmniOS Community Edition (OmniOSce) Association. +# + +include ../Makefile.lib + +$(BUILD64)SUBDIRS += $(MACH64) + +HDRS = lib9p.h +HDRDIR = common +CHECKHDRS = + +all:= TARGET= all +install:= TARGET= install +clean:= TARGET= clean +clobber:= TARGET= clobber + +.KEEP_STATE: + +all install clean clobber: $(SUBDIRS) + +install_h: $(ROOTHDRS) +check: $(CHECKHDRS) + +$(SUBDIRS): FRC + cd $@; pwd; $(MAKE) $(TARGET) + +FRC: + +include ../Makefile.targ diff --git a/usr/src/lib/lib9p/Makefile.com b/usr/src/lib/lib9p/Makefile.com new file mode 100644 index 0000000000..b04b210796 --- /dev/null +++ b/usr/src/lib/lib9p/Makefile.com @@ -0,0 +1,77 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2021 OmniOS Community Edition (OmniOSce) Association. +# + +LIBRARY= lib9p.a +VERS= .1 + +OBJECTS= backend/fs.o \ + connection.o \ + genacl.o \ + hashtable.o \ + log.o \ + pack.o \ + request.o \ + rfuncs.o \ + sbuf/sbuf.o \ + threadpool.o \ + transport/socket.o \ + utils.o +HDRS = lib9p.h + +LOBJDIRS= backend transport sbuf + +include ../../Makefile.lib + +LIBS = $(DYNLIB) +LDLIBS += -lc -lcustr -lsocket -lsec -lnvpair + +SRCDIR = .. + +CSTD = $(CSTD_GNU99) + +CFLAGS += $(CCVERBOSE) + +CPPFLAGS += -D__illumos__ +CPPFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D__EXTENSIONS__ +CPPFLAGS += -I../common -I../common/backend +$(NOT_RELEASE_BUILD)CPPFLAGS += -DL9P_DEBUG=L9P_DEBUG + +SMOFF += all_func_returns + +.KEEP_STATE: + +all: $(LIBS) + +$(LIBS): mkpicdirs + +mkpicdirs: + @mkdir -p $(LOBJDIRS:%=pics/%) + +pics/%.o: ../common/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +pics/backend/%.o: ../common/backend/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +pics/transport/%.o: ../common/transport/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) + +$(ROOTHDRDIR)/%.h: ../common/%.h + $(INS.file) + +include ../../Makefile.targ diff --git a/usr/src/lib/lib9p/amd64/Makefile b/usr/src/lib/lib9p/amd64/Makefile new file mode 100644 index 0000000000..c3510fdb62 --- /dev/null +++ b/usr/src/lib/lib9p/amd64/Makefile @@ -0,0 +1,19 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2021 OmniOS Community Edition (OmniOSce) Association. +# + +include ../Makefile.com +include ../../Makefile.lib.64 + +install: all $(ROOTLIBS64) $(ROOTLINKS64) diff --git a/usr/src/lib/lib9p/common/backend/backend.h b/usr/src/lib/lib9p/common/backend/backend.h new file mode 100644 index 0000000000..2b4bf2d8e4 --- /dev/null +++ b/usr/src/lib/lib9p/common/backend/backend.h @@ -0,0 +1,69 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#ifndef LIB9P_BACKEND_H +#define LIB9P_BACKEND_H + +struct l9p_backend { + void *softc; + void (*freefid)(void *, struct l9p_fid *); + int (*attach)(void *, struct l9p_request *); + int (*clunk)(void *, struct l9p_fid *); + int (*create)(void *, struct l9p_request *); + int (*open)(void *, struct l9p_request *); + int (*read)(void *, struct l9p_request *); + int (*remove)(void *, struct l9p_fid *); + int (*stat)(void *, struct l9p_request *); + int (*walk)(void *, struct l9p_request *); + int (*write)(void *, struct l9p_request *); + int (*wstat)(void *, struct l9p_request *); + int (*statfs)(void *, struct l9p_request *); + int (*lopen)(void *, struct l9p_request *); + int (*lcreate)(void *, struct l9p_request *); + int (*symlink)(void *, struct l9p_request *); + int (*mknod)(void *, struct l9p_request *); + int (*rename)(void *, struct l9p_request *); + int (*readlink)(void *, struct l9p_request *); + int (*getattr)(void *, struct l9p_request *); + int (*setattr)(void *, struct l9p_request *); + int (*xattrwalk)(void *, struct l9p_request *); + int (*xattrcreate)(void *, struct l9p_request *); + int (*xattrread)(void *, struct l9p_request *); + int (*xattrwrite)(void *, struct l9p_request *); + int (*xattrclunk)(void *, struct l9p_fid *); + int (*readdir)(void *, struct l9p_request *); + int (*fsync)(void *, struct l9p_request *); + int (*lock)(void *, struct l9p_request *); + int (*getlock)(void *, struct l9p_request *); + int (*link)(void *, struct l9p_request *); + int (*mkdir)(void *, struct l9p_request *); + int (*renameat)(void *, struct l9p_request *); + int (*unlinkat)(void *, struct l9p_request *); +}; + +#endif /* LIB9P_BACKEND_H */ diff --git a/usr/src/lib/lib9p/common/backend/fs.c b/usr/src/lib/lib9p/common/backend/fs.c new file mode 100644 index 0000000000..4b7764cd86 --- /dev/null +++ b/usr/src/lib/lib9p/common/backend/fs.c @@ -0,0 +1,3238 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Copyright 2021 Joyent, Inc. + */ + +/* + * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail> + */ + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdbool.h> +#include <fcntl.h> +#include <errno.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/param.h> +#include <sys/queue.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <dirent.h> +#include <pwd.h> +#include <grp.h> +#include <libgen.h> +#include <pthread.h> +#include "../lib9p.h" +#include "../lib9p_impl.h" +#include "../fid.h" +#include "../log.h" +#include "../rfuncs.h" +#include "../genacl.h" +#include "backend.h" +#include "fs.h" + +#if defined(WITH_CASPER) + #include <libcasper.h> + #include <casper/cap_pwd.h> + #include <casper/cap_grp.h> +#endif + +#if defined(__FreeBSD__) + #include <sys/param.h> + #if __FreeBSD_version >= 1000000 + #define HAVE_BINDAT + #endif +#endif + +#if defined(__FreeBSD__) + #define HAVE_BIRTHTIME +#endif + +#if defined(__APPLE__) + #include <sys/syscall.h> + #include "Availability.h" + #define ACL_TYPE_NFS4 ACL_TYPE_EXTENDED +#endif + +#if defined (__illumos__) + #include <sys/sysmacros.h> + #include <sys/statvfs.h> + #include <sys/un.h> + #include <attr.h> + #include <sys/nvpair.h> +#endif + +struct fs_softc { + int fs_rootfd; + bool fs_readonly; +#if defined(__illumos__) + /* + * On illumos, the file creation time (birthtime) is stored (on + * supported filesystems -- i.e. zfs) in an extended attribute. + * If for some reason the fs doesn't support extended attributes, + * we skip trying to read the creation time. + */ + bool fs_hasxattr; +#endif +#if defined(WITH_CASPER) + cap_channel_t *fs_cappwd; + cap_channel_t *fs_capgrp; +#endif +}; + +struct fs_fid { + DIR *ff_dir; + int ff_dirfd; + int ff_fd; + int ff_flags; + char *ff_name; + struct fs_authinfo *ff_ai; + pthread_mutex_t ff_mtx; + struct l9p_acl *ff_acl; /* cached ACL if any */ +}; + +#if defined(__FreeBSD__) +# define STATFS_FSID(_s) \ + (((uint64_t)(_s)->f_fsid.val[0] << 32) | (uint64_t)(_s)->f_fsid.val[1]) + +# define STAT_ATIME(_s) ((_s)->st_atimespec) +# define STAT_MTIME(_s) ((_s)->st_mtimespec) +# define STAT_CTIME(_s) ((_s)->st_ctimespec) +#elif defined (__illumos__) +# define STATFS_FSID(_s) ((_s)->f_fsid) + +# define STAT_ATIME(_s) ((_s)->st_atim) +# define STAT_MTIME(_s) ((_s)->st_mtim) +# define STAT_CTIME(_s) ((_s)->st_ctim) +#else +#error "Port me" +#endif + +#define FF_NO_NFSV4_ACL 0x01 /* don't go looking for NFSv4 ACLs */ +/* FF_NO_POSIX_ACL 0x02 -- not yet */ + +/* + * Our authinfo consists of: + * + * - a reference count + * - a uid + * - a gid-set + * + * The "default" gid is the first gid in the git-set, provided the + * set size is at least 1. The set-size may be zero, though. + * + * Adjustments to the ref-count must be atomic, once it's shared. + * It would be nice to use C11 atomics here but they are not common + * enough to all systems just yet; for now, we use a mutex. + * + * Note that some ops (Linux style ones) pass an effective gid for + * the op, in which case, that gid may override. To achieve this + * effect, permissions testing functions also take an extra gid. + * If this gid is (gid_t)-1 it is not used and only the remaining + * gids take part. + * + * The uid may also be (uid_t)-1, meaning "no uid was available + * at all at attach time". In this case, new files inherit parent + * directory uids. + * + * The refcount is simply the number of "openfile"s using this + * authinfo (so that when the last ref goes away, we can free it). + * + * There are also master ACL flags (same as in ff_flags). + */ +struct fs_authinfo { + pthread_mutex_t ai_mtx; /* lock for refcnt */ + uint32_t ai_refcnt; + int ai_flags; + uid_t ai_uid; + int ai_ngids; + gid_t ai_gids[]; /* NB: flexible array member */ +}; + +/* + * We have a global-static mutex for single-threading Tattach + * requests, which use getpwnam (and indirectly, getgr* functions) + * which are not reentrant. + */ +static bool fs_attach_mutex_inited; +static pthread_mutex_t fs_attach_mutex; + +static pthread_mutexattr_t fs_mutexattr; + +/* + * Internal functions (except inline functions). + */ +static struct passwd *fs_getpwuid(struct fs_softc *, uid_t, struct r_pgdata *); +static struct group *fs_getgrgid(struct fs_softc *, gid_t, struct r_pgdata *); +static int fs_buildname(struct l9p_fid *, char *, char *, size_t); +static int fs_pdir(struct fs_softc *, struct l9p_fid *, char *, size_t, + struct stat *st); +static int fs_dpf(char *, char *, size_t); +static int fs_oflags_dotu(int, int *); +static int fs_oflags_dotl(uint32_t, int *, enum l9p_omode *); +static int fs_nde(struct fs_softc *, struct l9p_fid *, bool, gid_t, + struct stat *, uid_t *, gid_t *); +static struct fs_fid *open_fid(int, const char *, struct fs_authinfo *, bool); +static void dostat(struct fs_softc *, struct l9p_stat *, char *, + struct stat *, bool dotu); +#ifdef __illumos__ +static void getcrtime(struct fs_softc *, int, const char *, uint64_t *, + uint64_t *); +static void dostatfs(struct l9p_statfs *, struct statvfs *, long); +#define ACL_TYPE_NFS4 1 +acl_t *acl_get_fd_np(int fd, int type); +#else +static void dostatfs(struct l9p_statfs *, struct statfs *, long); +#endif +static void fillacl(struct fs_fid *ff); +static struct l9p_acl *getacl(struct fs_fid *ff, int fd, const char *path); +static void dropacl(struct fs_fid *ff); +static struct l9p_acl *look_for_nfsv4_acl(struct fs_fid *ff, int fd, + const char *path); +static int check_access(int32_t, + struct l9p_acl *, struct stat *, struct l9p_acl *, struct stat *, + struct fs_authinfo *, gid_t); +static void generate_qid(struct stat *, struct l9p_qid *); + +static int fs_icreate(void *, struct l9p_fid *, char *, int, + bool, mode_t, gid_t, struct stat *); +static int fs_iopen(void *, struct l9p_fid *, int, enum l9p_omode, + gid_t, struct stat *); +static int fs_imkdir(void *, struct l9p_fid *, char *, + bool, mode_t, gid_t, struct stat *); +static int fs_imkfifo(void *, struct l9p_fid *, char *, + bool, mode_t, gid_t, struct stat *); +static int fs_imknod(void *, struct l9p_fid *, char *, + bool, mode_t, dev_t, gid_t, struct stat *); +static int fs_imksocket(void *, struct l9p_fid *, char *, + bool, mode_t, gid_t, struct stat *); +static int fs_isymlink(void *, struct l9p_fid *, char *, char *, + gid_t, struct stat *); + +/* + * Internal functions implementing backend. + */ +static int fs_attach(void *, struct l9p_request *); +static int fs_clunk(void *, struct l9p_fid *); +static int fs_create(void *, struct l9p_request *); +static int fs_open(void *, struct l9p_request *); +static int fs_read(void *, struct l9p_request *); +static int fs_remove(void *, struct l9p_fid *); +static int fs_stat(void *, struct l9p_request *); +static int fs_walk(void *, struct l9p_request *); +static int fs_write(void *, struct l9p_request *); +static int fs_wstat(void *, struct l9p_request *); +static int fs_statfs(void *, struct l9p_request *); +static int fs_lopen(void *, struct l9p_request *); +static int fs_lcreate(void *, struct l9p_request *); +static int fs_symlink(void *, struct l9p_request *); +static int fs_mknod(void *, struct l9p_request *); +static int fs_rename(void *, struct l9p_request *); +static int fs_readlink(void *, struct l9p_request *); +static int fs_getattr(void *, struct l9p_request *); +static int fs_setattr(void *, struct l9p_request *); +static int fs_xattrwalk(void *, struct l9p_request *); +static int fs_xattrcreate(void *, struct l9p_request *); +static int fs_readdir(void *, struct l9p_request *); +static int fs_fsync(void *, struct l9p_request *); +static int fs_lock(void *, struct l9p_request *); +static int fs_getlock(void *, struct l9p_request *); +static int fs_link(void *, struct l9p_request *); +static int fs_renameat(void *, struct l9p_request *); +static int fs_unlinkat(void *, struct l9p_request *); +static void fs_freefid(void *, struct l9p_fid *); + +/* + * Convert from 9p2000 open/create mode to Unix-style O_* flags. + * This includes 9p2000.u extensions, but not 9p2000.L protocol, + * which has entirely different open, create, etc., flag bits. + * + * The <mode> given here is the one-byte (uint8_t) "mode" + * argument to Tcreate or Topen, so it can have at most 8 bits. + * + * https://swtch.com/plan9port/man/man9/open.html and + * http://plan9.bell-labs.com/magic/man2html/5/open + * both say: + * + * The [low two bits of the] mode field determines the + * type of I/O ... [I]f mode has the OTRUNC (0x10) bit + * set, the file is to be truncated, which requires write + * permission ...; if the mode has the ORCLOSE (0x40) bit + * set, the file is to be removed when the fid is clunked, + * which requires permission to remove the file from its + * directory. All other bits in mode should be zero. It + * is illegal to write a directory, truncate it, or + * attempt to remove it on close. + * + * 9P2000.u may add ODIRECT (0x80); this is not completely clear. + * The fcall.h header defines OCEXEC (0x20) as well, but it makes + * no sense to send this to a server. There seem to be no bits + * 0x04 and 0x08. + * + * We always turn on O_NOCTTY since as a server, we never want + * to gain a controlling terminal. We always turn on O_NOFOLLOW + * for reasons described elsewhere. + */ +static int +fs_oflags_dotu(int mode, int *aflags) +{ + int flags; +#define CONVERT(theirs, ours) \ + do { \ + if (mode & (theirs)) { \ + mode &= ~(theirs); \ + flags |= ours; \ + } \ + } while (0) + + switch (mode & L9P_OACCMODE) { + + case L9P_OREAD: + default: + flags = O_RDONLY; + break; + + case L9P_OWRITE: + flags = O_WRONLY; + break; + + case L9P_ORDWR: + flags = O_RDWR; + break; + + case L9P_OEXEC: + if (mode & L9P_OTRUNC) + return (EINVAL); + flags = O_RDONLY; + break; + } + + flags |= O_NOCTTY | O_NOFOLLOW; + + CONVERT(L9P_OTRUNC, O_TRUNC); + + /* + * Now take away some flags locally: + * the access mode (already translated) + * ORCLOSE - caller only + * OCEXEC - makes no sense in server + * ODIRECT - not applicable here + * If there are any flag bits left after this, + * we were unable to translate them. For now, let's + * treat this as EINVAL so that we can catch problems. + */ + mode &= ~(L9P_OACCMODE | L9P_ORCLOSE | L9P_OCEXEC | L9P_ODIRECT); + if (mode != 0) { + L9P_LOG(L9P_INFO, + "fs_oflags_dotu: untranslated bits: %#x", + (unsigned)mode); + return (EINVAL); + } + + *aflags = flags; + return (0); +#undef CONVERT +} + +/* + * Convert from 9P2000.L (Linux) open mode bits to O_* flags. + * See fs_oflags_dotu above. + * + * Linux currently does not have open-for-exec, but there is a + * proposal for it using O_PATH|O_NOFOLLOW, now handled here. + * + * We may eventually also set L9P_ORCLOSE for L_O_TMPFILE. + */ +static int +fs_oflags_dotl(uint32_t l_mode, int *aflags, enum l9p_omode *ap9) +{ + int flags; + enum l9p_omode p9; +#define CLEAR(theirs) l_mode &= ~(uint32_t)(theirs) +#define CONVERT(theirs, ours) \ + do { \ + if (l_mode & (theirs)) { \ + CLEAR(theirs); \ + flags |= ours; \ + } \ + } while (0) + + /* + * Linux O_RDONLY, O_WRONLY, O_RDWR (0,1,2) match BSD/MacOS. + */ + flags = l_mode & O_ACCMODE; + if (flags == 3) + return (EINVAL); + CLEAR(O_ACCMODE); + + if ((l_mode & (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) == + (L9P_L_O_PATH | L9P_L_O_NOFOLLOW)) { + CLEAR(L9P_L_O_PATH | L9P_L_O_NOFOLLOW); + p9 = L9P_OEXEC; + } else { + /* + * Slightly dirty, but same dirt, really, as + * setting flags from l_mode & O_ACCMODE. + */ + p9 = (enum l9p_omode)flags; /* slightly dirty */ + } + + /* turn L_O_TMPFILE into L9P_ORCLOSE in *p9? */ + if (l_mode & L9P_L_O_TRUNC) + p9 |= L9P_OTRUNC; /* but don't CLEAR yet */ + + flags |= O_NOCTTY | O_NOFOLLOW; + + /* + * L_O_CREAT seems to be noise, since we get separate open + * and create. But it is actually set sometimes. We just + * throw it out here; create ops must set it themselves and + * open ops have no permissions bits and hence cannot create. + * + * L_O_EXCL does make sense on create ops, i.e., we can + * take a create op with or without L_O_EXCL. We pass that + * through. + */ + CLEAR(L9P_L_O_CREAT); + CONVERT(L9P_L_O_EXCL, O_EXCL); + CONVERT(L9P_L_O_TRUNC, O_TRUNC); + CONVERT(L9P_L_O_DIRECTORY, O_DIRECTORY); + CONVERT(L9P_L_O_APPEND, O_APPEND); + CONVERT(L9P_L_O_NONBLOCK, O_NONBLOCK); + + /* + * Discard these as useless noise at our (server) end. + * (NOATIME might be useful but we can only set it on a + * per-mount basis.) + */ + CLEAR(L9P_L_O_CLOEXEC); + CLEAR(L9P_L_O_DIRECT); + CLEAR(L9P_L_O_DSYNC); + CLEAR(L9P_L_O_FASYNC); + CLEAR(L9P_L_O_LARGEFILE); + CLEAR(L9P_L_O_NOATIME); + CLEAR(L9P_L_O_NOCTTY); + CLEAR(L9P_L_O_NOFOLLOW); + CLEAR(L9P_L_O_SYNC); + + if (l_mode != 0) { + L9P_LOG(L9P_INFO, + "fs_oflags_dotl: untranslated bits: %#x", + (unsigned)l_mode); + return (EINVAL); + } + + *aflags = flags; + *ap9 = p9; + return (0); +#undef CLEAR +#undef CONVERT +} + +static struct passwd * +fs_getpwuid(struct fs_softc *sc, uid_t uid, struct r_pgdata *pg) +{ +#if defined(WITH_CASPER) + return (r_cap_getpwuid(sc->fs_cappwd, uid, pg)); +#else + (void)sc; + return (r_getpwuid(uid, pg)); +#endif +} + +static struct group * +fs_getgrgid(struct fs_softc *sc, gid_t gid, struct r_pgdata *pg) +{ +#if defined(WITH_CASPER) + return (r_cap_getgrgid(sc->fs_capgrp, gid, pg)); +#else + (void)sc; + return (r_getgrgid(gid, pg)); +#endif +} + +/* + * Build full name of file by appending given name to directory name. + */ +static int +fs_buildname(struct l9p_fid *dir, char *name, char *buf, size_t size) +{ + struct fs_fid *dirf = dir->lo_aux; + size_t dlen, nlen1; + + assert(dirf != NULL); + dlen = strlen(dirf->ff_name); + nlen1 = strlen(name) + 1; /* +1 for '\0' */ + if (dlen + 1 + nlen1 > size) + return (ENAMETOOLONG); + memcpy(buf, dirf->ff_name, dlen); + buf[dlen] = '/'; + memcpy(buf + dlen + 1, name, nlen1); + return (0); +} + +/* + * Build parent name of file by splitting it off. Return an error + * if the given fid represents the root, so that there is no such + * parent, or if the discovered parent is not a directory. + */ +static int +fs_pdir(struct fs_softc *sc __unused, struct l9p_fid *fid, char *buf, + size_t size, struct stat *st) +{ + struct fs_fid *ff; + char *path; + + ff = fid->lo_aux; + assert(ff != NULL); + path = ff->ff_name; + path = r_dirname(path, buf, size); + if (path == NULL) + return (ENAMETOOLONG); + if (fstatat(ff->ff_dirfd, path, st, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (!S_ISDIR(st->st_mode)) + return (ENOTDIR); + return (0); +} + +/* + * Like fs_buildname() but for adding a file name to a buffer + * already holding a directory name. Essentially does + * strcat(dbuf, "/"); + * strcat(dbuf, fname); + * but with size checking and an ENAMETOOLONG error as needed. + * + * (Think of the function name as "directory plus-equals file".) + */ +static int +fs_dpf(char *dbuf, char *fname, size_t size) +{ + size_t dlen, nlen1; + + dlen = strlen(dbuf); + nlen1 = strlen(fname) + 1; + if (dlen + 1 + nlen1 > size) + return (ENAMETOOLONG); + dbuf[dlen] = '/'; + memcpy(dbuf + dlen + 1, fname, nlen1); + return (0); +} + +/* + * Prepare to create a new directory entry (open with O_CREAT, + * mkdir, etc -- any operation that creates a new inode), + * operating in parent data <dir>, based on authinfo <ai> and + * effective gid <egid>. + * + * The new entity should be owned by user/group <*nuid, *ngid>, + * if it's really a new entity. It will be a directory if isdir. + * + * Returns an error number if the entry should not be created + * (e.g., read-only file system or no permission to write in + * parent directory). Always sets *nuid and *ngid on success: + * in the worst case, when there is no available ID, this will + * use the parent directory's IDs. Fills in <*st> on success. + */ +static int +fs_nde(struct fs_softc *sc, struct l9p_fid *dir, bool isdir, gid_t egid, + struct stat *st, uid_t *nuid, gid_t *ngid) +{ + struct fs_fid *dirf; + struct fs_authinfo *ai; + int32_t op; + int error; + + if (sc->fs_readonly) + return (EROFS); + dirf = dir->lo_aux; + assert(dirf != NULL); + if (fstatat(dirf->ff_dirfd, dirf->ff_name, st, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (!S_ISDIR(st->st_mode)) + return (ENOTDIR); + dirf = dir->lo_aux; + ai = dirf->ff_ai; + fillacl(dirf); + op = isdir ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE; + error = check_access(op, dirf->ff_acl, st, NULL, NULL, ai, egid); + if (error) + return (EPERM); + + *nuid = ai->ai_uid != (uid_t)-1 ? ai->ai_uid : st->st_uid; + *ngid = egid != (gid_t)-1 ? egid : + ai->ai_ngids > 0 ? ai->ai_gids[0] : st->st_gid; + return (0); +} + +/* + * Allocate new open-file data structure to attach to a fid. + * + * The new file's authinfo is the same as the old one's, and + * we gain a reference. + */ +static struct fs_fid * +open_fid(int dirfd, const char *path, struct fs_authinfo *ai, bool creating) +{ + struct fs_fid *ret; + uint32_t newcount; + int error; + + ret = l9p_calloc(1, sizeof(*ret)); +#ifdef __illumos__ + error = pthread_mutex_init(&ret->ff_mtx, &fs_mutexattr); +#else + error = pthread_mutex_init(&ret->ff_mtx, NULL); +#endif + if (error) { + free(ret); + return (NULL); + } + ret->ff_fd = -1; + ret->ff_dirfd = dirfd; + ret->ff_name = strdup(path); + if (ret->ff_name == NULL) { + (void) pthread_mutex_destroy(&ret->ff_mtx); + free(ret); + return (NULL); + } + if (pthread_mutex_lock(&ai->ai_mtx) != 0) { + (void) pthread_mutex_destroy(&ret->ff_mtx); + free(ret->ff_name); + free(ret); + return (NULL); + } + newcount = ++ai->ai_refcnt; + (void) pthread_mutex_unlock(&ai->ai_mtx); + /* + * If we just incremented the count to 1, we're the *first* + * reference. This is only allowed when creating the authinfo, + * otherwise it means something has gone wrong. This cannot + * catch every bad (re)use of a freed authinfo but it may catch + * a few. + */ + assert(newcount > 1 || creating); + L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu", + (void *)ai, (u_long)newcount); + ret->ff_ai = ai; + return (ret); +} + +static void +dostat(struct fs_softc *sc, struct l9p_stat *s, char *name, + struct stat *buf, bool dotu) +{ + struct passwd *user; + struct group *group; + + memset(s, 0, sizeof(struct l9p_stat)); + + generate_qid(buf, &s->qid); + + s->type = 0; + s->dev = 0; + s->mode = buf->st_mode & 0777; + + if (S_ISDIR(buf->st_mode)) + s->mode |= L9P_DMDIR; + + if (S_ISLNK(buf->st_mode) && dotu) + s->mode |= L9P_DMSYMLINK; + + if (S_ISCHR(buf->st_mode) || S_ISBLK(buf->st_mode)) + s->mode |= L9P_DMDEVICE; + + if (S_ISSOCK(buf->st_mode)) + s->mode |= L9P_DMSOCKET; + + if (S_ISFIFO(buf->st_mode)) + s->mode |= L9P_DMNAMEDPIPE; + + s->atime = (uint32_t)buf->st_atime; + s->mtime = (uint32_t)buf->st_mtime; + s->length = (uint64_t)buf->st_size; + + s->name = r_basename(name, NULL, 0); + + if (!dotu) { + struct r_pgdata udata, gdata; + + user = fs_getpwuid(sc, buf->st_uid, &udata); + group = fs_getgrgid(sc, buf->st_gid, &gdata); + s->uid = user != NULL ? strdup(user->pw_name) : NULL; + s->gid = group != NULL ? strdup(group->gr_name) : NULL; + s->muid = user != NULL ? strdup(user->pw_name) : NULL; + r_pgfree(&udata); + r_pgfree(&gdata); + } else { + /* + * When using 9P2000.u, we don't need to bother about + * providing user and group names in textual form. + * + * NB: if the asprintf()s fail, s->extension should + * be unset so we can ignore these. + */ + s->n_uid = buf->st_uid; + s->n_gid = buf->st_gid; + s->n_muid = buf->st_uid; + + if (S_ISLNK(buf->st_mode)) { + char target[MAXPATHLEN]; + ssize_t ret = readlink(name, target, MAXPATHLEN); + + if (ret < 0) { + s->extension = NULL; + return; + } + + s->extension = strndup(target, (size_t)ret); + } + + if (S_ISBLK(buf->st_mode)) { + asprintf(&s->extension, "b %d %d", major(buf->st_rdev), + minor(buf->st_rdev)); + } + + if (S_ISCHR(buf->st_mode)) { + asprintf(&s->extension, "c %d %d", major(buf->st_rdev), + minor(buf->st_rdev)); + } + } +} + +#ifndef __illumos__ +static void +dostatfs(struct l9p_statfs *out, struct statfs *in, long namelen) +#else +static void +dostatfs(struct l9p_statfs *out, struct statvfs *in, long namelen) +#endif +{ + + out->type = L9P_FSTYPE; + out->bsize = in->f_bsize; +#ifndef __illumos__ + out->blocks = in->f_blocks; + out->bfree = in->f_bfree; + out->bavail = in->f_bavail; +#else + out->blocks = in->f_blocks * in->f_frsize / in->f_bsize; + out->bfree = in->f_bfree * in->f_frsize / in->f_bsize; + out->bavail = in->f_bavail * in->f_frsize / in->f_bsize; +#endif + out->files = in->f_files; + out->ffree = in->f_ffree; + out->namelen = (uint32_t)namelen; + out->fsid = STATFS_FSID(in); +} + +static void +generate_qid(struct stat *buf, struct l9p_qid *qid) +{ + qid->path = buf->st_ino; + qid->version = 0; + + if (S_ISREG(buf->st_mode)) + qid->type |= L9P_QTFILE; + + if (S_ISDIR(buf->st_mode)) + qid->type |= L9P_QTDIR; + + if (S_ISLNK(buf->st_mode)) + qid->type |= L9P_QTSYMLINK; +} + +/* + * Fill in ff->ff_acl if it's not set yet. Skip if the "don't use + * ACLs" flag is set, and use the flag to remember failure so + * we don't bother retrying either. + */ +static void +fillacl(struct fs_fid *ff) +{ + + if (ff->ff_acl == NULL && (ff->ff_flags & FF_NO_NFSV4_ACL) == 0) { + ff->ff_acl = look_for_nfsv4_acl(ff, ff->ff_fd, ff->ff_name); + if (ff->ff_acl == NULL) + ff->ff_flags |= FF_NO_NFSV4_ACL; + } +} + +/* + * Get an ACL given fd and/or path name. We check for the "don't get + * ACL" flag in the given ff_fid data structure first, but don't set + * the flag here. The fillacl() code is similar but will set the + * flag; it also uses the ff_fd and ff_name directly. + * + * (This is used to get ACLs for parent directories, for instance.) + */ +static struct l9p_acl * +getacl(struct fs_fid *ff, int fd, const char *path) +{ + + if (ff->ff_flags & FF_NO_NFSV4_ACL) + return (NULL); + return look_for_nfsv4_acl(ff, fd, path); +} + +/* + * Drop cached ff->ff_acl, e.g., after moving from one directory to + * another, where inherited ACLs might change. + */ +static void +dropacl(struct fs_fid *ff) +{ + + l9p_acl_free(ff->ff_acl); + ff->ff_acl = NULL; + ff->ff_flags = ff->ff_ai->ai_flags; +} + +/* + * Check to see if we can find NFSv4 ACLs for the given file. + * If we have an open fd, we can use that, otherwise we need + * to use the path. + */ +static struct l9p_acl * +look_for_nfsv4_acl(struct fs_fid *ff, int fd, const char *path) +{ + struct l9p_acl *acl; +#ifdef __illumos__ + acl_t *sysacl; +#else + acl_t sysacl; +#endif + int doclose = 0; + + if (fd < 0) { + fd = openat(ff->ff_dirfd, path, 0); + doclose = 1; + } + + sysacl = acl_get_fd_np(fd, ACL_TYPE_NFS4); + if (sysacl == NULL) { + /* + * EINVAL means no NFSv4 ACLs apply for this file. + * Other error numbers indicate some kind of problem. + */ + if (errno != EINVAL) { + L9P_LOG(L9P_ERROR, + "error retrieving NFSv4 ACL from " + "fdesc %d (%s): %s", fd, + path, strerror(errno)); + } + + if (doclose) + close(fd); + + return (NULL); + } +#if defined(HAVE_FREEBSD_ACLS) + acl = l9p_freebsd_nfsv4acl_to_acl(sysacl); +#elif defined(HAVE__ILLUMOS_ACLS) + acl = l9p_illumos_nfsv4acl_to_acl(sysacl); +#else + acl = NULL; /* XXX need a l9p_darwin_acl_to_acl */ +#endif + acl_free(sysacl); + + if (doclose) + close(fd); + + return (acl); +} + +/* + * Verify that the user whose authinfo is in <ai> and effective + * group ID is <egid> ((gid_t)-1 means no egid supplied) has + * permission to do something. + * + * The "something" may be rather complex: we allow NFSv4 style + * operation masks here, and provide parent and child ACLs and + * stat data. At most one of pacl+pst and cacl+cst can be NULL, + * unless ACLs are not supported; then pacl and cacl can both + * be NULL but pst or cst must be non-NULL depending on the + * operation. + */ +static int +check_access(int32_t opmask, + struct l9p_acl *pacl, struct stat *pst, + struct l9p_acl *cacl, struct stat *cst, + struct fs_authinfo *ai, gid_t egid) +{ + struct l9p_acl_check_args args; + + /* + * If we have ACLs, use them exclusively, ignoring Unix + * permissions. Otherwise, fall back on stat st_mode + * bits, and allow super-user as well. + */ + args.aca_uid = ai->ai_uid; + args.aca_gid = egid; + args.aca_groups = ai->ai_gids; + args.aca_ngroups = (size_t)ai->ai_ngids; + args.aca_parent = pacl; + args.aca_pstat = pst; + args.aca_child = cacl; + args.aca_cstat = cst; + args.aca_aclmode = pacl == NULL && cacl == NULL + ? L9P_ACM_STAT_MODE + : L9P_ACM_NFS_ACL | L9P_ACM_ZFS_ACL; + + args.aca_superuser = true; + return (l9p_acl_check_access(opmask, &args)); +} + +static int +fs_attach(void *softc, struct l9p_request *req) +{ + struct fs_authinfo *ai; + struct fs_softc *sc = (struct fs_softc *)softc; + struct fs_fid *file; + struct passwd *pwd; + struct stat st; + struct r_pgdata udata; + uint32_t n_uname; + gid_t *gids; + uid_t uid; + int error; + int ngroups; + + assert(req->lr_fid != NULL); + + /* + * Single-thread pwd/group related items. We have a reentrant + * r_getpwuid but not a reentrant r_getpwnam, and l9p_getgrlist + * may use non-reentrant C library getgr* routines. + */ + if ((error = pthread_mutex_lock(&fs_attach_mutex)) != 0) + return (error); + + n_uname = req->lr_req.tattach.n_uname; + if (n_uname != L9P_NONUNAME) { + uid = (uid_t)n_uname; + pwd = fs_getpwuid(sc, uid, &udata); +#if defined(L9P_DEBUG) + if (pwd == NULL) + L9P_LOG(L9P_DEBUG, + "Tattach: uid %ld: no such user", (long)uid); +#endif + } else { + uid = (uid_t)-1; +#if defined(WITH_CASPER) + pwd = cap_getpwnam(sc->fs_cappwd, req->lr_req.tattach.uname); +#else + pwd = getpwnam(req->lr_req.tattach.uname); +#endif +#if defined(L9P_DEBUG) + if (pwd == NULL) + L9P_LOG(L9P_DEBUG, + "Tattach: %s: no such user", + req->lr_req.tattach.uname); +#endif + } + + /* + * If caller didn't give a numeric UID, pick it up from pwd + * if possible. If that doesn't work we can't continue. + * + * Note that pwd also supplies the group set. This assumes + * the server has the right mapping; this needs improvement. + * We do at least support ai->ai_ngids==0 properly now though. + */ + if (uid == (uid_t)-1 && pwd != NULL) + uid = pwd->pw_uid; + if (uid == (uid_t)-1) + error = EPERM; + else { + error = 0; + if (fstat(sc->fs_rootfd, &st) != 0) + error = errno; + else if (!S_ISDIR(st.st_mode)) + error = ENOTDIR; + } + if (error) { + (void) pthread_mutex_unlock(&fs_attach_mutex); + L9P_LOG(L9P_DEBUG, + "Tattach: denying uid=%ld access to rootdir: %s", + (long)uid, strerror(error)); + /* + * Pass ENOENT and ENOTDIR through for diagnosis; + * others become EPERM. This should not leak too + * much security. + */ + return (error == ENOENT || error == ENOTDIR ? error : EPERM); + } + + if (pwd != NULL) { + /* + * This either succeeds and fills in ngroups and + * returns non-NULL, or fails and sets ngroups to 0 + * and returns NULL. Either way ngroups is correct. + */ + gids = l9p_getgrlist(pwd->pw_name, pwd->pw_gid, &ngroups); + } else { + gids = NULL; + ngroups = 0; + } + + /* + * Done with pwd and group related items that may use + * non-reentrant C library routines; allow other threads in. + */ + (void) pthread_mutex_unlock(&fs_attach_mutex); + + ai = malloc(sizeof(*ai) + (size_t)ngroups * sizeof(gid_t)); + if (ai == NULL) { + free(gids); + return (ENOMEM); + } +#ifdef __illumos__ + error = pthread_mutex_init(&ai->ai_mtx, &fs_mutexattr); +#else + error = pthread_mutex_init(&ai->ai_mtx, NULL); +#endif + if (error) { + free(gids); + free(ai); + return (error); + } + ai->ai_refcnt = 0; + ai->ai_uid = uid; + ai->ai_flags = 0; /* XXX for now */ + ai->ai_ngids = ngroups; + memcpy(ai->ai_gids, gids, (size_t)ngroups * sizeof(gid_t)); + free(gids); + + file = open_fid(sc->fs_rootfd, ".", ai, true); + if (file == NULL) { + (void) pthread_mutex_destroy(&ai->ai_mtx); + free(ai); + return (ENOMEM); + } + + req->lr_fid->lo_aux = file; + generate_qid(&st, &req->lr_resp.rattach.qid); + return (0); +} + +static int +fs_clunk(void *softc __unused, struct l9p_fid *fid) +{ + struct fs_fid *file; + + file = fid->lo_aux; + assert(file != NULL); + + if (file->ff_dir) { + closedir(file->ff_dir); + file->ff_dir = NULL; + } else if (file->ff_fd != -1) { + close(file->ff_fd); + file->ff_fd = -1; + } + + return (0); +} + +/* + * Create ops. + * + * We are to create a new file under some existing path, + * where the new file's name is in the Tcreate request and the + * existing path is due to a fid-based file (req->lr_fid). + * + * One op (create regular file) sets file->fd, the rest do not. + */ +static int +fs_create(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + uint32_t dmperm; + mode_t perm; + char *name; + int error; + + dir = req->lr_fid; + name = req->lr_req.tcreate.name; + dmperm = req->lr_req.tcreate.perm; + perm = (mode_t)(dmperm & 0777); + + if (dmperm & L9P_DMDIR) + error = fs_imkdir(softc, dir, name, true, + perm, (gid_t)-1, &st); + else if (dmperm & L9P_DMSYMLINK) + error = fs_isymlink(softc, dir, name, + req->lr_req.tcreate.extension, (gid_t)-1, &st); + else if (dmperm & L9P_DMNAMEDPIPE) + error = fs_imkfifo(softc, dir, name, true, + perm, (gid_t)-1, &st); + else if (dmperm & L9P_DMSOCKET) + error = fs_imksocket(softc, dir, name, true, + perm, (gid_t)-1, &st); + else if (dmperm & L9P_DMDEVICE) { + unsigned int major, minor; + char type; + dev_t dev; + + /* + * ??? Should this be testing < 3? For now, allow a single + * integer mode with minor==0 implied. + */ + minor = 0; + if (sscanf(req->lr_req.tcreate.extension, "%c %u %u", + &type, &major, &minor) < 2) { + return (EINVAL); + } + + switch (type) { + case 'b': + perm |= S_IFBLK; + break; + case 'c': + perm |= S_IFCHR; + break; + default: + return (EINVAL); + } + dev = makedev(major, minor); + error = fs_imknod(softc, dir, name, true, perm, dev, + (gid_t)-1, &st); + } else { + enum l9p_omode p9; + int flags; + + p9 = req->lr_req.tcreate.mode; + error = fs_oflags_dotu(p9, &flags); + if (error) + return (error); + error = fs_icreate(softc, dir, name, flags, + true, perm, (gid_t)-1, &st); + req->lr_resp.rcreate.iounit = req->lr_conn->lc_max_io_size; + } + + if (error == 0) + generate_qid(&st, &req->lr_resp.rcreate.qid); + + return (error); +} + +/* + * https://swtch.com/plan9port/man/man9/open.html and + * http://plan9.bell-labs.com/magic/man2html/5/open + * say that permissions are actually + * perm & (~0666 | (dir.perm & 0666)) + * for files, and + * perm & (~0777 | (dir.perm & 0777)) + * for directories. That is, the parent directory may + * take away permissions granted by the operation. + * + * This seems a bit restrictive; probably + * there should be a control knob for this. + */ +static inline mode_t +fs_p9perm(mode_t perm, mode_t dir_perm, bool isdir) +{ + + if (isdir) + perm &= ~0777 | (dir_perm & 0777); + else + perm &= ~0666 | (dir_perm & 0666); + return (perm); +} + +/* + * Internal form of create (plain file). + * + * Our caller takes care of splitting off all the special + * types of create (mknod, etc), so this is purely for files. + * We receive the fs_softc <softc>, the directory fid <dir> + * in which the new file is to be created, the name of the + * new file, a flag <isp9> indicating whether to do plan9 style + * permissions or Linux style permissions, the permissions <perm>, + * an effective group id <egid>, and a pointer to a stat structure + * <st> to fill in describing the final result on success. + * + * On successful create, the fid switches to the newly created + * file, which is now open; its associated file-name changes too. + * + * Note that the original (dir) fid is never currently open, + * so there is nothing to close. + */ +static int +fs_icreate(void *softc, struct l9p_fid *dir, char *name, int flags, + bool isp9, mode_t perm, gid_t egid, struct stat *st) +{ + struct fs_fid *file; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error, fd; + + file = dir->lo_aux; + + /* + * Build full path name from directory + file name. We'll + * check permissions on the parent directory, then race to + * create the file before anything bad happens like symlinks. + * + * (To close this race we need to use openat(), which is + * left for a later version of this code.) + */ + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + /* In case of success, we will need a new file->ff_name. */ + name = strdup(newname); + if (name == NULL) + return (ENOMEM); + + /* Check create permission and compute new file ownership. */ + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) { + free(name); + return (error); + } + + /* Adjust new-file permissions for Plan9 protocol. */ + if (isp9) + perm = fs_p9perm(perm, st->st_mode, false); + + /* Create is always exclusive so O_TRUNC is irrelevant. */ + fd = openat(file->ff_dirfd, newname, flags | O_CREAT | O_EXCL, perm); + if (fd < 0) { + error = errno; + free(name); + return (error); + } + + /* Fix permissions and owner. */ + if (fchmod(fd, perm) != 0 || + fchown(fd, uid, gid) != 0 || + fstat(fd, st) != 0) { + error = errno; + (void) close(fd); + /* unlink(newname); ? */ + free(name); + return (error); + } + + /* It *was* a directory; now it's a file, and it's open. */ + free(file->ff_name); + file->ff_name = name; + file->ff_fd = fd; + return (0); +} + +/* + * Internal form of open: stat file and verify permissions (from p9 + * argument), then open the file-or-directory, leaving the internal + * fs_fid fields set up. If we cannot open the file, return a + * suitable error number, and leave everything unchanged. + * + * To mitigate the race between permissions testing and the actual + * open, we can stat the file twice (once with lstat() before open, + * then with fstat() after). We assume O_NOFOLLOW is set in flags, + * so if some other race-winner substitutes in a symlink we won't + * open it here. (However, embedded symlinks, if they occur, are + * still an issue. Ideally we would like to have an O_NEVERFOLLOW + * that fails on embedded symlinks, and a way to pass this to + * lstat() as well.) + * + * When we use opendir() we cannot pass O_NOFOLLOW, so we must rely + * on substitution-detection via fstat(). To simplify the code we + * just always re-check. + * + * (For a proper fix in the future, we can require openat(), keep + * each parent directory open during walk etc, and allow only final + * name components with O_NOFOLLOW.) + * + * On successful return, st has been filled in. + */ +static int +fs_iopen(void *softc, struct l9p_fid *fid, int flags, enum l9p_omode p9, + gid_t egid __unused, struct stat *st) +{ + struct fs_softc *sc = softc; + struct fs_fid *file; + struct stat first; + int32_t op; + char *name; + int error; + int fd; + DIR *dirp; + + /* Forbid write ops on read-only file system. */ + if (sc->fs_readonly) { + if ((flags & O_TRUNC) != 0) + return (EROFS); + if ((flags & O_ACCMODE) != O_RDONLY) + return (EROFS); + if (p9 & L9P_ORCLOSE) + return (EROFS); + } + + file = fid->lo_aux; + assert(file != NULL); + name = file->ff_name; + + if (fstatat(file->ff_dirfd, name, &first, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (S_ISLNK(first.st_mode)) + return (EPERM); + + /* Can we rely on O_APPEND here? Best not, can be cleared. */ + switch (flags & O_ACCMODE) { + case O_RDONLY: + op = L9P_ACE_READ_DATA; + break; + case O_WRONLY: + op = L9P_ACE_WRITE_DATA; + break; + case O_RDWR: + op = L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA; + break; + default: + return (EINVAL); + } + fillacl(file); + error = check_access(op, NULL, NULL, file->ff_acl, &first, + file->ff_ai, (gid_t)-1); + if (error) + return (error); + + if (S_ISDIR(first.st_mode)) { + /* Forbid write or truncate on directory. */ + if ((flags & O_ACCMODE) != O_RDONLY || (flags & O_TRUNC)) + return (EPERM); + fd = openat(file->ff_dirfd, name, O_DIRECTORY); + dirp = fdopendir(fd); + if (dirp == NULL) + return (EPERM); + fd = dirfd(dirp); + } else { + dirp = NULL; + fd = openat(file->ff_dirfd, name, flags); + if (fd < 0) + return (EPERM); + } + + /* + * We have a valid fd, and maybe non-null dirp. Re-check + * the file, and fail if st_dev or st_ino changed. + */ + if (fstat(fd, st) != 0 || + first.st_dev != st->st_dev || + first.st_ino != st->st_ino) { + if (dirp != NULL) + (void) closedir(dirp); + else + (void) close(fd); + return (EPERM); + } + if (dirp != NULL) + file->ff_dir = dirp; + else + file->ff_fd = fd; + return (0); +} + +/* + * Internal form of mkdir (common code for all forms). + * We receive the fs_softc <softc>, the directory fid <dir> + * in which the new entry is to be created, the name of the + * new entry, a flag <isp9> indicating whether to do plan9 style + * permissions or Linux style permissions, the permissions <perm>, + * an effective group id <egid>, and a pointer to a stat structure + * <st> to fill in describing the final result on success. + * + * See also fs_icreate() above. + */ +static int +fs_imkdir(void *softc, struct l9p_fid *dir, char *name, + bool isp9, mode_t perm, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error, fd; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, true, egid, st, &uid, &gid); + if (error) + return (error); + + if (isp9) + perm = fs_p9perm(perm, st->st_mode, true); + + if (mkdirat(ff->ff_dirfd, newname, perm) != 0) + return (errno); + + fd = openat(ff->ff_dirfd, newname, + O_DIRECTORY | O_RDONLY | O_NOFOLLOW); + if (fd < 0 || + fchown(fd, uid, gid) != 0 || + fchmod(fd, perm) != 0 || + fstat(fd, st) != 0) { + error = errno; + /* rmdir(newname) ? */ + } + if (fd >= 0) + (void) close(fd); + + return (error); +} + +#ifdef __APPLE__ +/* + * This is an undocumented OS X syscall. It would be best to avoid it, + * but there doesn't seem to be another safe way to implement mknodat. + * Dear Apple, please implement mknodat before you remove this syscall. + */ +static int fs_ifchdir_thread_local(int fd) +{ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + return syscall(SYS___pthread_fchdir, fd); +#pragma clang diagnostic pop +} +#endif + +/* + * Internal form of mknod (special device). + * + * The device type (S_IFBLK, S_IFCHR) is included in the <mode> parameter. + */ +static int +fs_imknod(void *softc, struct l9p_fid *dir, char *name, + bool isp9, mode_t mode, dev_t dev, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + mode_t perm; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) + return (error); + + if (isp9) { + perm = fs_p9perm(mode & 0777, st->st_mode, false); + mode = (mode & ~0777) | perm; + } else { + perm = mode & 0777; + } + +#ifdef __APPLE__ + if (fs_ifchdir_thread_local(ff->ff_dirfd) < 0) { + return -1; + } + error = mknod(newname, mode, dev); + int preserved_errno = errno; + /* Stop using the thread-local cwd */ + fs_ifchdir_thread_local(-1); + if (error < 0) { + errno = preserved_errno; + return errno; + } +#else + if (mknodat(ff->ff_dirfd, newname, mode, dev) != 0) + return (errno); +#endif + + /* We cannot open the new name; race to use l* syscalls. */ + if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 || + fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 || + fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0) + error = errno; + else if ((st->st_mode & S_IFMT) != (mode & S_IFMT)) + error = EPERM; /* ??? lost a race anyway */ + + /* if (error) unlink(newname) ? */ + + return (error); +} + +/* + * Internal form of mkfifo. + */ +static int +fs_imkfifo(void *softc, struct l9p_fid *dir, char *name, + bool isp9, mode_t perm, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) + return (error); + + if (isp9) + perm = fs_p9perm(perm, st->st_mode, false); + + if (mkfifo(newname, perm) != 0) + return (errno); + + /* We cannot open the new name; race to use l* syscalls. */ + if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 || + fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 || + fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0) + error = errno; + else if (!S_ISFIFO(st->st_mode)) + error = EPERM; /* ??? lost a race anyway */ + + /* if (error) unlink(newname) ? */ + + return (error); +} + +/* + * Internal form of mksocket. + * + * This is a bit different because of the horrible socket naming + * system (bind() with sockaddr_un sun_path). + */ +static int +fs_imksocket(void *softc, struct l9p_fid *dir, char *name, + bool isp9, mode_t perm, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + struct sockaddr_un un; + char *path; + char newname[MAXPATHLEN]; + gid_t gid; + uid_t uid; + int error = 0, s, fd, slen; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) + return (error); + + if (isp9) + perm = fs_p9perm(perm, st->st_mode, false); + + s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s < 0) + return (errno); + + path = newname; + fd = -1; +#ifdef HAVE_BINDAT + /* Try bindat() if needed. */ + if (strlen(path) >= sizeof(un.sun_path)) { + fd = openat(ff->ff_dirfd, ff->ff_name, + O_RDONLY | O_DIRECTORY | O_NOFOLLOW); + if (fd >= 0) + path = name; + } +#endif + + /* + * Can only create the socket if the path will fit. + * Even if we are using bindat() there are limits + * (the API for AF_UNIX sockets is ... not good). + * + * Note: in theory we can fill sun_path to the end + * (omitting a terminating '\0') but in at least one + * Unix-like system, this was known to behave oddly, + * so we test for ">=" rather than just ">". + */ + if (strlen(path) >= sizeof(un.sun_path)) { + error = ENAMETOOLONG; + goto out; + } + un.sun_family = AF_UNIX; +#ifndef __illumos__ + slen = un.sun_len = sizeof(struct sockaddr_un); +#else + slen = SUN_LEN(&un); +#endif + + strncpy(un.sun_path, path, sizeof(un.sun_path)); + +#ifdef HAVE_BINDAT + if (fd >= 0) { + if (bindat(fd, s, (struct sockaddr *)&un, slen) < 0) + error = errno; + goto out; /* done now, for good or ill */ + } +#endif + + if (bind(s, (struct sockaddr *)&un, slen) < 0) + error = errno; +out: + + if (error == 0) { + /* + * We believe we created the socket-inode. Fix + * permissions etc. Note that we cannot use + * fstat() on the socket descriptor: it succeeds, + * but we get bogus data! + */ + if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 || + fchmodat(ff->ff_dirfd, newname, perm, 0) != 0 || + fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0) + error = errno; + else if (!S_ISSOCK(st->st_mode)) + error = EPERM; /* ??? lost a race anyway */ + + /* if (error) unlink(newname) ? */ + } + + /* + * It's not clear which error should override, although + * ideally we should never see either close() call fail. + * In any case we do want to try to close both fd and s, + * always. Let's set error only if it is not already set, + * so that all exit paths can use the same code. + */ + if (fd >= 0 && close(fd) != 0) + if (error == 0) + error = errno; + if (close(s) != 0) + if (error == 0) + error = errno; + + return (error); +} + +/* + * Internal form of symlink. + * + * Note that symlinks are presumed to carry no permission bits. + * They do have owners, however (who may be charged for quotas). + */ +static int +fs_isymlink(void *softc, struct l9p_fid *dir, char *name, + char *symtgt, gid_t egid, struct stat *st) +{ + struct fs_fid *ff; + gid_t gid; + uid_t uid; + char newname[MAXPATHLEN]; + int error; + + ff = dir->lo_aux; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + error = fs_nde(softc, dir, false, egid, st, &uid, &gid); + if (error) + return (error); + + if (symlinkat(symtgt, ff->ff_dirfd, newname) != 0) + return (errno); + + /* We cannot open the new name; race to use l* syscalls. */ + if (fchownat(ff->ff_dirfd, newname, uid, gid, AT_SYMLINK_NOFOLLOW) != 0 || + fstatat(ff->ff_dirfd, newname, st, AT_SYMLINK_NOFOLLOW) != 0) + error = errno; + else if (!S_ISLNK(st->st_mode)) + error = EPERM; /* ??? lost a race anyway */ + + /* if (error) unlink(newname) ? */ + + return (error); +} + +static int +fs_open(void *softc, struct l9p_request *req) +{ + struct l9p_fid *fid = req->lr_fid; + struct stat st; + enum l9p_omode p9; + int error, flags; + + p9 = req->lr_req.topen.mode; + error = fs_oflags_dotu(p9, &flags); + if (error) + return (error); + + error = fs_iopen(softc, fid, flags, p9, (gid_t)-1, &st); + if (error) + return (error); + + generate_qid(&st, &req->lr_resp.ropen.qid); + req->lr_resp.ropen.iounit = req->lr_conn->lc_max_io_size; + return (0); +} + +/* + * Helper for directory read. We want to run an lstat on each + * file name within the directory. This is a lot faster if we + * have lstatat (or fstatat with AT_SYMLINK_NOFOLLOW), but not + * all systems do, so hide the ifdef-ed code in an inline function. + */ +static inline int +fs_lstatat(struct fs_fid *file, char *name, struct stat *st) +{ + + return (fstatat(dirfd(file->ff_dir), name, st, AT_SYMLINK_NOFOLLOW)); +} + +static int +fs_read(void *softc, struct l9p_request *req) +{ + struct l9p_stat l9stat; + struct fs_softc *sc; + struct fs_fid *file; + bool dotu = req->lr_conn->lc_version >= L9P_2000U; + ssize_t ret; + + sc = softc; + file = req->lr_fid->lo_aux; + assert(file != NULL); + + if (file->ff_dir != NULL) { + struct dirent *d; + struct stat st; + struct l9p_message msg; + long o; + int err; + + if ((err = pthread_mutex_lock(&file->ff_mtx)) != 0) + return (err); + + /* + * Must use telldir before readdir since seekdir + * takes cookie values. Unfortunately this wastes + * a lot of time (and memory) building unneeded + * cookies that can only be flushed by closing + * the directory. + * + * NB: FreeBSD libc seekdir has SINGLEUSE defined, + * so in fact, we can discard the cookies by + * calling seekdir on them. This clears up wasted + * memory at the cost of even more wasted time... + * + * XXX: readdir/telldir/seekdir not thread safe + */ + l9p_init_msg(&msg, req, L9P_PACK); + for (;;) { + o = telldir(file->ff_dir); + d = readdir(file->ff_dir); + if (d == NULL) + break; + if (fs_lstatat(file, d->d_name, &st)) + continue; + dostat(sc, &l9stat, d->d_name, &st, dotu); + if (l9p_pack_stat(&msg, req, &l9stat) != 0) { + seekdir(file->ff_dir, o); + break; + } +#if defined(__FreeBSD__) + seekdir(file->ff_dir, o); + (void) readdir(file->ff_dir); +#endif + } + + (void) pthread_mutex_unlock(&file->ff_mtx); + } else { + size_t niov = l9p_truncate_iov(req->lr_data_iov, + req->lr_data_niov, req->lr_req.io.count); + +#if defined(__FreeBSD__) || defined(__illumos__) + ret = preadv(file->ff_fd, req->lr_data_iov, niov, + req->lr_req.io.offset); +#else + /* XXX: not thread safe, should really use aio_listio. */ + if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0) + return (errno); + + ret = (uint32_t)readv(file->ff_fd, req->lr_data_iov, (int)niov); +#endif + + if (ret < 0) + return (errno); + + req->lr_resp.io.count = (uint32_t)ret; + } + + return (0); +} + +static int +fs_remove(void *softc, struct l9p_fid *fid) +{ + struct fs_softc *sc = softc; + struct l9p_acl *parent_acl; + struct fs_fid *file; + struct stat pst, cst; + char dirname[MAXPATHLEN]; + int error; + + if (sc->fs_readonly) + return (EROFS); + + error = fs_pdir(sc, fid, dirname, sizeof(dirname), &pst); + if (error) + return (error); + + file = fid->lo_aux; + if (fstatat(file->ff_dirfd, file->ff_name, &cst, AT_SYMLINK_NOFOLLOW) != 0) + return (error); + + parent_acl = getacl(file, -1, dirname); + fillacl(file); + + error = check_access(L9P_ACOP_UNLINK, + parent_acl, &pst, file->ff_acl, &cst, file->ff_ai, (gid_t)-1); + l9p_acl_free(parent_acl); + if (error) + return (error); + + if (unlinkat(file->ff_dirfd, file->ff_name, + S_ISDIR(cst.st_mode) ? AT_REMOVEDIR : 0) != 0) + error = errno; + + return (error); +} + +static int +fs_stat(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc; + struct fs_fid *file; + struct stat st; + bool dotu = req->lr_conn->lc_version >= L9P_2000U; + + sc = softc; + file = req->lr_fid->lo_aux; + assert(file); + + if (fstatat(file->ff_dirfd, file->ff_name, &st, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + dostat(sc, &req->lr_resp.rstat.stat, file->ff_name, &st, dotu); + return (0); +} + +static int +fs_walk(void *softc, struct l9p_request *req) +{ + struct l9p_acl *acl; + struct fs_authinfo *ai; + struct fs_fid *file = req->lr_fid->lo_aux; + struct fs_fid *newfile; + struct stat st; + size_t clen, namelen, need; + char *comp, *succ, *next, *swtmp; + bool atroot; + bool dotdot; + int i, nwname; + int error = 0; + char namebufs[2][MAXPATHLEN]; + + /* + * https://swtch.com/plan9port/man/man9/walk.html: + * + * It is legal for nwname to be zero, in which case newfid + * will represent the same file as fid and the walk will + * usually succeed; this is equivalent to walking to dot. + * [Aside: it's not clear if we should test S_ISDIR here.] + * ... + * The name ".." ... represents the parent directory. + * The name "." ... is not used in the protocol. + * ... A walk of the name ".." in the root directory + * of the server is equivalent to a walk with no name + * elements. + * + * Note that req.twalk.nwname never exceeds L9P_MAX_WELEM, + * so it is safe to convert to plain int. + * + * We are to return an error only if the first walk fails, + * else stop at the end of the names or on the first error. + * The final fid is based on the last name successfully + * walked. + * + * Note that we *do* get Twalk requests with nwname==0 on files. + * + * Set up "successful name" buffer pointer with base fid name, + * initially. We'll swap each new success into it as we go. + * + * Invariant: atroot and stat data correspond to current + * (succ) path. + */ + succ = namebufs[0]; + next = namebufs[1]; + namelen = strlcpy(succ, file->ff_name, MAXPATHLEN); + if (namelen >= MAXPATHLEN) + return (ENAMETOOLONG); + if (fstatat(file->ff_dirfd, succ, &st, AT_SYMLINK_NOFOLLOW) < 0) + return (errno); + ai = file->ff_ai; + atroot = strlen(succ) == 0; /* XXX? */ + fillacl(file); + acl = file->ff_acl; + + nwname = (int)req->lr_req.twalk.nwname; + + for (i = 0; i < nwname; i++) { + /* + * Must have execute permission to search a directory. + * Then, look up each component in its directory-so-far. + * Check for ".." along the way, handlng specially + * as needed. Forbid "/" in name components. + * + */ + if (!S_ISDIR(st.st_mode)) { + error = ENOTDIR; + goto out; + } + error = check_access(L9P_ACE_EXECUTE, + NULL, NULL, acl, &st, ai, (gid_t)-1); + if (error) { + L9P_LOG(L9P_DEBUG, + "Twalk: denying dir-walk on \"%s\" for uid %u", + succ, (unsigned)ai->ai_uid); + error = EPERM; + goto out; + } + comp = req->lr_req.twalk.wname[i]; + if (strchr(comp, '/') != NULL) { + error = EINVAL; + break; + } + + clen = strlen(comp); + dotdot = false; + + /* + * Build next pathname (into "next"). If "..", + * just strip one name component off the success + * name so far. Since we know this name fits, the + * stripped down version also fits. Otherwise, + * the name is the base name plus '/' plus the + * component name plus terminating '\0'; this may + * or may not fit. + */ + if (comp[0] == '.') { + if (clen == 1) { + error = EINVAL; + break; + } + if (comp[1] == '.' && clen == 2) + dotdot = true; + } + if (dotdot) { + /* + * It's not clear how ".." at root should + * be handled when i > 0. Obeying the man + * page exactly, we reset i to 0 and stop, + * declaring terminal success. + * + * Otherwise, we just climbed up one level + * so adjust "atroot". + */ + if (atroot) { + i = 0; + break; + } + (void) r_dirname(succ, next, MAXPATHLEN); + namelen = strlen(next); + atroot = strlen(next) == 0; /* XXX? */ + } else { + need = namelen + 1 + clen + 1; + if (need > MAXPATHLEN) { + error = ENAMETOOLONG; + break; + } + memcpy(next, succ, namelen); + next[namelen++] = '/'; + memcpy(&next[namelen], comp, clen + 1); + namelen += clen; + /* + * Since name is never ".", we are necessarily + * descending below the root now. + */ + atroot = false; + } + + if (fstatat(file->ff_dirfd, next, &st, AT_SYMLINK_NOFOLLOW) < 0) { + error = ENOENT; + break; + } + + /* + * Success: generate qid and swap this + * successful name into place. Update acl. + */ + generate_qid(&st, &req->lr_resp.rwalk.wqid[i]); + swtmp = succ; + succ = next; + next = swtmp; + if (acl != NULL && acl != file->ff_acl) + l9p_acl_free(acl); + acl = getacl(file, -1, next); + } + + /* + * Fail only if we failed on the first name. + * Otherwise we succeeded on something, and "succ" + * points to the last successful name in namebufs[]. + */ + if (error) { + if (i == 0) + goto out; + error = 0; + } + + newfile = open_fid(file->ff_dirfd, succ, ai, false); + if (newfile == NULL) { + error = ENOMEM; + goto out; + } + if (req->lr_newfid == req->lr_fid) { + /* + * Before overwriting fid->lo_aux, free the old value. + * Note that this doesn't free the l9p_fid data, + * just the fs_fid data. (But it does ditch ff_acl.) + */ + if (acl == file->ff_acl) + acl = NULL; + fs_freefid(softc, req->lr_fid); + file = NULL; + } + req->lr_newfid->lo_aux = newfile; + if (file != NULL && acl != file->ff_acl) { + newfile->ff_acl = acl; + acl = NULL; + } + req->lr_resp.rwalk.nwqid = (uint16_t)i; +out: + if (file != NULL && acl != file->ff_acl) + l9p_acl_free(acl); + return (error); +} + +static int +fs_write(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct fs_fid *file; + ssize_t ret; + + file = req->lr_fid->lo_aux; + assert(file != NULL); + + if (sc->fs_readonly) + return (EROFS); + + size_t niov = l9p_truncate_iov(req->lr_data_iov, + req->lr_data_niov, req->lr_req.io.count); + +#if defined(__FreeBSD__) || defined(__illumos__) + ret = pwritev(file->ff_fd, req->lr_data_iov, niov, + req->lr_req.io.offset); +#else + /* XXX: not thread safe, should really use aio_listio. */ + if (lseek(file->ff_fd, (off_t)req->lr_req.io.offset, SEEK_SET) < 0) + return (errno); + + ret = writev(file->ff_fd, req->lr_data_iov, + (int)niov); +#endif + + if (ret < 0) + return (errno); + + req->lr_resp.io.count = (uint32_t)ret; + return (0); +} + +static int +fs_wstat(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct l9p_stat *l9stat = &req->lr_req.twstat.stat; + struct l9p_fid *fid; + struct fs_fid *file; + int error = 0; + + fid = req->lr_fid; + file = fid->lo_aux; + assert(file != NULL); + + /* + * XXX: + * + * stat(9P) sez: + * + * Either all the changes in wstat request happen, or none of them + * does: if the request succeeds, all changes were made; if it fails, + * none were. + * + * Atomicity is clearly missing in current implementation. + */ + + if (sc->fs_readonly) + return (EROFS); + + if (l9stat->atime != (uint32_t)~0) { + /* XXX: not implemented, ignore */ + } + + if (l9stat->mtime != (uint32_t)~0) { + /* XXX: not implemented, ignore */ + } + + if (l9stat->dev != (uint32_t)~0) { + error = EPERM; + goto out; + } + + if (l9stat->length != (uint64_t)~0) { + if (file->ff_dir != NULL) { + error = EINVAL; + goto out; + } + + if (truncate(file->ff_name, (off_t)l9stat->length) != 0) { + error = errno; + goto out; + } + } + + if (req->lr_conn->lc_version >= L9P_2000U) { + if (fchownat(file->ff_dirfd, file->ff_name, l9stat->n_uid, + l9stat->n_gid, AT_SYMLINK_NOFOLLOW) != 0) { + error = errno; + goto out; + } + } + + if (l9stat->mode != (uint32_t)~0) { + if (fchmodat(file->ff_dirfd, file->ff_name, + l9stat->mode & 0777, 0) != 0) { + error = errno; + goto out; + } + } + + if (strlen(l9stat->name) > 0) { + struct l9p_acl *parent_acl; + struct stat st; + char *tmp; + char newname[MAXPATHLEN]; + + /* + * Rename-within-directory: it's not deleting anything, + * but we need write permission on the directory. This + * should suffice. + */ + error = fs_pdir(softc, fid, newname, sizeof(newname), &st); + if (error) + goto out; + parent_acl = getacl(file, -1, newname); + error = check_access(L9P_ACE_ADD_FILE, + parent_acl, &st, NULL, NULL, file->ff_ai, (gid_t)-1); + l9p_acl_free(parent_acl); + if (error) + goto out; + error = fs_dpf(newname, l9stat->name, sizeof(newname)); + if (error) + goto out; + tmp = strdup(newname); + if (tmp == NULL) { + error = ENOMEM; + goto out; + } + if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, + tmp) != 0) { + error = errno; + free(tmp); + goto out; + } + /* Successful rename, update file->ff_name. ACL can stay. */ + free(file->ff_name); + file->ff_name = tmp; + } +out: + return (error); +} + +static int +fs_statfs(void *softc __unused, struct l9p_request *req) +{ + struct fs_fid *file; + struct stat st; +#ifdef __illumos__ + struct statvfs f; +#else + struct statfs f; +#endif + long name_max; + int error; + int fd; + + file = req->lr_fid->lo_aux; + assert(file); + + if (fstatat(file->ff_dirfd, file->ff_name, &st, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + /* + * Not entirely clear what access to require; we'll go + * for "read data". + */ + fillacl(file); + error = check_access(L9P_ACE_READ_DATA, NULL, NULL, + file->ff_acl, &st, file->ff_ai, (gid_t)-1); + if (error) + return (error); + + fd = openat(file->ff_dirfd, file->ff_name, 0); + if (fd < 0) + return (errno); + +#ifdef __illumos__ + if (fstatvfs(fd, &f) != 0) + return (errno); +#else + if (fstatfs(fd, &f) != 0) + return (errno); +#endif + + name_max = fpathconf(fd, _PC_NAME_MAX); + error = errno; + close(fd); + + if (name_max == -1) + return (error); + + dostatfs(&req->lr_resp.rstatfs.statfs, &f, name_max); + + return (0); +} + +static int +fs_lopen(void *softc, struct l9p_request *req) +{ + struct l9p_fid *fid = req->lr_fid; + struct stat st; + enum l9p_omode p9; + gid_t gid; + int error, flags; + + error = fs_oflags_dotl(req->lr_req.tlopen.flags, &flags, &p9); + if (error) + return (error); + + gid = req->lr_req.tlopen.gid; + error = fs_iopen(softc, fid, flags, p9, gid, &st); + if (error) + return (error); + + generate_qid(&st, &req->lr_resp.rlopen.qid); + req->lr_resp.rlopen.iounit = req->lr_conn->lc_max_io_size; + return (0); +} + +static int +fs_lcreate(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + enum l9p_omode p9; + char *name; + mode_t perm; + gid_t gid; + int error, flags; + + dir = req->lr_fid; + name = req->lr_req.tlcreate.name; + + error = fs_oflags_dotl(req->lr_req.tlcreate.flags, &flags, &p9); + if (error) + return (error); + + perm = (mode_t)req->lr_req.tlcreate.mode & 0777; /* ? set-id bits? */ + gid = req->lr_req.tlcreate.gid; + error = fs_icreate(softc, dir, name, flags, false, perm, gid, &st); + if (error == 0) + generate_qid(&st, &req->lr_resp.rlcreate.qid); + req->lr_resp.rlcreate.iounit = req->lr_conn->lc_max_io_size; + return (error); +} + +static int +fs_symlink(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + gid_t gid; + char *name, *symtgt; + int error; + + dir = req->lr_fid; + name = req->lr_req.tsymlink.name; + symtgt = req->lr_req.tsymlink.symtgt; + gid = req->lr_req.tsymlink.gid; + error = fs_isymlink(softc, dir, name, symtgt, gid, &st); + if (error == 0) + generate_qid(&st, &req->lr_resp.rsymlink.qid); + return (error); +} + +static int +fs_mknod(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + uint32_t mode, major, minor; + dev_t dev; + gid_t gid; + char *name; + int error; + + dir = req->lr_fid; + name = req->lr_req.tmknod.name; + mode = req->lr_req.tmknod.mode; + gid = req->lr_req.tmknod.gid; + + switch (mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + mode = (mode & S_IFMT) | (mode & 0777); /* ??? */ + major = req->lr_req.tmknod.major; + minor = req->lr_req.tmknod.major; + dev = makedev(major, minor); + error = fs_imknod(softc, dir, name, false, + (mode_t)mode, dev, gid, &st); + break; + + case S_IFIFO: + error = fs_imkfifo(softc, dir, name, false, + (mode_t)(mode & 0777), gid, &st); + break; + + case S_IFSOCK: + error = fs_imksocket(softc, dir, name, false, + (mode_t)(mode & 0777), gid, &st); + break; + + default: + error = EINVAL; + break; + } + if (error == 0) + generate_qid(&st, &req->lr_resp.rmknod.qid); + return (error); +} + +static int +fs_rename(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct fs_authinfo *ai; + struct l9p_acl *oparent_acl; + struct l9p_fid *fid, *f2; + struct fs_fid *file, *f2ff; + struct stat cst, opst, npst; + int32_t op; + bool reparenting; + char *tmp; + char olddir[MAXPATHLEN], newname[MAXPATHLEN]; + int error; + + if (sc->fs_readonly) + return (EROFS); + + /* + * Note: lr_fid represents the file that is to be renamed, + * so we must locate its parent directory and verify that + * both this parent directory and the new directory f2 are + * writable. But if the new parent directory is the same + * path as the old parent directory, our job is simpler. + */ + fid = req->lr_fid; + file = fid->lo_aux; + assert(file != NULL); + ai = file->ff_ai; + + error = fs_pdir(sc, fid, olddir, sizeof(olddir), &opst); + if (error) + return (error); + + f2 = req->lr_fid2; + f2ff = f2->lo_aux; + assert(f2ff != NULL); + + reparenting = strcmp(olddir, f2ff->ff_name) != 0; + + fillacl(file); + fillacl(f2ff); + + if (fstatat(file->ff_dirfd, file->ff_name, &cst, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + /* + * Are we moving from olddir? If so, we're unlinking + * from it, in terms of ACL access. + */ + if (reparenting) { + oparent_acl = getacl(file, -1, olddir); + error = check_access(L9P_ACOP_UNLINK, + oparent_acl, &opst, file->ff_acl, &cst, ai, (gid_t)-1); + l9p_acl_free(oparent_acl); + if (error) + return (error); + } + + /* + * Now check that we're allowed to "create" a file or directory in + * f2. (Should we do this, too, only if reparenting? Maybe check + * for dir write permission if not reparenting -- but that's just + * add-file/add-subdir, which means doing this always.) + */ + if (fstatat(f2ff->ff_dirfd, f2ff->ff_name, &npst, + AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + op = S_ISDIR(cst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE; + error = check_access(op, f2ff->ff_acl, &npst, NULL, NULL, + ai, (gid_t)-1); + if (error) + return (error); + + /* + * Directories OK, file systems not R/O, etc; build final name. + * f2ff->ff_name cannot exceed MAXPATHLEN, but out of general + * paranoia, let's double check anyway. + */ + if (strlcpy(newname, f2ff->ff_name, sizeof(newname)) >= sizeof(newname)) + return (ENAMETOOLONG); + error = fs_dpf(newname, req->lr_req.trename.name, sizeof(newname)); + if (error) + return (error); + tmp = strdup(newname); + if (tmp == NULL) + return (ENOMEM); + + if (renameat(file->ff_dirfd, file->ff_name, file->ff_dirfd, tmp) != 0) { + error = errno; + free(tmp); + return (error); + } + + /* file has been renamed but old fid is not clunked */ + free(file->ff_name); + file->ff_name = tmp; + + dropacl(file); + return (0); +} + +static int +fs_readlink(void *softc __unused, struct l9p_request *req) +{ + struct fs_fid *file; + ssize_t linklen; + char buf[MAXPATHLEN]; + int error = 0; + + file = req->lr_fid->lo_aux; + assert(file); + + linklen = readlinkat(file->ff_dirfd, file->ff_name, buf, sizeof(buf)); + if (linklen < 0) + error = errno; + else if ((size_t)linklen >= sizeof(buf)) + error = ENOMEM; /* todo: allocate dynamically */ + else if ((req->lr_resp.rreadlink.target = strndup(buf, + (size_t)linklen)) == NULL) + error = ENOMEM; + return (error); +} + +static int +fs_getattr(void *softc __unused, struct l9p_request *req) +{ + uint64_t mask, valid; + struct fs_fid *file; + struct stat st; + int error = 0; + + file = req->lr_fid->lo_aux; + assert(file); + + valid = 0; + if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) { + error = errno; + goto out; + } + /* ?? Can we provide items not-requested? If so, can skip tests. */ + mask = req->lr_req.tgetattr.request_mask; + if (mask & L9PL_GETATTR_MODE) { + /* It is not clear if we need any translations. */ + req->lr_resp.rgetattr.mode = st.st_mode; + valid |= L9PL_GETATTR_MODE; + } + if (mask & L9PL_GETATTR_NLINK) { + req->lr_resp.rgetattr.nlink = st.st_nlink; + valid |= L9PL_GETATTR_NLINK; + } + if (mask & L9PL_GETATTR_UID) { + /* provide st_uid, or file->ff_uid? */ + req->lr_resp.rgetattr.uid = st.st_uid; + valid |= L9PL_GETATTR_UID; + } + if (mask & L9PL_GETATTR_GID) { + /* provide st_gid, or file->ff_gid? */ + req->lr_resp.rgetattr.gid = st.st_gid; + valid |= L9PL_GETATTR_GID; + } + if (mask & L9PL_GETATTR_RDEV) { + /* It is not clear if we need any translations. */ + req->lr_resp.rgetattr.rdev = (uint64_t)st.st_rdev; + valid |= L9PL_GETATTR_RDEV; + } + if (mask & L9PL_GETATTR_ATIME) { + req->lr_resp.rgetattr.atime_sec = + (uint64_t)STAT_ATIME(&st).tv_sec; + req->lr_resp.rgetattr.atime_nsec = + (uint64_t)STAT_ATIME(&st).tv_nsec; + valid |= L9PL_GETATTR_ATIME; + } + if (mask & L9PL_GETATTR_MTIME) { + req->lr_resp.rgetattr.mtime_sec = + (uint64_t)STAT_MTIME(&st).tv_sec; + req->lr_resp.rgetattr.mtime_nsec = + (uint64_t)STAT_MTIME(&st).tv_nsec; + valid |= L9PL_GETATTR_MTIME; + } + if (mask & L9PL_GETATTR_CTIME) { + req->lr_resp.rgetattr.ctime_sec = + (uint64_t)STAT_CTIME(&st).tv_sec; + req->lr_resp.rgetattr.ctime_nsec = + (uint64_t)STAT_CTIME(&st).tv_nsec; + valid |= L9PL_GETATTR_CTIME; + } + if (mask & L9PL_GETATTR_BTIME) { +#if defined(HAVE_BIRTHTIME) + req->lr_resp.rgetattr.btime_sec = + (uint64_t)st.st_birthtim.tv_sec; + req->lr_resp.rgetattr.btime_nsec = + (uint64_t)st.st_birthtim.tv_nsec; +#elif defined(__illumos__) + getcrtime(softc, file->ff_dirfd, file->ff_name, + &req->lr_resp.rgetattr.btime_sec, + &req->lr_resp.rgetattr.btime_nsec); +#else + req->lr_resp.rgetattr.btime_sec = 0; + req->lr_resp.rgetattr.btime_nsec = 0; +#endif + valid |= L9PL_GETATTR_BTIME; + } + if (mask & L9PL_GETATTR_INO) + valid |= L9PL_GETATTR_INO; + if (mask & L9PL_GETATTR_SIZE) { + req->lr_resp.rgetattr.size = (uint64_t)st.st_size; + valid |= L9PL_GETATTR_SIZE; + } + if (mask & L9PL_GETATTR_BLOCKS) { + req->lr_resp.rgetattr.blksize = (uint64_t)st.st_blksize; + req->lr_resp.rgetattr.blocks = (uint64_t)st.st_blocks; + valid |= L9PL_GETATTR_BLOCKS; + } +#ifndef __illumos__ + if (mask & L9PL_GETATTR_GEN) { + req->lr_resp.rgetattr.gen = st.st_gen; + valid |= L9PL_GETATTR_GEN; + } +#endif + /* don't know what to do with data version yet */ + + generate_qid(&st, &req->lr_resp.rgetattr.qid); +out: + req->lr_resp.rgetattr.valid = valid; + return (error); +} + +/* + * Should combine some of this with wstat code. + */ +static int +fs_setattr(void *softc, struct l9p_request *req) +{ + uint64_t mask; + struct fs_softc *sc = softc; + struct timespec ts[2]; + struct fs_fid *file; + struct stat st; + int error = 0; + uid_t uid, gid; + + file = req->lr_fid->lo_aux; + assert(file); + + if (sc->fs_readonly) + return (EROFS); + + /* + * As with WSTAT we have atomicity issues. + */ + mask = req->lr_req.tsetattr.valid; + + if (fstatat(file->ff_dirfd, file->ff_name, &st, AT_SYMLINK_NOFOLLOW)) { + error = errno; + goto out; + } + + if ((mask & L9PL_SETATTR_SIZE) && S_ISDIR(st.st_mode)) { + error = EISDIR; + goto out; + } + + if (mask & L9PL_SETATTR_MODE) { + if (fchmodat(file->ff_dirfd, file->ff_name, + req->lr_req.tsetattr.mode & 0777, + 0)) { + error = errno; + goto out; + } + } + + if (mask & (L9PL_SETATTR_UID | L9PL_SETATTR_GID)) { + uid = mask & L9PL_SETATTR_UID + ? req->lr_req.tsetattr.uid + : (uid_t)-1; + + gid = mask & L9PL_SETATTR_GID + ? req->lr_req.tsetattr.gid + : (gid_t)-1; + + if (fchownat(file->ff_dirfd, file->ff_name, uid, gid, + AT_SYMLINK_NOFOLLOW)) { + error = errno; + goto out; + } + } + + if (mask & L9PL_SETATTR_SIZE) { + /* Truncate follows symlinks, is this OK? */ + int fd = openat(file->ff_dirfd, file->ff_name, O_RDWR); + if (ftruncate(fd, (off_t)req->lr_req.tsetattr.size)) { + error = errno; + (void) close(fd); + goto out; + } + (void) close(fd); + } + + if (mask & (L9PL_SETATTR_ATIME | L9PL_SETATTR_MTIME)) { + ts[0].tv_sec = STAT_ATIME(&st).tv_sec; + ts[0].tv_nsec = STAT_ATIME(&st).tv_nsec; + ts[1].tv_sec = STAT_MTIME(&st).tv_sec; + ts[1].tv_nsec = STAT_MTIME(&st).tv_nsec; + + if (mask & L9PL_SETATTR_ATIME) { + if (mask & L9PL_SETATTR_ATIME_SET) { + ts[0].tv_sec = req->lr_req.tsetattr.atime_sec; + ts[0].tv_nsec = req->lr_req.tsetattr.atime_nsec; + } else { + if (clock_gettime(CLOCK_REALTIME, &ts[0]) != 0) { + error = errno; + goto out; + } + } + } + + if (mask & L9PL_SETATTR_MTIME) { + if (mask & L9PL_SETATTR_MTIME_SET) { + ts[1].tv_sec = req->lr_req.tsetattr.mtime_sec; + ts[1].tv_nsec = req->lr_req.tsetattr.mtime_nsec; + } else { + if (clock_gettime(CLOCK_REALTIME, &ts[1]) != 0) { + error = errno; + goto out; + } + } + } + + if (utimensat(file->ff_dirfd, file->ff_name, ts, + AT_SYMLINK_NOFOLLOW)) { + error = errno; + goto out; + } + } +out: + return (error); +} + +static int +fs_xattrwalk(void *softc __unused, struct l9p_request *req __unused) +{ + return (EOPNOTSUPP); +} + +static int +fs_xattrcreate(void *softc __unused, struct l9p_request *req __unused) +{ + return (EOPNOTSUPP); +} + +static int +fs_readdir(void *softc __unused, struct l9p_request *req) +{ + struct l9p_message msg; + struct l9p_dirent de; + struct fs_fid *file; + struct dirent *dp; + struct stat st; + uint32_t count; + int error = 0; + + file = req->lr_fid->lo_aux; + assert(file); + + if (file->ff_dir == NULL) + return (ENOTDIR); + + if ((error = pthread_mutex_lock(&file->ff_mtx)) != 0) + return (error); + + /* + * It's not clear whether we can use the same trick for + * discarding offsets here as we do in fs_read. It + * probably should work, we'll have to see if some + * client(s) use the zero-offset thing to rescan without + * clunking the directory first. + * + * Probably the thing to do is switch to calling + * getdirentries() / getdents() directly, instead of + * going through libc. + */ + if (req->lr_req.io.offset == 0) + rewinddir(file->ff_dir); + else + seekdir(file->ff_dir, (long)req->lr_req.io.offset); + + l9p_init_msg(&msg, req, L9P_PACK); + count = (uint32_t)msg.lm_size; /* in case we get no entries */ + while ((dp = readdir(file->ff_dir)) != NULL) { + /* + * Although "." is forbidden in naming and ".." is + * special cased, testing shows that we must transmit + * them through readdir. (For ".." at root, we + * should perhaps alter the inode number, but not + * yet.) + */ + + /* + * TODO: we do a full lstat here; could use dp->d_* + * to construct the qid more efficiently, as long + * as dp->d_type != DT_UNKNOWN. + */ + if (fs_lstatat(file, dp->d_name, &st)) + continue; + + de.qid.type = 0; + generate_qid(&st, &de.qid); + de.offset = (uint64_t)telldir(file->ff_dir); +#ifdef __illumos__ + de.type = st.st_mode & S_IFMT; +#else + de.type = dp->d_type; +#endif + de.name = dp->d_name; + + /* Update count only if we completely pack the dirent. */ + if (l9p_pudirent(&msg, &de) < 0) + break; + count = (uint32_t)msg.lm_size; + } + + (void) pthread_mutex_unlock(&file->ff_mtx); + req->lr_resp.io.count = count; + return (error); +} + +static int +fs_fsync(void *softc __unused, struct l9p_request *req) +{ + struct fs_fid *file; + int error = 0; + + file = req->lr_fid->lo_aux; + assert(file); + if (fsync(file->ff_dir != NULL ? dirfd(file->ff_dir) : file->ff_fd)) + error = errno; + return (error); +} + +static int +fs_lock(void *softc __unused, struct l9p_request *req) +{ + + switch (req->lr_req.tlock.type) { + case L9PL_LOCK_TYPE_RDLOCK: + case L9PL_LOCK_TYPE_WRLOCK: + case L9PL_LOCK_TYPE_UNLOCK: + break; + default: + return (EINVAL); + } + + req->lr_resp.rlock.status = L9PL_LOCK_SUCCESS; + return (0); +} + +static int +fs_getlock(void *softc __unused, struct l9p_request *req) +{ + + /* + * Client wants to see if a request to lock a region would + * block. This is, of course, not atomic anyway, so the + * op is useless. QEMU simply says "unlocked!", so we do + * too. + */ + switch (req->lr_req.getlock.type) { + case L9PL_LOCK_TYPE_RDLOCK: + case L9PL_LOCK_TYPE_WRLOCK: + case L9PL_LOCK_TYPE_UNLOCK: + break; + default: + return (EINVAL); + } + + req->lr_resp.getlock = req->lr_req.getlock; + req->lr_resp.getlock.type = L9PL_LOCK_TYPE_UNLOCK; + req->lr_resp.getlock.client_id = strdup(""); /* XXX what should go here? */ + return (0); +} + +static int +fs_link(void *softc __unused, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct fs_fid *file; + struct fs_fid *dirf; + struct stat fst, tdst; + int32_t op; + char *name; + char newname[MAXPATHLEN]; + int error; + + /* N.B.: lr_fid is the file to link, lr_fid2 is the target dir */ + dir = req->lr_fid2; + dirf = dir->lo_aux; + assert(dirf != NULL); + + name = req->lr_req.tlink.name; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + + file = req->lr_fid->lo_aux; + assert(file != NULL); + + if (fstatat(dirf->ff_dirfd, dirf->ff_name, &tdst, AT_SYMLINK_NOFOLLOW) != 0 || + fstatat(file->ff_dirfd, file->ff_name, &fst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (S_ISDIR(fst.st_mode)) + return (EISDIR); + fillacl(dirf); + op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : L9P_ACE_ADD_FILE; + error = check_access(op, + dirf->ff_acl, &tdst, NULL, NULL, file->ff_ai, (gid_t)-1); + if (error) + return (error); + + if (linkat(file->ff_dirfd, file->ff_name, file->ff_dirfd, + newname, 0) != 0) + error = errno; + else + dropacl(file); + + return (error); +} + +static int +fs_mkdir(void *softc, struct l9p_request *req) +{ + struct l9p_fid *dir; + struct stat st; + mode_t perm; + gid_t gid; + char *name; + int error; + + dir = req->lr_fid; + name = req->lr_req.tmkdir.name; + perm = (mode_t)req->lr_req.tmkdir.mode; + gid = req->lr_req.tmkdir.gid; + + error = fs_imkdir(softc, dir, name, false, perm, gid, &st); + if (error == 0) + generate_qid(&st, &req->lr_resp.rmkdir.qid); + return (error); +} + +static int +fs_renameat(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct l9p_fid *olddir, *newdir; + struct l9p_acl *facl; + struct fs_fid *off, *nff; + struct stat odst, ndst, fst; + int32_t op; + bool reparenting; + char *onp, *nnp; + char onb[MAXPATHLEN], nnb[MAXPATHLEN]; + int error; + + if (sc->fs_readonly) + return (EROFS); + + olddir = req->lr_fid; + newdir = req->lr_fid2; + assert(olddir != NULL && newdir != NULL); + off = olddir->lo_aux; + nff = newdir->lo_aux; + assert(off != NULL && nff != NULL); + + onp = req->lr_req.trenameat.oldname; + nnp = req->lr_req.trenameat.newname; + error = fs_buildname(olddir, onp, onb, sizeof(onb)); + if (error) + return (error); + error = fs_buildname(newdir, nnp, nnb, sizeof(nnb)); + if (error) + return (error); + if (fstatat(off->ff_dirfd, onb, &fst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + + reparenting = olddir != newdir && + strcmp(off->ff_name, nff->ff_name) != 0; + + if (fstatat(off->ff_dirfd, off->ff_name, &odst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (!S_ISDIR(odst.st_mode)) + return (ENOTDIR); + fillacl(off); + + if (reparenting) { + if (fstatat(nff->ff_dirfd, nff->ff_name, &ndst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + if (!S_ISDIR(ndst.st_mode)) + return (ENOTDIR); + facl = getacl(off, -1, onb); + fillacl(nff); + + error = check_access(L9P_ACOP_UNLINK, + off->ff_acl, &odst, facl, &fst, off->ff_ai, (gid_t)-1); + l9p_acl_free(facl); + if (error) + return (error); + op = S_ISDIR(fst.st_mode) ? L9P_ACE_ADD_SUBDIRECTORY : + L9P_ACE_ADD_FILE; + error = check_access(op, + nff->ff_acl, &ndst, NULL, NULL, nff->ff_ai, (gid_t)-1); + if (error) + return (error); + } + + if (renameat(off->ff_dirfd, onb, nff->ff_dirfd, nnb)) + error = errno; + + return (error); +} + +/* + * Unlink file in given directory, or remove directory in given + * directory, based on flags. + */ +static int +fs_unlinkat(void *softc, struct l9p_request *req) +{ + struct fs_softc *sc = softc; + struct l9p_acl *facl; + struct l9p_fid *dir; + struct fs_fid *dirff; + struct stat dirst, fst; + char *name; + char newname[MAXPATHLEN]; + int error; + + if (sc->fs_readonly) + return (EROFS); + + dir = req->lr_fid; + dirff = dir->lo_aux; + assert(dirff != NULL); + name = req->lr_req.tunlinkat.name; + error = fs_buildname(dir, name, newname, sizeof(newname)); + if (error) + return (error); + if (fstatat(dirff->ff_dirfd, newname, &fst, AT_SYMLINK_NOFOLLOW) != 0 || + fstatat(dirff->ff_dirfd, dirff->ff_name, &dirst, AT_SYMLINK_NOFOLLOW) != 0) + return (errno); + fillacl(dirff); + facl = getacl(dirff, -1, newname); + error = check_access(L9P_ACOP_UNLINK, + dirff->ff_acl, &dirst, facl, &fst, dirff->ff_ai, (gid_t)-1); + l9p_acl_free(facl); + if (error) + return (error); + + if (req->lr_req.tunlinkat.flags & L9PL_AT_REMOVEDIR) { + if (unlinkat(dirff->ff_dirfd, newname, AT_REMOVEDIR) != 0) + error = errno; + } else { + if (unlinkat(dirff->ff_dirfd, newname, 0) != 0) + error = errno; + } + return (error); +} + +static void +fs_freefid(void *softc __unused, struct l9p_fid *fid) +{ + struct fs_fid *f = fid->lo_aux; + struct fs_authinfo *ai; + uint32_t newcount; + + if (f == NULL) { + /* Nothing to do here */ + return; + } + + if (f->ff_fd != -1) + close(f->ff_fd); + + if (f->ff_dir) + closedir(f->ff_dir); + + (void) pthread_mutex_destroy(&f->ff_mtx); + free(f->ff_name); + ai = f->ff_ai; + l9p_acl_free(f->ff_acl); + free(f); + (void) pthread_mutex_lock(&ai->ai_mtx); + newcount = --ai->ai_refcnt; + (void) pthread_mutex_unlock(&ai->ai_mtx); + if (newcount == 0) { + /* + * We *were* the last ref, no one can have gained a ref. + */ + L9P_LOG(L9P_DEBUG, "dropped last ref to authinfo %p", + (void *)ai); + (void) pthread_mutex_destroy(&ai->ai_mtx); + free(ai); + } else { + L9P_LOG(L9P_DEBUG, "authinfo %p now used by %lu", + (void *)ai, (u_long)newcount); + } +} + +int +l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro) +{ + struct l9p_backend *backend; + struct fs_softc *sc; + int error; +#if defined(WITH_CASPER) + cap_channel_t *capcas; +#endif + + if (!fs_attach_mutex_inited) { +#ifdef __illumos__ + if ((error = pthread_mutexattr_init(&fs_mutexattr)) != 0) { + errno = error; + return (-1); + } + if ((error = pthread_mutexattr_settype(&fs_mutexattr, + PTHREAD_MUTEX_ERRORCHECK)) != 0) { + errno = error; + return (-1); + } + error = pthread_mutex_init(&fs_attach_mutex, &fs_mutexattr); +#else + error = pthread_mutex_init(&fs_attach_mutex, NULL); +#endif + if (error) { + errno = error; + return (-1); + } + fs_attach_mutex_inited = true; + } + + backend = l9p_malloc(sizeof(*backend)); + backend->attach = fs_attach; + backend->clunk = fs_clunk; + backend->create = fs_create; + backend->open = fs_open; + backend->read = fs_read; + backend->remove = fs_remove; + backend->stat = fs_stat; + backend->walk = fs_walk; + backend->write = fs_write; + backend->wstat = fs_wstat; + backend->statfs = fs_statfs; + backend->lopen = fs_lopen; + backend->lcreate = fs_lcreate; + backend->symlink = fs_symlink; + backend->mknod = fs_mknod; + backend->rename = fs_rename; + backend->readlink = fs_readlink; + backend->getattr = fs_getattr; + backend->setattr = fs_setattr; + backend->xattrwalk = fs_xattrwalk; + backend->xattrcreate = fs_xattrcreate; + backend->readdir = fs_readdir; + backend->fsync = fs_fsync; + backend->lock = fs_lock; + backend->getlock = fs_getlock; + backend->link = fs_link; + backend->mkdir = fs_mkdir; + backend->renameat = fs_renameat; + backend->unlinkat = fs_unlinkat; + backend->freefid = fs_freefid; + + sc = l9p_malloc(sizeof(*sc)); + sc->fs_rootfd = rootfd; + sc->fs_readonly = ro; + backend->softc = sc; + +#if defined(__illumos__) + if (fpathconf(rootfd, _PC_XATTR_ENABLED) > 0) + sc->fs_hasxattr = 1; +#endif + +#if defined(WITH_CASPER) + capcas = cap_init(); + if (capcas == NULL) + return (-1); + + sc->fs_cappwd = cap_service_open(capcas, "system.pwd"); + if (sc->fs_cappwd == NULL) + return (-1); + + sc->fs_capgrp = cap_service_open(capcas, "system.grp"); + if (sc->fs_capgrp == NULL) + return (-1); + + cap_setpassent(sc->fs_cappwd, 1); + cap_setgroupent(sc->fs_capgrp, 1); + cap_close(capcas); +#elif defined(__illumos__) + setpwent(); +#else + setpassent(1); +#endif + + *backendp = backend; + return (0); +} + +#ifdef __illumos__ +acl_t * +acl_get_fd_np(int fd, int type) +{ + acl_t *acl; + int flag, ret; + + flag = 0; + if (type == ACL_TYPE_NFS4) + flag = ACL_NO_TRIVIAL; + + ret = facl_get(fd, flag, &acl); + if (ret != 0) + return (NULL); + + return (acl); +} + +static void +getcrtime(struct fs_softc *sc, int dirfd, const char *fname, uint64_t *secp, + uint64_t *nsp) +{ + nvlist_t *nvl = NULL; + uint64_t *vals = NULL; + uint_t nvals = 0; + int error; + + *secp = 0; + *nsp = 0; + + if (!sc->fs_hasxattr) + return; + + if ((error = getattrat(dirfd, XATTR_VIEW_READWRITE, fname, &nvl)) != 0) + return; + + if (nvlist_lookup_uint64_array(nvl, "crtime", &vals, &nvals) != 0) + goto done; + + if (nvals != 2) + goto done; + + *secp = vals[0]; + *nsp = vals[1]; + +done: + nvlist_free(nvl); +} +#endif diff --git a/usr/src/lib/lib9p/common/backend/fs.h b/usr/src/lib/lib9p/common/backend/fs.h new file mode 100644 index 0000000000..84b37171c2 --- /dev/null +++ b/usr/src/lib/lib9p/common/backend/fs.h @@ -0,0 +1,37 @@ + +/* + * Copyright 2016 Chris Torek <torek@ixsystems.com> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_BACKEND_FS_H +#define LIB9P_BACKEND_FS_H + +#include <stdbool.h> +#include "backend.h" + +int l9p_backend_fs_init(struct l9p_backend **backendp, int rootfd, bool ro); + +#endif /* LIB9P_BACKEND_FS_H */ diff --git a/usr/src/lib/lib9p/common/connection.c b/usr/src/lib/lib9p/common/connection.c new file mode 100644 index 0000000000..20c27796b8 --- /dev/null +++ b/usr/src/lib/lib9p/common/connection.c @@ -0,0 +1,215 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <sys/queue.h> +#include "lib9p.h" +#include "lib9p_impl.h" +#include "fid.h" +#include "hashtable.h" +#include "log.h" +#include "threadpool.h" +#include "backend/backend.h" + +int +l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend) +{ + struct l9p_server *server; + + server = l9p_calloc(1, sizeof (*server)); + server->ls_max_version = L9P_2000L; + server->ls_backend = backend; + LIST_INIT(&server->ls_conns); + + *serverp = server; + return (0); +} + +int +l9p_connection_init(struct l9p_server *server, struct l9p_connection **conn) +{ + struct l9p_connection *newconn; + + assert(server != NULL); + assert(conn != NULL); + + newconn = calloc(1, sizeof (*newconn)); + if (newconn == NULL) + return (-1); + newconn->lc_server = server; + newconn->lc_msize = L9P_DEFAULT_MSIZE; + if (l9p_threadpool_init(&newconn->lc_tp, L9P_NUMTHREADS)) { + free(newconn); + return (-1); + } + ht_init(&newconn->lc_files, 100); + ht_init(&newconn->lc_requests, 100); + LIST_INSERT_HEAD(&server->ls_conns, newconn, lc_link); + *conn = newconn; + + return (0); +} + +void +l9p_connection_free(struct l9p_connection *conn) +{ + + LIST_REMOVE(conn, lc_link); + free(conn); +} + +void +l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov, + const size_t niov, void *aux) +{ + struct l9p_request *req; + int error; + + req = l9p_calloc(1, sizeof (struct l9p_request)); + req->lr_aux = aux; + req->lr_conn = conn; + + req->lr_req_msg.lm_mode = L9P_UNPACK; + req->lr_req_msg.lm_niov = niov; + memcpy(req->lr_req_msg.lm_iov, iov, sizeof (struct iovec) * niov); + + req->lr_resp_msg.lm_mode = L9P_PACK; + + if (l9p_pufcall(&req->lr_req_msg, &req->lr_req, conn->lc_version) != 0) { + L9P_LOG(L9P_WARNING, "cannot unpack received message"); + l9p_freefcall(&req->lr_req); + free(req); + return; + } + + if (ht_add(&conn->lc_requests, req->lr_req.hdr.tag, req)) { + L9P_LOG(L9P_WARNING, "client reusing outstanding tag %d", + req->lr_req.hdr.tag); + l9p_freefcall(&req->lr_req); + free(req); + return; + } + + error = conn->lc_lt.lt_get_response_buffer(req, + req->lr_resp_msg.lm_iov, + &req->lr_resp_msg.lm_niov, + conn->lc_lt.lt_aux); + if (error) { + L9P_LOG(L9P_WARNING, "cannot obtain buffers for response"); + ht_remove(&conn->lc_requests, req->lr_req.hdr.tag); + l9p_freefcall(&req->lr_req); + free(req); + return; + } + + /* + * NB: it's up to l9p_threadpool_run to decide whether + * to queue the work or to run it immediately and wait + * (it must do the latter for Tflush requests). + */ + l9p_threadpool_run(&conn->lc_tp, req); +} + +void +l9p_connection_close(struct l9p_connection *conn) +{ + struct ht_iter iter; + struct l9p_fid *fid; + struct l9p_request *req; + + L9P_LOG(L9P_DEBUG, "waiting for thread pool to shut down"); + l9p_threadpool_shutdown(&conn->lc_tp); + + /* Drain pending requests (if any) */ + L9P_LOG(L9P_DEBUG, "draining pending requests"); + ht_iter(&conn->lc_requests, &iter); + while ((req = ht_next(&iter)) != NULL) { +#ifdef notyet + /* XXX would be good to know if there is anyone listening */ + if (anyone listening) { + /* XXX crude - ops like Tclunk should succeed */ + req->lr_error = EINTR; + l9p_respond(req, false, false); + } else +#endif + l9p_respond(req, true, false); /* use no-answer path */ + ht_remove_at_iter(&iter); + } + + /* Close opened files (if any) */ + L9P_LOG(L9P_DEBUG, "closing opened files"); + ht_iter(&conn->lc_files, &iter); + while ((fid = ht_next(&iter)) != NULL) { + conn->lc_server->ls_backend->freefid( + conn->lc_server->ls_backend->softc, fid); + free(fid); + ht_remove_at_iter(&iter); + } + + ht_destroy(&conn->lc_requests); + ht_destroy(&conn->lc_files); +} + +struct l9p_fid * +l9p_connection_alloc_fid(struct l9p_connection *conn, uint32_t fid) +{ + struct l9p_fid *file; + + file = l9p_calloc(1, sizeof (struct l9p_fid)); + file->lo_fid = fid; + /* + * Note that the new fid is not marked valid yet. + * The insert here will fail if the fid number is + * in use, otherwise we have an invalid fid in the + * table (as desired). + */ + + if (ht_add(&conn->lc_files, fid, file) != 0) { + free(file); + return (NULL); + } + + return (file); +} + +void +l9p_connection_remove_fid(struct l9p_connection *conn, struct l9p_fid *fid) +{ + struct l9p_backend *be; + + /* fid should be marked invalid by this point */ + assert(!l9p_fid_isvalid(fid)); + + be = conn->lc_server->ls_backend; + be->freefid(be->softc, fid); + + ht_remove(&conn->lc_files, fid->lo_fid); + free(fid); +} diff --git a/usr/src/lib/lib9p/common/fcall.h b/usr/src/lib/lib9p/common/fcall.h new file mode 100644 index 0000000000..f779ea6ad5 --- /dev/null +++ b/usr/src/lib/lib9p/common/fcall.h @@ -0,0 +1,624 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail> + */ + +#ifndef LIB9P_FCALL_H +#define LIB9P_FCALL_H + +#include <stdint.h> + +#define L9P_MAX_WELEM 256 + +/* + * Function call/reply (Tfoo/Rfoo) numbers. + * + * These are protocol code numbers, so the exact values + * matter. However, __FIRST and __LAST_PLUS_ONE are for + * debug code, and just need to encompass the entire range. + * + * Note that we rely (in the debug code) on Rfoo == Tfoo+1. + */ +enum l9p_ftype { + L9P__FIRST = 6, /* NB: must be <= all legal values */ + L9P_TLERROR = 6, /* illegal; exists for parity with Rlerror */ + L9P_RLERROR, + L9P_TSTATFS = 8, + L9P_RSTATFS, + L9P_TLOPEN = 12, + L9P_RLOPEN, + L9P_TLCREATE = 14, + L9P_RLCREATE, + L9P_TSYMLINK = 16, + L9P_RSYMLINK, + L9P_TMKNOD = 18, + L9P_RMKNOD, + L9P_TRENAME = 20, + L9P_RRENAME, + L9P_TREADLINK = 22, + L9P_RREADLINK, + L9P_TGETATTR = 24, + L9P_RGETATTR, + L9P_TSETATTR = 26, + L9P_RSETATTR, + L9P_TXATTRWALK = 30, + L9P_RXATTRWALK, + L9P_TXATTRCREATE = 32, + L9P_RXATTRCREATE, + L9P_TREADDIR = 40, + L9P_RREADDIR, + L9P_TFSYNC = 50, + L9P_RFSYNC, + L9P_TLOCK = 52, + L9P_RLOCK, + L9P_TGETLOCK = 54, + L9P_RGETLOCK, + L9P_TLINK = 70, + L9P_RLINK, + L9P_TMKDIR = 72, + L9P_RMKDIR, + L9P_TRENAMEAT = 74, + L9P_RRENAMEAT, + L9P_TUNLINKAT = 76, + L9P_RUNLINKAT, + L9P_TVERSION = 100, + L9P_RVERSION, + L9P_TAUTH = 102, + L9P_RAUTH, + L9P_TATTACH = 104, + L9P_RATTACH, + L9P_TERROR = 106, /* illegal */ + L9P_RERROR, + L9P_TFLUSH = 108, + L9P_RFLUSH, + L9P_TWALK = 110, + L9P_RWALK, + L9P_TOPEN = 112, + L9P_ROPEN, + L9P_TCREATE = 114, + L9P_RCREATE, + L9P_TREAD = 116, + L9P_RREAD, + L9P_TWRITE = 118, + L9P_RWRITE, + L9P_TCLUNK = 120, + L9P_RCLUNK, + L9P_TREMOVE = 122, + L9P_RREMOVE, + L9P_TSTAT = 124, + L9P_RSTAT, + L9P_TWSTAT = 126, + L9P_RWSTAT, + L9P__LAST_PLUS_1, /* NB: must be last */ +}; + +/* + * When a Tfoo request comes over the wire, we decode it + * (pack.c) from wire format into a request laid out in + * a "union l9p_fcall" object. This object is not in wire + * format, but rather in something more convenient for us + * to operate on. + * + * We then dispatch the request (request.c, backend/fs.c) and + * use another "union l9p_fcall" object to build a reply. + * The reply is converted to wire format on the way back out + * (pack.c again). + * + * All sub-objects start with a header containing the request + * or reply type code and two-byte tag, and whether or not it + * is needed, a four-byte fid. + * + * What this means here is that the data structures within + * the union can be shared across various requests and replies. + * For instance, replies to OPEN, CREATE, LCREATE, LOPEN, MKDIR, and + * SYMLINK are all fairly similar (providing a qid and sometimes + * an iounit) and hence can all use the l9p_f_ropen structure. + * Which structures are used for which operations is somewhat + * arbitrary; for programming ease, if an operation shares a + * data structure, it still has its own name: there are union + * members named ropen, rcreate, rlcreate, rlopen, rmkdir, and + * rsymlink, even though all use struct l9p_f_ropen. + * + * The big exception to the above rule is struct l9p_f_io, which + * is used as both request and reply for all of READ, WRITE, and + * READDIR. Moreover, the READDIR reply must be pre-packed into + * wire format (it is handled like raw data a la READ). + * + * Some request messages (e.g., TREADLINK) fit in a header, having + * just type code, tag, and fid. These have no separate data + * structure, nor union member name. Similarly, some reply + * messages (e.g., RCLUNK, RREMOVE, RRENAME) have just the type + * code and tag. + */ + +/* + * Type code bits in (the first byte of) a qid. + */ +enum l9p_qid_type { + L9P_QTDIR = 0x80, /* type bit for directories */ + L9P_QTAPPEND = 0x40, /* type bit for append only files */ + L9P_QTEXCL = 0x20, /* type bit for exclusive use files */ + L9P_QTMOUNT = 0x10, /* type bit for mounted channel */ + L9P_QTAUTH = 0x08, /* type bit for authentication file */ + L9P_QTTMP = 0x04, /* type bit for non-backed-up file */ + L9P_QTSYMLINK = 0x02, /* type bit for symbolic link */ + L9P_QTFILE = 0x00 /* type bits for plain file */ +}; + +/* + * Extra permission bits in create and file modes (stat). + */ +#define L9P_DMDIR 0x80000000 +enum { + L9P_DMAPPEND = 0x40000000, + L9P_DMEXCL = 0x20000000, + L9P_DMMOUNT = 0x10000000, + L9P_DMAUTH = 0x08000000, + L9P_DMTMP = 0x04000000, + L9P_DMSYMLINK = 0x02000000, + /* 9P2000.u extensions */ + L9P_DMDEVICE = 0x00800000, + L9P_DMNAMEDPIPE = 0x00200000, + L9P_DMSOCKET = 0x00100000, + L9P_DMSETUID = 0x00080000, + L9P_DMSETGID = 0x00040000, +}; + +/* + * Open/create mode bits in 9P2000 and 9P2000.u operations + * (not Linux lopen and lcreate flags, which are different). + * Note that the mode field is only one byte wide. + */ +enum l9p_omode { + L9P_OREAD = 0, /* open for read */ + L9P_OWRITE = 1, /* write */ + L9P_ORDWR = 2, /* read and write */ + L9P_OEXEC = 3, /* execute, == read but check execute permission */ + L9P_OACCMODE = 3, /* mask for the above access-mode bits */ + L9P_OTRUNC = 16, /* or'ed in (except for exec), truncate file first */ + L9P_OCEXEC = 32, /* or'ed in, close on exec */ + L9P_ORCLOSE = 64, /* or'ed in, remove on close */ + L9P_ODIRECT = 128, /* or'ed in, direct access */ +}; + +/* + * Flag bits in 9P2000.L operations (Tlopen, Tlcreate). These are + * basically just the Linux L_* flags. The bottom 3 bits are the + * same as for l9p_omode, although open-for-exec is not used: + * instead, the client does a Tgetattr and checks the mode for + * execute bits, then just opens for reading. + * + * Each L_O_xxx is just value O_xxx has on Linux in <fcntl.h>; + * not all are necessarily used. From observation, we do get + * L_O_CREAT and L_O_EXCL when creating with exclusive, and always + * get L_O_LARGEFILE. We do get L_O_APPEND when opening for + * append. We also get both L_O_DIRECT and L_O_DIRECTORY set + * when opening directories. + * + * We probably never get L_O_NOCTTY which makes no sense, and + * some of the other options may need to be handled on the client. + */ +enum l9p_l_o_flags { + L9P_L_O_CREAT = 000000100U, + L9P_L_O_EXCL = 000000200U, + L9P_L_O_NOCTTY = 000000400U, + L9P_L_O_TRUNC = 000001000U, + L9P_L_O_APPEND = 000002000U, + L9P_L_O_NONBLOCK = 000004000U, + L9P_L_O_DSYNC = 000010000U, + L9P_L_O_FASYNC = 000020000U, + L9P_L_O_DIRECT = 000040000U, + L9P_L_O_LARGEFILE = 000100000U, + L9P_L_O_DIRECTORY = 000200000U, + L9P_L_O_NOFOLLOW = 000400000U, + L9P_L_O_NOATIME = 001000000U, + L9P_L_O_CLOEXEC = 002000000U, + L9P_L_O_SYNC = 004000000U, + L9P_L_O_PATH = 010000000U, + L9P_L_O_TMPFILE = 020000000U, +}; + +struct l9p_hdr { + uint8_t type; + uint16_t tag; + uint32_t fid; +}; + +struct l9p_qid { + uint8_t type; + uint32_t version; + uint64_t path; +}; + +struct l9p_stat { + uint16_t type; + uint32_t dev; + struct l9p_qid qid; + uint32_t mode; + uint32_t atime; + uint32_t mtime; + uint64_t length; + char *name; + char *uid; + char *gid; + char *muid; + char *extension; + uint32_t n_uid; + uint32_t n_gid; + uint32_t n_muid; +}; + +#define L9P_FSTYPE 0x01021997 + +struct l9p_statfs { + uint32_t type; /* file system type */ + uint32_t bsize; /* block size for I/O */ + uint64_t blocks; /* file system size (bsize-byte blocks) */ + uint64_t bfree; /* free blocks in fs */ + uint64_t bavail; /* free blocks avail to non-superuser*/ + uint64_t files; /* file nodes in file system (# inodes) */ + uint64_t ffree; /* free file nodes in fs */ + uint64_t fsid; /* file system identifier */ + uint32_t namelen; /* maximum length of filenames */ +}; + +struct l9p_f_version { + struct l9p_hdr hdr; + uint32_t msize; + char *version; +}; + +struct l9p_f_tflush { + struct l9p_hdr hdr; + uint16_t oldtag; +}; + +struct l9p_f_error { + struct l9p_hdr hdr; + char *ename; + uint32_t errnum; +}; + +struct l9p_f_ropen { + struct l9p_hdr hdr; + struct l9p_qid qid; + uint32_t iounit; +}; + +struct l9p_f_rauth { + struct l9p_hdr hdr; + struct l9p_qid aqid; +}; + +struct l9p_f_attach { + struct l9p_hdr hdr; + uint32_t afid; + char *uname; + char *aname; + uint32_t n_uname; +}; +#define L9P_NOFID ((uint32_t)-1) /* in Tattach, no auth fid */ +#define L9P_NONUNAME ((uint32_t)-1) /* in Tattach, no n_uname */ + +struct l9p_f_tcreate { + struct l9p_hdr hdr; + uint32_t perm; + char *name; + uint8_t mode; /* +Topen */ + char *extension; +}; + +struct l9p_f_twalk { + struct l9p_hdr hdr; + uint32_t newfid; + uint16_t nwname; + char *wname[L9P_MAX_WELEM]; +}; + +struct l9p_f_rwalk { + struct l9p_hdr hdr; + uint16_t nwqid; + struct l9p_qid wqid[L9P_MAX_WELEM]; +}; + +struct l9p_f_io { + struct l9p_hdr hdr; + uint64_t offset; /* Tread, Twrite, Treaddir */ + uint32_t count; /* Tread, Twrite, Rread, Treaddir, Rreaddir */ +}; + +struct l9p_f_rstat { + struct l9p_hdr hdr; + struct l9p_stat stat; +}; + +struct l9p_f_twstat { + struct l9p_hdr hdr; + struct l9p_stat stat; +}; + +struct l9p_f_rstatfs { + struct l9p_hdr hdr; + struct l9p_statfs statfs; +}; + +/* Used for Tlcreate, Tlopen, Tmkdir, Tunlinkat. */ +struct l9p_f_tlcreate { + struct l9p_hdr hdr; + char *name; /* Tlcreate, Tmkdir, Tunlinkat */ + uint32_t flags; /* Tlcreate, Tlopen, Tmkdir, Tunlinkat */ + uint32_t mode; /* Tlcreate, Tmkdir */ + uint32_t gid; /* Tlcreate, Tmkdir */ +}; + +struct l9p_f_tsymlink { + struct l9p_hdr hdr; + char *name; + char *symtgt; + uint32_t gid; +}; + +struct l9p_f_tmknod { + struct l9p_hdr hdr; + char *name; + uint32_t mode; + uint32_t major; + uint32_t minor; + uint32_t gid; +}; + +struct l9p_f_trename { + struct l9p_hdr hdr; + uint32_t dfid; + char *name; +}; + +struct l9p_f_rreadlink { + struct l9p_hdr hdr; + char *target; +}; + +struct l9p_f_tgetattr { + struct l9p_hdr hdr; + uint64_t request_mask; +}; + +struct l9p_f_rgetattr { + struct l9p_hdr hdr; + uint64_t valid; + struct l9p_qid qid; + uint32_t mode; + uint32_t uid; + uint32_t gid; + uint64_t nlink; + uint64_t rdev; + uint64_t size; + uint64_t blksize; + uint64_t blocks; + uint64_t atime_sec; + uint64_t atime_nsec; + uint64_t mtime_sec; + uint64_t mtime_nsec; + uint64_t ctime_sec; + uint64_t ctime_nsec; + uint64_t btime_sec; + uint64_t btime_nsec; + uint64_t gen; + uint64_t data_version; +}; + +/* Fields in req->request_mask and reply->valid for Tgetattr, Rgetattr. */ +enum l9pl_getattr_flags { + L9PL_GETATTR_MODE = 0x00000001, + L9PL_GETATTR_NLINK = 0x00000002, + L9PL_GETATTR_UID = 0x00000004, + L9PL_GETATTR_GID = 0x00000008, + L9PL_GETATTR_RDEV = 0x00000010, + L9PL_GETATTR_ATIME = 0x00000020, + L9PL_GETATTR_MTIME = 0x00000040, + L9PL_GETATTR_CTIME = 0x00000080, + L9PL_GETATTR_INO = 0x00000100, + L9PL_GETATTR_SIZE = 0x00000200, + L9PL_GETATTR_BLOCKS = 0x00000400, + /* everything up to and including BLOCKS is BASIC */ + L9PL_GETATTR_BASIC = L9PL_GETATTR_MODE | + L9PL_GETATTR_NLINK | + L9PL_GETATTR_UID | + L9PL_GETATTR_GID | + L9PL_GETATTR_RDEV | + L9PL_GETATTR_ATIME | + L9PL_GETATTR_MTIME | + L9PL_GETATTR_CTIME | + L9PL_GETATTR_INO | + L9PL_GETATTR_SIZE | + L9PL_GETATTR_BLOCKS, + L9PL_GETATTR_BTIME = 0x00000800, + L9PL_GETATTR_GEN = 0x00001000, + L9PL_GETATTR_DATA_VERSION = 0x00002000, + /* BASIC + birthtime + gen + data-version = ALL */ + L9PL_GETATTR_ALL = L9PL_GETATTR_BASIC | + L9PL_GETATTR_BTIME | + L9PL_GETATTR_GEN | + L9PL_GETATTR_DATA_VERSION, +}; + +struct l9p_f_tsetattr { + struct l9p_hdr hdr; + uint32_t valid; + uint32_t mode; + uint32_t uid; + uint32_t gid; + uint64_t size; + uint64_t atime_sec; /* if valid & L9PL_SETATTR_ATIME_SET */ + uint64_t atime_nsec; /* (else use on-server time) */ + uint64_t mtime_sec; /* if valid & L9PL_SETATTR_MTIME_SET */ + uint64_t mtime_nsec; /* (else use on-server time) */ +}; + +/* Fields in req->valid for Tsetattr. */ +enum l9pl_setattr_flags { + L9PL_SETATTR_MODE = 0x00000001, + L9PL_SETATTR_UID = 0x00000002, + L9PL_SETATTR_GID = 0x00000004, + L9PL_SETATTR_SIZE = 0x00000008, + L9PL_SETATTR_ATIME = 0x00000010, + L9PL_SETATTR_MTIME = 0x00000020, + L9PL_SETATTR_CTIME = 0x00000040, + L9PL_SETATTR_ATIME_SET = 0x00000080, + L9PL_SETATTR_MTIME_SET = 0x00000100, +}; + +struct l9p_f_txattrwalk { + struct l9p_hdr hdr; + uint32_t newfid; + char *name; +}; + +struct l9p_f_rxattrwalk { + struct l9p_hdr hdr; + uint64_t size; +}; + +struct l9p_f_txattrcreate { + struct l9p_hdr hdr; + char *name; + uint64_t attr_size; + uint32_t flags; +}; + +struct l9p_f_tlock { + struct l9p_hdr hdr; + uint8_t type; /* from l9pl_lock_type */ + uint32_t flags; /* from l9pl_lock_flags */ + uint64_t start; + uint64_t length; + uint32_t proc_id; + char *client_id; +}; + +enum l9pl_lock_type { + L9PL_LOCK_TYPE_RDLOCK = 0, + L9PL_LOCK_TYPE_WRLOCK = 1, + L9PL_LOCK_TYPE_UNLOCK = 2, +}; + +enum l9pl_lock_flags { + L9PL_LOCK_TYPE_BLOCK = 1, + L9PL_LOCK_TYPE_RECLAIM = 2, +}; + +struct l9p_f_rlock { + struct l9p_hdr hdr; + uint8_t status; /* from l9pl_lock_status */ +}; + +enum l9pl_lock_status { + L9PL_LOCK_SUCCESS = 0, + L9PL_LOCK_BLOCKED = 1, + L9PL_LOCK_ERROR = 2, + L9PL_LOCK_GRACE = 3, +}; + +struct l9p_f_getlock { + struct l9p_hdr hdr; + uint8_t type; /* from l9pl_lock_type */ + uint64_t start; + uint64_t length; + uint32_t proc_id; + char *client_id; +}; + +struct l9p_f_tlink { + struct l9p_hdr hdr; + uint32_t dfid; + char *name; +}; + +struct l9p_f_trenameat { + struct l9p_hdr hdr; + char *oldname; + uint32_t newdirfid; + char *newname; +}; + +/* + * Flags in Tunlinkat (which re-uses f_tlcreate data structure but + * with different meaning). + */ +enum l9p_l_unlinkat_flags { + /* not sure if any other AT_* flags are passed through */ + L9PL_AT_REMOVEDIR = 0x0200, +}; + +union l9p_fcall { + struct l9p_hdr hdr; + struct l9p_f_version version; + struct l9p_f_tflush tflush; + struct l9p_f_ropen ropen; + struct l9p_f_ropen rcreate; + struct l9p_f_ropen rattach; + struct l9p_f_error error; + struct l9p_f_rauth rauth; + struct l9p_f_attach tattach; + struct l9p_f_attach tauth; + struct l9p_f_tcreate tcreate; + struct l9p_f_tcreate topen; + struct l9p_f_twalk twalk; + struct l9p_f_rwalk rwalk; + struct l9p_f_twstat twstat; + struct l9p_f_rstat rstat; + struct l9p_f_rstatfs rstatfs; + struct l9p_f_tlcreate tlopen; + struct l9p_f_ropen rlopen; + struct l9p_f_tlcreate tlcreate; + struct l9p_f_ropen rlcreate; + struct l9p_f_tsymlink tsymlink; + struct l9p_f_ropen rsymlink; + struct l9p_f_tmknod tmknod; + struct l9p_f_ropen rmknod; + struct l9p_f_trename trename; + struct l9p_f_rreadlink rreadlink; + struct l9p_f_tgetattr tgetattr; + struct l9p_f_rgetattr rgetattr; + struct l9p_f_tsetattr tsetattr; + struct l9p_f_txattrwalk txattrwalk; + struct l9p_f_rxattrwalk rxattrwalk; + struct l9p_f_txattrcreate txattrcreate; + struct l9p_f_tlock tlock; + struct l9p_f_rlock rlock; + struct l9p_f_getlock getlock; + struct l9p_f_tlink tlink; + struct l9p_f_tlcreate tmkdir; + struct l9p_f_ropen rmkdir; + struct l9p_f_trenameat trenameat; + struct l9p_f_tlcreate tunlinkat; + struct l9p_f_io io; +}; + +#endif /* LIB9P_FCALL_H */ diff --git a/usr/src/lib/lib9p/common/fid.h b/usr/src/lib/lib9p/common/fid.h new file mode 100644 index 0000000000..cdfdd7ec93 --- /dev/null +++ b/usr/src/lib/lib9p/common/fid.h @@ -0,0 +1,160 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_FID_H +#define LIB9P_FID_H + +#include <stdbool.h> + +/* + * Data structure for a fid. All active fids in one session + * are stored in a hash table; the hash table provides the + * iterator to process them. (See also l9p_connection in lib9p.h.) + * + * The back-end code has additional data per fid, found via + * lo_aux. Currently this is allocated with a separate calloc(). + * + * Most fids represent a file or directory, but a few are special + * purpose, including the auth fid from Tauth+Tattach, and the + * fids used for extended attributes. We have our own set of + * flags here in lo_flags. + * + * Note that all new fids start as potentially-valid (reserving + * their 32-bit fid value), but not actually-valid. If another + * (threaded) op is invoked on a not-yet-valid fid, the fid cannot + * be used. A fid can also be locked against other threads, in + * which case they must wait for it: this happens during create + * and open, which on success result in the fid changing from a + * directory to a file. (At least, all this applies in principle + * -- we're currently single-threaded per connection so the locks + * are nop-ed out and the valid bit is mainly just for debug.) + * + * Fids that are "open" (the underlying file or directory is open) + * are marked as well. + * + * Locking is managed by the front end (request.c); validation + * and type-marking can be done by either side as needed. + * + * Fid types and validity are manipulated by set* and unset* + * functions, and tested by is* ops. Note that we only + * distinguish between "directory" and "not directory" at this + * level, i.e., symlinks and devices are just "not a directory + * fid". Also, fids cannot be unset as auth or xattr fids, + * nor can an open fid become closed, except by being clunked. + * While files should not normally become directories, it IS normal + * for directory fids to become file fids due to Twalk operations. + * + * (These accessor functions are just to leave wiggle room for + * different future implementations.) + */ +struct l9p_fid { + void *lo_aux; + uint32_t lo_fid; + uint32_t lo_flags; /* volatile atomic_t when threaded? */ +}; + +enum l9p_lo_flags { + L9P_LO_ISAUTH = 0x01, + L9P_LO_ISDIR = 0x02, + L9P_LO_ISOPEN = 0x04, + L9P_LO_ISVALID = 0x08, + L9P_LO_ISXATTR = 0x10, +}; + +static inline bool +l9p_fid_isauth(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISAUTH) != 0); +} + +static inline void +l9p_fid_setauth(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISAUTH; +} + +static inline bool +l9p_fid_isdir(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISDIR) != 0); +} + +static inline void +l9p_fid_setdir(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISDIR; +} + +static inline void +l9p_fid_unsetdir(struct l9p_fid *fid) +{ + fid->lo_flags &= ~(uint32_t)L9P_LO_ISDIR; +} + +static inline bool +l9p_fid_isopen(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISOPEN) != 0); +} + +static inline void +l9p_fid_setopen(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISOPEN; +} + +static inline bool +l9p_fid_isvalid(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISVALID) != 0); +} + +static inline void +l9p_fid_setvalid(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISVALID; +} + +static inline void +l9p_fid_unsetvalid(struct l9p_fid *fid) +{ + fid->lo_flags &= ~(uint32_t)L9P_LO_ISVALID; +} + +static inline bool +l9p_fid_isxattr(struct l9p_fid *fid) +{ + return ((fid->lo_flags & L9P_LO_ISXATTR) != 0); +} + +static inline void +l9p_fid_setxattr(struct l9p_fid *fid) +{ + fid->lo_flags |= L9P_LO_ISXATTR; +} + +#endif /* LIB9P_FID_H */ diff --git a/usr/src/lib/lib9p/common/genacl.c b/usr/src/lib/lib9p/common/genacl.c new file mode 100644 index 0000000000..a7be17ca9b --- /dev/null +++ b/usr/src/lib/lib9p/common/genacl.c @@ -0,0 +1,806 @@ +/* + * Copyright 2016 Chris Torek <torek@ixsystems.com> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/acl.h> +#include <sys/stat.h> + +#ifdef __illumos__ +#include <sys/sysmacros.h> +#endif + +#include "lib9p.h" +#include "lib9p_impl.h" +#include "genacl.h" +#include "fid.h" +#include "log.h" + +#ifndef __illumos__ +typedef int econvertfn(acl_entry_t, struct l9p_ace *); +#endif + +#ifdef __FreeBSD__ +static struct l9p_acl *l9p_new_acl(uint32_t acetype, uint32_t aceasize); +static struct l9p_acl *l9p_growacl(struct l9p_acl *acl, uint32_t aceasize); +static int l9p_count_aces(acl_t sysacl); +static struct l9p_acl *l9p_sysacl_to_acl(int, acl_t, econvertfn *); +#endif +static bool l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids); +static int l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st, + uid_t uid, gid_t gid, gid_t *gids, size_t ngids); + +void +l9p_acl_free(struct l9p_acl *acl) +{ + + free(acl); +} + +/* + * Is the given group ID tid (test-id) any of the gid's in agids? + */ +static bool +l9p_ingroup(gid_t tid, gid_t gid, gid_t *gids, size_t ngids) +{ + size_t i; + + if (tid == gid) + return (true); + for (i = 0; i < ngids; i++) + if (tid == gids[i]) + return (true); + return (false); +} + +/* #define ACE_DEBUG */ + +/* + * Note that NFSv4 tests are done on a "first match" basis. + * That is, we check each ACE sequentially until we run out + * of ACEs, or find something explicitly denied (DENIED!), + * or have cleared out all our attempt-something bits. Once + * we come across an ALLOW entry for the bits we're trying, + * we clear those from the bits we're still looking for, in + * the order they appear. + * + * The result is either "definitely allowed" (we cleared + * all the bits), "definitely denied" (we hit a deny with + * some or all of the bits), or "unspecified". We + * represent these three states as +1 (positive = yes = allow), + * -1 (negative = no = denied), or 0 (no strong answer). + * + * For our caller's convenience, if we are called with a + * mask of 0, we return 0 (no answer). + */ +static int +l9p_check_aces(int32_t mask, struct l9p_acl *acl, struct stat *st, + uid_t uid, gid_t gid, gid_t *gids, size_t ngids) +{ + uint32_t i; + struct l9p_ace *ace; +#ifdef ACE_DEBUG + const char *acetype, *allowdeny; + bool show_tid; +#endif + bool match; + uid_t tid; + + if (mask == 0) + return (0); + + for (i = 0; mask != 0 && i < acl->acl_nace; i++) { + ace = &acl->acl_aces[i]; + switch (ace->ace_type) { + case L9P_ACET_ACCESS_ALLOWED: + case L9P_ACET_ACCESS_DENIED: + break; + default: + /* audit, alarm - ignore */ + continue; + } +#ifdef ACE_DEBUG + show_tid = false; +#endif + if (ace->ace_flags & L9P_ACEF_OWNER) { +#ifdef ACE_DEBUG + acetype = "OWNER@"; +#endif + match = st->st_uid == uid; + } else if (ace->ace_flags & L9P_ACEF_GROUP) { +#ifdef ACE_DEBUG + acetype = "GROUP@"; +#endif + match = l9p_ingroup(st->st_gid, gid, gids, ngids); + } else if (ace->ace_flags & L9P_ACEF_EVERYONE) { +#ifdef ACE_DEBUG + acetype = "EVERYONE@"; +#endif + match = true; + } else { + if (ace->ace_idsize != sizeof(tid)) + continue; +#ifdef ACE_DEBUG + show_tid = true; +#endif + memcpy(&tid, &ace->ace_idbytes, sizeof(tid)); + if (ace->ace_flags & L9P_ACEF_IDENTIFIER_GROUP) { +#ifdef ACE_DEBUG + acetype = "group"; +#endif + match = l9p_ingroup(tid, gid, gids, ngids); + } else { +#ifdef ACE_DEBUG + acetype = "user"; +#endif + match = tid == uid; + } + } + /* + * If this ACE applies to us, check remaining bits. + * If any of those bits also apply, check the type: + * DENY means "stop now", ALLOW means allow these bits + * and keep checking. + */ +#ifdef ACE_DEBUG + allowdeny = ace->ace_type == L9P_ACET_ACCESS_DENIED ? + "deny" : "allow"; +#endif + if (match && (ace->ace_mask & (uint32_t)mask) != 0) { +#ifdef ACE_DEBUG + if (show_tid) + L9P_LOG(L9P_DEBUG, + "ACE: %s %s %d: mask 0x%x ace_mask 0x%x", + allowdeny, acetype, (int)tid, + (u_int)mask, (u_int)ace->ace_mask); + else + L9P_LOG(L9P_DEBUG, + "ACE: %s %s: mask 0x%x ace_mask 0x%x", + allowdeny, acetype, + (u_int)mask, (u_int)ace->ace_mask); +#endif + if (ace->ace_type == L9P_ACET_ACCESS_DENIED) + return (-1); + mask &= ~ace->ace_mask; +#ifdef ACE_DEBUG + L9P_LOG(L9P_DEBUG, "clear 0x%x: now mask=0x%x", + (u_int)ace->ace_mask, (u_int)mask); +#endif + } else { +#ifdef ACE_DEBUG + if (show_tid) + L9P_LOG(L9P_DEBUG, + "ACE: SKIP %s %s %d: " + "match %d mask 0x%x ace_mask 0x%x", + allowdeny, acetype, (int)tid, + (int)match, (u_int)mask, + (u_int)ace->ace_mask); + else + L9P_LOG(L9P_DEBUG, + "ACE: SKIP %s %s: " + "match %d mask 0x%x ace_mask 0x%x", + allowdeny, acetype, + (int)match, (u_int)mask, + (u_int)ace->ace_mask); +#endif + } + } + + /* Return 1 if access definitely granted. */ +#ifdef ACE_DEBUG + L9P_LOG(L9P_DEBUG, "ACE: end of ACEs, mask now 0x%x: %s", + mask, mask ? "no-definitive-answer" : "ALLOW"); +#endif + return (mask == 0 ? 1 : 0); +} + +/* + * Test against ACLs. + * + * The return value is normally 0 (access allowed) or EPERM + * (access denied), so it could just be a boolean.... + * + * For "make new dir in dir" and "remove dir in dir", you must + * set the mask to test the directory permissions (not ADD_FILE but + * ADD_SUBDIRECTORY, and DELETE_CHILD). For "make new file in dir" + * you must set the opmask to test file ADD_FILE. + * + * The L9P_ACE_DELETE flag means "can delete this thing"; it's not + * clear whether it should override the parent directory's ACL if + * any. In our case it does not, but a caller may try + * L9P_ACE_DELETE_CHILD (separately, on its own) and then a + * (second, separate) L9P_ACE_DELETE, to make the permissions work + * as "or" instead of "and". + * + * Pass a NULL parent/pstat if they are not applicable, e.g., + * for doing operations on an existing file, such as reading or + * writing data or attributes. Pass in a null child/cstat if + * that's not applicable, such as creating a new file/dir. + * + * NB: it's probably wise to allow the owner of any file to update + * the ACLs of that file, but we leave that test to the caller. + */ +int l9p_acl_check_access(int32_t opmask, struct l9p_acl_check_args *args) +{ + struct l9p_acl *parent, *child; + struct stat *pstat, *cstat; + int32_t pop, cop; + size_t ngids; + uid_t uid; + gid_t gid, *gids; + int panswer, canswer; + + assert(opmask != 0); + parent = args->aca_parent; + pstat = args->aca_pstat; + child = args->aca_child; + cstat = args->aca_cstat; + uid = args->aca_uid; + gid = args->aca_gid; + gids = args->aca_groups; + ngids = args->aca_ngroups; + +#ifdef ACE_DEBUG + L9P_LOG(L9P_DEBUG, + "l9p_acl_check_access: opmask=0x%x uid=%ld gid=%ld ngids=%zd", + (u_int)opmask, (long)uid, (long)gid, ngids); +#endif + /* + * If caller said "superuser semantics", check that first. + * Note that we apply them regardless of ACLs. + */ + if (uid == 0 && args->aca_superuser) + return (0); + + /* + * If told to ignore ACLs and use only stat-based permissions, + * discard any non-NULL ACL pointers. + * + * This will need some fancying up when we support POSIX ACLs. + */ + if ((args->aca_aclmode & L9P_ACM_NFS_ACL) == 0) + parent = child = NULL; + + assert(parent == NULL || parent->acl_acetype == L9P_ACLTYPE_NFSv4); + assert(parent == NULL || pstat != NULL); + assert(child == NULL || child->acl_acetype == L9P_ACLTYPE_NFSv4); + assert(child == NULL || cstat != NULL); + assert(pstat != NULL || cstat != NULL); + + /* + * If the operation is UNLINK we should have either both ACLs + * or no ACLs, but we won't require that here. + * + * If a parent ACL is supplied, it's a directory by definition. + * Make sure we're allowed to do this there, whatever this is. + * If a child ACL is supplied, check it too. Note that the + * DELETE permission only applies in the child though, not + * in the parent, and the DELETE_CHILD only applies in the + * parent. + */ + pop = cop = opmask; + if (parent != NULL || pstat != NULL) { + /* + * Remove child-only bits from parent op and + * parent-only bits from child op. + * + * L9P_ACE_DELETE is child-only. + * + * L9P_ACE_DELETE_CHILD is parent-only, and three data + * access bits overlap with three directory access bits. + * We should have child==NULL && cstat==NULL, so the + * three data bits should be redundant, but it's + * both trivial and safest to remove them anyway. + */ + pop &= ~L9P_ACE_DELETE; + cop &= ~(L9P_ACE_DELETE_CHILD | L9P_ACE_LIST_DIRECTORY | + L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY); + } else { + /* + * Remove child-only bits from parent op. We need + * not bother since we just found we have no parent + * and no pstat, and hence won't actually *use* pop. + * + * pop &= ~(L9P_ACE_READ_DATA | L9P_ACE_WRITE_DATA | + * L9P_ACE_APPEND_DATA); + */ + } + panswer = 0; + canswer = 0; + if (parent != NULL) + panswer = l9p_check_aces(pop, parent, pstat, + uid, gid, gids, ngids); + if (child != NULL) + canswer = l9p_check_aces(cop, child, cstat, + uid, gid, gids, ngids); + + if (panswer || canswer) { + /* + * Got a definitive answer from parent and/or + * child ACLs. We're not quite done yet though. + */ + if (opmask == L9P_ACOP_UNLINK) { + /* + * For UNLINK, we can get an allow from child + * and deny from parent, or vice versa. It's + * not 100% clear how to handle the two-answer + * case. ZFS says that if either says "allow", + * we allow, and if both definitely say "deny", + * we deny. This makes sense, so we do that + * here for all cases, even "strict". + */ + if (panswer > 0 || canswer > 0) + return (0); + if (panswer < 0 && canswer < 0) + return (EPERM); + /* non-definitive answer from one! move on */ + } else { + /* + * Have at least one definitive answer, and + * should have only one; obey whichever + * one it is. + */ + if (panswer) + return (panswer < 0 ? EPERM : 0); + return (canswer < 0 ? EPERM : 0); + } + } + + /* + * No definitive answer from ACLs alone. Check for ZFS style + * permissions checking and an "UNLINK" operation under ACLs. + * If so, find write-and-execute permission on parent. + * Note that WRITE overlaps with ADD_FILE -- that's ZFS's + * way of saying "allow write to dir" -- but EXECUTE is + * separate from LIST_DIRECTORY, so that's at least a little + * bit cleaner. + * + * Note also that only a definitive yes (both bits are + * explicitly allowed) results in granting unlink, and + * a definitive no (at least one bit explicitly denied) + * results in EPERM. Only "no answer" moves on. + */ + if ((args->aca_aclmode & L9P_ACM_ZFS_ACL) && + opmask == L9P_ACOP_UNLINK && parent != NULL) { + panswer = l9p_check_aces(L9P_ACE_ADD_FILE | L9P_ACE_EXECUTE, + parent, pstat, uid, gid, gids, ngids); + if (panswer) + return (panswer < 0 ? EPERM : 0); + } + + /* + * No definitive answer from ACLs. + * + * Try POSIX style rwx permissions if allowed. This should + * be rare, occurring mainly when caller supplied no ACLs + * or set the mode to suppress them. + * + * The stat to check is the parent's if we don't have a child + * (i.e., this is a dir op), or if the DELETE_CHILD bit is set + * (i.e., this is an unlink or similar). Otherwise it's the + * child's. + */ + if (args->aca_aclmode & L9P_ACM_STAT_MODE) { + struct stat *st; + int rwx, bits; + + rwx = l9p_ace_mask_to_rwx(opmask); + if ((st = cstat) == NULL || (opmask & L9P_ACE_DELETE_CHILD)) + st = pstat; + if (uid == st->st_uid) + bits = (st->st_mode >> 6) & 7; + else if (l9p_ingroup(st->st_gid, gid, gids, ngids)) + bits = (st->st_mode >> 3) & 7; + else + bits = st->st_mode & 7; + /* + * If all the desired bits are set, we're OK. + */ + if ((rwx & bits) == rwx) + return (0); + } + + /* all methods have failed, return EPERM */ + return (EPERM); +} + +/* + * Collapse fancy ACL operation mask down to simple Unix bits. + * + * Directory operations don't map that well. However, listing + * a directory really does require read permission, and adding + * or deleting files really does require write permission, so + * this is probably sufficient. + */ +int +l9p_ace_mask_to_rwx(int32_t opmask) +{ + int rwx = 0; + + if (opmask & + (L9P_ACE_READ_DATA | L9P_ACE_READ_NAMED_ATTRS | + L9P_ACE_READ_ATTRIBUTES | L9P_ACE_READ_ACL)) + rwx |= 4; + if (opmask & + (L9P_ACE_WRITE_DATA | L9P_ACE_APPEND_DATA | + L9P_ACE_ADD_FILE | L9P_ACE_ADD_SUBDIRECTORY | + L9P_ACE_DELETE | L9P_ACE_DELETE_CHILD | + L9P_ACE_WRITE_NAMED_ATTRS | L9P_ACE_WRITE_ATTRIBUTES | + L9P_ACE_WRITE_ACL)) + rwx |= 2; + if (opmask & L9P_ACE_EXECUTE) + rwx |= 1; + return (rwx); +} + +#if defined(__FreeBSD__) || defined(__illumos__) +/* + * Allocate new ACL holder and ACEs. + */ +static struct l9p_acl * +l9p_new_acl(uint32_t acetype, uint32_t aceasize) +{ + struct l9p_acl *ret; + size_t asize, size; + + asize = aceasize * sizeof(struct l9p_ace); + size = sizeof(struct l9p_acl) + asize; + ret = malloc(size); + if (ret != NULL) { + ret->acl_acetype = acetype; + ret->acl_nace = 0; + ret->acl_aceasize = aceasize; + } + return (ret); +} +#endif + +#ifdef __FreeBSD__ +/* + * Expand ACL to accomodate more entries. + * + * Currently won't shrink, only grow, so it's a fast no-op until + * we hit the allocated size. After that, it's best to grow in + * big chunks, or this will be O(n**2). + */ +static struct l9p_acl * +l9p_growacl(struct l9p_acl *acl, uint32_t aceasize) +{ + struct l9p_acl *tmp; + size_t asize, size; + + if (acl->acl_aceasize < aceasize) { + asize = aceasize * sizeof(struct l9p_ace); + size = sizeof(struct l9p_acl) + asize; + tmp = realloc(acl, size); + if (tmp == NULL) + free(acl); + acl = tmp; + } + return (acl); +} + +/* + * Annoyingly, there's no POSIX-standard way to count the number + * of ACEs in a system ACL other than to walk through them all. + * This is silly, but at least 2n is still O(n), and the walk is + * short. (If the system ACL mysteriously grows, we'll handle + * that OK via growacl(), too.) + */ +static int +l9p_count_aces(acl_t sysacl) +{ + acl_entry_t entry; + uint32_t n; + int id; + + id = ACL_FIRST_ENTRY; + for (n = 0; acl_get_entry(sysacl, id, &entry) == 1; n++) + id = ACL_NEXT_ENTRY; + + return ((int)n); +} + +/* + * Create ACL with ACEs from the given acl_t. We use the given + * convert function on each ACE. + */ +static struct l9p_acl * +l9p_sysacl_to_acl(int acetype, acl_t sysacl, econvertfn *convert) +{ + struct l9p_acl *acl; + acl_entry_t entry; + uint32_t n; + int error, id; + + acl = l9p_new_acl((uint32_t)acetype, (uint32_t)l9p_count_aces(sysacl)); + if (acl == NULL) + return (NULL); + id = ACL_FIRST_ENTRY; + for (n = 0;;) { + if (acl_get_entry(sysacl, id, &entry) != 1) + break; + acl = l9p_growacl(acl, n + 1); + if (acl == NULL) + return (NULL); + error = (*convert)(entry, &acl->acl_aces[n]); + id = ACL_NEXT_ENTRY; + if (error == 0) + n++; + } + acl->acl_nace = n; + return (acl); +} +#endif + +#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */ +struct l9p_acl * +l9p_posix_acl_to_acl(acl_t sysacl) +{ +} +#endif + +#if defined(HAVE_FREEBSD_ACLS) +static int +l9p_frombsdnfs4(acl_entry_t sysace, struct l9p_ace *ace) +{ + acl_tag_t tag; /* e.g., USER_OBJ, GROUP, etc */ + acl_entry_type_t entry_type; /* e.g., allow/deny */ + acl_permset_t absdperm; + acl_flagset_t absdflag; + acl_perm_t bsdperm; /* e.g., READ_DATA */ + acl_flag_t bsdflag; /* e.g., FILE_INHERIT_ACE */ + uint32_t flags, mask; + int error; + uid_t uid, *aid; + + error = acl_get_tag_type(sysace, &tag); + if (error == 0) + error = acl_get_entry_type_np(sysace, &entry_type); + if (error == 0) + error = acl_get_flagset_np(sysace, &absdflag); + if (error == 0) + error = acl_get_permset(sysace, &absdperm); + if (error) + return (error); + + flags = 0; + uid = 0; + aid = NULL; + + /* move user/group/everyone + id-is-group-id into flags */ + switch (tag) { + case ACL_USER_OBJ: + flags |= L9P_ACEF_OWNER; + break; + case ACL_GROUP_OBJ: + flags |= L9P_ACEF_GROUP; + break; + case ACL_EVERYONE: + flags |= L9P_ACEF_EVERYONE; + break; + case ACL_GROUP: + flags |= L9P_ACEF_IDENTIFIER_GROUP; + /* FALLTHROUGH */ + case ACL_USER: + aid = acl_get_qualifier(sysace); /* ugh, this malloc()s */ + if (aid == NULL) + return (ENOMEM); + uid = *(uid_t *)aid; + free(aid); + aid = &uid; + break; + default: + return (EINVAL); /* can't happen */ + } + + switch (entry_type) { + + case ACL_ENTRY_TYPE_ALLOW: + ace->ace_type = L9P_ACET_ACCESS_ALLOWED; + break; + + case ACL_ENTRY_TYPE_DENY: + ace->ace_type = L9P_ACET_ACCESS_DENIED; + break; + + case ACL_ENTRY_TYPE_AUDIT: + ace->ace_type = L9P_ACET_SYSTEM_AUDIT; + break; + + case ACL_ENTRY_TYPE_ALARM: + ace->ace_type = L9P_ACET_SYSTEM_ALARM; + break; + + default: + return (EINVAL); /* can't happen */ + } + + /* transform remaining BSD flags to internal NFS-y form */ + bsdflag = *absdflag; + if (bsdflag & ACL_ENTRY_FILE_INHERIT) + flags |= L9P_ACEF_FILE_INHERIT_ACE; + if (bsdflag & ACL_ENTRY_DIRECTORY_INHERIT) + flags |= L9P_ACEF_DIRECTORY_INHERIT_ACE; + if (bsdflag & ACL_ENTRY_NO_PROPAGATE_INHERIT) + flags |= L9P_ACEF_NO_PROPAGATE_INHERIT_ACE; + if (bsdflag & ACL_ENTRY_INHERIT_ONLY) + flags |= L9P_ACEF_INHERIT_ONLY_ACE; + if (bsdflag & ACL_ENTRY_SUCCESSFUL_ACCESS) + flags |= L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG; + if (bsdflag & ACL_ENTRY_FAILED_ACCESS) + flags |= L9P_ACEF_FAILED_ACCESS_ACE_FLAG; + ace->ace_flags = flags; + + /* + * Transform BSD permissions to ace_mask. Note that directory + * vs file bits are the same in both sets, so we don't need + * to worry about that, at least. + * + * There seem to be no BSD equivalents for WRITE_RETENTION + * and WRITE_RETENTION_HOLD. + */ + mask = 0; + bsdperm = *absdperm; + if (bsdperm & ACL_READ_DATA) + mask |= L9P_ACE_READ_DATA; + if (bsdperm & ACL_WRITE_DATA) + mask |= L9P_ACE_WRITE_DATA; + if (bsdperm & ACL_APPEND_DATA) + mask |= L9P_ACE_APPEND_DATA; + if (bsdperm & ACL_READ_NAMED_ATTRS) + mask |= L9P_ACE_READ_NAMED_ATTRS; + if (bsdperm & ACL_WRITE_NAMED_ATTRS) + mask |= L9P_ACE_WRITE_NAMED_ATTRS; + if (bsdperm & ACL_EXECUTE) + mask |= L9P_ACE_EXECUTE; + if (bsdperm & ACL_DELETE_CHILD) + mask |= L9P_ACE_DELETE_CHILD; + if (bsdperm & ACL_READ_ATTRIBUTES) + mask |= L9P_ACE_READ_ATTRIBUTES; + if (bsdperm & ACL_WRITE_ATTRIBUTES) + mask |= L9P_ACE_WRITE_ATTRIBUTES; + /* L9P_ACE_WRITE_RETENTION */ + /* L9P_ACE_WRITE_RETENTION_HOLD */ + /* 0x00800 */ + if (bsdperm & ACL_DELETE) + mask |= L9P_ACE_DELETE; + if (bsdperm & ACL_READ_ACL) + mask |= L9P_ACE_READ_ACL; + if (bsdperm & ACL_WRITE_ACL) + mask |= L9P_ACE_WRITE_ACL; + if (bsdperm & ACL_WRITE_OWNER) + mask |= L9P_ACE_WRITE_OWNER; + if (bsdperm & ACL_SYNCHRONIZE) + mask |= L9P_ACE_SYNCHRONIZE; + ace->ace_mask = mask; + + /* fill in variable-size user or group ID bytes */ + if (aid == NULL) + ace->ace_idsize = 0; + else { + ace->ace_idsize = sizeof(uid); + memcpy(&ace->ace_idbytes[0], aid, sizeof(uid)); + } + + return (0); +} + +struct l9p_acl * +l9p_freebsd_nfsv4acl_to_acl(acl_t sysacl) +{ + + return (l9p_sysacl_to_acl(L9P_ACLTYPE_NFSv4, sysacl, l9p_frombsdnfs4)); +} +#endif + +#if defined(HAVE_DARWIN_ACLS) && 0 /* not yet */ +struct l9p_acl * +l9p_darwin_nfsv4acl_to_acl(acl_t sysacl) +{ +} +#endif + +#if defined(HAVE__ILLUMOS_ACLS) + +static struct { + uint16_t ace_flag; + uint32_t l9_flag; +} ace_flag_tbl[] = { + { ACE_FILE_INHERIT_ACE, L9P_ACEF_FILE_INHERIT_ACE }, + { ACE_DIRECTORY_INHERIT_ACE, L9P_ACEF_DIRECTORY_INHERIT_ACE }, + { ACE_NO_PROPAGATE_INHERIT_ACE, L9P_ACEF_NO_PROPAGATE_INHERIT_ACE }, + { ACE_INHERIT_ONLY_ACE, L9P_ACEF_INHERIT_ONLY_ACE }, + { ACE_SUCCESSFUL_ACCESS_ACE_FLAG, + L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG }, + { ACE_IDENTIFIER_GROUP, L9P_ACEF_IDENTIFIER_GROUP }, + /* There doesn't appear to be an equivalent for ACE_INHERITED_ACE */ + { ACE_OWNER, L9P_ACEF_OWNER }, + { ACE_GROUP, L9P_ACEF_GROUP }, + { ACE_EVERYONE, L9P_ACEF_EVERYONE } +}; + +struct l9p_acl * +l9p_illumos_nfsv4acl_to_acl(acl_t *sysacl) +{ + struct l9p_acl *l9acl; + struct l9p_ace *l9ace; + ace_t *ent; + int i, j; + + /* We only support NFSv4 ACLs.. so don't try this on UFS */ + if (sysacl->acl_type != ACE_T) + return (NULL); + + l9acl = l9p_new_acl(L9P_ACLTYPE_NFSv4, sysacl->acl_cnt); + if (l9acl == NULL) + return (NULL); + + ent = sysacl->acl_aclp; + l9ace = l9acl->acl_aces; + for (i = 0; i < sysacl->acl_cnt; i++, ent++, l9ace++) { + switch (ent->a_type) { + case ACE_ACCESS_ALLOWED_ACE_TYPE: + l9ace->ace_type = L9P_ACET_ACCESS_ALLOWED; + break; + case ACE_ACCESS_DENIED_ACE_TYPE: + l9ace->ace_type = L9P_ACET_ACCESS_DENIED; + break; + case ACE_SYSTEM_AUDIT_ACE_TYPE: + l9ace->ace_type = L9P_ACET_SYSTEM_AUDIT; + break; + case ACE_SYSTEM_ALARM_ACE_TYPE: + l9ace->ace_type = L9P_ACET_SYSTEM_ALARM; + break; + default: + L9P_LOG(L9P_ERROR, "invalid ACL type"); + l9p_acl_free(l9acl); + return (NULL); + } + + l9ace->ace_flags = 0; + for (j = 0; j < ARRAY_SIZE(ace_flag_tbl); j++) { + if ((ent->a_flags & ace_flag_tbl[j].ace_flag) != 0) + l9ace->ace_flags |= ace_flag_tbl[j].l9_flag; + } + + /* + * In a bit of good fortune, the bit values for ace_t masks + * and l9p masks are the same (l9p does have WRITE_RETENTION + * and WRITE_RETENTION_HOLD which aren't used -- we're also + * going ace_t->l9p so they dont matter in this context). + */ + l9ace->ace_mask = ent->a_access_mask; + l9ace->ace_idsize = sizeof (ent->a_who); + memcpy(l9acl->acl_aces, &ent->a_who, sizeof (ent->a_who)); + } + + return (l9acl); +} +#endif diff --git a/usr/src/lib/lib9p/common/genacl.h b/usr/src/lib/lib9p/common/genacl.h new file mode 100644 index 0000000000..d74b543c19 --- /dev/null +++ b/usr/src/lib/lib9p/common/genacl.h @@ -0,0 +1,316 @@ +/* + * Copyright 2016 Chris Torek <torek@ixsystems.com> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * General ACL support for 9P2000.L. + * + * We mostly use Linux's xattr name space and nfs4 ACL bits, as + * these are the most general forms available. + * + * Linux requests attributes named + * + * "system.posix_acl_default" + * "system.posix_acl_access" + * + * to get POSIX style ACLs, and: + * + * "system.nfs4_acl" + * + * to get NFSv4 style ACLs. The v9fs client does not explicitly + * ask for the latter, but if you use the Ubuntu nfs4-acl-tools + * package, it should be able to read and write these. + * + * For the record, the Linux kernel source code also shows: + * + * - Lustre uses "trusted.*", with "*" matching "lov", "lma", + * "lmv", "dmv", "link", "fid", "version", "som", "hsm", and + * "lfsck_namespace". + * + * - ceph has a name tree of the form "ceph.<type>.<name>" with + * <type,name> pairs like <"dir","entries">, <"dir","files>, + * <"file","layout">, and so on. + * + * - ext4 uses the POSIX names, plus some special ext4-specific + * goop that might not get externalized. + * + * - NFS uses both the POSIX names and the NFSv4 ACLs. However, + * what it mainly does is have nfsd generate fake NFSv4 ACLs + * from POSIX ACLs. If you run an NFS client, the client + * relies on the server actually implementing the ACLs, and + * lets nfs4-acl-tools read and write the system.nfs4_acl xattr + * data. If you run an NFS server off, e.g., an ext4 file system, + * the server looks for the system.nfs4_acl xattr, serves that + * out if found, and otherwise just generates the fakes. + * + * - "security.*" and "selinux.*" are reserved. + * + * - "security.capability" is the name for capabilities. + * + * - sockets use "system.sockprotoname". + */ + +#if defined(__APPLE__) + #define HAVE_POSIX_ACLS + #define HAVE_DARWIN_ACLS +#endif + +#if defined(__FreeBSD__) + #define HAVE_POSIX_ACLS + #define HAVE_FREEBSD_ACLS +#endif + +#if defined (__illumos__) + #define HAVE_POSIX_ACLS + #define HAVE__ILLUMOS_ACLS +#endif + +#include <sys/types.h> +#include <sys/acl.h> /* XXX assumes existence of sys/acl.h */ + +/* + * An ACL consists of a number of ACEs that grant some kind of + * "allow" or "deny" to some specific entity. + * + * The number of ACEs is potentially unlimited, although in practice + * they tend not to be that long. + * + * It's the responsibility of the back-end to supply the ACL + * for each test. However, the ACL may be in some sort of + * system-specific form. It's the responsibility of some + * (system-specific) code to translate it to *this* form, after + * which the backend may use l9p_acl_check_access() to get + * access granted or denied (and, eventually, audits and alarms + * recorded and raises, although that's yet to be designed). + * + * The reason for all this faffing-about with formats is so that + * we can *report* the ACLs using Linux 9p style xattrs. + */ + +struct l9p_acl; +struct l9p_fid; + +void l9p_acl_free(struct l9p_acl *); + +/* + * An ACL is made up of ACEs. + * + * Each ACE has: + * + * - a type: allow, deny, audit, alarm + * - a set of flags + * - permissions bits: a "mask" + * - an optional, nominally-variable-length identity + * + * The last part is especially tricky and currently has limited + * support here: it's always a 16 byte field on Darwin, and just + * a uint32_t on BSD (should be larger, really). Linux supports + * very large, actually-variable-size values; we'll deal with + * this later, maybe. + * + * We will define the mask first, below, since these are also the bits + * passed in for the accmask argument to l9p_acl_check_access(). + */ + +/* + * ACL entry mask, and accmask argument flags. + * + * NB: not every bit is implemented, but they are all here because + * they are all defined as part of an NFSv4 ACL entry, which is + * more or less a superset of a POSIX ACL entry. This means you + * can put a complete NFSv4 ACL in and we can reproduce it. + * + * Note that the LIST_DIRECTORY, ADD_FILE, and ADD_SUBDIRECTORY bits + * apply only to a directory, while the READ_DATA, WRITE_DATA, and + * APPEND_DATA bits apply only to a file. See aca_parent/aca_child + * below. + */ +#define L9P_ACE_READ_DATA 0x00001 +#define L9P_ACE_LIST_DIRECTORY 0x00001 /* same as READ_DATA */ +#define L9P_ACE_WRITE_DATA 0x00002 +#define L9P_ACE_ADD_FILE 0x00002 /* same as WRITE_DATA */ +#define L9P_ACE_APPEND_DATA 0x00004 +#define L9P_ACE_ADD_SUBDIRECTORY 0x00004 /* same as APPEND_DATA */ +#define L9P_ACE_READ_NAMED_ATTRS 0x00008 +#define L9P_ACE_WRITE_NAMED_ATTRS 0x00010 +#define L9P_ACE_EXECUTE 0x00020 +#define L9P_ACE_DELETE_CHILD 0x00040 +#define L9P_ACE_READ_ATTRIBUTES 0x00080 +#define L9P_ACE_WRITE_ATTRIBUTES 0x00100 +#define L9P_ACE_WRITE_RETENTION 0x00200 /* not used here */ +#define L9P_ACE_WRITE_RETENTION_HOLD 0x00400 /* not used here */ +/* 0x00800 unused? */ +#define L9P_ACE_DELETE 0x01000 +#define L9P_ACE_READ_ACL 0x02000 +#define L9P_ACE_WRITE_ACL 0x04000 +#define L9P_ACE_WRITE_OWNER 0x08000 +#define L9P_ACE_SYNCHRONIZE 0x10000 /* not used here */ + +/* + * This is not an ACE bit, but is used with the access checking + * below. It represents a request to unlink (delete child / + * delete) an entity, and is equivalent to asking for *either* + * (not both) permission. + */ +#define L9P_ACOP_UNLINK (L9P_ACE_DELETE_CHILD | L9P_ACE_DELETE) + +/* + * Access checking takes a lot of arguments, so they are + * collected into a "struct" here. + * + * The aca_parent and aca_pstat fields may/must be NULL if the + * operation itself does not involve "directory" permissions. + * The aca_child and aca_cstat fields may/must be NULL if the + * operation does not involve anything *but* a directory. This + * is how we decide whether you're interested in L9P_ACE_READ_DATA + * vs L9P_ACE_LIST_DIRECTORY, for instance. + * + * Note that it's OK for both parent and child to be directories + * (as is the case when we're adding or deleting a subdirectory). + */ +struct l9p_acl_check_args { + uid_t aca_uid; /* the uid that is requesting access */ + gid_t aca_gid; /* the gid that is requesting access */ + gid_t *aca_groups; /* the additional group-set, if any */ + size_t aca_ngroups; /* number of groups in group-set */ + struct l9p_acl *aca_parent; /* ACLs associated with parent/dir */ + struct stat *aca_pstat; /* stat data for parent/dir */ + struct l9p_acl *aca_child; /* ACLs associated with file */ + struct stat *aca_cstat; /* stat data for file */ + int aca_aclmode; /* mode checking bits, see below */ + bool aca_superuser; /* alway allow uid==0 in STAT_MODE */ +}; + +/* + * Access checking mode bits in aca_checkmode. If you enable + * ACLs, they are used first, optionally with ZFS style ACLs. + * This means that even if aca_superuser is set, if an ACL denies + * permission to uid 0, permission is really denied. + * + * NFS style ACLs run before POSIX style ACLs (though POSIX + * ACLs aren't done yet anyway). + * + * N.B.: you probably want L9P_ACL_ZFS, especially when operating + * with a ZFS file system on FreeBSD. + */ +#define L9P_ACM_NFS_ACL 0x0001 /* enable NFS ACL checking */ +#define L9P_ACM_ZFS_ACL 0x0002 /* use ZFS ACL unlink semantics */ +#define L9P_ACM_POSIX_ACL 0x0004 /* enable POSIX ACL checking (notyet) */ +#define L9P_ACM_STAT_MODE 0x0008 /* enable st_mode bits */ + +/* + * Requests to access some file or directory must provide: + * + * - An operation. This should usually be just one bit from the + * L9P_ACE_* bit-sets above, or our special L9P_ACOP_UNLINK. + * For a few file-open operations it may be multiple bits, + * e.g., both read and write data. + * - The identity of the accessor: uid + gid + gid-set. + * - The type of access desired: this may be multiple bits. + * - The parent directory, if applicable. + * - The child file/dir being accessed, if applicable. + * - stat data for parent and/or child, if applicable. + * + * The ACLs and/or stat data of the parent and/or child get used + * here, so the caller must provide them. We should have a way to + * cache these on fids, but not yet. The parent and child + * arguments are a bit tricky; see the code in genacl.c. + */ +int l9p_acl_check_access(int32_t op, struct l9p_acl_check_args *args); + +/* + * When falling back to POSIX ACL or Unix-style permissions + * testing, it's nice to collapse the above detailed permissions + * into simple read/write/execute bits (value 0..7). We provide + * a small utility function that does this. + */ +int l9p_ace_mask_to_rwx(int32_t); + +/* + * The rest of the data in an ACE. + */ + +/* type in ace_type */ +#define L9P_ACET_ACCESS_ALLOWED 0 +#define L9P_ACET_ACCESS_DENIED 1 +#define L9P_ACET_SYSTEM_AUDIT 2 +#define L9P_ACET_SYSTEM_ALARM 3 + +/* flags in ace_flags */ +#define L9P_ACEF_FILE_INHERIT_ACE 0x001 +#define L9P_ACEF_DIRECTORY_INHERIT_ACE 0x002 +#define L9P_ACEF_NO_PROPAGATE_INHERIT_ACE 0x004 +#define L9P_ACEF_INHERIT_ONLY_ACE 0x008 +#define L9P_ACEF_SUCCESSFUL_ACCESS_ACE_FLAG 0x010 +#define L9P_ACEF_FAILED_ACCESS_ACE_FLAG 0x020 +#define L9P_ACEF_IDENTIFIER_GROUP 0x040 +#define L9P_ACEF_OWNER 0x080 +#define L9P_ACEF_GROUP 0x100 +#define L9P_ACEF_EVERYONE 0x200 + +#if defined(__APPLE__) +# define L9P_ACE_IDSIZE 16 /* but, how do we map Darwin uuid? */ +#else +# define L9P_ACE_IDSIZE 4 +#endif + +struct l9p_ace { + uint16_t ace_type; /* ACL entry type */ + uint16_t ace_flags; /* ACL entry flags */ + uint32_t ace_mask; /* ACL entry mask */ + uint32_t ace_idsize; /* length of ace_idbytes */ + unsigned char ace_idbytes[L9P_ACE_IDSIZE]; +}; + +#define L9P_ACLTYPE_NFSv4 1 /* currently the only valid type */ +struct l9p_acl { + uint32_t acl_acetype; /* reserved for future expansion */ + uint32_t acl_nace; /* number of occupied ACEs */ + uint32_t acl_aceasize; /* actual size of ACE array */ + struct l9p_ace acl_aces[]; /* variable length ACE array */ +}; + +/* + * These are the system-specific converters. + * + * Right now the backend needs to just find BSD NFSv4 ACLs + * and convert them before each operation that needs to be + * tested. + */ +#if defined(HAVE_DARWIN_ACLS) +struct l9p_acl *l9p_darwin_nfsv4acl_to_acl(acl_t acl); +#endif + +#if defined(HAVE_FREEBSD_ACLS) +struct l9p_acl *l9p_freebsd_nfsv4acl_to_acl(acl_t acl); +#endif + +#if defined(HAVE__ILLUMOS_ACLS) +struct l9p_acl *l9p_illumos_nfsv4acl_to_acl(acl_t *acl); +#endif + +#if defined(HAVE_POSIX_ACLS) && 0 /* not yet */ +struct l9p_acl *l9p_posix_acl_to_acl(acl_t acl); +#endif diff --git a/usr/src/lib/lib9p/common/hashtable.c b/usr/src/lib/lib9p/common/hashtable.c new file mode 100644 index 0000000000..70db6bcc0e --- /dev/null +++ b/usr/src/lib/lib9p/common/hashtable.c @@ -0,0 +1,276 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <pthread.h> +#include <sys/types.h> +#include <sys/queue.h> +#include "lib9p_impl.h" +#include "hashtable.h" + +static struct ht_item *ht_iter_advance(struct ht_iter *, struct ht_item *); + +void +ht_init(struct ht *h, ssize_t size) +{ + ssize_t i; + + memset(h, 0, sizeof(struct ht)); + h->ht_nentries = size; + h->ht_entries = l9p_calloc((size_t)size, sizeof(struct ht_entry)); + (void) pthread_rwlock_init(&h->ht_rwlock, NULL); + + for (i = 0; i < size; i++) + TAILQ_INIT(&h->ht_entries[i].hte_items); +} + +void +ht_destroy(struct ht *h) +{ + struct ht_entry *he; + struct ht_item *item, *tmp; + ssize_t i; + + for (i = 0; i < h->ht_nentries; i++) { + he = &h->ht_entries[i]; + TAILQ_FOREACH_SAFE(item, &he->hte_items, hti_link, tmp) { + free(item); + } + } + + (void) pthread_rwlock_destroy(&h->ht_rwlock); + free(h->ht_entries); + h->ht_entries = NULL; +} + +void * +ht_find(struct ht *h, uint32_t hash) +{ + void *result; + + if (ht_rdlock(h) != 0) + return (NULL); + result = ht_find_locked(h, hash); + (void) ht_unlock(h); + return (result); +} + +void * +ht_find_locked(struct ht *h, uint32_t hash) +{ + struct ht_entry *entry; + struct ht_item *item; + + entry = &h->ht_entries[hash % h->ht_nentries]; + + TAILQ_FOREACH(item, &entry->hte_items, hti_link) { + if (item->hti_hash == hash) + return (item->hti_data); + } + + return (NULL); +} + +int +ht_add(struct ht *h, uint32_t hash, void *value) +{ + struct ht_entry *entry; + struct ht_item *item; + int err; + + if ((err = ht_wrlock(h)) != 0) + return (err); + + entry = &h->ht_entries[hash % h->ht_nentries]; + + TAILQ_FOREACH(item, &entry->hte_items, hti_link) { + if (item->hti_hash == hash) { + errno = EEXIST; + (void) ht_unlock(h); + return (-1); + } + } + + item = l9p_calloc(1, sizeof(struct ht_item)); + item->hti_hash = hash; + item->hti_data = value; + TAILQ_INSERT_TAIL(&entry->hte_items, item, hti_link); + (void) ht_unlock(h); + + return (0); +} + +int +ht_remove(struct ht *h, uint32_t hash) +{ + int result; + int err; + + if ((err = ht_wrlock(h)) != 0) + return (err); + result = ht_remove_locked(h, hash); + (void) ht_unlock(h); + return (result); +} + +int +ht_remove_locked(struct ht *h, uint32_t hash) +{ + struct ht_entry *entry; + struct ht_item *item, *tmp; + ssize_t slot = hash % h->ht_nentries; + + entry = &h->ht_entries[slot]; + + TAILQ_FOREACH_SAFE(item, &entry->hte_items, hti_link, tmp) { + if (item->hti_hash == hash) { + TAILQ_REMOVE(&entry->hte_items, item, hti_link); + free(item); + return (0); + } + } + + errno = ENOENT; + return (-1); +} + +/* + * Inner workings for advancing the iterator. + * + * If we have a current item, that tells us how to find the + * next item. If not, we get the first item from the next + * slot (well, the next slot with an item); in any case, we + * record the new slot and return the next item. + * + * For bootstrapping, iter->htit_slot can be -1 to start + * searching at slot 0. + * + * Caller must hold a lock on the table. + */ +static struct ht_item * +ht_iter_advance(struct ht_iter *iter, struct ht_item *cur) +{ + struct ht_item *next; + struct ht *h; + ssize_t slot; + + h = iter->htit_parent; + + if (cur == NULL) + next = NULL; + else + next = TAILQ_NEXT(cur, hti_link); + + if (next == NULL) { + slot = iter->htit_slot; + while (++slot < h->ht_nentries) { + next = TAILQ_FIRST(&h->ht_entries[slot].hte_items); + if (next != NULL) + break; + } + iter->htit_slot = slot; + } + return (next); +} + +/* + * Remove the current item - there must be one, or this is an + * error. This (necessarily) pre-locates the next item, so callers + * must not use it on an actively-changing table. + */ +int +ht_remove_at_iter(struct ht_iter *iter) +{ + struct ht_item *item; + struct ht *h; + ssize_t slot; + int err; + + assert(iter != NULL); + + if ((item = iter->htit_curr) == NULL) { + errno = EINVAL; + return (-1); + } + + /* remove the item from the table, saving the NEXT one */ + h = iter->htit_parent; + if ((err = ht_wrlock(h)) != 0) + return (err); + slot = iter->htit_slot; + iter->htit_next = ht_iter_advance(iter, item); + TAILQ_REMOVE(&h->ht_entries[slot].hte_items, item, hti_link); + (void) ht_unlock(h); + + /* mark us as no longer on an item, then free it */ + iter->htit_curr = NULL; + free(item); + + return (0); +} + +/* + * Initialize iterator. Subsequent ht_next calls will find the + * first item, then the next, and so on. Callers should in general + * not use this on actively-changing tables, though we do our best + * to make it semi-sensible. + */ +void +ht_iter(struct ht *h, struct ht_iter *iter) +{ + + iter->htit_parent = h; + iter->htit_curr = NULL; + iter->htit_next = NULL; + iter->htit_slot = -1; /* which will increment to 0 */ +} + +/* + * Return the next item, which is the first item if we have not + * yet been called on this iterator, or the next item if we have. + */ +void * +ht_next(struct ht_iter *iter) +{ + struct ht_item *item; + struct ht *h; + + if ((item = iter->htit_next) == NULL) { + /* no pre-loaded next; find next from current */ + h = iter->htit_parent; + if (ht_rdlock(h) != 0) + return (NULL); + item = ht_iter_advance(iter, iter->htit_curr); + (void) ht_unlock(h); + } else + iter->htit_next = NULL; + iter->htit_curr = item; + return (item == NULL ? NULL : item->hti_data); +} diff --git a/usr/src/lib/lib9p/common/hashtable.h b/usr/src/lib/lib9p/common/hashtable.h new file mode 100644 index 0000000000..60b8dfff7b --- /dev/null +++ b/usr/src/lib/lib9p/common/hashtable.h @@ -0,0 +1,107 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_HASHTABLE_H +#define LIB9P_HASHTABLE_H + +#include <pthread.h> +#include <sys/queue.h> + +struct ht { + struct ht_entry * ht_entries; + ssize_t ht_nentries; + pthread_rwlock_t ht_rwlock; +}; + +struct ht_entry { + TAILQ_HEAD(, ht_item) hte_items; +}; + +struct ht_item { + uint32_t hti_hash; + void * hti_data; + TAILQ_ENTRY(ht_item) hti_link; +}; + +struct ht_iter { + struct ht * htit_parent; + struct ht_item * htit_curr; + struct ht_item * htit_next; + ssize_t htit_slot; +}; + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wthread-safety-analysis" +#endif + +/* + * Obtain read-lock on hash table. + */ +static inline int +ht_rdlock(struct ht *h) +{ + + return (pthread_rwlock_rdlock(&h->ht_rwlock)); +} + +/* + * Obtain write-lock on hash table. + */ +static inline int +ht_wrlock(struct ht *h) +{ + + return (pthread_rwlock_wrlock(&h->ht_rwlock)); +} + +/* + * Release lock on hash table. + */ +static inline int +ht_unlock(struct ht *h) +{ + + return (pthread_rwlock_unlock(&h->ht_rwlock)); +} + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +void ht_init(struct ht *h, ssize_t size); +void ht_destroy(struct ht *h); +void *ht_find(struct ht *h, uint32_t hash); +void *ht_find_locked(struct ht *h, uint32_t hash); +int ht_add(struct ht *h, uint32_t hash, void *value); +int ht_remove(struct ht *h, uint32_t hash); +int ht_remove_locked(struct ht *h, uint32_t hash); +int ht_remove_at_iter(struct ht_iter *iter); +void ht_iter(struct ht *h, struct ht_iter *iter); +void *ht_next(struct ht_iter *iter); + +#endif /* LIB9P_HASHTABLE_H */ diff --git a/usr/src/lib/lib9p/common/illumos_endian.h b/usr/src/lib/lib9p/common/illumos_endian.h new file mode 100644 index 0000000000..ecb7874724 --- /dev/null +++ b/usr/src/lib/lib9p/common/illumos_endian.h @@ -0,0 +1,26 @@ +#ifndef __ILLUMOS_ENDIAN_H +#define __ILLUMOS_ENDIAN_H + +/* + * Shims to make illumos' endian headers and macros compatible + * with FreeBSD's <sys/endian.h> + */ + +# include <endian.h> + +# define _COMPAT_LITTLE_ENDIAN 0x12345678 +# define _COMPAT_BIG_ENDIAN 0x87654321 + +# ifdef _LITTLE_ENDIAN +# define _BYTE_ORDER _COMPAT_LITTLE_ENDIAN +# endif +# ifdef _BIG_ENDIAN +# define _BYTE_ORDER _COMPAT_BIG_ENDIAN +# endif + +# undef _LITTLE_ENDIAN +# undef _BIG_ENDIAN +# define _LITTLE_ENDIAN _COMPAT_LITTLE_ENDIAN +# define _BIG_ENDIAN _COMPAT_BIG_ENDIAN + +#endif /* __ILLUMOS_ENDIAN_H */ diff --git a/usr/src/lib/lib9p/common/lib9p.h b/usr/src/lib/lib9p/common/lib9p.h new file mode 100644 index 0000000000..3d62e99006 --- /dev/null +++ b/usr/src/lib/lib9p/common/lib9p.h @@ -0,0 +1,249 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + + +#ifndef LIB9P_LIB9P_H +#define LIB9P_LIB9P_H + +#include <stdbool.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/uio.h> +#include <pthread.h> + +#if defined(__FreeBSD__) +#include <sys/sbuf.h> +#else +#include "sbuf/sbuf.h" +#endif + +#include "fcall.h" +#include "threadpool.h" +#include "hashtable.h" + +#define L9P_DEFAULT_MSIZE 8192 +#define L9P_MAX_IOV 128 +#define L9P_NUMTHREADS 8 + +struct l9p_request; +struct l9p_backend; +struct l9p_fid; + +/* + * Functions to implement underlying transport for lib9p. + * + * The transport is responsible for: + * + * - allocating a response buffer (filling in the iovec and niov) + * (gets req, pointer to base of iov array of size L9P_MAX_IOV, + * pointer to niov, lt_aux) + * + * - sending a response, when a request has a reply ready + * (gets req, pointer to iov, niov, actual response length, lt_aux) + * + * - dropping the response buffer, when a request has been + * flushed or otherwise dropped without a response + * (gets req, pointer to iov, niov, lt_aux) + * + * The transport is of course also responsible for feeding in + * request-buffers, but that happens by the transport calling + * l9p_connection_recv(). + */ +struct l9p_transport { + void *lt_aux; + int (*lt_get_response_buffer)(struct l9p_request *, struct iovec *, + size_t *, void *); + int (*lt_send_response)(struct l9p_request *, const struct iovec *, + size_t, size_t, void *); + void (*lt_drop_response)(struct l9p_request *, const struct iovec *, + size_t, void *); +}; + +enum l9p_pack_mode { + L9P_PACK, + L9P_UNPACK +}; + +enum l9p_integer_type { + L9P_BYTE = 1, + L9P_WORD = 2, + L9P_DWORD = 4, + L9P_QWORD = 8 +}; + +enum l9p_version { + L9P_INVALID_VERSION = 0, + L9P_2000 = 1, + L9P_2000U = 2, + L9P_2000L = 3 +}; + +/* + * This structure is used for unpacking (decoding) incoming + * requests and packing (encoding) outgoing results. It has its + * own copy of the iov array, with its own counters for working + * through that array, but it borrows the actual DATA from the + * original iov array associated with the original request (see + * below). + */ +struct l9p_message { + enum l9p_pack_mode lm_mode; + struct iovec lm_iov[L9P_MAX_IOV]; + size_t lm_niov; + size_t lm_cursor_iov; + size_t lm_cursor_offset; + size_t lm_size; +}; + +/* + * Data structure for a request/response pair (Tfoo/Rfoo). + * + * Note that the response is not formatted out into raw data + * (overwriting the request raw data) until we are really + * responding, with the exception of read operations Tread + * and Treaddir, which overlay their result-data into the + * iov array in the process of reading. + * + * We have room for two incoming fids, in case we are + * using 9P2000.L protocol. Note that nothing that uses two + * fids also has an output fid (newfid), so we could have a + * union of lr_fid2 and lr_newfid, but keeping them separate + * is probably a bit less error-prone. (If we want to shave + * memory requirements there are more places to look.) + * + * (The fid, fid2, and newfid fields should be removed via + * reorganization, as they are only used for smuggling data + * between request.c and the backend and should just be + * parameters to backend ops.) + */ +struct l9p_request { + struct l9p_message lr_req_msg; /* for unpacking the request */ + struct l9p_message lr_resp_msg; /* for packing the response */ + union l9p_fcall lr_req; /* the request, decoded/unpacked */ + union l9p_fcall lr_resp; /* the response, not yet packed */ + + struct l9p_fid *lr_fid; + struct l9p_fid *lr_fid2; + struct l9p_fid *lr_newfid; + + struct l9p_connection *lr_conn; /* containing connection */ + void *lr_aux; /* reserved for transport layer */ + + struct iovec lr_data_iov[L9P_MAX_IOV]; /* iovecs for req + resp */ + size_t lr_data_niov; /* actual size of data_iov */ + + int lr_error; /* result from l9p_dispatch_request */ + + /* proteced by threadpool mutex */ + enum l9p_workstate lr_workstate; /* threadpool: work state */ + enum l9p_flushstate lr_flushstate; /* flush state if flushee */ + struct l9p_worker *lr_worker; /* threadpool: worker */ + STAILQ_ENTRY(l9p_request) lr_worklink; /* reserved to threadpool */ + + /* protected by tag hash table lock */ + struct l9p_request_queue lr_flushq; /* q of flushers */ + STAILQ_ENTRY(l9p_request) lr_flushlink; /* link w/in flush queue */ +}; + +/* N.B.: these dirents are variable length and for .L only */ +struct l9p_dirent { + struct l9p_qid qid; + uint64_t offset; + uint8_t type; + char *name; +}; + +/* + * The 9pfs protocol has the notion of a "session", which is + * traffic between any two "Tversion" requests. All fids + * (lc_files, below) are specific to one particular session. + * + * We need a data structure per connection (client/server + * pair). This data structure lasts longer than these 9pfs + * sessions, but contains the request/response pairs and fids. + * Logically, the per-session data should be separate, but + * most of the time that would just require an extra + * indirection. Instead, a new session simply clunks all + * fids, and otherwise keeps using this same connection. + */ +struct l9p_connection { + struct l9p_server *lc_server; + struct l9p_transport lc_lt; + struct l9p_threadpool lc_tp; + enum l9p_version lc_version; + uint32_t lc_msize; + uint32_t lc_max_io_size; + struct ht lc_files; + struct ht lc_requests; + LIST_ENTRY(l9p_connection) lc_link; +}; + +struct l9p_server { + struct l9p_backend *ls_backend; + enum l9p_version ls_max_version; + LIST_HEAD(, l9p_connection) ls_conns; +}; + +int l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall, + enum l9p_version version); +ssize_t l9p_pustat(struct l9p_message *msg, struct l9p_stat *s, + enum l9p_version version); +uint16_t l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version); +int l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req, + struct l9p_stat *s); +ssize_t l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de); + +int l9p_server_init(struct l9p_server **serverp, struct l9p_backend *backend); + +int l9p_connection_init(struct l9p_server *server, + struct l9p_connection **connp); +void l9p_connection_free(struct l9p_connection *conn); +void l9p_connection_recv(struct l9p_connection *conn, const struct iovec *iov, + size_t niov, void *aux); +void l9p_connection_close(struct l9p_connection *conn); +struct l9p_fid *l9p_connection_alloc_fid(struct l9p_connection *conn, + uint32_t fid); +void l9p_connection_remove_fid(struct l9p_connection *conn, + struct l9p_fid *fid); + +int l9p_dispatch_request(struct l9p_request *req); +void l9p_respond(struct l9p_request *req, bool drop, bool rmtag); + +void l9p_init_msg(struct l9p_message *msg, struct l9p_request *req, + enum l9p_pack_mode mode); +void l9p_seek_iov(const struct iovec *iov1, size_t niov1, struct iovec *iov2, + size_t *niov2, size_t seek); +size_t l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length); +void l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version, + struct sbuf *sb); +void l9p_freefcall(union l9p_fcall *fcall); +void l9p_freestat(struct l9p_stat *stat); + +gid_t *l9p_getgrlist(const char *, gid_t, int *); + +#endif /* LIB9P_LIB9P_H */ diff --git a/usr/src/lib/lib9p/common/lib9p_impl.h b/usr/src/lib/lib9p/common/lib9p_impl.h new file mode 100644 index 0000000000..41ff07ae18 --- /dev/null +++ b/usr/src/lib/lib9p/common/lib9p_impl.h @@ -0,0 +1,78 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_LIB9P_IMPL_H +#define LIB9P_LIB9P_IMPL_H + +#include <stdio.h> +#include <stdlib.h> + +#ifndef _KERNEL +static inline void * +l9p_malloc(size_t size) +{ + void *r = malloc(size); + + if (r == NULL) { + fprintf(stderr, "cannot allocate %zd bytes: out of memory\n", + size); + abort(); + } + + return (r); +} + +static inline void * +l9p_calloc(size_t n, size_t size) +{ + void *r = calloc(n, size); + + if (r == NULL) { + fprintf(stderr, "cannot allocate %zd bytes: out of memory\n", + n * size); + abort(); + } + + return (r); +} + +static inline void * +l9p_realloc(void *ptr, size_t newsize) +{ + void *r = realloc(ptr, newsize); + + if (r == NULL) { + fprintf(stderr, "cannot allocate %zd bytes: out of memory\n", + newsize); + abort(); + } + + return (r); +} +#endif /* _KERNEL */ + +#endif /* LIB9P_LIB9P_IMPL_H */ diff --git a/usr/src/lib/lib9p/common/linux_errno.h b/usr/src/lib/lib9p/common/linux_errno.h new file mode 100644 index 0000000000..72778daa23 --- /dev/null +++ b/usr/src/lib/lib9p/common/linux_errno.h @@ -0,0 +1,247 @@ +/* + * Copyright 2016 Chris Torek <torek@ixsystems.com> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_LINUX_ERRNO_H +#define LIB9P_LINUX_ERRNO_H + +/* + * Linux error numbers that are outside of the original base range + * (which ends with ERANGE). + * + * This is pretty much the same as Linux's errno.h except that the + * names are prefixed with "LINUX_", and we add _STR with the + * string name. + * + * The string expansions were obtained with a little program to + * print every strerror(). + * + * Note that BSD EDEADLK is 11 and BSD EAGAIN is 35, vs + * Linux / Plan9 EAGAIN at 11. So one value in the ERANGE + * range still needs translation too. + */ + +#define LINUX_EAGAIN 11 +#define LINUX_EAGAIN_STR "Resource temporarily unavailable" + +#define LINUX_EDEADLK 35 +#define LINUX_EDEADLK_STR "Resource deadlock avoided" +#define LINUX_ENAMETOOLONG 36 +#define LINUX_ENAMETOOLONG_STR "File name too long" +#define LINUX_ENOLCK 37 +#define LINUX_ENOLCK_STR "No locks available" +#define LINUX_ENOSYS 38 +#define LINUX_ENOSYS_STR "Function not implemented" +#define LINUX_ENOTEMPTY 39 +#define LINUX_ENOTEMPTY_STR "Directory not empty" +#define LINUX_ELOOP 40 +#define LINUX_ELOOP_STR "Too many levels of symbolic links" +/* 41 unused */ +#define LINUX_ENOMSG 42 +#define LINUX_ENOMSG_STR "No message of desired type" +#define LINUX_EIDRM 43 +#define LINUX_EIDRM_STR "Identifier removed" +#define LINUX_ECHRNG 44 +#define LINUX_ECHRNG_STR "Channel number out of range" +#define LINUX_EL2NSYNC 45 +#define LINUX_EL2NSYNC_STR "Level 2 not synchronized" +#define LINUX_EL3HLT 46 +#define LINUX_EL3HLT_STR "Level 3 halted" +#define LINUX_EL3RST 47 +#define LINUX_EL3RST_STR "Level 3 reset" +#define LINUX_ELNRNG 48 +#define LINUX_ELNRNG_STR "Link number out of range" +#define LINUX_EUNATCH 49 +#define LINUX_EUNATCH_STR "Protocol driver not attached" +#define LINUX_ENOCSI 50 +#define LINUX_ENOCSI_STR "No CSI structure available" +#define LINUX_EL2HLT 51 +#define LINUX_EL2HLT_STR "Level 2 halted" +#define LINUX_EBADE 52 +#define LINUX_EBADE_STR "Invalid exchange" +#define LINUX_EBADR 53 +#define LINUX_EBADR_STR "Invalid request descriptor" +#define LINUX_EXFULL 54 +#define LINUX_EXFULL_STR "Exchange full" +#define LINUX_ENOANO 55 +#define LINUX_ENOANO_STR "No anode" +#define LINUX_EBADRQC 56 +#define LINUX_EBADRQC_STR "Invalid request code" +#define LINUX_EBADSLT 57 +#define LINUX_EBADSLT_STR "Invalid slot" +/* 58 unused */ +#define LINUX_EBFONT 59 +#define LINUX_EBFONT_STR "Bad font file format" +#define LINUX_ENOSTR 60 +#define LINUX_ENOSTR_STR "Device not a stream" +#define LINUX_ENODATA 61 +#define LINUX_ENODATA_STR "No data available" +#define LINUX_ETIME 62 +#define LINUX_ETIME_STR "Timer expired" +#define LINUX_ENOSR 63 +#define LINUX_ENOSR_STR "Out of streams resources" +#define LINUX_ENONET 64 +#define LINUX_ENONET_STR "Machine is not on the network" +#define LINUX_ENOPKG 65 +#define LINUX_ENOPKG_STR "Package not installed" +#define LINUX_EREMOTE 66 +#define LINUX_EREMOTE_STR "Object is remote" +#define LINUX_ENOLINK 67 +#define LINUX_ENOLINK_STR "Link has been severed" +#define LINUX_EADV 68 +#define LINUX_EADV_STR "Advertise error" +#define LINUX_ESRMNT 69 +#define LINUX_ESRMNT_STR "Srmount error" +#define LINUX_ECOMM 70 +#define LINUX_ECOMM_STR "Communication error on send" +#define LINUX_EPROTO 71 +#define LINUX_EPROTO_STR "Protocol error" +#define LINUX_EMULTIHOP 72 +#define LINUX_EMULTIHOP_STR "Multihop attempted" +#define LINUX_EDOTDOT 73 +#define LINUX_EDOTDOT_STR "RFS specific error" +#define LINUX_EBADMSG 74 +#define LINUX_EBADMSG_STR "Bad message" +#define LINUX_EOVERFLOW 75 +#define LINUX_EOVERFLOW_STR "Value too large for defined data type" +#define LINUX_ENOTUNIQ 76 +#define LINUX_ENOTUNIQ_STR "Name not unique on network" +#define LINUX_EBADFD 77 +#define LINUX_EBADFD_STR "File descriptor in bad state" +#define LINUX_EREMCHG 78 +#define LINUX_EREMCHG_STR "Remote address changed" +#define LINUX_ELIBACC 79 +#define LINUX_ELIBACC_STR "Can not access a needed shared library" +#define LINUX_ELIBBAD 80 +#define LINUX_ELIBBAD_STR "Accessing a corrupted shared library" +#define LINUX_ELIBSCN 81 +#define LINUX_ELIBSCN_STR ".lib section in a.out corrupted" +#define LINUX_ELIBMAX 82 +#define LINUX_ELIBMAX_STR "Attempting to link in too many shared libraries" +#define LINUX_ELIBEXEC 83 +#define LINUX_ELIBEXEC_STR "Cannot exec a shared library directly" +#define LINUX_EILSEQ 84 +#define LINUX_EILSEQ_STR "Invalid or incomplete multibyte or wide character" +#define LINUX_ERESTART 85 +#define LINUX_ERESTART_STR "Interrupted system call should be restarted" +#define LINUX_ESTRPIPE 86 +#define LINUX_ESTRPIPE_STR "Streams pipe error" +#define LINUX_EUSERS 87 +#define LINUX_EUSERS_STR "Too many users" +#define LINUX_ENOTSOCK 88 +#define LINUX_ENOTSOCK_STR "Socket operation on non-socket" +#define LINUX_EDESTADDRREQ 89 +#define LINUX_EDESTADDRREQ_STR "Destination address required" +#define LINUX_EMSGSIZE 90 +#define LINUX_EMSGSIZE_STR "Message too long" +#define LINUX_EPROTOTYPE 91 +#define LINUX_EPROTOTYPE_STR "Protocol wrong type for socket" +#define LINUX_ENOPROTOOPT 92 +#define LINUX_ENOPROTOOPT_STR "Protocol not available" +#define LINUX_EPROTONOSUPPORT 93 +#define LINUX_EPROTONOSUPPORT_STR "Protocol not supported" +#define LINUX_ESOCKTNOSUPPORT 94 +#define LINUX_ESOCKTNOSUPPORT_STR "Socket type not supported" +#define LINUX_EOPNOTSUPP 95 +#define LINUX_EOPNOTSUPP_STR "Operation not supported" +#define LINUX_EPFNOSUPPORT 96 +#define LINUX_EPFNOSUPPORT_STR "Protocol family not supported" +#define LINUX_EAFNOSUPPORT 97 +#define LINUX_EAFNOSUPPORT_STR "Address family not supported by protocol" +#define LINUX_EADDRINUSE 98 +#define LINUX_EADDRINUSE_STR "Address already in use" +#define LINUX_EADDRNOTAVAIL 99 +#define LINUX_EADDRNOTAVAIL_STR "Cannot assign requested address" +#define LINUX_ENETDOWN 100 +#define LINUX_ENETDOWN_STR "Network is down" +#define LINUX_ENETUNREACH 101 +#define LINUX_ENETUNREACH_STR "Network is unreachable" +#define LINUX_ENETRESET 102 +#define LINUX_ENETRESET_STR "Network dropped connection on reset" +#define LINUX_ECONNABORTED 103 +#define LINUX_ECONNABORTED_STR "Software caused connection abort" +#define LINUX_ECONNRESET 104 +#define LINUX_ECONNRESET_STR "Connection reset by peer" +#define LINUX_ENOBUFS 105 +#define LINUX_ENOBUFS_STR "No buffer space available" +#define LINUX_EISCONN 106 +#define LINUX_EISCONN_STR "Transport endpoint is already connected" +#define LINUX_ENOTCONN 107 +#define LINUX_ENOTCONN_STR "Transport endpoint is not connected" +#define LINUX_ESHUTDOWN 108 +#define LINUX_ESHUTDOWN_STR "Cannot send after transport endpoint shutdown" +#define LINUX_ETOOMANYREFS 109 +#define LINUX_ETOOMANYREFS_STR "Too many references: cannot splice" +#define LINUX_ETIMEDOUT 110 +#define LINUX_ETIMEDOUT_STR "Connection timed out" +#define LINUX_ECONNREFUSED 111 +#define LINUX_ECONNREFUSED_STR "Connection refused" +#define LINUX_EHOSTDOWN 112 +#define LINUX_EHOSTDOWN_STR "Host is down" +#define LINUX_EHOSTUNREACH 113 +#define LINUX_EHOSTUNREACH_STR "No route to host" +#define LINUX_EALREADY 114 +#define LINUX_EALREADY_STR "Operation already in progress" +#define LINUX_EINPROGRESS 115 +#define LINUX_EINPROGRESS_STR "Operation now in progress" +#define LINUX_ESTALE 116 +#define LINUX_ESTALE_STR "Stale file handle" +#define LINUX_EUCLEAN 117 +#define LINUX_EUCLEAN_STR "Structure needs cleaning" +#define LINUX_ENOTNAM 118 +#define LINUX_ENOTNAM_STR "Not a XENIX named type file" +#define LINUX_ENAVAIL 119 +#define LINUX_ENAVAIL_STR "No XENIX semaphores available" +#define LINUX_EISNAM 120 +#define LINUX_EISNAM_STR "Is a named type file" +#define LINUX_EREMOTEIO 121 +#define LINUX_EREMOTEIO_STR "Remote I/O error" +#define LINUX_EDQUOT 122 +#define LINUX_EDQUOT_STR "Quota exceeded" +#define LINUX_ENOMEDIUM 123 +#define LINUX_ENOMEDIUM_STR "No medium found" +#define LINUX_EMEDIUMTYPE 124 +#define LINUX_EMEDIUMTYPE_STR "Wrong medium type" +#define LINUX_ECANCELED 125 +#define LINUX_ECANCELED_STR "Operation canceled" +#define LINUX_ENOKEY 126 +#define LINUX_ENOKEY_STR "Required key not available" +#define LINUX_EKEYEXPIRED 127 +#define LINUX_EKEYEXPIRED_STR "Key has expired" +#define LINUX_EKEYREVOKED 128 +#define LINUX_EKEYREVOKED_STR "Key has been revoked" +#define LINUX_EKEYREJECTED 129 +#define LINUX_EKEYREJECTED_STR "Key was rejected by service" +#define LINUX_EOWNERDEAD 130 +#define LINUX_EOWNERDEAD_STR "Owner died" +#define LINUX_ENOTRECOVERABLE 131 +#define LINUX_ENOTRECOVERABLE_STR "State not recoverable" +#define LINUX_ERFKILL 132 +#define LINUX_ERFKILL_STR "Operation not possible due to RF-kill" +#define LINUX_EHWPOISON 133 +#define LINUX_EHWPOISON_STR "Memory page has hardware error" + +#endif /* LIB9P_LINUX_ERRNO_H */ diff --git a/usr/src/lib/lib9p/common/log.c b/usr/src/lib/lib9p/common/log.c new file mode 100644 index 0000000000..fb2596a16f --- /dev/null +++ b/usr/src/lib/lib9p/common/log.c @@ -0,0 +1,67 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include "log.h" + +static const char *l9p_log_level_names[] = { + "DEBUG", + "INFO", + "WARN", + "ERROR" +}; + +void +l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...) +{ + const char *dest = NULL; + static FILE *stream = NULL; + va_list ap; + + if (stream == NULL) { + dest = getenv("LIB9P_LOGGING"); + if (dest == NULL) + return; + else if (!strcmp(dest, "stderr")) + stream = stderr; + else { + stream = fopen(dest, "a"); + if (stream == NULL) + return; + } + } + + va_start(ap, fmt); + fprintf(stream, "[%s]\t %s: ", l9p_log_level_names[level], func); + vfprintf(stream, fmt, ap); + fprintf(stream, "\n"); + fflush(stream); + va_end(ap); +} diff --git a/usr/src/lib/lib9p/common/log.h b/usr/src/lib/lib9p/common/log.h new file mode 100644 index 0000000000..b801d4017a --- /dev/null +++ b/usr/src/lib/lib9p/common/log.h @@ -0,0 +1,46 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_LOG_H +#define LIB9P_LOG_H + +enum l9p_log_level { + L9P_DEBUG, + L9P_INFO, + L9P_WARNING, + L9P_ERROR +}; + +void l9p_logf(enum l9p_log_level level, const char *func, const char *fmt, ...); + +#if defined(L9P_DEBUG) +#define L9P_LOG(level, fmt, ...) l9p_logf(level, __func__, fmt, ##__VA_ARGS__) +#else +#define L9P_LOG(level, fmt, ...) +#endif + +#endif /* LIB9P_LOG_H */ diff --git a/usr/src/lib/lib9p/common/pack.c b/usr/src/lib/lib9p/common/pack.c new file mode 100644 index 0000000000..13ec5f02b5 --- /dev/null +++ b/usr/src/lib/lib9p/common/pack.c @@ -0,0 +1,996 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Based on libixp code: ©2007-2010 Kris Maglione <maglione.k at Gmail> + */ + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/param.h> +#ifdef __APPLE__ +# include "apple_endian.h" +#elif __illumos__ +# include "illumos_endian.h" +# include <sys/sysmacros.h> +#else +# include <sys/endian.h> +#endif +#include <sys/uio.h> +#include "lib9p.h" +#include "lib9p_impl.h" +#include "log.h" + +#define N(ary) (sizeof(ary) / sizeof(*ary)) +#define STRING_SIZE(s) (L9P_WORD + (s != NULL ? (uint16_t)strlen(s) : 0)) +#define QID_SIZE (L9P_BYTE + L9P_DWORD + L9P_QWORD) + +static ssize_t l9p_iov_io(struct l9p_message *, void *, size_t); +static inline ssize_t l9p_pu8(struct l9p_message *, uint8_t *); +static inline ssize_t l9p_pu16(struct l9p_message *, uint16_t *); +static inline ssize_t l9p_pu32(struct l9p_message *, uint32_t *); +static inline ssize_t l9p_pu64(struct l9p_message *, uint64_t *); +static ssize_t l9p_pustring(struct l9p_message *, char **s); +static ssize_t l9p_pustrings(struct l9p_message *, uint16_t *, char **, size_t); +static ssize_t l9p_puqid(struct l9p_message *, struct l9p_qid *); +static ssize_t l9p_puqids(struct l9p_message *, uint16_t *, struct l9p_qid *q); + +/* + * Transfer data from incoming request, or to outgoing response, + * using msg to track position and direction within request/response. + * + * Returns the number of bytes actually transferred (which is always + * just len itself, converted to signed), or -1 if we ran out of space. + * + * Note that if we return -1, subsequent l9p_iov_io() calls with + * the same (and not-reset) msg and len > 0 will also return -1. + * This means most users can just check the *last* call for failure. + */ +static ssize_t +l9p_iov_io(struct l9p_message *msg, void *buffer, size_t len) +{ + size_t done = 0; + size_t left = len; + + assert(msg != NULL); + + if (len == 0) + return (0); + + if (msg->lm_cursor_iov >= msg->lm_niov) + return (-1); + + assert(buffer != NULL); + + while (left > 0) { + size_t idx = msg->lm_cursor_iov; + size_t space = msg->lm_iov[idx].iov_len - msg->lm_cursor_offset; + size_t towrite = MIN(space, left); + + if (msg->lm_mode == L9P_PACK) { + memcpy((char *)msg->lm_iov[idx].iov_base + + msg->lm_cursor_offset, (char *)buffer + done, + towrite); + } + + if (msg->lm_mode == L9P_UNPACK) { + memcpy((char *)buffer + done, + (char *)msg->lm_iov[idx].iov_base + + msg->lm_cursor_offset, towrite); + } + + msg->lm_cursor_offset += towrite; + + done += towrite; + left -= towrite; + + if (space - towrite == 0) { + /* Advance to next iov */ + msg->lm_cursor_iov++; + msg->lm_cursor_offset = 0; + + if (msg->lm_cursor_iov >= msg->lm_niov && left > 0) + return (-1); + } + } + + msg->lm_size += done; + return ((ssize_t)done); +} + +/* + * Pack or unpack a byte (8 bits). + * + * Returns 1 (success, 1 byte) or -1 (error). + */ +static inline ssize_t +l9p_pu8(struct l9p_message *msg, uint8_t *val) +{ + + return (l9p_iov_io(msg, val, sizeof (uint8_t))); +} + +/* + * Pack or unpack 16-bit value. + * + * Returns 2 or -1. + */ +static inline ssize_t +l9p_pu16(struct l9p_message *msg, uint16_t *val) +{ +#if _BYTE_ORDER != _LITTLE_ENDIAN + /* + * The ifdefs are annoying, but there is no need + * for all of this foolery on little-endian hosts, + * and I don't expect the compiler to optimize it + * all away. + */ + uint16_t copy; + ssize_t ret; + + if (msg->lm_mode == L9P_PACK) { + copy = htole16(*val); + return (l9p_iov_io(msg, ©, sizeof (uint16_t))); + } + ret = l9p_iov_io(msg, val, sizeof (uint16_t)); + *val = le16toh(*val); + return (ret); +#else + return (l9p_iov_io(msg, val, sizeof (uint16_t))); +#endif +} + +/* + * Pack or unpack 32-bit value. + * + * Returns 4 or -1. + */ +static inline ssize_t +l9p_pu32(struct l9p_message *msg, uint32_t *val) +{ +#if _BYTE_ORDER != _LITTLE_ENDIAN + uint32_t copy; + ssize_t ret; + + if (msg->lm_mode == L9P_PACK) { + copy = htole32(*val); + return (l9p_iov_io(msg, ©, sizeof (uint32_t))); + } + ret = l9p_iov_io(msg, val, sizeof (uint32_t)); + *val = le32toh(*val); + return (ret); +#else + return (l9p_iov_io(msg, val, sizeof (uint32_t))); +#endif +} + +/* + * Pack or unpack 64-bit value. + * + * Returns 8 or -1. + */ +static inline ssize_t +l9p_pu64(struct l9p_message *msg, uint64_t *val) +{ +#if _BYTE_ORDER != _LITTLE_ENDIAN + uint64_t copy; + ssize_t ret; + + if (msg->lm_mode == L9P_PACK) { + copy = htole64(*val); + return (l9p_iov_io(msg, ©, sizeof (uint64_t))); + } + ret = l9p_iov_io(msg, val, sizeof (uint32_t)); + *val = le64toh(*val); + return (ret); +#else + return (l9p_iov_io(msg, val, sizeof (uint64_t))); +#endif +} + +/* + * Pack or unpack a string, encoded as 2-byte length followed by + * string bytes. The returned length is 2 greater than the + * length of the string itself. + * + * When unpacking, this allocates a new string (NUL-terminated). + * + * Return -1 on error (not space, or failed to allocate string, + * or illegal string). + * + * Note that pustring (and hence pustrings) can return an error + * even when l9p_iov_io succeeds. + */ +static ssize_t +l9p_pustring(struct l9p_message *msg, char **s) +{ + uint16_t len; + + if (msg->lm_mode == L9P_PACK) + len = *s != NULL ? (uint16_t)strlen(*s) : 0; + + if (l9p_pu16(msg, &len) < 0) + return (-1); + + if (msg->lm_mode == L9P_UNPACK) { + *s = l9p_calloc(1, len + 1); + if (*s == NULL) + return (-1); + } + + if (l9p_iov_io(msg, *s, len) < 0) + return (-1); + + if (msg->lm_mode == L9P_UNPACK) { + /* + * An embedded NUL byte in a string is illegal. + * We don't necessarily have to check (we'll just + * treat it as a shorter string), but checking + * seems like a good idea. + */ + if (memchr(*s, '\0', len) != NULL) + return (-1); + } + + return ((ssize_t)len + 2); +} + +/* + * Pack or unpack a number (*num) of strings (but at most max of + * them). + * + * Returns the number of bytes transferred, including the packed + * number of strings. If packing and the packed number of strings + * was reduced, the original *num value is unchanged; only the + * wire-format number is reduced. If unpacking and the input + * number of strings exceeds the max, the incoming *num is reduced + * to lim, if needed. (NOTE ASYMMETRY HERE!) + * + * Returns -1 on error. + */ +static ssize_t +l9p_pustrings(struct l9p_message *msg, uint16_t *num, char **strings, + size_t max) +{ + size_t i, lim; + ssize_t r, ret; + uint16_t adjusted; + + if (msg->lm_mode == L9P_PACK) { + lim = *num; + if (lim > max) + lim = max; + adjusted = (uint16_t)lim; + r = l9p_pu16(msg, &adjusted); + } else { + r = l9p_pu16(msg, num); + lim = *num; + if (lim > max) + *num = (uint16_t)(lim = max); + } + if (r < 0) + return (-1); + + for (i = 0; i < lim; i++) { + ret = l9p_pustring(msg, &strings[i]); + if (ret < 1) + return (-1); + + r += ret; + } + + return (r); +} + +/* + * Pack or unpack a qid. + * + * Returns 13 (success) or -1 (error). + */ +static ssize_t +l9p_puqid(struct l9p_message *msg, struct l9p_qid *qid) +{ + ssize_t r; + uint8_t type; + + if (msg->lm_mode == L9P_PACK) { + type = qid->type; + r = l9p_pu8(msg, &type); + } else { + r = l9p_pu8(msg, &type); + qid->type = type; + } + if (r > 0) + r = l9p_pu32(msg, &qid->version); + if (r > 0) + r = l9p_pu64(msg, &qid->path); + + return (r > 0 ? QID_SIZE : r); +} + +/* + * Pack or unpack *num qids. + * + * Returns 2 + 13 * *num (after possibly setting *num), or -1 on error. + */ +static ssize_t +l9p_puqids(struct l9p_message *msg, uint16_t *num, struct l9p_qid *qids) +{ + size_t i, lim; + ssize_t ret, r; + + r = l9p_pu16(msg, num); + if (r > 0) { + for (i = 0, lim = *num; i < lim; i++) { + ret = l9p_puqid(msg, &qids[i]); + if (ret < 0) + return (-1); + r += ret; + } + } + return (r); +} + +/* + * Pack or unpack a l9p_stat. + * + * These have variable size, and the size further depends on + * the protocol version. + * + * Returns the number of bytes packed/unpacked, or -1 on error. + */ +ssize_t +l9p_pustat(struct l9p_message *msg, struct l9p_stat *stat, + enum l9p_version version) +{ + ssize_t r = 0; + uint16_t size; + + /* The on-wire size field excludes the size of the size field. */ + if (msg->lm_mode == L9P_PACK) + size = l9p_sizeof_stat(stat, version) - 2; + + r += l9p_pu16(msg, &size); + r += l9p_pu16(msg, &stat->type); + r += l9p_pu32(msg, &stat->dev); + r += l9p_puqid(msg, &stat->qid); + r += l9p_pu32(msg, &stat->mode); + r += l9p_pu32(msg, &stat->atime); + r += l9p_pu32(msg, &stat->mtime); + r += l9p_pu64(msg, &stat->length); + r += l9p_pustring(msg, &stat->name); + r += l9p_pustring(msg, &stat->uid); + r += l9p_pustring(msg, &stat->gid); + r += l9p_pustring(msg, &stat->muid); + + if (version >= L9P_2000U) { + r += l9p_pustring(msg, &stat->extension); + r += l9p_pu32(msg, &stat->n_uid); + r += l9p_pu32(msg, &stat->n_gid); + r += l9p_pu32(msg, &stat->n_muid); + } + + if (r < size + 2) + return (-1); + + return (r); +} + +/* + * Pack or unpack a variable-length dirent. + * + * If unpacking, the name field is malloc()ed and the caller must + * free it. + * + * Returns the wire-format length, or -1 if we ran out of room. + */ +ssize_t +l9p_pudirent(struct l9p_message *msg, struct l9p_dirent *de) +{ + ssize_t r, s; + + r = l9p_puqid(msg, &de->qid); + r += l9p_pu64(msg, &de->offset); + r += l9p_pu8(msg, &de->type); + s = l9p_pustring(msg, &de->name); + if (r < QID_SIZE + 8 + 1 || s < 0) + return (-1); + return (r + s); +} + +/* + * Pack or unpack a request or response (fcall). + * + * Returns 0 on success, -1 on error. (It's up to the caller + * to call l9p_freefcall on our failure.) + */ +int +l9p_pufcall(struct l9p_message *msg, union l9p_fcall *fcall, + enum l9p_version version) +{ + uint32_t length = 0; + ssize_t r; + + /* + * Get overall length, type, and tag, which should appear + * in all messages. If not even that works, abort immediately. + */ + l9p_pu32(msg, &length); + l9p_pu8(msg, &fcall->hdr.type); + r = l9p_pu16(msg, &fcall->hdr.tag); + if (r < 0) + return (-1); + + /* + * Decode remainder of message. When unpacking, this may + * allocate memory, even if we fail during the decode. + * Note that the initial fcall is zeroed out, though, so + * we can just freefcall() to release whatever might have + * gotten allocated, if the unpack fails due to a short + * packet. + */ + switch (fcall->hdr.type) { + case L9P_TVERSION: + case L9P_RVERSION: + l9p_pu32(msg, &fcall->version.msize); + r = l9p_pustring(msg, &fcall->version.version); + break; + + case L9P_TAUTH: + l9p_pu32(msg, &fcall->tauth.afid); + r = l9p_pustring(msg, &fcall->tauth.uname); + if (r < 0) + break; + r = l9p_pustring(msg, &fcall->tauth.aname); + if (r < 0) + break; + if (version >= L9P_2000U) + r = l9p_pu32(msg, &fcall->tauth.n_uname); + break; + + case L9P_RAUTH: + r = l9p_puqid(msg, &fcall->rauth.aqid); + break; + + case L9P_TATTACH: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->tattach.afid); + r = l9p_pustring(msg, &fcall->tattach.uname); + if (r < 0) + break; + r = l9p_pustring(msg, &fcall->tattach.aname); + if (r < 0) + break; + if (version >= L9P_2000U) + r = l9p_pu32(msg, &fcall->tattach.n_uname); + break; + + case L9P_RATTACH: + r = l9p_puqid(msg, &fcall->rattach.qid); + break; + + case L9P_RERROR: + r = l9p_pustring(msg, &fcall->error.ename); + if (r < 0) + break; + if (version >= L9P_2000U) + r = l9p_pu32(msg, &fcall->error.errnum); + break; + + case L9P_RLERROR: + r = l9p_pu32(msg, &fcall->error.errnum); + break; + + case L9P_TFLUSH: + r = l9p_pu16(msg, &fcall->tflush.oldtag); + break; + + case L9P_RFLUSH: + break; + + case L9P_TWALK: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->twalk.newfid); + r = l9p_pustrings(msg, &fcall->twalk.nwname, + fcall->twalk.wname, N(fcall->twalk.wname)); + break; + + case L9P_RWALK: + r = l9p_puqids(msg, &fcall->rwalk.nwqid, fcall->rwalk.wqid); + break; + + case L9P_TOPEN: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pu8(msg, &fcall->topen.mode); + break; + + case L9P_ROPEN: + l9p_puqid(msg, &fcall->ropen.qid); + r = l9p_pu32(msg, &fcall->ropen.iounit); + break; + + case L9P_TCREATE: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tcreate.name); + if (r < 0) + break; + l9p_pu32(msg, &fcall->tcreate.perm); + r = l9p_pu8(msg, &fcall->tcreate.mode); + if (version >= L9P_2000U) + r = l9p_pustring(msg, &fcall->tcreate.extension); + break; + + case L9P_RCREATE: + l9p_puqid(msg, &fcall->rcreate.qid); + r = l9p_pu32(msg, &fcall->rcreate.iounit); + break; + + case L9P_TREAD: + case L9P_TREADDIR: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu64(msg, &fcall->io.offset); + r = l9p_pu32(msg, &fcall->io.count); + break; + + case L9P_RREAD: + case L9P_RREADDIR: + r = l9p_pu32(msg, &fcall->io.count); + break; + + case L9P_TWRITE: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu64(msg, &fcall->io.offset); + r = l9p_pu32(msg, &fcall->io.count); + break; + + case L9P_RWRITE: + r = l9p_pu32(msg, &fcall->io.count); + break; + + case L9P_TCLUNK: + case L9P_TSTAT: + case L9P_TREMOVE: + case L9P_TSTATFS: + r = l9p_pu32(msg, &fcall->hdr.fid); + break; + + case L9P_RCLUNK: + case L9P_RREMOVE: + break; + + case L9P_RSTAT: + { + uint16_t size = l9p_sizeof_stat(&fcall->rstat.stat, + version); + l9p_pu16(msg, &size); + r = l9p_pustat(msg, &fcall->rstat.stat, version); + } + break; + + case L9P_TWSTAT: + { + uint16_t size; + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu16(msg, &size); + r = l9p_pustat(msg, &fcall->twstat.stat, version); + } + break; + + case L9P_RWSTAT: + break; + + case L9P_RSTATFS: + l9p_pu32(msg, &fcall->rstatfs.statfs.type); + l9p_pu32(msg, &fcall->rstatfs.statfs.bsize); + l9p_pu64(msg, &fcall->rstatfs.statfs.blocks); + l9p_pu64(msg, &fcall->rstatfs.statfs.bfree); + l9p_pu64(msg, &fcall->rstatfs.statfs.bavail); + l9p_pu64(msg, &fcall->rstatfs.statfs.files); + l9p_pu64(msg, &fcall->rstatfs.statfs.ffree); + l9p_pu64(msg, &fcall->rstatfs.statfs.fsid); + r = l9p_pu32(msg, &fcall->rstatfs.statfs.namelen); + break; + + case L9P_TLOPEN: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pu32(msg, &fcall->tlopen.flags); + break; + + case L9P_RLOPEN: + l9p_puqid(msg, &fcall->rlopen.qid); + r = l9p_pu32(msg, &fcall->rlopen.iounit); + break; + + case L9P_TLCREATE: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tlcreate.name); + if (r < 0) + break; + l9p_pu32(msg, &fcall->tlcreate.flags); + l9p_pu32(msg, &fcall->tlcreate.mode); + r = l9p_pu32(msg, &fcall->tlcreate.gid); + break; + + case L9P_RLCREATE: + l9p_puqid(msg, &fcall->rlcreate.qid); + r = l9p_pu32(msg, &fcall->rlcreate.iounit); + break; + + case L9P_TSYMLINK: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tsymlink.name); + if (r < 0) + break; + r = l9p_pustring(msg, &fcall->tsymlink.symtgt); + if (r < 0) + break; + r = l9p_pu32(msg, &fcall->tlcreate.gid); + break; + + case L9P_RSYMLINK: + r = l9p_puqid(msg, &fcall->rsymlink.qid); + break; + + case L9P_TMKNOD: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tmknod.name); + if (r < 0) + break; + l9p_pu32(msg, &fcall->tmknod.mode); + l9p_pu32(msg, &fcall->tmknod.major); + l9p_pu32(msg, &fcall->tmknod.minor); + r = l9p_pu32(msg, &fcall->tmknod.gid); + break; + + case L9P_RMKNOD: + r = l9p_puqid(msg, &fcall->rmknod.qid); + break; + + case L9P_TRENAME: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->trename.dfid); + r = l9p_pustring(msg, &fcall->trename.name); + break; + + case L9P_RRENAME: + break; + + case L9P_TREADLINK: + r = l9p_pu32(msg, &fcall->hdr.fid); + break; + + case L9P_RREADLINK: + r = l9p_pustring(msg, &fcall->rreadlink.target); + break; + + case L9P_TGETATTR: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pu64(msg, &fcall->tgetattr.request_mask); + break; + + case L9P_RGETATTR: + l9p_pu64(msg, &fcall->rgetattr.valid); + l9p_puqid(msg, &fcall->rgetattr.qid); + l9p_pu32(msg, &fcall->rgetattr.mode); + l9p_pu32(msg, &fcall->rgetattr.uid); + l9p_pu32(msg, &fcall->rgetattr.gid); + l9p_pu64(msg, &fcall->rgetattr.nlink); + l9p_pu64(msg, &fcall->rgetattr.rdev); + l9p_pu64(msg, &fcall->rgetattr.size); + l9p_pu64(msg, &fcall->rgetattr.blksize); + l9p_pu64(msg, &fcall->rgetattr.blocks); + l9p_pu64(msg, &fcall->rgetattr.atime_sec); + l9p_pu64(msg, &fcall->rgetattr.atime_nsec); + l9p_pu64(msg, &fcall->rgetattr.mtime_sec); + l9p_pu64(msg, &fcall->rgetattr.mtime_nsec); + l9p_pu64(msg, &fcall->rgetattr.ctime_sec); + l9p_pu64(msg, &fcall->rgetattr.ctime_nsec); + l9p_pu64(msg, &fcall->rgetattr.btime_sec); + l9p_pu64(msg, &fcall->rgetattr.btime_nsec); + l9p_pu64(msg, &fcall->rgetattr.gen); + r = l9p_pu64(msg, &fcall->rgetattr.data_version); + break; + + case L9P_TSETATTR: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->tsetattr.valid); + l9p_pu32(msg, &fcall->tsetattr.mode); + l9p_pu32(msg, &fcall->tsetattr.uid); + l9p_pu32(msg, &fcall->tsetattr.gid); + l9p_pu64(msg, &fcall->tsetattr.size); + l9p_pu64(msg, &fcall->tsetattr.atime_sec); + l9p_pu64(msg, &fcall->tsetattr.atime_nsec); + l9p_pu64(msg, &fcall->tsetattr.mtime_sec); + r = l9p_pu64(msg, &fcall->tsetattr.mtime_nsec); + break; + + case L9P_RSETATTR: + break; + + case L9P_TXATTRWALK: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu32(msg, &fcall->txattrwalk.newfid); + r = l9p_pustring(msg, &fcall->txattrwalk.name); + break; + + case L9P_RXATTRWALK: + r = l9p_pu64(msg, &fcall->rxattrwalk.size); + break; + + case L9P_TXATTRCREATE: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->txattrcreate.name); + if (r < 0) + break; + l9p_pu64(msg, &fcall->txattrcreate.attr_size); + r = l9p_pu32(msg, &fcall->txattrcreate.flags); + break; + + case L9P_RXATTRCREATE: + break; + + case L9P_TFSYNC: + r = l9p_pu32(msg, &fcall->hdr.fid); + break; + + case L9P_RFSYNC: + break; + + case L9P_TLOCK: + l9p_pu32(msg, &fcall->hdr.fid); + l9p_pu8(msg, &fcall->tlock.type); + l9p_pu32(msg, &fcall->tlock.flags); + l9p_pu64(msg, &fcall->tlock.start); + l9p_pu64(msg, &fcall->tlock.length); + l9p_pu32(msg, &fcall->tlock.proc_id); + r = l9p_pustring(msg, &fcall->tlock.client_id); + break; + + case L9P_RLOCK: + r = l9p_pu8(msg, &fcall->rlock.status); + break; + + case L9P_TGETLOCK: + l9p_pu32(msg, &fcall->hdr.fid); + /* FALLTHROUGH */ + + case L9P_RGETLOCK: + l9p_pu8(msg, &fcall->getlock.type); + l9p_pu64(msg, &fcall->getlock.start); + l9p_pu64(msg, &fcall->getlock.length); + l9p_pu32(msg, &fcall->getlock.proc_id); + r = l9p_pustring(msg, &fcall->getlock.client_id); + break; + + case L9P_TLINK: + l9p_pu32(msg, &fcall->tlink.dfid); + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tlink.name); + break; + + case L9P_RLINK: + break; + + case L9P_TMKDIR: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tmkdir.name); + if (r < 0) + break; + l9p_pu32(msg, &fcall->tmkdir.mode); + r = l9p_pu32(msg, &fcall->tmkdir.gid); + break; + + case L9P_RMKDIR: + r = l9p_puqid(msg, &fcall->rmkdir.qid); + break; + + case L9P_TRENAMEAT: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->trenameat.oldname); + if (r < 0) + break; + l9p_pu32(msg, &fcall->trenameat.newdirfid); + r = l9p_pustring(msg, &fcall->trenameat.newname); + break; + + case L9P_RRENAMEAT: + break; + + case L9P_TUNLINKAT: + l9p_pu32(msg, &fcall->hdr.fid); + r = l9p_pustring(msg, &fcall->tunlinkat.name); + if (r < 0) + break; + r = l9p_pu32(msg, &fcall->tunlinkat.flags); + break; + + case L9P_RUNLINKAT: + break; + + default: + L9P_LOG(L9P_ERROR, "%s(): missing case for type %d", + __func__, fcall->hdr.type); + break; + } + + /* Check for over- or under-run, or pustring error. */ + if (r < 0) + return (-1); + + if (msg->lm_mode == L9P_PACK) { + /* Rewind to the beginning and install size at front. */ + uint32_t len = (uint32_t)msg->lm_size; + msg->lm_cursor_offset = 0; + msg->lm_cursor_iov = 0; + + /* + * Subtract 4 bytes from current size, becase we're + * overwriting size (rewinding message to the beginning) + * and writing again, which will increase it 4 more. + */ + msg->lm_size -= sizeof(uint32_t); + + if (fcall->hdr.type == L9P_RREAD || + fcall->hdr.type == L9P_RREADDIR) + len += fcall->io.count; + + l9p_pu32(msg, &len); + } + + return (0); +} + +/* + * Free any strings or other data malloc'ed in the process of + * packing or unpacking an fcall. + */ +void +l9p_freefcall(union l9p_fcall *fcall) +{ + uint16_t i; + + switch (fcall->hdr.type) { + + case L9P_TVERSION: + case L9P_RVERSION: + free(fcall->version.version); + return; + + case L9P_TATTACH: + free(fcall->tattach.aname); + free(fcall->tattach.uname); + return; + + case L9P_TWALK: + for (i = 0; i < fcall->twalk.nwname; i++) + free(fcall->twalk.wname[i]); + return; + + case L9P_TCREATE: + case L9P_TOPEN: + free(fcall->tcreate.name); + free(fcall->tcreate.extension); + return; + + case L9P_RSTAT: + l9p_freestat(&fcall->rstat.stat); + return; + + case L9P_TWSTAT: + l9p_freestat(&fcall->twstat.stat); + return; + + case L9P_TLCREATE: + free(fcall->tlcreate.name); + return; + + case L9P_TSYMLINK: + free(fcall->tsymlink.name); + free(fcall->tsymlink.symtgt); + return; + + case L9P_TMKNOD: + free(fcall->tmknod.name); + return; + + case L9P_TRENAME: + free(fcall->trename.name); + return; + + case L9P_RREADLINK: + free(fcall->rreadlink.target); + return; + + case L9P_TXATTRWALK: + free(fcall->txattrwalk.name); + return; + + case L9P_TXATTRCREATE: + free(fcall->txattrcreate.name); + return; + + case L9P_TLOCK: + free(fcall->tlock.client_id); + return; + + case L9P_TGETLOCK: + case L9P_RGETLOCK: + free(fcall->getlock.client_id); + return; + + case L9P_TLINK: + free(fcall->tlink.name); + return; + + case L9P_TMKDIR: + free(fcall->tmkdir.name); + return; + + case L9P_TRENAMEAT: + free(fcall->trenameat.oldname); + free(fcall->trenameat.newname); + return; + + case L9P_TUNLINKAT: + free(fcall->tunlinkat.name); + return; + } +} + +void +l9p_freestat(struct l9p_stat *stat) +{ + free(stat->name); + free(stat->extension); + free(stat->uid); + free(stat->gid); + free(stat->muid); +} + +uint16_t +l9p_sizeof_stat(struct l9p_stat *stat, enum l9p_version version) +{ + uint16_t size = L9P_WORD /* size */ + + L9P_WORD /* type */ + + L9P_DWORD /* dev */ + + QID_SIZE /* qid */ + + 3 * L9P_DWORD /* mode, atime, mtime */ + + L9P_QWORD /* length */ + + STRING_SIZE(stat->name) + + STRING_SIZE(stat->uid) + + STRING_SIZE(stat->gid) + + STRING_SIZE(stat->muid); + + if (version >= L9P_2000U) { + size += STRING_SIZE(stat->extension) + + 3 * L9P_DWORD; + } + + return (size); +} diff --git a/usr/src/lib/lib9p/common/request.c b/usr/src/lib/lib9p/common/request.c new file mode 100644 index 0000000000..99885690af --- /dev/null +++ b/usr/src/lib/lib9p/common/request.c @@ -0,0 +1,1446 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <sys/param.h> +#include <sys/uio.h> +#ifdef __illumos__ +#include <sys/sysmacros.h> +#endif +#if defined(__FreeBSD__) +#include <sys/sbuf.h> +#else +#include "sbuf/sbuf.h" +#endif +#include "lib9p.h" +#include "lib9p_impl.h" +#include "fcall.h" +#include "fid.h" +#include "hashtable.h" +#include "log.h" +#include "linux_errno.h" +#include "backend/backend.h" +#include "threadpool.h" + +#define N(x) (sizeof(x) / sizeof(x[0])) + +static int l9p_dispatch_tversion(struct l9p_request *req); +static int l9p_dispatch_tattach(struct l9p_request *req); +static int l9p_dispatch_tclunk(struct l9p_request *req); +static int l9p_dispatch_tcreate(struct l9p_request *req); +static int l9p_dispatch_topen(struct l9p_request *req); +static int l9p_dispatch_tread(struct l9p_request *req); +static int l9p_dispatch_tremove(struct l9p_request *req); +static int l9p_dispatch_tstat(struct l9p_request *req); +static int l9p_dispatch_twalk(struct l9p_request *req); +static int l9p_dispatch_twrite(struct l9p_request *req); +static int l9p_dispatch_twstat(struct l9p_request *req); +static int l9p_dispatch_tstatfs(struct l9p_request *req); +static int l9p_dispatch_tlopen(struct l9p_request *req); +static int l9p_dispatch_tlcreate(struct l9p_request *req); +static int l9p_dispatch_tsymlink(struct l9p_request *req); +static int l9p_dispatch_tmknod(struct l9p_request *req); +static int l9p_dispatch_trename(struct l9p_request *req); +static int l9p_dispatch_treadlink(struct l9p_request *req); +static int l9p_dispatch_tgetattr(struct l9p_request *req); +static int l9p_dispatch_tsetattr(struct l9p_request *req); +static int l9p_dispatch_txattrwalk(struct l9p_request *req); +static int l9p_dispatch_txattrcreate(struct l9p_request *req); +static int l9p_dispatch_treaddir(struct l9p_request *req); +static int l9p_dispatch_tfsync(struct l9p_request *req); +static int l9p_dispatch_tlock(struct l9p_request *req); +static int l9p_dispatch_tgetlock(struct l9p_request *req); +static int l9p_dispatch_tlink(struct l9p_request *req); +static int l9p_dispatch_tmkdir(struct l9p_request *req); +static int l9p_dispatch_trenameat(struct l9p_request *req); +static int l9p_dispatch_tunlinkat(struct l9p_request *req); + +/* + * Each Txxx handler has a "must run" flag. If it is false, + * we check for a flush request before calling the handler. + * If a flush is already requested we can instantly fail the + * request with EINTR. + * + * Tclunk and Tremove must run because they make their fids + * become invalid. Tversion and Tattach should never get + * a flush request applied (it makes no sense as the connection + * is not really running yet), so it should be harmless to + * set them either way, but for now we have them as must-run. + * Flushing a Tflush is not really allowed either so we keep + * these as must-run too (although they run without being done + * threaded anyway). + */ +struct l9p_handler { + enum l9p_ftype type; + int (*handler)(struct l9p_request *); + bool must_run; +}; + +static const struct l9p_handler l9p_handlers_no_version[] = { + {L9P_TVERSION, l9p_dispatch_tversion, true}, +}; + +static const struct l9p_handler l9p_handlers_base[] = { + {L9P_TVERSION, l9p_dispatch_tversion, true}, + {L9P_TATTACH, l9p_dispatch_tattach, true}, + {L9P_TCLUNK, l9p_dispatch_tclunk, true}, + {L9P_TFLUSH, l9p_threadpool_tflush, true}, + {L9P_TCREATE, l9p_dispatch_tcreate, false}, + {L9P_TOPEN, l9p_dispatch_topen, false}, + {L9P_TREAD, l9p_dispatch_tread, false}, + {L9P_TWRITE, l9p_dispatch_twrite, false}, + {L9P_TREMOVE, l9p_dispatch_tremove, true}, + {L9P_TSTAT, l9p_dispatch_tstat, false}, + {L9P_TWALK, l9p_dispatch_twalk, false}, + {L9P_TWSTAT, l9p_dispatch_twstat, false} +}; +static const struct l9p_handler l9p_handlers_dotu[] = { + {L9P_TVERSION, l9p_dispatch_tversion, true}, + {L9P_TATTACH, l9p_dispatch_tattach, true}, + {L9P_TCLUNK, l9p_dispatch_tclunk, true}, + {L9P_TFLUSH, l9p_threadpool_tflush, true}, + {L9P_TCREATE, l9p_dispatch_tcreate, false}, + {L9P_TOPEN, l9p_dispatch_topen, false}, + {L9P_TREAD, l9p_dispatch_tread, false}, + {L9P_TWRITE, l9p_dispatch_twrite, false}, + {L9P_TREMOVE, l9p_dispatch_tremove, true}, + {L9P_TSTAT, l9p_dispatch_tstat, false}, + {L9P_TWALK, l9p_dispatch_twalk, false}, + {L9P_TWSTAT, l9p_dispatch_twstat, false} +}; +static const struct l9p_handler l9p_handlers_dotL[] = { + {L9P_TVERSION, l9p_dispatch_tversion, true}, + {L9P_TATTACH, l9p_dispatch_tattach, true}, + {L9P_TCLUNK, l9p_dispatch_tclunk, true}, + {L9P_TFLUSH, l9p_threadpool_tflush, true}, + {L9P_TCREATE, l9p_dispatch_tcreate, false}, + {L9P_TOPEN, l9p_dispatch_topen, false}, + {L9P_TREAD, l9p_dispatch_tread, false}, + {L9P_TWRITE, l9p_dispatch_twrite, false}, + {L9P_TREMOVE, l9p_dispatch_tremove, true}, + {L9P_TSTAT, l9p_dispatch_tstat, false}, + {L9P_TWALK, l9p_dispatch_twalk, false}, + {L9P_TWSTAT, l9p_dispatch_twstat, false}, + {L9P_TSTATFS, l9p_dispatch_tstatfs, false}, + {L9P_TLOPEN, l9p_dispatch_tlopen, false}, + {L9P_TLCREATE, l9p_dispatch_tlcreate, false}, + {L9P_TSYMLINK, l9p_dispatch_tsymlink, false}, + {L9P_TMKNOD, l9p_dispatch_tmknod, false}, + {L9P_TRENAME, l9p_dispatch_trename, false}, + {L9P_TREADLINK, l9p_dispatch_treadlink, false}, + {L9P_TGETATTR, l9p_dispatch_tgetattr, false}, + {L9P_TSETATTR, l9p_dispatch_tsetattr, false}, + {L9P_TXATTRWALK, l9p_dispatch_txattrwalk, false}, + {L9P_TXATTRCREATE, l9p_dispatch_txattrcreate, false}, + {L9P_TREADDIR, l9p_dispatch_treaddir, false}, + {L9P_TFSYNC, l9p_dispatch_tfsync, false}, + {L9P_TLOCK, l9p_dispatch_tlock, true}, + {L9P_TGETLOCK, l9p_dispatch_tgetlock, true}, + {L9P_TLINK, l9p_dispatch_tlink, false}, + {L9P_TMKDIR, l9p_dispatch_tmkdir, false}, + {L9P_TRENAMEAT, l9p_dispatch_trenameat, false}, + {L9P_TUNLINKAT, l9p_dispatch_tunlinkat, false}, +}; + +/* + * NB: version index 0 is reserved for new connections, and + * is a protocol that handles only L9P_TVERSION. Once we get a + * valid version, we start a new session using its dispatch table. + */ +static const struct { + const char *name; + const struct l9p_handler *handlers; + int n_handlers; +} l9p_versions[] = { + { "<none>", l9p_handlers_no_version, N(l9p_handlers_no_version) }, + { "9P2000", l9p_handlers_base, N(l9p_handlers_base) }, + { "9P2000.u", l9p_handlers_dotu, N(l9p_handlers_dotu), }, + { "9P2000.L", l9p_handlers_dotL, N(l9p_handlers_dotL), }, +}; + +/* + * Run the appropriate handler for this request. + * It's our caller's responsibility to respond. + */ +int +l9p_dispatch_request(struct l9p_request *req) +{ + struct l9p_connection *conn; +#if defined(L9P_DEBUG) + struct sbuf *sb; +#endif + size_t i, n; + const struct l9p_handler *handlers, *hp; + bool flush_requested; + + conn = req->lr_conn; + flush_requested = req->lr_flushstate == L9P_FLUSH_REQUESTED_PRE_START; + + handlers = l9p_versions[conn->lc_version].handlers; + n = (size_t)l9p_versions[conn->lc_version].n_handlers; + for (hp = handlers, i = 0; i < n; hp++, i++) + if (req->lr_req.hdr.type == hp->type) + goto found; + hp = NULL; +found: + +#if defined(L9P_DEBUG) + sb = sbuf_new_auto(); + if (flush_requested) { + sbuf_cat(sb, "FLUSH requested pre-dispatch"); + if (hp != NULL && hp->must_run) + sbuf_cat(sb, ", but must run"); + sbuf_cat(sb, ": "); + } + l9p_describe_fcall(&req->lr_req, conn->lc_version, sb); + sbuf_finish(sb); + + L9P_LOG(L9P_DEBUG, "%s", sbuf_data(sb)); + sbuf_delete(sb); +#endif + + if (hp != NULL) { + if (!flush_requested || hp->must_run) + return (hp->handler(req)); + return (EINTR); + } + + L9P_LOG(L9P_WARNING, "unknown request of type %d", + req->lr_req.hdr.type); + return (ENOSYS); +} + +/* + * Translate BSD errno to 9P2000/9P2000.u errno. + */ +static inline int +e29p(int errnum) +{ + static int const table[] = { + [ENOTEMPTY] = EPERM, + [EDQUOT] = EPERM, + [ENOSYS] = EPERM, /* ??? */ + }; + + if ((size_t)errnum < N(table) && table[errnum] != 0) + return (table[errnum]); + if (errnum <= ERANGE) + return (errnum); + return (EIO); /* ??? */ +} + +/* + * Translate BSD errno to Linux errno. + */ +static inline int +e2linux(int errnum) +{ + static int const table[] = { + [EDEADLK] = LINUX_EDEADLK, + [EAGAIN] = LINUX_EAGAIN, + [EINPROGRESS] = LINUX_EINPROGRESS, + [EALREADY] = LINUX_EALREADY, + [ENOTSOCK] = LINUX_ENOTSOCK, + [EDESTADDRREQ] = LINUX_EDESTADDRREQ, + [EMSGSIZE] = LINUX_EMSGSIZE, + [EPROTOTYPE] = LINUX_EPROTOTYPE, + [ENOPROTOOPT] = LINUX_ENOPROTOOPT, + [EPROTONOSUPPORT] = LINUX_EPROTONOSUPPORT, + [ESOCKTNOSUPPORT] = LINUX_ESOCKTNOSUPPORT, + [EOPNOTSUPP] = LINUX_EOPNOTSUPP, + [EPFNOSUPPORT] = LINUX_EPFNOSUPPORT, + [EAFNOSUPPORT] = LINUX_EAFNOSUPPORT, + [EADDRINUSE] = LINUX_EADDRINUSE, + [EADDRNOTAVAIL] = LINUX_EADDRNOTAVAIL, + [ENETDOWN] = LINUX_ENETDOWN, + [ENETUNREACH] = LINUX_ENETUNREACH, + [ENETRESET] = LINUX_ENETRESET, + [ECONNABORTED] = LINUX_ECONNABORTED, + [ECONNRESET] = LINUX_ECONNRESET, + [ENOBUFS] = LINUX_ENOBUFS, + [EISCONN] = LINUX_EISCONN, + [ENOTCONN] = LINUX_ENOTCONN, + [ESHUTDOWN] = LINUX_ESHUTDOWN, + [ETOOMANYREFS] = LINUX_ETOOMANYREFS, + [ETIMEDOUT] = LINUX_ETIMEDOUT, + [ECONNREFUSED] = LINUX_ECONNREFUSED, + [ELOOP] = LINUX_ELOOP, + [ENAMETOOLONG] = LINUX_ENAMETOOLONG, + [EHOSTDOWN] = LINUX_EHOSTDOWN, + [EHOSTUNREACH] = LINUX_EHOSTUNREACH, + [ENOTEMPTY] = LINUX_ENOTEMPTY, +#ifndef __illumos__ + [EPROCLIM] = LINUX_EAGAIN, +#endif + [EUSERS] = LINUX_EUSERS, + [EDQUOT] = LINUX_EDQUOT, + [ESTALE] = LINUX_ESTALE, + [EREMOTE] = LINUX_EREMOTE, + /* EBADRPC = unmappable? */ + /* ERPCMISMATCH = unmappable? */ + /* EPROGUNAVAIL = unmappable? */ + /* EPROGMISMATCH = unmappable? */ + /* EPROCUNAVAIL = unmappable? */ + [ENOLCK] = LINUX_ENOLCK, + [ENOSYS] = LINUX_ENOSYS, + /* EFTYPE = unmappable? */ + /* EAUTH = unmappable? */ + /* ENEEDAUTH = unmappable? */ + [EIDRM] = LINUX_EIDRM, + [ENOMSG] = LINUX_ENOMSG, + [EOVERFLOW] = LINUX_EOVERFLOW, + [ECANCELED] = LINUX_ECANCELED, + [EILSEQ] = LINUX_EILSEQ, + /* EDOOFUS = unmappable? */ + [EBADMSG] = LINUX_EBADMSG, + [EMULTIHOP] = LINUX_EMULTIHOP, + [ENOLINK] = LINUX_ENOLINK, + [EPROTO] = LINUX_EPROTO, + /* ENOTCAPABLE = unmappable? */ +#ifdef ECAPMODE + [ECAPMODE] = EPERM, +#endif +#ifdef ENOTRECOVERABLE + [ENOTRECOVERABLE] = LINUX_ENOTRECOVERABLE, +#endif +#ifdef EOWNERDEAD + [EOWNERDEAD] = LINUX_EOWNERDEAD, +#endif + }; + + /* + * In case we want to return a raw Linux errno, allow negative + * values a la Linux kernel internals. + * + * Values up to ERANGE are shared across systems (see + * linux_errno.h), except for EAGAIN. + */ + if (errnum < 0) + return (-errnum); + + if ((size_t)errnum < N(table) && table[errnum] != 0) + return (table[errnum]); + + if (errnum <= ERANGE) + return (errnum); + + L9P_LOG(L9P_WARNING, "cannot map errno %d to anything reasonable", + errnum); + + return (LINUX_ENOTRECOVERABLE); /* ??? */ +} + +/* + * Send response to request, or possibly just drop request. + * We also need to know whether to remove the request from + * the tag hash table. + */ +void +l9p_respond(struct l9p_request *req, bool drop, bool rmtag) +{ + struct l9p_connection *conn = req->lr_conn; + size_t iosize; +#if defined(L9P_DEBUG) + struct sbuf *sb; + const char *ftype; +#endif + int error; + + req->lr_resp.hdr.tag = req->lr_req.hdr.tag; + + error = req->lr_error; + if (error == 0) + req->lr_resp.hdr.type = req->lr_req.hdr.type + 1; + else { + if (conn->lc_version == L9P_2000L) { + req->lr_resp.hdr.type = L9P_RLERROR; + req->lr_resp.error.errnum = (uint32_t)e2linux(error); + } else { + req->lr_resp.hdr.type = L9P_RERROR; + req->lr_resp.error.ename = strerror(error); + req->lr_resp.error.errnum = (uint32_t)e29p(error); + } + } + +#if defined(L9P_DEBUG) + sb = sbuf_new_auto(); + l9p_describe_fcall(&req->lr_resp, conn->lc_version, sb); + sbuf_finish(sb); + + switch (req->lr_flushstate) { + case L9P_FLUSH_NONE: + default: + ftype = ""; + break; + case L9P_FLUSH_REQUESTED_PRE_START: + ftype = "FLUSH requested pre-dispatch: "; + break; + case L9P_FLUSH_REQUESTED_POST_START: + ftype = "FLUSH requested while running: "; + break; + case L9P_FLUSH_TOOLATE: + ftype = "FLUSH requested too late: "; + break; + } + L9P_LOG(L9P_DEBUG, "%s%s%s", + drop ? "DROP: " : "", ftype, sbuf_data(sb)); + sbuf_delete(sb); +#endif + + error = drop ? 0 : + l9p_pufcall(&req->lr_resp_msg, &req->lr_resp, conn->lc_version); + if (rmtag) + ht_remove(&conn->lc_requests, req->lr_req.hdr.tag); + if (error != 0) { + L9P_LOG(L9P_ERROR, "cannot pack response"); + drop = true; + } + + if (drop) { + conn->lc_lt.lt_drop_response(req, + req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov, + conn->lc_lt.lt_aux); + } else { + iosize = req->lr_resp_msg.lm_size; + + /* + * Include I/O size in calculation for Rread and + * Rreaddir responses. + */ + if (req->lr_resp.hdr.type == L9P_RREAD || + req->lr_resp.hdr.type == L9P_RREADDIR) + iosize += req->lr_resp.io.count; + + conn->lc_lt.lt_send_response(req, + req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov, + iosize, conn->lc_lt.lt_aux); + } + + l9p_freefcall(&req->lr_req); + l9p_freefcall(&req->lr_resp); + + free(req); +} + +/* + * This allows a caller to iterate through the data in a + * read or write request (creating the data if packing, + * scanning through it if unpacking). This is used for + * writing readdir entries, so mode should be L9P_PACK + * (but we allow L9P_UNPACK so that debug code can also scan + * through the data later, if desired). + * + * This relies on the Tread op having positioned the request's + * iov to the beginning of the data buffer (note the l9p_seek_iov + * in l9p_dispatch_tread). + */ +void +l9p_init_msg(struct l9p_message *msg, struct l9p_request *req, + enum l9p_pack_mode mode) +{ + + msg->lm_size = 0; + msg->lm_mode = mode; + msg->lm_cursor_iov = 0; + msg->lm_cursor_offset = 0; + msg->lm_niov = req->lr_data_niov; + memcpy(msg->lm_iov, req->lr_data_iov, + sizeof (struct iovec) * req->lr_data_niov); +} + +enum fid_lookup_flags { + F_REQUIRE_OPEN = 0x01, /* require that the file be marked OPEN */ + F_REQUIRE_DIR = 0x02, /* require that the file be marked ISDIR */ + F_REQUIRE_XATTR = 0x04, /* require that the file be marked XATTR */ + F_REQUIRE_AUTH = 0x08, /* require that the fid be marked AUTH */ + F_FORBID_OPEN = 0x10, /* forbid that the file be marked OPEN */ + F_FORBID_DIR = 0x20, /* forbid that the file be marked ISDIR */ + F_FORBID_XATTR = 0x40, /* forbid that the file be marked XATTR */ + F_ALLOW_AUTH = 0x80, /* allow that the fid be marked AUTH */ +}; + +/* + * Look up a fid. It must correspond to a valid file, else we return + * the given errno (some "not a valid fid" calls must return EIO and + * some must return EINVAL and qemu returns ENOENT in other cases and + * so on, so we just provide a general "return this error number"). + * + * Callers may also set constraints: fid must be (or not be) open, + * must be (or not be) a directory, must be (or not be) an xattr. + * + * Only one op has a fid that *must* be an auth fid. Most ops forbid + * auth fids So instead of FORBID we have ALLOW here and the default + * is FORBID. + */ +static inline int +fid_lookup(struct l9p_connection *conn, uint32_t fid, int err, int flags, + struct l9p_fid **afile) +{ + struct l9p_fid *file; + + file = ht_find(&conn->lc_files, fid); + if (file == NULL) + return (err); + + /* + * As soon as we go multithreaded / async, this + * assert has to become "return EINVAL" or "return err". + * + * We may also need a way to mark a fid as + * "in async op" (valid for some purposes, but cannot be + * used elsewhere until async op is completed or aborted). + * + * For now, this serves for bug-detecting. + */ + assert(l9p_fid_isvalid(file)); + + /* + * Note that we're inline expanded and flags is constant, + * so unnecessary tests just drop out entirely. + */ + if ((flags & F_REQUIRE_OPEN) && !l9p_fid_isopen(file)) + return (EINVAL); + if ((flags & F_FORBID_OPEN) && l9p_fid_isopen(file)) + return (EINVAL); + if ((flags & F_REQUIRE_DIR) && !l9p_fid_isdir(file)) + return (ENOTDIR); + if ((flags & F_FORBID_DIR) && l9p_fid_isdir(file)) + return (EISDIR); + if ((flags & F_REQUIRE_XATTR) && !l9p_fid_isxattr(file)) + return (EINVAL); + if ((flags & F_FORBID_XATTR) && l9p_fid_isxattr(file)) + return (EINVAL); + if (l9p_fid_isauth(file)) { + if ((flags & (F_REQUIRE_AUTH | F_ALLOW_AUTH)) == 0) + return (EINVAL); + } else if (flags & F_REQUIRE_AUTH) + return (EINVAL); + *afile = file; + return (0); +} + +/* + * Append variable-size stat object and adjust io count. + * Returns 0 if the entire stat object was packed, -1 if not. + * A fully packed object updates the request's io count. + * + * Caller must use their own private l9p_message object since + * a partially packed object will leave the message object in + * a useless state. + * + * Frees the stat object. + */ +int +l9p_pack_stat(struct l9p_message *msg, struct l9p_request *req, + struct l9p_stat *st) +{ + struct l9p_connection *conn = req->lr_conn; + uint16_t size = l9p_sizeof_stat(st, conn->lc_version); + int ret = 0; + + assert(msg->lm_mode == L9P_PACK); + + if (req->lr_resp.io.count + size > req->lr_req.io.count || + l9p_pustat(msg, st, conn->lc_version) < 0) + ret = -1; + else + req->lr_resp.io.count += size; + l9p_freestat(st); + return (ret); +} + +static int +l9p_dispatch_tversion(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_server *server = conn->lc_server; + enum l9p_version remote_version = L9P_INVALID_VERSION; + size_t i; + const char *remote_version_name; + + for (i = 0; i < N(l9p_versions); i++) { + if (strcmp(req->lr_req.version.version, + l9p_versions[i].name) == 0) { + remote_version = (enum l9p_version)i; + break; + } + } + + if (remote_version == L9P_INVALID_VERSION) { + L9P_LOG(L9P_ERROR, "unsupported remote version: %s", + req->lr_req.version.version); + return (ENOSYS); + } + + remote_version_name = l9p_versions[remote_version].name; + L9P_LOG(L9P_INFO, "remote version: %s", remote_version_name); + L9P_LOG(L9P_INFO, "local version: %s", + l9p_versions[server->ls_max_version].name); + + conn->lc_version = MIN(remote_version, server->ls_max_version); + conn->lc_msize = MIN(req->lr_req.version.msize, conn->lc_msize); + conn->lc_max_io_size = conn->lc_msize - 24; + req->lr_resp.version.version = strdup(remote_version_name); + req->lr_resp.version.msize = conn->lc_msize; + return (0); +} + +static int +l9p_dispatch_tattach(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* + * We still don't have Tauth yet, but let's code this part + * anyway. + * + * Look up the auth fid first since if it fails we can just + * return immediately. + */ + if (req->lr_req.tattach.afid != L9P_NOFID) { + error = fid_lookup(conn, req->lr_req.tattach.afid, EINVAL, + F_REQUIRE_AUTH, &req->lr_fid2); + if (error) + return (error); + } else + req->lr_fid2 = NULL; + + fid = l9p_connection_alloc_fid(conn, req->lr_req.hdr.fid); + if (fid == NULL) + return (EINVAL); + + be = conn->lc_server->ls_backend; + + req->lr_fid = fid; + + /* For backend convenience, set NONUNAME on 9P2000. */ + if (conn->lc_version == L9P_2000) + req->lr_req.tattach.n_uname = L9P_NONUNAME; + error = be->attach(be->softc, req); + + /* + * On success, fid becomes valid; on failure, disconnect. + * It certainly *should* be a directory here... + */ + if (error == 0) { + l9p_fid_setvalid(fid); + if (req->lr_resp.rattach.qid.type & L9P_QTDIR) + l9p_fid_setdir(fid); + } else + l9p_connection_remove_fid(conn, fid); + return (error); +} + +static int +l9p_dispatch_tclunk(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* Note that clunk is the only way to dispose of an auth fid. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_ALLOW_AUTH, &fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + l9p_fid_unsetvalid(fid); + + /* + * If it's an xattr fid there must, by definition, be an + * xattrclunk. The xattrclunk function can only be NULL if + * xattrwalk and xattrcreate are NULL or always return error. + * + * Q: do we want to allow async xattrclunk in case of very + * large xattr create? This will make things difficult, + * so probably not. + */ + if (l9p_fid_isxattr(fid)) + error = be->xattrclunk(be->softc, fid); + else + error = be->clunk(be->softc, fid); + + /* fid is now gone regardless of any error return */ + l9p_connection_remove_fid(conn, fid); + return (error); +} + +static int +l9p_dispatch_tcreate(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + uint32_t dmperm; + int error; + + /* Incoming fid must represent a directory that has not been opened. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + dmperm = req->lr_req.tcreate.perm; +#define MKDIR_OR_SIMILAR \ + (L9P_DMDIR | L9P_DMSYMLINK | L9P_DMNAMEDPIPE | L9P_DMSOCKET | L9P_DMDEVICE) + + /* + * TODO: + * - check new file name + * - break out different kinds of create (file vs mkdir etc) + * - add async file-create (leaves req->lr_fid in limbo) + * + * A successful file-create changes the fid into an open file. + */ + error = be->create(be->softc, req); + if (error == 0 && (dmperm & MKDIR_OR_SIMILAR) == 0) { + l9p_fid_unsetdir(req->lr_fid); + l9p_fid_setopen(req->lr_fid); + } + + return (error); +} + +static int +l9p_dispatch_topen(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - add async open (leaves req->lr_fid in limbo) + */ + error = be->open(be->softc, req); + if (error == 0) + l9p_fid_setopen(req->lr_fid); + return (error); +} + +static int +l9p_dispatch_tread(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* Xattr fids are not open, so we need our own tests. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid); + if (error) + return (error); + + /* + * Adjust so that writing messages (packing data) starts + * right after the count field in the response. + * + * size[4] + Rread[1] + tag[2] + count[4] = 11 + */ + l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov, + req->lr_data_iov, &req->lr_data_niov, 11); + + /* + * If it's an xattr fid there must, by definition, be an + * xattrread. The xattrread function can only be NULL if + * xattrwalk and xattrcreate are NULL or always return error. + * + * TODO: + * separate out directory-read + * allow async read + */ + be = conn->lc_server->ls_backend; + fid = req->lr_fid; + if (l9p_fid_isxattr(fid)) { + error = be->xattrread(be->softc, req); + } else if (l9p_fid_isopen(fid)) { + error = be->read(be->softc, req); + } else { + error = EINVAL; + } + + return (error); +} + +static int +l9p_dispatch_tremove(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* + * ?? Should we allow Tremove on auth fids? If so, do + * we pretend it is just a Tclunk? + */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + l9p_fid_unsetvalid(fid); + + error = be->remove(be->softc, fid); + /* fid is now gone regardless of any error return */ + l9p_connection_remove_fid(conn, fid); + return (error); +} + +static int +l9p_dispatch_tstat(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* Allow Tstat on auth fid? Seems harmless enough... */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_ALLOW_AUTH, &fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + req->lr_fid = fid; + error = be->stat(be->softc, req); + + if (error == 0) { + if (l9p_fid_isauth(fid)) + req->lr_resp.rstat.stat.qid.type |= L9P_QTAUTH; + + /* should we check req->lr_resp.rstat.qid.type L9P_QTDIR bit? */ + if (req->lr_resp.rstat.stat.qid.type &= L9P_QTDIR) + l9p_fid_setdir(fid); + else + l9p_fid_unsetdir(fid); + } + + return (error); +} + +static int +l9p_dispatch_twalk(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid, *newfid; + uint16_t n; + int error; + + /* Can forbid XATTR, but cannot require DIR. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &fid); + if (error) + return (error); + + if (req->lr_req.twalk.hdr.fid != req->lr_req.twalk.newfid) { + newfid = l9p_connection_alloc_fid(conn, + req->lr_req.twalk.newfid); + if (newfid == NULL) + return (EINVAL); + } else + newfid = fid; + + be = conn->lc_server->ls_backend; + req->lr_fid = fid; + req->lr_newfid = newfid; + error = be->walk(be->softc, req); + + /* + * If newfid == fid, then fid itself has (potentially) changed, + * but is still valid. Otherwise set newfid valid on + * success, and destroy it on error. + */ + if (newfid != fid) { + if (error == 0) + l9p_fid_setvalid(newfid); + else + l9p_connection_remove_fid(conn, newfid); + } + + /* + * If we walked any name elements, the last (n-1'th) qid + * has the type (dir vs file) for the new fid. Otherwise + * the type of newfid is the same as fid. Of course, if + * n==0 and fid==newfid, fid is already set up correctly + * as the whole thing was a big no-op, but it's safe to + * copy its dir bit to itself. + */ + if (error == 0) { + n = req->lr_resp.rwalk.nwqid; + if (n > 0) { + if (req->lr_resp.rwalk.wqid[n - 1].type & L9P_QTDIR) + l9p_fid_setdir(newfid); + } else { + if (l9p_fid_isdir(fid)) + l9p_fid_setdir(newfid); + } + } + return (error); +} + +static int +l9p_dispatch_twrite(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* Cannot require open due to xattr write, but can forbid dir. */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, + F_FORBID_DIR, &req->lr_fid); + if (error) + return (error); + + /* + * Adjust to point to the data to be written (a la + * l9p_dispatch_tread, but we're pointing into the request + * buffer rather than the response): + * + * size[4] + Twrite[1] + tag[2] + fid[4] + offset[8] + count[4] = 23 + */ + l9p_seek_iov(req->lr_req_msg.lm_iov, req->lr_req_msg.lm_niov, + req->lr_data_iov, &req->lr_data_niov, 23); + + /* + * Unlike read, write and xattrwrite are optional (for R/O fs). + * + * TODO: + * allow async write + */ + be = conn->lc_server->ls_backend; + fid = req->lr_fid; + if (l9p_fid_isxattr(fid)) { + error = be->xattrwrite != NULL ? + be->xattrwrite(be->softc, req) : ENOSYS; + } else if (l9p_fid_isopen(fid)) { + error = be->write != NULL ? + be->write(be->softc, req) : ENOSYS; + } else { + error = EINVAL; + } + + return (error); +} + +static int +l9p_dispatch_twstat(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, + F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + error = be->wstat != NULL ? be->wstat(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tstatfs(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* Should we allow statfs on auth fids? */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, 0, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + error = be->statfs(be->softc, req); + return (error); +} + +static int +l9p_dispatch_tlopen(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_OPEN | F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - add async open (leaves req->lr_fid in limbo) + */ + error = be->lopen != NULL ? be->lopen(be->softc, req) : ENOSYS; + if (error == 0) + l9p_fid_setopen(req->lr_fid); + return (error); +} + +static int +l9p_dispatch_tlcreate(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - check new file name + * - add async create (leaves req->lr_fid in limbo) + */ + error = be->lcreate != NULL ? be->lcreate(be->softc, req) : ENOSYS; + if (error == 0) { + l9p_fid_unsetdir(req->lr_fid); + l9p_fid_setopen(req->lr_fid); + } + return (error); +} + +static int +l9p_dispatch_tsymlink(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* This doesn't affect the containing dir; maybe allow OPEN? */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - check new file name + */ + error = be->symlink != NULL ? be->symlink(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tmknod(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* This doesn't affect the containing dir; maybe allow OPEN? */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - check new file name + */ + error = be->mknod != NULL ? be->mknod(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_trename(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* Rename directory or file (including symlink etc). */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + /* Doesn't affect new dir fid; maybe allow OPEN? */ + error = fid_lookup(conn, req->lr_req.trename.dfid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: + * - check new file name (trename.name) + */ + error = be->rename != NULL ? be->rename(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_treadlink(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* + * The underlying readlink will fail unless it's a symlink, + * and the back end has to check, but we might as well forbid + * directories and open files here since it's cheap. + */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->readlink != NULL ? be->readlink(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tgetattr(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->getattr != NULL ? be->getattr(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tsetattr(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->setattr != NULL ? be->setattr(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_txattrwalk(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid, *newfid; + int error; + + /* + * Not sure if we care if file-or-dir is open or not. + * However, the fid argument should always be a file or + * dir and the newfid argument must be supplied, must + * be different, and always becomes a new xattr, + * so this is not very much like Twalk. + */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_XATTR, &fid); + if (error) + return (error); + + newfid = l9p_connection_alloc_fid(conn, req->lr_req.txattrwalk.newfid); + if (newfid == NULL) + return (EINVAL); + + be = conn->lc_server->ls_backend; + + req->lr_fid = fid; + req->lr_newfid = newfid; + error = be->xattrwalk != NULL ? be->xattrwalk(be->softc, req) : ENOSYS; + + /* + * Success/fail is similar to Twalk, except that we need + * to set the xattr type bit in the new fid. It's also + * much simpler since newfid is always a new fid. + */ + if (error == 0) { + l9p_fid_setvalid(newfid); + l9p_fid_setxattr(newfid); + } else { + l9p_connection_remove_fid(conn, newfid); + } + return (error); +} + +static int +l9p_dispatch_txattrcreate(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + struct l9p_fid *fid; + int error; + + /* + * Forbid incoming open fid since it's going to become an + * xattr fid instead. If it turns out we need to allow + * it, fs code will need to handle this. + * + * Curiously, qemu 9pfs uses ENOENT for a bad txattrwalk + * fid, but EINVAL for txattrcreate (so we do too). + */ + error = fid_lookup(conn, req->lr_req.hdr.fid, EINVAL, + F_FORBID_XATTR | F_FORBID_OPEN, &fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + req->lr_fid = fid; + error = be->xattrcreate != NULL ? be->xattrcreate(be->softc, req) : + ENOSYS; + + /* + * On success, fid has changed from a regular (file or dir) + * fid to an xattr fid. + */ + if (error == 0) { + l9p_fid_unsetdir(fid); + l9p_fid_setxattr(fid); + } + return (error); +} + +static int +l9p_dispatch_treaddir(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_REQUIRE_OPEN, &req->lr_fid); + if (error) + return (error); + + /* + * Adjust so that writing messages (packing data) starts + * right after the count field in the response. + * + * size[4] + Rreaddir[1] + tag[2] + count[4] = 11 + */ + l9p_seek_iov(req->lr_resp_msg.lm_iov, req->lr_resp_msg.lm_niov, + req->lr_data_iov, &req->lr_data_niov, 11); + + be = conn->lc_server->ls_backend; + + error = be->readdir != NULL ? be->readdir(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tfsync(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->fsync != NULL ? be->fsync(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tlock(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* Forbid directories? */ + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: multiple client handling; perhaps async locking. + */ + error = be->lock != NULL ? be->lock(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tgetlock(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* + * TODO: multiple client handling; perhaps async locking. + */ + error = be->getlock != NULL ? be->getlock(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tlink(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + /* + * Note, dfid goes into fid2 in current scheme. + * + * Allow open dir? Target dir fid is not modified... + */ + error = fid_lookup(conn, req->lr_req.tlink.dfid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2); + if (error) + return (error); + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_FORBID_DIR | F_FORBID_XATTR, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + error = be->link != NULL ? be->link(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tmkdir(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + /* Slashes embedded in the name are not allowed */ + if (strchr(req->lr_req.tlcreate.name, '/') != NULL) + return (EINVAL); + + be = conn->lc_server->ls_backend; + error = be->mkdir != NULL ? be->mkdir(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_trenameat(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + error = fid_lookup(conn, req->lr_req.trenameat.newdirfid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid2); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* TODO: check old and new names */ + error = be->renameat != NULL ? be->renameat(be->softc, req) : ENOSYS; + return (error); +} + +static int +l9p_dispatch_tunlinkat(struct l9p_request *req) +{ + struct l9p_connection *conn = req->lr_conn; + struct l9p_backend *be; + int error; + + error = fid_lookup(conn, req->lr_req.hdr.fid, ENOENT, + F_REQUIRE_DIR | F_FORBID_OPEN, &req->lr_fid); + if (error) + return (error); + + be = conn->lc_server->ls_backend; + + /* TODO: check dir-or-file name */ + error = be->unlinkat != NULL ? be->unlinkat(be->softc, req) : ENOSYS; + return (error); +} diff --git a/usr/src/lib/lib9p/common/rfuncs.c b/usr/src/lib/lib9p/common/rfuncs.c new file mode 100644 index 0000000000..f80e8c1541 --- /dev/null +++ b/usr/src/lib/lib9p/common/rfuncs.c @@ -0,0 +1,320 @@ +/* + * Copyright 2016 Chris Torek <chris.torek@gmail.com> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#if defined(WITH_CASPER) +#include <libcasper.h> +#include <casper/cap_pwd.h> +#include <casper/cap_grp.h> +#endif + +#include "rfuncs.h" + +/* + * This is essentially a clone of the BSD basename_r function, + * which is like POSIX basename() but puts the result in a user + * supplied buffer. + * + * In BSD basename_r, the buffer must be least MAXPATHLEN bytes + * long. In our case we take the size of the buffer as an argument. + * + * Note that it's impossible in general to do this without + * a temporary buffer since basename("foo/bar") is "bar", + * but basename("foo/bar/") is still "bar" -- no trailing + * slash is allowed. + * + * The return value is your supplied buffer <buf>, or NULL if + * the length of the basename of the supplied <path> equals or + * exceeds your indicated <bufsize>. + * + * As a special but useful case, if you supply NULL for the <buf> + * argument, we allocate the buffer dynamically to match the + * basename, i.e., the result is basically strdup()ed for you. + * In this case <bufsize> is ignored (recommended: pass 0 here). + */ +char * +r_basename(const char *path, char *buf, size_t bufsize) +{ + const char *endp, *comp; + size_t len; + + /* + * NULL or empty path means ".". This is perhaps overly + * forgiving but matches libc basename_r(), and avoids + * breaking the code below. + */ + if (path == NULL || *path == '\0') { + comp = "."; + len = 1; + } else { + /* + * Back up over any trailing slashes. If we reach + * the top of the path and it's still a trailing + * slash, it's also a leading slash and the entire + * path is just "/" (or "//", or "///", etc). + */ + endp = path + strlen(path) - 1; + while (*endp == '/' && endp > path) + endp--; + /* Invariant: *endp != '/' || endp == path */ + if (*endp == '/') { + /* then endp==path and hence entire path is "/" */ + comp = "/"; + len = 1; + } else { + /* + * We handled empty strings earlier, and + * we just proved *endp != '/'. Hence + * we have a non-empty basename, ending + * at endp. + * + * Back up one path name component. The + * part between these two is the basename. + * + * Note that we only stop backing up when + * either comp==path, or comp[-1] is '/'. + * + * Suppose path[0] is '/'. Then, since *endp + * is *not* '/', we had comp>path initially, and + * stopped backing up because we found a '/' + * (perhaps path[0], perhaps a later '/'). + * + * Or, suppose path[0] is NOT '/'. Then, + * either there are no '/'s at all and + * comp==path, or comp[-1] is '/'. + * + * In all cases, we want all bytes from *comp + * to *endp, inclusive. + */ + comp = endp; + while (comp > path && comp[-1] != '/') + comp--; + len = (size_t)(endp - comp + 1); + } + } + if (buf == NULL) { + buf = malloc(len + 1); + if (buf == NULL) + return (NULL); + } else { + if (len >= bufsize) { + errno = ENAMETOOLONG; + return (NULL); + } + } + memcpy(buf, comp, len); + buf[len] = '\0'; + return (buf); +} + +/* + * This is much like POSIX dirname(), but is reentrant. + * + * We examine a path, find the directory portion, and copy that + * to a user supplied buffer <buf> of the given size <bufsize>. + * + * Note that dirname("/foo/bar/") is "/foo", dirname("/foo") is "/", + * and dirname("////") is "/". However, dirname("////foo/bar") is + * "////foo" (we do not resolve these leading slashes away -- this + * matches the BSD libc behavior). + * + * The return value is your supplied buffer <buf>, or NULL if + * the length of the dirname of the supplied <path> equals or + * exceeds your indicated <bufsize>. + * + * As a special but useful case, if you supply NULL for the <buf> + * argument, we allocate the buffer dynamically to match the + * dirname, i.e., the result is basically strdup()ed for you. + * In this case <bufsize> is ignored (recommended: pass 0 here). + */ +char * +r_dirname(const char *path, char *buf, size_t bufsize) +{ + const char *endp, *dirpart; + size_t len; + + /* + * NULL or empty path means ".". This is perhaps overly + * forgiving but matches libc dirname(), and avoids breaking + * the code below. + */ + if (path == NULL || *path == '\0') { + dirpart = "."; + len = 1; + } else { + /* + * Back up over any trailing slashes, then back up + * one path name, then back up over more slashes. + * In all cases, stop as soon as endp==path so + * that we do not back out of the buffer entirely. + * + * The first loop takes care of trailing slashes + * in names like "/foo/bar//" (where the dirname + * part is to be "/foo"), the second strips out + * the non-dir-name part, and the third leaves us + * pointing to the end of the directory component. + * + * If the entire name is of the form "/foo" or + * "//foo" (or "/foo/", etc, but we already + * handled trailing slashes), we end up pointing + * to the leading "/", which is what we want; but + * if it is of the form "foo" (or "foo/", etc) we + * point to a non-slash. So, if (and only if) + * endp==path AND *endp is not '/', the dirname is + * ".", but in all cases, the LENGTH of the + * dirname is (endp-path+1). + */ + endp = path + strlen(path) - 1; + while (endp > path && *endp == '/') + endp--; + while (endp > path && *endp != '/') + endp--; + while (endp > path && *endp == '/') + endp--; + + len = (size_t)(endp - path + 1); + if (endp == path && *endp != '/') + dirpart = "."; + else + dirpart = path; + } + if (buf == NULL) { + buf = malloc(len + 1); + if (buf == NULL) + return (NULL); + } else { + if (len >= bufsize) { + errno = ENAMETOOLONG; + return (NULL); + } + } + memcpy(buf, dirpart, len); + buf[len] = '\0'; + return (buf); +} + +static void +r_pginit(struct r_pgdata *pg) +{ + + /* Note: init to half size since the first thing we do is double it */ + pg->r_pgbufsize = 1 << 9; + pg->r_pgbuf = NULL; /* note that realloc(NULL) == malloc */ +} + +static int +r_pgexpand(struct r_pgdata *pg) +{ + size_t nsize; + + nsize = pg->r_pgbufsize << 1; + if (nsize >= (1 << 20) || + (pg->r_pgbuf = reallocf(pg->r_pgbuf, nsize)) == NULL) + return (ENOMEM); + return (0); +} + +void +r_pgfree(struct r_pgdata *pg) +{ + + free(pg->r_pgbuf); +} + +struct passwd * +r_getpwuid(uid_t uid, struct r_pgdata *pg) +{ + struct passwd *result = NULL; + int error; + + r_pginit(pg); + do { + error = r_pgexpand(pg); + if (error == 0) + error = getpwuid_r(uid, &pg->r_pgun.un_pw, + pg->r_pgbuf, pg->r_pgbufsize, &result); + } while (error == ERANGE); + + return (error ? NULL : result); +} + +struct group * +r_getgrgid(gid_t gid, struct r_pgdata *pg) +{ + struct group *result = NULL; + int error; + + r_pginit(pg); + do { + error = r_pgexpand(pg); + if (error == 0) + error = getgrgid_r(gid, &pg->r_pgun.un_gr, + pg->r_pgbuf, pg->r_pgbufsize, &result); + } while (error == ERANGE); + + return (error ? NULL : result); +} + +#if defined(WITH_CASPER) +struct passwd * +r_cap_getpwuid(cap_channel_t *cap, uid_t uid, struct r_pgdata *pg) +{ + struct passwd *result = NULL; + int error; + + r_pginit(pg); + do { + error = r_pgexpand(pg); + if (error == 0) + error = cap_getpwuid_r(cap, uid, &pg->r_pgun.un_pw, + pg->r_pgbuf, pg->r_pgbufsize, &result); + } while (error == ERANGE); + + return (error ? NULL : result); +} + +struct group * +r_cap_getgrgid(cap_channel_t *cap, gid_t gid, struct r_pgdata *pg) +{ + struct group *result = NULL; + int error; + + r_pginit(pg); + do { + error = r_pgexpand(pg); + if (error == 0) + error = cap_getgrgid_r(cap, gid, &pg->r_pgun.un_gr, + pg->r_pgbuf, pg->r_pgbufsize, &result); + } while (error == ERANGE); + + return (error ? NULL : result); +} +#endif diff --git a/usr/src/lib/lib9p/common/rfuncs.h b/usr/src/lib/lib9p/common/rfuncs.h new file mode 100644 index 0000000000..5946f2e2b7 --- /dev/null +++ b/usr/src/lib/lib9p/common/rfuncs.h @@ -0,0 +1,83 @@ +/* + * Copyright 2016 Chris Torek <chris.torek@gmail.com> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_RFUNCS_H +#define LIB9P_RFUNCS_H + +#if defined(__illumos__) && !defined(_POSIX_PTHREAD_SEMANTICS) +#define _POSIX_PTHREAD_SEMANTICS 1 +#endif + +#include <grp.h> +#include <pwd.h> +#include <string.h> + +#if defined(WITH_CASPER) +#include <libcasper.h> +#endif + +/* + * Reentrant, optionally-malloc-ing versions of + * basename() and dirname(). + */ +char *r_basename(const char *, char *, size_t); +char *r_dirname(const char *, char *, size_t); + +/* + * Yuck: getpwuid, getgrgid are not thread-safe, and the + * POSIX replacements (getpwuid_r, getgrgid_r) are horrible. + * This is to allow us to loop over the get.*_r calls with ever + * increasing buffers until they succeed or get unreasonable + * (same idea as the libc code for the non-reentrant versions, + * although prettier). + * + * The getpwuid/getgrgid functions auto-init one of these, + * but the caller must call r_pgfree() when done with the + * return values. + * + * If we need more later, we may have to expose the init function. + */ +struct r_pgdata { + char *r_pgbuf; + size_t r_pgbufsize; + union { + struct passwd un_pw; + struct group un_gr; + } r_pgun; +}; + +/* void r_pginit(struct r_pgdata *); */ +void r_pgfree(struct r_pgdata *); +struct passwd *r_getpwuid(uid_t, struct r_pgdata *); +struct group *r_getgrgid(gid_t, struct r_pgdata *); + +#if defined(WITH_CASPER) +struct passwd *r_cap_getpwuid(cap_channel_t *, uid_t, struct r_pgdata *); +struct group *r_cap_getgrgid(cap_channel_t *, gid_t, struct r_pgdata *); +#endif + +#endif /* LIB9P_RFUNCS_H */ diff --git a/usr/src/lib/lib9p/common/sbuf/sbuf.c b/usr/src/lib/lib9p/common/sbuf/sbuf.c new file mode 100644 index 0000000000..55e0f88650 --- /dev/null +++ b/usr/src/lib/lib9p/common/sbuf/sbuf.c @@ -0,0 +1,65 @@ +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Minimal libsbuf wrapper around libcustr for illumos. + */ + +#include <stdlib.h> +#include "sbuf.h" + +struct sbuf * +sbuf_new_auto() +{ + struct sbuf *s; + + s = malloc(sizeof(struct sbuf)); + if (s == NULL) + return (s); + if (custr_alloc(&s->s_custr) != 0) { + free(s); + return (NULL); + } + return (s); +} + +int +sbuf_printf(struct sbuf *s, const char *fmt, ...) +{ + int ret; + va_list ap; + + va_start(ap, fmt); + ret = custr_append_vprintf(s->s_custr, fmt, ap); + va_end(ap); + + return (ret); +} + +void +sbuf_delete(struct sbuf *s) +{ + custr_free(s->s_custr); + free(s); +} diff --git a/usr/src/lib/lib9p/common/sbuf/sbuf.h b/usr/src/lib/lib9p/common/sbuf/sbuf.h new file mode 100644 index 0000000000..5b17b3113e --- /dev/null +++ b/usr/src/lib/lib9p/common/sbuf/sbuf.h @@ -0,0 +1,51 @@ +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Minimal libsbuf wrapper around libcustr for illumos. + */ + +#ifndef LIB9P_SBUF_H +#define LIB9P_SBUF_H + +#include <stdarg.h> +#include <libcustr.h> + +struct sbuf +{ + custr_t *s_custr; +}; + +struct sbuf *sbuf_new_auto(void); +char *sbuf_data(struct sbuf *s); +int sbuf_printf(struct sbuf *s, const char *fmt, ...); +void sbuf_delete(struct sbuf *s); + +#define sbuf_cat(s, str) custr_append((s)->s_custr, (str)) +#define sbuf_vprintf(s, fmt, args) \ + custr_append_vprintf((s)->s_custr, (fmt), (args)) +#define sbuf_data(s) custr_cstr((s)->s_custr) +#define sbuf_finish(s) + +#endif /* LIB9P_SBUF_H */ diff --git a/usr/src/lib/lib9p/common/threadpool.c b/usr/src/lib/lib9p/common/threadpool.c new file mode 100644 index 0000000000..a29f2315c5 --- /dev/null +++ b/usr/src/lib/lib9p/common/threadpool.c @@ -0,0 +1,469 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Copyright 2020 Joyent, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <errno.h> +#include <stdlib.h> +#include <pthread.h> +#if defined(__FreeBSD__) +#include <pthread_np.h> +#endif +#include <sys/queue.h> +#include "lib9p.h" +#include "threadpool.h" + +static void l9p_threadpool_rflush(struct l9p_threadpool *tp, + struct l9p_request *req); + +static void * +l9p_responder(void *arg) +{ + struct l9p_threadpool *tp; + struct l9p_worker *worker = arg; + struct l9p_request *req; + + tp = worker->ltw_tp; + for (;;) { + /* get next reply to send */ + + if (pthread_mutex_lock(&tp->ltp_mtx) != 0) + break; + while (STAILQ_EMPTY(&tp->ltp_replyq) && !worker->ltw_exiting) { + (void) pthread_cond_wait(&tp->ltp_reply_cv, + &tp->ltp_mtx); + } + if (worker->ltw_exiting) { + (void) pthread_mutex_unlock(&tp->ltp_mtx); + break; + } + + /* off reply queue */ + req = STAILQ_FIRST(&tp->ltp_replyq); + STAILQ_REMOVE_HEAD(&tp->ltp_replyq, lr_worklink); + + /* request is now in final glide path, can't be Tflush-ed */ + req->lr_workstate = L9P_WS_REPLYING; + + /* any flushers waiting for this request can go now */ + if (req->lr_flushstate != L9P_FLUSH_NONE) + l9p_threadpool_rflush(tp, req); + + if (pthread_mutex_unlock(&tp->ltp_mtx) != 0) + break; + + /* send response */ + l9p_respond(req, false, true); + } + return (NULL); +} + +static void * +l9p_worker(void *arg) +{ + struct l9p_threadpool *tp; + struct l9p_worker *worker = arg; + struct l9p_request *req; + + tp = worker->ltw_tp; + if (pthread_mutex_lock(&tp->ltp_mtx) != 0) + return (NULL); + for (;;) { + while (STAILQ_EMPTY(&tp->ltp_workq) && !worker->ltw_exiting) { + (void) pthread_cond_wait(&tp->ltp_work_cv, + &tp->ltp_mtx); + } + if (worker->ltw_exiting) + break; + + /* off work queue; now work-in-progress, by us */ + req = STAILQ_FIRST(&tp->ltp_workq); + STAILQ_REMOVE_HEAD(&tp->ltp_workq, lr_worklink); + req->lr_workstate = L9P_WS_INPROGRESS; + req->lr_worker = worker; + (void) pthread_mutex_unlock(&tp->ltp_mtx); + + /* actually try the request */ + req->lr_error = l9p_dispatch_request(req); + + /* move to responder queue, updating work-state */ + if (pthread_mutex_lock(&tp->ltp_mtx) != 0) + return (NULL); + req->lr_workstate = L9P_WS_RESPQUEUED; + req->lr_worker = NULL; + STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink); + + /* signal the responder */ + (void) pthread_cond_signal(&tp->ltp_reply_cv); + } + (void) pthread_mutex_unlock(&tp->ltp_mtx); + return (NULL); +} + +/* + * Just before finally replying to a request that got touched by + * a Tflush request, we enqueue its flushers (requests of type + * Tflush, which are now on the flushee's lr_flushq) onto the + * response queue. + */ +static void +l9p_threadpool_rflush(struct l9p_threadpool *tp, struct l9p_request *req) +{ + struct l9p_request *flusher; + + /* + * https://swtch.com/plan9port/man/man9/flush.html says: + * + * "Should multiple Tflushes be received for a pending + * request, they must be answered in order. A Rflush for + * any of the multiple Tflushes implies an answer for all + * previous ones. Therefore, should a server receive a + * request and then multiple flushes for that request, it + * need respond only to the last flush." This means + * we could march through the queue of flushers here, + * marking all but the last one as "to be dropped" rather + * than "to be replied-to". + * + * However, we'll leave that for later, if ever -- it + * should be harmless to respond to each, in order. + */ + STAILQ_FOREACH(flusher, &req->lr_flushq, lr_flushlink) { + flusher->lr_workstate = L9P_WS_RESPQUEUED; +#ifdef notdef + if (not the last) { + flusher->lr_flushstate = L9P_FLUSH_NOT_RUN; + /* or, flusher->lr_drop = true ? */ + } +#endif + STAILQ_INSERT_TAIL(&tp->ltp_replyq, flusher, lr_worklink); + } +} + +int +l9p_threadpool_init(struct l9p_threadpool *tp, int size) +{ + struct l9p_worker *worker; +#if defined(__FreeBSD__) + char threadname[16]; +#endif + int error; + int i, nworkers, nresponders; + + if (size <= 0) + return (EINVAL); +#ifdef __illumos__ + pthread_mutexattr_t attr; + + if ((error = pthread_mutexattr_init(&attr)) != 0) + return (error); + if ((error = pthread_mutexattr_settype(&attr, + PTHREAD_MUTEX_ERRORCHECK)) != 0) { + return (error); + } + error = pthread_mutex_init(&tp->ltp_mtx, &attr); +#else + error = pthread_mutex_init(&tp->ltp_mtx, NULL); +#endif + if (error) + return (error); + error = pthread_cond_init(&tp->ltp_work_cv, NULL); + if (error) + goto fail_work_cv; + error = pthread_cond_init(&tp->ltp_reply_cv, NULL); + if (error) + goto fail_reply_cv; + + STAILQ_INIT(&tp->ltp_workq); + STAILQ_INIT(&tp->ltp_replyq); + LIST_INIT(&tp->ltp_workers); + + nresponders = 0; + nworkers = 0; + for (i = 0; i <= size; i++) { + worker = calloc(1, sizeof(struct l9p_worker)); +#ifdef __illumos__ + if (worker == NULL) + break; +#endif + worker->ltw_tp = tp; + worker->ltw_responder = i == 0; + error = pthread_create(&worker->ltw_thread, NULL, + worker->ltw_responder ? l9p_responder : l9p_worker, + (void *)worker); + if (error) { + free(worker); + break; + } + if (worker->ltw_responder) + nresponders++; + else + nworkers++; + +#if defined(__FreeBSD__) + if (worker->ltw_responder) { + pthread_set_name_np(worker->ltw_thread, "9p-responder"); + } else { + sprintf(threadname, "9p-worker:%d", i - 1); + pthread_set_name_np(worker->ltw_thread, threadname); + } +#elif defined(__illumos__) + if (worker->ltw_responder) { + (void) pthread_setname_np(worker->ltw_thread, + "9p-responder"); + } else { + char threadname[PTHREAD_MAX_NAMELEN_NP]; + + (void) snprintf(threadname, sizeof (threadname), + "9p-worker:%d", i - 1); + (void) pthread_setname_np(worker->ltw_thread, + threadname); + } +#endif + + LIST_INSERT_HEAD(&tp->ltp_workers, worker, ltw_link); + } + if (nresponders == 0 || nworkers == 0) { + /* need the one responder, and at least one worker */ + l9p_threadpool_shutdown(tp); + return (error); + } + return (0); + + /* + * We could avoid these labels by having multiple destroy + * paths (one for each error case), or by having booleans + * for which variables were initialized. Neither is very + * appealing... + */ +fail_reply_cv: + (void) pthread_cond_destroy(&tp->ltp_work_cv); +fail_work_cv: + (void) pthread_mutex_destroy(&tp->ltp_mtx); + + return (error); +} + +/* + * Run a request, usually by queueing it. + */ +void +l9p_threadpool_run(struct l9p_threadpool *tp, struct l9p_request *req) +{ + + /* + * Flush requests must be handled specially, since they + * can cancel / kill off regular requests. (But we can + * run them through the regular dispatch mechanism.) + */ + if (req->lr_req.hdr.type == L9P_TFLUSH) { + /* not on a work queue yet so we can touch state */ + req->lr_workstate = L9P_WS_IMMEDIATE; + (void) l9p_dispatch_request(req); + } else { + if (pthread_mutex_lock(&tp->ltp_mtx) != 0) + return; + req->lr_workstate = L9P_WS_NOTSTARTED; + STAILQ_INSERT_TAIL(&tp->ltp_workq, req, lr_worklink); + (void) pthread_cond_signal(&tp->ltp_work_cv); + (void) pthread_mutex_unlock(&tp->ltp_mtx); + } +} + +/* + * Run a Tflush request. Called via l9p_dispatch_request() since + * it has some debug code in it, but not called from worker thread. + */ +int +l9p_threadpool_tflush(struct l9p_request *req) +{ + struct l9p_connection *conn; + struct l9p_threadpool *tp; + struct l9p_request *flushee; + uint16_t oldtag; + enum l9p_flushstate nstate = L9P_FLUSH_NONE; + int err; + + /* + * Find what we're supposed to flush (the flushee, as it were). + */ + req->lr_error = 0; /* Tflush always succeeds */ + conn = req->lr_conn; + tp = &conn->lc_tp; + oldtag = req->lr_req.tflush.oldtag; + if ((err = ht_wrlock(&conn->lc_requests)) != 0) + return (err); + flushee = ht_find_locked(&conn->lc_requests, oldtag); + if (flushee == NULL) { + /* + * Nothing to flush! The old request must have + * been done and gone already. Just queue this + * Tflush for a success reply. + */ + (void) ht_unlock(&conn->lc_requests); + if ((err = pthread_mutex_lock(&tp->ltp_mtx)) != 0) + return (err); + goto done; + } + + /* + * Found the original request. We'll need to inspect its + * work-state to figure out what to do. + */ + if ((err = pthread_mutex_lock(&tp->ltp_mtx)) != 0) { + (void) ht_unlock(&conn->lc_requests); + return (err); + } + (void) ht_unlock(&conn->lc_requests); + + switch (flushee->lr_workstate) { + + case L9P_WS_NOTSTARTED: + /* + * Flushee is on work queue, but not yet being + * handled by a worker. + * + * The documentation -- see + * http://ericvh.github.io/9p-rfc/rfc9p2000.html + * https://swtch.com/plan9port/man/man9/flush.html + * -- says that "the server should answer the + * flush message immediately". However, Linux + * sends flush requests for operations that + * must finish, such as Tclunk, and it's not + * possible to *answer* the flush request until + * it has been handled (if necessary) or aborted + * (if allowed). + * + * We therefore now just the original request + * and let the request-handler do whatever is + * appropriate. NOTE: we could have a table of + * "requests that can be aborted without being + * run" vs "requests that must be run to be + * aborted", but for now that seems like an + * unnecessary complication. + */ + nstate = L9P_FLUSH_REQUESTED_PRE_START; + break; + + case L9P_WS_IMMEDIATE: + /* + * This state only applies to Tflush requests, and + * flushing a Tflush is illegal. But we'll do nothing + * special here, which will make us act like a flush + * request for the flushee that arrived too late to + * do anything about the flushee. + */ + nstate = L9P_FLUSH_REQUESTED_POST_START; + break; + + case L9P_WS_INPROGRESS: + /* + * Worker thread flushee->lr_worker is working on it. + * Kick it to get it out of blocking system calls. + * (This requires that it carefully set up some + * signal handlers, and may be FreeBSD-dependent, + * it probably cannot be handled this way on MacOS.) + */ +#ifdef notyet + pthread_kill(...); +#endif + nstate = L9P_FLUSH_REQUESTED_POST_START; + break; + + case L9P_WS_RESPQUEUED: + /* + * The flushee is already in the response queue. + * We'll just mark it as having had some flush + * action applied. + */ + nstate = L9P_FLUSH_TOOLATE; + break; + + case L9P_WS_REPLYING: + /* + * Although we found the flushee, it's too late to + * make us depend on it: it's already heading out + * the door as a reply. + * + * We don't want to do anything to the flushee. + * Instead, we want to work the same way as if + * we had never found the tag. + */ + goto done; + } + + /* + * Now add us to the list of Tflush-es that are waiting + * for the flushee (creating the list if needed, i.e., if + * this is the first Tflush for the flushee). We (req) + * will get queued for reply later, when the responder + * processes the flushee and calls l9p_threadpool_rflush(). + */ + if (flushee->lr_flushstate == L9P_FLUSH_NONE) + STAILQ_INIT(&flushee->lr_flushq); + flushee->lr_flushstate = nstate; + STAILQ_INSERT_TAIL(&flushee->lr_flushq, req, lr_flushlink); + + (void) pthread_mutex_unlock(&tp->ltp_mtx); + + return (0); + +done: + /* + * This immediate op is ready to be replied-to now, so just + * stick it onto the reply queue. + */ + req->lr_workstate = L9P_WS_RESPQUEUED; + STAILQ_INSERT_TAIL(&tp->ltp_replyq, req, lr_worklink); + (void) pthread_mutex_unlock(&tp->ltp_mtx); + (void) pthread_cond_signal(&tp->ltp_reply_cv); + return (0); +} + +int +l9p_threadpool_shutdown(struct l9p_threadpool *tp) +{ + struct l9p_worker *worker, *tmp; + + LIST_FOREACH_SAFE(worker, &tp->ltp_workers, ltw_link, tmp) { + if (pthread_mutex_lock(&tp->ltp_mtx) != 0) + continue; + worker->ltw_exiting = true; + if (worker->ltw_responder) + (void) pthread_cond_signal(&tp->ltp_reply_cv); + else + (void) pthread_cond_broadcast(&tp->ltp_work_cv); + (void) pthread_mutex_unlock(&tp->ltp_mtx); + (void) pthread_join(worker->ltw_thread, NULL); + LIST_REMOVE(worker, ltw_link); + free(worker); + } + (void) pthread_cond_destroy(&tp->ltp_reply_cv); + (void) pthread_cond_destroy(&tp->ltp_work_cv); + (void) pthread_mutex_destroy(&tp->ltp_mtx); + + return (0); +} diff --git a/usr/src/lib/lib9p/common/threadpool.h b/usr/src/lib/lib9p/common/threadpool.h new file mode 100644 index 0000000000..2855c1c545 --- /dev/null +++ b/usr/src/lib/lib9p/common/threadpool.h @@ -0,0 +1,118 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_THREADPOOL_H +#define LIB9P_THREADPOOL_H + +#include <stdbool.h> +#include <pthread.h> +#include <sys/queue.h> +#include "lib9p.h" + +STAILQ_HEAD(l9p_request_queue, l9p_request); + +/* + * Most of the workers in the threadpool run requests. + * + * One distinguished worker delivers responses from the + * response queue. The reason this worker exists is to + * guarantee response order, so that flush responses go + * after their flushed requests. + */ +struct l9p_threadpool { + struct l9p_connection * ltp_conn; /* the connection */ + struct l9p_request_queue ltp_workq; /* requests awaiting a worker */ + struct l9p_request_queue ltp_replyq; /* requests that are done */ + pthread_mutex_t ltp_mtx; /* locks queues and cond vars */ + pthread_cond_t ltp_work_cv; /* to signal regular workers */ + pthread_cond_t ltp_reply_cv; /* to signal reply-worker */ + LIST_HEAD(, l9p_worker) ltp_workers; /* list of all workers */ +}; + +/* + * All workers, including the responder, use this as their + * control structure. (The only thing that distinguishes the + * responder is that it runs different code and waits on the + * reply_cv.) + */ +struct l9p_worker { + struct l9p_threadpool * ltw_tp; + pthread_t ltw_thread; + bool ltw_exiting; + bool ltw_responder; + LIST_ENTRY(l9p_worker) ltw_link; +}; + +/* + * Each request has a "work state" telling where the request is, + * in terms of workers working on it. That is, this tells us + * which threadpool queue, if any, the request is in now or would + * go in, or what's happening with it. + */ +enum l9p_workstate { + L9P_WS_NOTSTARTED, /* not yet started */ + L9P_WS_IMMEDIATE, /* Tflush being done sans worker */ + L9P_WS_INPROGRESS, /* worker is working on it */ + L9P_WS_RESPQUEUED, /* worker is done, response queued */ + L9P_WS_REPLYING, /* responder is in final reply path */ +}; + +/* + * Each request has a "flush state", initally NONE meaning no + * Tflush affected the request. + * + * If a Tflush comes in before we ever assign a work thread, + * the flush state goes to FLUSH_REQUESTED_PRE_START. + * + * If a Tflush comes in after we assign a work thread, the + * flush state goes to FLUSH_REQUESTED_POST_START. The flush + * request may be too late: the request might finish anyway. + * Or it might be soon enough to abort. In all cases, though, the + * operation requesting the flush (the "flusher") must wait for + * the other request (the "flushee") to go through the respond + * path. The respond routine gets to decide whether to send a + * normal response, send an error, or drop the request + * entirely. + * + * There's one especially annoying case: what if a Tflush comes in + * *while* we're sending a response? In this case it's too late: + * the flush just waits for the fully-composed response. + */ +enum l9p_flushstate { + L9P_FLUSH_NONE = 0, /* must be zero */ + L9P_FLUSH_REQUESTED_PRE_START, /* not even started before flush */ + L9P_FLUSH_REQUESTED_POST_START, /* started, then someone said flush */ + L9P_FLUSH_TOOLATE /* too late, already responding */ +}; + +void l9p_threadpool_flushee_done(struct l9p_request *); +int l9p_threadpool_init(struct l9p_threadpool *, int); +void l9p_threadpool_run(struct l9p_threadpool *, struct l9p_request *); +int l9p_threadpool_shutdown(struct l9p_threadpool *); +int l9p_threadpool_tflush(struct l9p_request *); + +#endif /* LIB9P_THREADPOOL_H */ diff --git a/usr/src/lib/lib9p/common/transport/socket.c b/usr/src/lib/lib9p/common/transport/socket.c new file mode 100644 index 0000000000..214a1c8d70 --- /dev/null +++ b/usr/src/lib/lib9p/common/transport/socket.c @@ -0,0 +1,593 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Copyright 2021 Joyent, Inc. + */ + +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> +#include <assert.h> +#include <sys/types.h> +#ifdef __APPLE__ +# include "../apple_endian.h" +#elif __illumos__ +# include <sys/param.h> +# include <port.h> +# include "../illumos_endian.h" +#else +# include <sys/endian.h> +#endif +#include <sys/socket.h> +#ifndef __illumos__ +# include <sys/event.h> +#endif +#include <sys/uio.h> +#include <netdb.h> +#include "../lib9p.h" +#include "../lib9p_impl.h" +#include "../log.h" +#include "socket.h" + +struct l9p_socket_softc +{ + struct l9p_connection *ls_conn; + struct sockaddr ls_sockaddr; + socklen_t ls_socklen; + pthread_t ls_thread; + int ls_fd; +}; + +#ifdef __FreeBSD__ +struct event_svr { + struct kevent *ev_kev; + struct kevent *ev_event; + int ev_kq; +}; +#elif __illumos__ +struct event_svr { + port_event_t *ev_pe; + int ev_port; +}; +#else +#error "No event server defined" +#endif + +static int l9p_init_event_svr(struct event_svr *, uint_t); +static uint_t l9p_get_server_addrs(const char *, const char *, + struct addrinfo **); +static uint_t l9p_bind_addrs(struct event_svr *, struct addrinfo *, uint_t, + int **); +static int l9p_event_get(struct l9p_server *, struct event_svr *, uint_t, + void (*cb)(struct l9p_server *, int)); +static int l9p_socket_readmsg(struct l9p_socket_softc *, void **, size_t *); +static int l9p_socket_get_response_buffer(struct l9p_request *, + struct iovec *, size_t *, void *); +static int l9p_socket_send_response(struct l9p_request *, const struct iovec *, + const size_t, const size_t, void *); +static void l9p_socket_drop_response(struct l9p_request *, const struct iovec *, + size_t, void *); +static void *l9p_socket_thread(void *); +static ssize_t xread(int, void *, size_t); +static ssize_t xwrite(int, void *, size_t); + +int +l9p_start_server(struct l9p_server *server, const char *host, const char *port) +{ + struct addrinfo *res = NULL; + int *sockets = NULL; + uint_t naddrs = 0; + uint_t nsockets = 0; + uint_t i; + struct event_svr esvr; + + naddrs = l9p_get_server_addrs(host, port, &res); + if (naddrs == 0) + return (-1); + + if (l9p_init_event_svr(&esvr, naddrs) != 0) { + freeaddrinfo(res); + return (-1); + } + + nsockets = l9p_bind_addrs(&esvr, res, naddrs, &sockets); + + /* + * We don't need res, after this, so free it and NULL it to prevent + * any possible use after free. + */ + freeaddrinfo(res); + res = NULL; + + if (nsockets == 0) + goto fail; + + for (;;) { + if (l9p_event_get(server, &esvr, nsockets, + l9p_socket_accept) < 0) + break; + } + + /* We get here if something failed */ + for (i = 0; i < nsockets; i++) + close(sockets[i]); + +fail: + free(sockets); + +#ifdef __FreeBSD__ + close(esvr.ev_kq); + free(esvr.ev_kev); + free(esvr.ev_event); +#elif __illumos__ + close(esvr.ev_port); + free(esvr.ev_pe); +#else +#error "Port me" +#endif + + return (-1); +} + +static uint_t +l9p_get_server_addrs(const char *host, const char *port, struct addrinfo **resp) +{ + struct addrinfo *res, hints; + uint_t naddrs; + int rc; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = PF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + rc = getaddrinfo(host, port, &hints, resp); + if (rc > 0) { + L9P_LOG(L9P_ERROR, "getaddrinfo(): %s", gai_strerror(rc)); + return (0); + } + + naddrs = 0; + for (res = *resp; res != NULL; res = res->ai_next) + naddrs++; + + if (naddrs == 0) { + L9P_LOG(L9P_ERROR, "no addresses found for %s:%s", host, port); + } + + return (naddrs); +} + +#ifdef __FreeBSD__ +static int +l9p_init_event_svr(struct event_svr *svr, uint_t nsockets) +{ + svr->ev_kev = calloc(nsockets, sizeof(struct kevent)); + if (svr->ev_kev == NULL) { + L9P_LOG(L9P_ERROR, "calloc(): %s", strerror(errno)); + return (-1); + } + + svr->ev_event = calloc(nsockets, sizeof(struct kevent)); + if (svr->ev_event == NULL) { + L9P_LOG(L9P_ERROR, "calloc(): %s", strerror(errno)); + free(svr->ev_key); + svr->ev_key = NULL; + return (-1); + } + + svr->ev_kq = kqueue(); + if (svr->ev_kq == -1) { + L9P_LOG(L9P_ERROR, "kqueue(): %s", strerror(errno)); + free(svr->ev_kev); + free(svr->ev_event); + svr->ev_kev = NULL; + svr->ev_event = NULL; + return (-1); + } + + return (0); +} +#elif __illumos__ +static int +l9p_init_event_svr(struct event_svr *svr, uint_t nsockets) +{ + svr->ev_pe = calloc(nsockets, sizeof(port_event_t)); + if (svr->ev_pe == NULL) { + L9P_LOG(L9P_ERROR, "calloc(): %s", strerror(errno)); + return (-1); + } + + svr->ev_port = port_create(); + if (svr->ev_port == -1) { + L9P_LOG(L9P_ERROR, "port_create(): %s", strerror(errno)); + return (-1); + } + + return (0); +} +#else +#error "No event server defined" +#endif + +static uint_t +l9p_bind_addrs(struct event_svr *svr, struct addrinfo *addrs, uint_t naddrs, + int **socketsp) +{ + struct addrinfo *addr; + uint_t i, j; + + *socketsp = calloc(naddrs, sizeof(int)); + if (*socketsp == NULL) { + L9P_LOG(L9P_ERROR, "calloc(): %s", strerror(errno)); + return (0); + } + + for (i = 0, addr = addrs; addr != NULL; addr = addr->ai_next) { + int s; + int val = 1; + + s = socket(addr->ai_family, addr->ai_socktype, + addr->ai_protocol); + if (s == -1) { + L9P_LOG(L9P_ERROR, "socket(): %s", strerror(errno)); + continue; + } + + if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &val, + sizeof(val)) < 0) { + L9P_LOG(L9P_ERROR, "setsockopt(): %s", strerror(errno)); + close(s); + continue; + } + + if (bind(s, addr->ai_addr, addr->ai_addrlen) < 0) { + L9P_LOG(L9P_ERROR, "bind(): %s", strerror(errno)); + close(s); + continue; + } + + if (listen(s, 10) < 0) { + L9P_LOG(L9P_ERROR, "listen(): %s", strerror(errno)); + close(s); + continue; + } + +#ifdef __FreeBSD__ + EV_SET(&svr->ev_kev[i], s, EVFILT_READ, EV_ADD | EV_ENABLE, 0, + 0, 0); +#elif __illumos__ + if (port_associate(svr->ev_port, PORT_SOURCE_FD, s, + POLLIN|POLLHUP, NULL) < 0) { + L9P_LOG(L9P_ERROR, "port_associate(%d): %s", s, + strerror(errno)); + close(s); + continue; + } +#else +#error "Port me" +#endif + + *socketsp[i++] = s; + } + + if (i < 1) { + free(*socketsp); + *socketsp = NULL; + return (0); + } + + for (j = i; j < naddrs; j++) + *socketsp[j++] = -1; + +#ifdef __FreeBSD__ + if (kevent(svr->ev_kq, svr->ev_kev, i, NULL, 0, NULL) < 0) { + L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno)); + + for (j = 0; j < i; j++) + close(j); + + free(*socketsp); + *socketsp = NULL; + + return (0); + } +#endif + + return (i); +} + +#ifdef __FreeBSD__ +static int +l9p_event_get(struct l9p_server *l9svr, struct event_svr *esvr, uint_t nsockets, + void (*cb)(struct l9p_server *, int)) +{ + int i, evs; + + evs = kevent(esvr->ev_kq, NULL, 0, esvr->ev_event, nsockets, NULL); + if (evs < 0) { + if (errno == EINTR) + return (0); + L9P_LOG(L9P_ERROR, "kevent(): %s", strerror(errno)); + return (-1); + } + + for (i = 0; i < evs; i++) + cb(l9svr, (int)sevr->ev_event[i].ident); + + return (0); +} +#elif __illumos__ +static int +l9p_event_get(struct l9p_server *l9svr, struct event_svr *esvr, uint_t nsockets, + void (*cb)(struct l9p_server *, int)) +{ + uint_t evs = 1; + int i; + + if (port_getn(esvr->ev_port, esvr->ev_pe, nsockets, &evs, NULL) < 0) { + if (errno == EINTR) + return (0); + L9P_LOG(L9P_ERROR, "port_getn(): %s", strerror(errno)); + return (-1); + } + + for (i = 0; i < evs; i++) { + if (esvr->ev_pe[i].portev_source != PORT_SOURCE_FD) + continue; + + cb(l9svr, (int)esvr->ev_pe[i].portev_object); + } + + return (0); +} +#else +#error "Port me" +#endif + +void +l9p_socket_accept(struct l9p_server *server, int svr_fd) +{ + struct l9p_socket_softc *sc; + struct l9p_connection *conn; + char host[NI_MAXHOST + 1]; + char serv[NI_MAXSERV + 1]; + struct sockaddr client_addr; + socklen_t client_addr_len = sizeof(client_addr); + int conn_fd, err; + + conn_fd = accept(svr_fd, &client_addr, &client_addr_len); + if (conn_fd < 0) { + L9P_LOG(L9P_WARNING, "accept(): %s", strerror(errno)); + return; + } + + err = getnameinfo(&client_addr, client_addr_len, host, NI_MAXHOST, + serv, NI_MAXSERV, NI_NUMERICHOST | NI_NUMERICSERV); + + if (err != 0) { + L9P_LOG(L9P_WARNING, "cannot look up client name: %s", + gai_strerror(err)); + } else { + L9P_LOG(L9P_INFO, "new connection from %s:%s", host, serv); + } + + if (l9p_connection_init(server, &conn) != 0) { + L9P_LOG(L9P_ERROR, "cannot create new connection"); + return; + } + + sc = l9p_calloc(1, sizeof(*sc)); + sc->ls_conn = conn; + sc->ls_fd = conn_fd; + + /* + * Fill in transport handler functions and aux argument. + */ + conn->lc_lt.lt_aux = sc; + conn->lc_lt.lt_get_response_buffer = l9p_socket_get_response_buffer; + conn->lc_lt.lt_send_response = l9p_socket_send_response; + conn->lc_lt.lt_drop_response = l9p_socket_drop_response; + + err = pthread_create(&sc->ls_thread, NULL, l9p_socket_thread, sc); + if (err) { + L9P_LOG(L9P_ERROR, + "pthread_create (for connection from %s:%s): error %s", + host, serv, strerror(err)); + l9p_connection_close(sc->ls_conn); + free(sc); + } +} + +static void * +l9p_socket_thread(void *arg) +{ + struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg; + struct iovec iov; + void *buf; + size_t length; + + for (;;) { + if (l9p_socket_readmsg(sc, &buf, &length) != 0) + break; + + iov.iov_base = buf; + iov.iov_len = length; + l9p_connection_recv(sc->ls_conn, &iov, 1, NULL); + free(buf); + } + + L9P_LOG(L9P_INFO, "connection closed"); + l9p_connection_close(sc->ls_conn); + free(sc); + return (NULL); +} + +static int +l9p_socket_readmsg(struct l9p_socket_softc *sc, void **buf, size_t *size) +{ + uint32_t msize; + size_t toread; + ssize_t ret; + void *buffer; + int fd = sc->ls_fd; + + assert(fd > 0); + + buffer = l9p_malloc(sizeof(uint32_t)); + + ret = xread(fd, buffer, sizeof(uint32_t)); + if (ret < 0) { + L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno)); + return (-1); + } + + if (ret != sizeof(uint32_t)) { + if (ret == 0) { + L9P_LOG(L9P_DEBUG, "%p: EOF", (void *)sc->ls_conn); + } else { + L9P_LOG(L9P_ERROR, + "short read: %zd bytes of %zd expected", + ret, sizeof(uint32_t)); + } + return (-1); + } + + msize = le32toh(*(uint32_t *)buffer); + toread = msize - sizeof(uint32_t); + buffer = l9p_realloc(buffer, msize); + + ret = xread(fd, (char *)buffer + sizeof(uint32_t), toread); + if (ret < 0) { + L9P_LOG(L9P_ERROR, "read(): %s", strerror(errno)); + return (-1); + } + + if (ret != (ssize_t)toread) { + L9P_LOG(L9P_ERROR, "short read: %zd bytes of %zd expected", + ret, toread); + return (-1); + } + + *size = msize; + *buf = buffer; + L9P_LOG(L9P_INFO, "%p: read complete message, buf=%p size=%d", + (void *)sc->ls_conn, buffer, msize); + + return (0); +} + +static int +l9p_socket_get_response_buffer(struct l9p_request *req, struct iovec *iov, + size_t *niovp, void *arg __unused) +{ + size_t size = req->lr_conn->lc_msize; + void *buf; + + buf = l9p_malloc(size); + iov[0].iov_base = buf; + iov[0].iov_len = size; + + *niovp = 1; + return (0); +} + +static int +l9p_socket_send_response(struct l9p_request *req __unused, + const struct iovec *iov, const size_t niov __unused, const size_t iolen, + void *arg) +{ + struct l9p_socket_softc *sc = (struct l9p_socket_softc *)arg; + + assert(sc->ls_fd >= 0); + + L9P_LOG(L9P_DEBUG, "%p: sending reply, buf=%p, size=%d", arg, + iov[0].iov_base, iolen); + + if (xwrite(sc->ls_fd, iov[0].iov_base, iolen) != (int)iolen) { + L9P_LOG(L9P_ERROR, "short write: %s", strerror(errno)); + return (-1); + } + + free(iov[0].iov_base); + return (0); +} + +static void +l9p_socket_drop_response(struct l9p_request *req __unused, + const struct iovec *iov, size_t niov __unused, void *arg) +{ + + L9P_LOG(L9P_DEBUG, "%p: drop buf=%p", arg, iov[0].iov_base); + free(iov[0].iov_base); +} + +static ssize_t +xread(int fd, void *buf, size_t count) +{ + size_t done = 0; + ssize_t ret; + + while (done < count) { + ret = read(fd, (char *)buf + done, count - done); + if (ret < 0) { + if (errno == EINTR) + continue; + + return (-1); + } + + if (ret == 0) + return ((ssize_t)done); + + done += (size_t)ret; + } + + return ((ssize_t)done); +} + +static ssize_t +xwrite(int fd, void *buf, size_t count) +{ + size_t done = 0; + ssize_t ret; + + while (done < count) { + ret = write(fd, (char *)buf + done, count - done); + if (ret < 0) { + if (errno == EINTR) + continue; + + return (-1); + } + + if (ret == 0) + return ((ssize_t)done); + + done += (size_t)ret; + } + + return ((ssize_t)done); +} diff --git a/usr/src/lib/lib9p/common/transport/socket.h b/usr/src/lib/lib9p/common/transport/socket.h new file mode 100644 index 0000000000..df950ffb7d --- /dev/null +++ b/usr/src/lib/lib9p/common/transport/socket.h @@ -0,0 +1,39 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef LIB9P_SOCKET_H +#define LIB9P_SOCKET_H + +#include <sys/types.h> +#include <sys/socket.h> +#include "../lib9p.h" + +int l9p_start_server(struct l9p_server *server, const char *host, + const char *port); +void l9p_socket_accept(struct l9p_server *server, int serv_fd); + +#endif /* LIB9P_SOCKET_H */ diff --git a/usr/src/lib/lib9p/common/utils.c b/usr/src/lib/lib9p/common/utils.c new file mode 100644 index 0000000000..10c9683c0a --- /dev/null +++ b/usr/src/lib/lib9p/common/utils.c @@ -0,0 +1,1363 @@ +/* + * Copyright 2016 Jakub Klama <jceel@FreeBSD.org> + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <inttypes.h> +#include <limits.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/uio.h> +#if defined(__FreeBSD__) +#include <sys/sbuf.h> +#else +#include "sbuf/sbuf.h" +#endif +#include "lib9p.h" +#include "fcall.h" +#include "linux_errno.h" + +#ifdef __illumos__ +#include <sys/sysmacros.h> +#include <grp.h> +#endif + +#ifdef __APPLE__ + #define GETGROUPS_GROUP_TYPE_IS_INT +#endif + +#define N(ary) (sizeof(ary) / sizeof(*ary)) + +/* See l9p_describe_bits() below. */ +struct descbits { + uint64_t db_mask; /* mask value */ + uint64_t db_match; /* match value */ + const char *db_name; /* name for matched value */ +}; + + +static bool l9p_describe_bits(const char *, uint64_t, const char *, + const struct descbits *, struct sbuf *); +static void l9p_describe_fid(const char *, uint32_t, struct sbuf *); +static void l9p_describe_mode(const char *, uint32_t, struct sbuf *); +static void l9p_describe_name(const char *, char *, struct sbuf *); +static void l9p_describe_perm(const char *, uint32_t, struct sbuf *); +static void l9p_describe_lperm(const char *, uint32_t, struct sbuf *); +static void l9p_describe_qid(const char *, struct l9p_qid *, struct sbuf *); +static void l9p_describe_l9stat(const char *, struct l9p_stat *, + enum l9p_version, struct sbuf *); +static void l9p_describe_statfs(const char *, struct l9p_statfs *, + struct sbuf *); +static void l9p_describe_time(struct sbuf *, const char *, uint64_t, uint64_t); +static void l9p_describe_readdir(struct sbuf *, struct l9p_f_io *); +static void l9p_describe_size(const char *, uint64_t, struct sbuf *); +static void l9p_describe_ugid(const char *, uint32_t, struct sbuf *); +static void l9p_describe_getattr_mask(uint64_t, struct sbuf *); +static void l9p_describe_unlinkat_flags(const char *, uint32_t, struct sbuf *); +static const char *lookup_linux_errno(uint32_t, char *, size_t); + +/* + * Using indexed initializers, we can have these occur in any order. + * Using adjacent-string concatenation ("T" #name, "R" #name), we + * get both Tfoo and Rfoo strings with one copy of the name. + * Alas, there is no stupid cpp trick to lowercase-ify, so we + * have to write each name twice. In which case we might as well + * make the second one a string in the first place and not bother + * with the stringizing. + * + * This table should have entries for each enum value in fcall.h. + */ +#define X(NAME, name) [L9P_T##NAME - L9P__FIRST] = "T" name, \ + [L9P_R##NAME - L9P__FIRST] = "R" name +static const char *ftype_names[] = { + X(VERSION, "version"), + X(AUTH, "auth"), + X(ATTACH, "attach"), + X(ERROR, "error"), + X(LERROR, "lerror"), + X(FLUSH, "flush"), + X(WALK, "walk"), + X(OPEN, "open"), + X(CREATE, "create"), + X(READ, "read"), + X(WRITE, "write"), + X(CLUNK, "clunk"), + X(REMOVE, "remove"), + X(STAT, "stat"), + X(WSTAT, "wstat"), + X(STATFS, "statfs"), + X(LOPEN, "lopen"), + X(LCREATE, "lcreate"), + X(SYMLINK, "symlink"), + X(MKNOD, "mknod"), + X(RENAME, "rename"), + X(READLINK, "readlink"), + X(GETATTR, "getattr"), + X(SETATTR, "setattr"), + X(XATTRWALK, "xattrwalk"), + X(XATTRCREATE, "xattrcreate"), + X(READDIR, "readdir"), + X(FSYNC, "fsync"), + X(LOCK, "lock"), + X(GETLOCK, "getlock"), + X(LINK, "link"), + X(MKDIR, "mkdir"), + X(RENAMEAT, "renameat"), + X(UNLINKAT, "unlinkat"), +}; +#undef X + +void +l9p_seek_iov(const struct iovec *iov1, size_t niov1, struct iovec *iov2, + size_t *niov2, size_t seek) +{ + size_t remainder = 0; + size_t left = seek; + size_t i, j; + + assert(niov1 <= L9P_MAX_IOV); + + for (i = 0; i < niov1; i++) { + size_t toseek = MIN(left, iov1[i].iov_len); + left -= toseek; + + if (toseek == iov1[i].iov_len) + continue; + + if (left == 0) { + remainder = toseek; + break; + } + } + + for (j = i; j < niov1; j++) { + iov2[j - i].iov_base = (char *)iov1[j].iov_base + remainder; + iov2[j - i].iov_len = iov1[j].iov_len - remainder; + remainder = 0; + } + + *niov2 = j - i; +} + +size_t +l9p_truncate_iov(struct iovec *iov, size_t niov, size_t length) +{ + size_t i, done = 0; + + for (i = 0; i < niov; i++) { + size_t toseek = MIN(length - done, iov[i].iov_len); + done += toseek; + + if (toseek < iov[i].iov_len) { + iov[i].iov_len = toseek; + return (i + 1); + } + } + + return (niov); +} + +/* + * This wrapper for getgrouplist() that calloc'ed memory, and + * papers over FreeBSD vs Mac differences in the getgrouplist() + * argument types. + * + * Note that this function guarantees that *either*: + * return value != NULL and *angroups has been set + * or: return value == NULL and *angroups is 0 + */ +gid_t * +l9p_getgrlist(const char *name, gid_t basegid, int *angroups) +{ +#ifdef GETGROUPS_GROUP_TYPE_IS_INT + int i, *int_groups; +#endif + gid_t *groups; + int ngroups; + + /* + * Todo, perhaps: while getgrouplist() returns -1, expand. + * For now just use NGROUPS_MAX. + */ + ngroups = NGROUPS_MAX; + groups = calloc((size_t)ngroups, sizeof(*groups)); +#ifdef GETGROUPS_GROUP_TYPE_IS_INT + int_groups = groups ? calloc((size_t)ngroups, sizeof(*int_groups)) : + NULL; + if (int_groups == NULL) { + free(groups); + groups = NULL; + } +#endif + if (groups == NULL) { + *angroups = 0; + return (NULL); + } +#ifdef GETGROUPS_GROUP_TYPE_IS_INT + if (getgrouplist(name, (int)basegid, int_groups, &ngroups) < 0) { + free(groups); + free(int_groups); + return (NULL); + } + for (i = 0; i < ngroups; i++) + groups[i] = (gid_t)int_groups[i]; + free(int_groups); +#else + if (getgrouplist(name, basegid, groups, &ngroups) < 0) { + free(groups); + return (NULL); + } +#endif + *angroups = ngroups; + return (groups); +} + +/* + * For the various debug describe ops: decode bits in a bit-field-y + * value. For example, we might produce: + * value=0x3c[FOO,BAR,QUUX,?0x20] + * when FOO is bit 0x10, BAR is 0x08, and QUUX is 0x04 (as defined + * by the table). This leaves 0x20 (bit 5) as a mystery, while bits + * 4, 3, and 2 were decoded. (Bits 0 and 1 were 0 on input hence + * were not attempted here.) + * + * For general use we take a uint64_t <value>. The bit description + * table <db> is an array of {mask, match, str} values ending with + * {0, 0, NULL}. + * + * If <str> is non-NULL we'll print it and the mask as well (if + * str is NULL we'll print neither). The mask is always printed in + * hex at the moment. See undec description too. + * + * For convenience, you can use a mask-and-match value, e.g., to + * decode a 2-bit field in bits 0 and 1 you can mask against 3 and + * match the values 0, 1, 2, and 3. To handle this, make sure that + * all masks-with-same-match are sequential. + * + * If there are any nonzero undecoded bits, print them after + * all the decode-able bits have been handled. + * + * The <oc> argument defines the open and close bracket characters, + * typically "[]", that surround the entire string. If NULL, no + * brackets are added, else oc[0] goes in the front and oc[1] at + * the end, after printing any <str><value> part. + * + * Returns true if it printed anything (other than the implied + * str-and-value, that is). + */ +static bool +l9p_describe_bits(const char *str, uint64_t value, const char *oc, + const struct descbits *db, struct sbuf *sb) +{ + const char *sep; + char bracketbuf[2] = ""; + bool printed = false; + + if (str != NULL) + sbuf_printf(sb, "%s0x%" PRIx64, str, value); + + if (oc != NULL) + bracketbuf[0] = oc[0]; + sep = bracketbuf; + for (; db->db_name != NULL; db++) { + if ((value & db->db_mask) == db->db_match) { + sbuf_printf(sb, "%s%s", sep, db->db_name); + sep = ","; + printed = true; + + /* + * Clear the field, and make sure we + * won't match a zero-valued field with + * this same mask. + */ + value &= ~db->db_mask; + while (db[1].db_mask == db->db_mask && + db[1].db_name != NULL) + db++; + } + } + if (value != 0) { + sbuf_printf(sb, "%s?0x%" PRIx64, sep, value); + printed = true; + } + if (printed && oc != NULL) { + bracketbuf[0] = oc[1]; + sbuf_cat(sb, bracketbuf); + } + return (printed); +} + +/* + * Show file ID. + */ +static void +l9p_describe_fid(const char *str, uint32_t fid, struct sbuf *sb) +{ + + sbuf_printf(sb, "%s%" PRIu32, str, fid); +} + +/* + * Show user or group ID. + */ +static void +l9p_describe_ugid(const char *str, uint32_t ugid, struct sbuf *sb) +{ + + sbuf_printf(sb, "%s%" PRIu32, str, ugid); +} + +/* + * Show file mode (O_RDWR, O_RDONLY, etc). The argument is + * an l9p_omode, not a Linux flags mode. Linux flags are + * decoded with l9p_describe_lflags. + */ +static void +l9p_describe_mode(const char *str, uint32_t mode, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { L9P_OACCMODE, L9P_OREAD, "OREAD" }, + { L9P_OACCMODE, L9P_OWRITE, "OWRITE" }, + { L9P_OACCMODE, L9P_ORDWR, "ORDWR" }, + { L9P_OACCMODE, L9P_OEXEC, "OEXEC" }, + + { L9P_OCEXEC, L9P_OCEXEC, "OCEXEC" }, + { L9P_ODIRECT, L9P_ODIRECT, "ODIRECT" }, + { L9P_ORCLOSE, L9P_ORCLOSE, "ORCLOSE" }, + { L9P_OTRUNC, L9P_OTRUNC, "OTRUNC" }, + { 0, 0, NULL } + }; + + (void) l9p_describe_bits(str, mode, "[]", bits, sb); +} + +/* + * Show Linux mode/flags. + */ +static void +l9p_describe_lflags(const char *str, uint32_t flags, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { L9P_OACCMODE, L9P_OREAD, "O_READ" }, + { L9P_OACCMODE, L9P_OWRITE, "O_WRITE" }, + { L9P_OACCMODE, L9P_ORDWR, "O_RDWR" }, + { L9P_OACCMODE, L9P_OEXEC, "O_EXEC" }, + + { L9P_L_O_APPEND, L9P_L_O_APPEND, "O_APPEND" }, + { L9P_L_O_CLOEXEC, L9P_L_O_CLOEXEC, "O_CLOEXEC" }, + { L9P_L_O_CREAT, L9P_L_O_CREAT, "O_CREAT" }, + { L9P_L_O_DIRECT, L9P_L_O_DIRECT, "O_DIRECT" }, + { L9P_L_O_DIRECTORY, L9P_L_O_DIRECTORY, "O_DIRECTORY" }, + { L9P_L_O_DSYNC, L9P_L_O_DSYNC, "O_DSYNC" }, + { L9P_L_O_EXCL, L9P_L_O_EXCL, "O_EXCL" }, + { L9P_L_O_FASYNC, L9P_L_O_FASYNC, "O_FASYNC" }, + { L9P_L_O_LARGEFILE, L9P_L_O_LARGEFILE, "O_LARGEFILE" }, + { L9P_L_O_NOATIME, L9P_L_O_NOATIME, "O_NOATIME" }, + { L9P_L_O_NOCTTY, L9P_L_O_NOCTTY, "O_NOCTTY" }, + { L9P_L_O_NOFOLLOW, L9P_L_O_NOFOLLOW, "O_NOFOLLOW" }, + { L9P_L_O_NONBLOCK, L9P_L_O_NONBLOCK, "O_NONBLOCK" }, + { L9P_L_O_PATH, L9P_L_O_PATH, "O_PATH" }, + { L9P_L_O_SYNC, L9P_L_O_SYNC, "O_SYNC" }, + { L9P_L_O_TMPFILE, L9P_L_O_TMPFILE, "O_TMPFILE" }, + { L9P_L_O_TMPFILE, L9P_L_O_TMPFILE, "O_TMPFILE" }, + { L9P_L_O_TRUNC, L9P_L_O_TRUNC, "O_TRUNC" }, + { 0, 0, NULL } + }; + + (void) l9p_describe_bits(str, flags, "[]", bits, sb); +} + +/* + * Show file name or other similar, potentially-very-long string. + * Actual strings get quotes, a NULL name (if it occurs) gets + * <null> (no quotes), so you can tell the difference. + */ +static void +l9p_describe_name(const char *str, char *name, struct sbuf *sb) +{ + size_t len; + + if (name == NULL) { + sbuf_printf(sb, "%s<null>", str); + return; + } + + len = strlen(name); + + if (len > 32) + sbuf_printf(sb, "%s\"%.*s...\"", str, 32 - 3, name); + else + sbuf_printf(sb, "%s\"%.*s\"", str, (int)len, name); +} + +#define STRMODE_SIZE 12 + +#ifdef __illumos__ +static void +strmode(mode_t mode, char *bp) +{ + char *const sbp = bp; + + /* + * The single caller does not pass in the file type as part of 'mode', + * and ignores the first character in the returned buffer anyway. + */ + *bp++ = '?'; + +#define ONE(_cmp, _ch) ((mode & (_cmp)) != 0) ? (_ch) : '-' + *bp++ = ONE(S_IRUSR, 'r'); + *bp++ = ONE(S_IWUSR, 'w'); + switch (mode & (S_ISUID|S_IXUSR)) { + case S_ISUID|S_IXUSR: + *bp++ = 's'; + break; + case S_ISUID: + *bp++ = 'S'; + break; + case S_IXUSR: + *bp++ = 'x'; + break; + case 0: + *bp++ = '-'; + } + + *bp++ = ONE(S_IRGRP, 'r'); + *bp++ = ONE(S_IWGRP, 'w'); + switch (mode & (S_ISGID|S_IXGRP|S_IFREG)) { + case S_ISGID|S_IXGRP: + *bp++ = 's'; + break; + case S_ISGID|S_IFREG: + *bp++ = 'L'; + break; + case S_ISGID: + *bp++ = 'S'; + break; + case S_IXGRP: + *bp++ = 'x'; + break; + default: + *bp++ = '-'; + } + + *bp++ = ONE(S_IROTH, 'r'); + *bp++ = ONE(S_IWOTH, 'w'); + switch (mode & (S_ISVTX|S_IXOTH)) { + case S_ISVTX|S_IXOTH: + *bp++ = 't'; + break; + case S_ISVTX: + *bp++ = 'T'; + break; + case S_IXOTH: + *bp++ = 'x'; + break; + default: + *bp++ = '-'; + } + + *bp++ = ' '; + *bp = '\0'; + + assert(bp - sbp <= STRMODE_SIZE); +#undef ONE +} +#endif /* __illumos__ */ + +/* + * Show permissions (rwx etc). Prints the value in hex only if + * the rwx bits do not cover the entire value. + */ +static void +l9p_describe_perm(const char *str, uint32_t mode, struct sbuf *sb) +{ + char pbuf[STRMODE_SIZE]; + + strmode(mode & 0777, pbuf); + if ((mode & ~(uint32_t)0777) != 0) + sbuf_printf(sb, "%s0x%" PRIx32 "<%.9s>", str, mode, pbuf + 1); + else + sbuf_printf(sb, "%s<%.9s>", str, pbuf + 1); +} + +/* + * Show "extended" permissions: regular permissions, but also the + * various DM* extension bits from 9P2000.u. + */ +static void +l9p_describe_ext_perm(const char *str, uint32_t mode, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { L9P_DMDIR, L9P_DMDIR, "DMDIR" }, + { L9P_DMAPPEND, L9P_DMAPPEND, "DMAPPEND" }, + { L9P_DMEXCL, L9P_DMEXCL, "DMEXCL" }, + { L9P_DMMOUNT, L9P_DMMOUNT, "DMMOUNT" }, + { L9P_DMAUTH, L9P_DMAUTH, "DMAUTH" }, + { L9P_DMTMP, L9P_DMTMP, "DMTMP" }, + { L9P_DMSYMLINK, L9P_DMSYMLINK, "DMSYMLINK" }, + { L9P_DMDEVICE, L9P_DMDEVICE, "DMDEVICE" }, + { L9P_DMNAMEDPIPE, L9P_DMNAMEDPIPE, "DMNAMEDPIPE" }, + { L9P_DMSOCKET, L9P_DMSOCKET, "DMSOCKET" }, + { L9P_DMSETUID, L9P_DMSETUID, "DMSETUID" }, + { L9P_DMSETGID, L9P_DMSETGID, "DMSETGID" }, + { 0, 0, NULL } + }; + bool need_sep; + + sbuf_printf(sb, "%s[", str); + need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL, + bits, sb); + l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb); + sbuf_cat(sb, "]"); +} + +/* + * Show Linux-specific permissions: regular permissions, but also + * the S_IFMT field. + */ +static void +l9p_describe_lperm(const char *str, uint32_t mode, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { S_IFMT, S_IFIFO, "S_IFIFO" }, + { S_IFMT, S_IFCHR, "S_IFCHR" }, + { S_IFMT, S_IFDIR, "S_IFDIR" }, + { S_IFMT, S_IFBLK, "S_IFBLK" }, + { S_IFMT, S_IFREG, "S_IFREG" }, + { S_IFMT, S_IFLNK, "S_IFLNK" }, + { S_IFMT, S_IFSOCK, "S_IFSOCK" }, +#ifdef __illumos__ + { S_IFMT, S_IFDOOR, "S_IFDOOR" }, + { S_IFMT, S_IFPORT, "S_IFPORT" }, +#endif + { 0, 0, NULL } + }; + bool need_sep; + + sbuf_printf(sb, "%s[", str); + need_sep = l9p_describe_bits(NULL, mode & ~(uint32_t)0777, NULL, + bits, sb); + l9p_describe_perm(need_sep ? "," : "", mode & 0777, sb); + sbuf_cat(sb, "]"); +} + +/* + * Show qid (<type, version, path> tuple). + */ +static void +l9p_describe_qid(const char *str, struct l9p_qid *qid, struct sbuf *sb) +{ + static const struct descbits bits[] = { + /* + * NB: L9P_QTFILE is 0, i.e., is implied by no + * other bits being set. We get this produced + * when we mask against 0xff and compare for + * L9P_QTFILE, but we must do it first so that + * we mask against the original (not-adjusted) + * value. + */ + { 0xff, L9P_QTFILE, "FILE" }, + { L9P_QTDIR, L9P_QTDIR, "DIR" }, + { L9P_QTAPPEND, L9P_QTAPPEND, "APPEND" }, + { L9P_QTEXCL, L9P_QTEXCL, "EXCL" }, + { L9P_QTMOUNT, L9P_QTMOUNT, "MOUNT" }, + { L9P_QTAUTH, L9P_QTAUTH, "AUTH" }, + { L9P_QTTMP, L9P_QTTMP, "TMP" }, + { L9P_QTSYMLINK, L9P_QTSYMLINK, "SYMLINK" }, + { 0, 0, NULL } + }; + + assert(qid != NULL); + + sbuf_cat(sb, str); + (void) l9p_describe_bits("<", qid->type, "[]", bits, sb); + sbuf_printf(sb, ",%" PRIu32 ",0x%016" PRIx64 ">", + qid->version, qid->path); +} + +/* + * Show size. + */ +static void +l9p_describe_size(const char *str, uint64_t size, struct sbuf *sb) +{ + + sbuf_printf(sb, "%s%" PRIu64, str, size); +} + +/* + * Show l9stat (including 9P2000.u extensions if appropriate). + */ +static void +l9p_describe_l9stat(const char *str, struct l9p_stat *st, + enum l9p_version version, struct sbuf *sb) +{ + bool dotu = version >= L9P_2000U; + + assert(st != NULL); + + sbuf_printf(sb, "%stype=0x%04" PRIx32 " dev=0x%08" PRIx32, str, + st->type, st->dev); + l9p_describe_qid(" qid=", &st->qid, sb); + l9p_describe_ext_perm(" mode=", st->mode, sb); + if (st->atime != (uint32_t)-1) + sbuf_printf(sb, " atime=%" PRIu32, st->atime); + if (st->mtime != (uint32_t)-1) + sbuf_printf(sb, " mtime=%" PRIu32, st->mtime); + if (st->length != (uint64_t)-1) + sbuf_printf(sb, " length=%" PRIu64, st->length); + l9p_describe_name(" name=", st->name, sb); + /* + * It's pretty common to have NULL name+gid+muid. They're + * just noise if NULL *and* dot-u; decode only if non-null + * or not-dot-u. + */ + if (st->uid != NULL || !dotu) + l9p_describe_name(" uid=", st->uid, sb); + if (st->gid != NULL || !dotu) + l9p_describe_name(" gid=", st->gid, sb); + if (st->muid != NULL || !dotu) + l9p_describe_name(" muid=", st->muid, sb); + if (dotu) { + if (st->extension != NULL) + l9p_describe_name(" extension=", st->extension, sb); + sbuf_printf(sb, + " n_uid=%" PRIu32 " n_gid=%" PRIu32 " n_muid=%" PRIu32, + st->n_uid, st->n_gid, st->n_muid); + } +} + +static void +l9p_describe_statfs(const char *str, struct l9p_statfs *st, struct sbuf *sb) +{ + + assert(st != NULL); + + sbuf_printf(sb, "%stype=0x%04lx bsize=%lu blocks=%" PRIu64 + " bfree=%" PRIu64 " bavail=%" PRIu64 " files=%" PRIu64 + " ffree=%" PRIu64 " fsid=0x%" PRIx64 " namelen=%" PRIu32 ">", + str, (u_long)st->type, (u_long)st->bsize, st->blocks, + st->bfree, st->bavail, st->files, + st->ffree, st->fsid, st->namelen); +} + +/* + * Decode a <seconds,nsec> timestamp. + * + * Perhaps should use asctime_r. For now, raw values. + */ +static void +l9p_describe_time(struct sbuf *sb, const char *s, uint64_t sec, uint64_t nsec) +{ + + sbuf_cat(sb, s); + if (nsec > 999999999) + sbuf_printf(sb, "%" PRIu64 ".<invalid nsec %" PRIu64 ">)", + sec, nsec); + else + sbuf_printf(sb, "%" PRIu64 ".%09" PRIu64, sec, nsec); +} + +/* + * Decode readdir data (.L format, variable length names). + */ +static void +l9p_describe_readdir(struct sbuf *sb, struct l9p_f_io *io) +{ + uint32_t count; +#ifdef notyet + int i; + struct l9p_message msg; + struct l9p_dirent de; +#endif + + if ((count = io->count) == 0) { + sbuf_printf(sb, " EOF (count=0)"); + return; + } + + /* + * Can't do this yet because we do not have the original + * req. + */ +#ifdef notyet + sbuf_printf(sb, " count=%" PRIu32 " [", count); + + l9p_init_msg(&msg, req, L9P_UNPACK); + for (i = 0; msg.lm_size < count; i++) { + if (l9p_pudirent(&msg, &de) < 0) { + sbuf_printf(sb, " bad count"); + break; + } + + sbuf_printf(sb, i ? ", " : " "); + l9p_describe_qid(" qid=", &de.qid, sb); + sbuf_printf(sb, " offset=%" PRIu64 " type=%d", + de.offset, de.type); + l9p_describe_name(" name=", de.name); + free(de.name); + } + sbuf_printf(sb, "]=%d dir entries", i); +#else /* notyet */ + sbuf_printf(sb, " count=%" PRIu32, count); +#endif +} + +/* + * Decode Tgetattr request_mask field. + */ +static void +l9p_describe_getattr_mask(uint64_t request_mask, struct sbuf *sb) +{ + static const struct descbits bits[] = { + /* + * Note: ALL and BASIC must occur first and second. + * This is a little dirty: it depends on the way the + * describe_bits code clears the values. If we + * match ALL, we clear all those bits and do not + * match BASIC; if we match BASIC, we clear all + * those bits and do not match individual bits. Thus + * if we have BASIC but not all the additional bits, + * we'll see, e.g., [BASIC,BTIME,GEN]; if we have + * all the additional bits too, we'll see [ALL]. + * + * Since <undec> is true below, we'll also spot any + * bits added to the protocol since we made this table. + */ + { L9PL_GETATTR_ALL, L9PL_GETATTR_ALL, "ALL" }, + { L9PL_GETATTR_BASIC, L9PL_GETATTR_BASIC, "BASIC" }, + + /* individual bits in BASIC */ + { L9PL_GETATTR_MODE, L9PL_GETATTR_MODE, "MODE" }, + { L9PL_GETATTR_NLINK, L9PL_GETATTR_NLINK, "NLINK" }, + { L9PL_GETATTR_UID, L9PL_GETATTR_UID, "UID" }, + { L9PL_GETATTR_GID, L9PL_GETATTR_GID, "GID" }, + { L9PL_GETATTR_RDEV, L9PL_GETATTR_RDEV, "RDEV" }, + { L9PL_GETATTR_ATIME, L9PL_GETATTR_ATIME, "ATIME" }, + { L9PL_GETATTR_MTIME, L9PL_GETATTR_MTIME, "MTIME" }, + { L9PL_GETATTR_CTIME, L9PL_GETATTR_CTIME, "CTIME" }, + { L9PL_GETATTR_INO, L9PL_GETATTR_INO, "INO" }, + { L9PL_GETATTR_SIZE, L9PL_GETATTR_SIZE, "SIZE" }, + { L9PL_GETATTR_BLOCKS, L9PL_GETATTR_BLOCKS, "BLOCKS" }, + + /* additional bits in ALL */ + { L9PL_GETATTR_BTIME, L9PL_GETATTR_BTIME, "BTIME" }, + { L9PL_GETATTR_GEN, L9PL_GETATTR_GEN, "GEN" }, + { L9PL_GETATTR_DATA_VERSION, L9PL_GETATTR_DATA_VERSION, + "DATA_VERSION" }, + { 0, 0, NULL } + }; + + (void) l9p_describe_bits(" request_mask=", request_mask, "[]", bits, + sb); +} + +/* + * Decode Tunlinkat flags. + */ +static void +l9p_describe_unlinkat_flags(const char *str, uint32_t flags, struct sbuf *sb) +{ + static const struct descbits bits[] = { + { L9PL_AT_REMOVEDIR, L9PL_AT_REMOVEDIR, "AT_REMOVEDIR" }, + { 0, 0, NULL } + }; + + (void) l9p_describe_bits(str, flags, "[]", bits, sb); +} + +static const char * +lookup_linux_errno(uint32_t linux_errno, char *buf, size_t len) +{ + /* + * Error numbers in the "base" range (1..ERANGE) are common + * across BSD, MacOS, Linux, and Plan 9. + * + * Error numbers outside that range require translation. + */ + const char *const table[] = { +#define X0(name) [name] = name ## _STR +#define X(name) [name] = name ## _STR + X(LINUX_EAGAIN), + X(LINUX_EDEADLK), + X(LINUX_ENAMETOOLONG), + X(LINUX_ENOLCK), + X(LINUX_ENOSYS), + X(LINUX_ENOTEMPTY), + X(LINUX_ELOOP), + X(LINUX_ENOMSG), + X(LINUX_EIDRM), + X(LINUX_ECHRNG), + X(LINUX_EL2NSYNC), + X(LINUX_EL3HLT), + X(LINUX_EL3RST), + X(LINUX_ELNRNG), + X(LINUX_EUNATCH), + X(LINUX_ENOCSI), + X(LINUX_EL2HLT), + X(LINUX_EBADE), + X(LINUX_EBADR), + X(LINUX_EXFULL), + X(LINUX_ENOANO), + X(LINUX_EBADRQC), + X(LINUX_EBADSLT), + X(LINUX_EBFONT), + X(LINUX_ENOSTR), + X(LINUX_ENODATA), + X(LINUX_ETIME), + X(LINUX_ENOSR), + X(LINUX_ENONET), + X(LINUX_ENOPKG), + X(LINUX_EREMOTE), + X(LINUX_ENOLINK), + X(LINUX_EADV), + X(LINUX_ESRMNT), + X(LINUX_ECOMM), + X(LINUX_EPROTO), + X(LINUX_EMULTIHOP), + X(LINUX_EDOTDOT), + X(LINUX_EBADMSG), + X(LINUX_EOVERFLOW), + X(LINUX_ENOTUNIQ), + X(LINUX_EBADFD), + X(LINUX_EREMCHG), + X(LINUX_ELIBACC), + X(LINUX_ELIBBAD), + X(LINUX_ELIBSCN), + X(LINUX_ELIBMAX), + X(LINUX_ELIBEXEC), + X(LINUX_EILSEQ), + X(LINUX_ERESTART), + X(LINUX_ESTRPIPE), + X(LINUX_EUSERS), + X(LINUX_ENOTSOCK), + X(LINUX_EDESTADDRREQ), + X(LINUX_EMSGSIZE), + X(LINUX_EPROTOTYPE), + X(LINUX_ENOPROTOOPT), + X(LINUX_EPROTONOSUPPORT), + X(LINUX_ESOCKTNOSUPPORT), + X(LINUX_EOPNOTSUPP), + X(LINUX_EPFNOSUPPORT), + X(LINUX_EAFNOSUPPORT), + X(LINUX_EADDRINUSE), + X(LINUX_EADDRNOTAVAIL), + X(LINUX_ENETDOWN), + X(LINUX_ENETUNREACH), + X(LINUX_ENETRESET), + X(LINUX_ECONNABORTED), + X(LINUX_ECONNRESET), + X(LINUX_ENOBUFS), + X(LINUX_EISCONN), + X(LINUX_ENOTCONN), + X(LINUX_ESHUTDOWN), + X(LINUX_ETOOMANYREFS), + X(LINUX_ETIMEDOUT), + X(LINUX_ECONNREFUSED), + X(LINUX_EHOSTDOWN), + X(LINUX_EHOSTUNREACH), + X(LINUX_EALREADY), + X(LINUX_EINPROGRESS), + X(LINUX_ESTALE), + X(LINUX_EUCLEAN), + X(LINUX_ENOTNAM), + X(LINUX_ENAVAIL), + X(LINUX_EISNAM), + X(LINUX_EREMOTEIO), + X(LINUX_EDQUOT), + X(LINUX_ENOMEDIUM), + X(LINUX_EMEDIUMTYPE), + X(LINUX_ECANCELED), + X(LINUX_ENOKEY), + X(LINUX_EKEYEXPIRED), + X(LINUX_EKEYREVOKED), + X(LINUX_EKEYREJECTED), + X(LINUX_EOWNERDEAD), + X(LINUX_ENOTRECOVERABLE), + X(LINUX_ERFKILL), + X(LINUX_EHWPOISON), +#undef X0 +#undef X + }; + if ((size_t)linux_errno < N(table) && table[linux_errno] != NULL) + return (table[linux_errno]); + if (linux_errno <= ERANGE) + return (strerror((int)linux_errno)); + (void) snprintf(buf, len, "Unknown error %d", linux_errno); + return (buf); +} + +void +l9p_describe_fcall(union l9p_fcall *fcall, enum l9p_version version, + struct sbuf *sb) +{ + uint64_t mask; + uint8_t type; + int i; + + assert(fcall != NULL); + assert(sb != NULL); + assert(version <= L9P_2000L); + + type = fcall->hdr.type; + + if (type < L9P__FIRST || type >= L9P__LAST_PLUS_1 || + ftype_names[type - L9P__FIRST] == NULL) { + const char *rr; + + /* + * Can't say for sure that this distinction -- + * an even number is a request, an odd one is + * a response -- will be maintained forever, + * but it's good enough for now. + */ + rr = (type & 1) != 0 ? "response" : "request"; + sbuf_printf(sb, "<unknown %s %d> tag=%d", rr, type, + fcall->hdr.tag); + } else { + sbuf_printf(sb, "%s tag=%d", ftype_names[type - L9P__FIRST], + fcall->hdr.tag); + } + + switch (type) { + case L9P_TVERSION: + case L9P_RVERSION: + sbuf_printf(sb, " version=\"%s\" msize=%d", fcall->version.version, + fcall->version.msize); + return; + + case L9P_TAUTH: + l9p_describe_fid(" afid=", fcall->hdr.fid, sb); + sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"", + fcall->tauth.uname, fcall->tauth.aname); + return; + + case L9P_TATTACH: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_fid(" afid=", fcall->tattach.afid, sb); + sbuf_printf(sb, " uname=\"%s\" aname=\"%s\"", + fcall->tattach.uname, fcall->tattach.aname); + if (version >= L9P_2000U) + sbuf_printf(sb, " n_uname=%d", fcall->tattach.n_uname); + return; + + case L9P_RATTACH: + l9p_describe_qid(" ", &fcall->rattach.qid, sb); + return; + + case L9P_RERROR: + sbuf_printf(sb, " ename=\"%s\" errnum=%d", fcall->error.ename, + fcall->error.errnum); + return; + + case L9P_RLERROR: { + char unknown[50]; + + sbuf_printf(sb, " errnum=%d (%s)", fcall->error.errnum, + lookup_linux_errno(fcall->error.errnum, + unknown, sizeof(unknown))); + return; + } + + case L9P_TFLUSH: + sbuf_printf(sb, " oldtag=%d", fcall->tflush.oldtag); + return; + + case L9P_RFLUSH: + return; + + case L9P_TWALK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_fid(" newfid=", fcall->twalk.newfid, sb); + if (fcall->twalk.nwname) { + sbuf_cat(sb, " wname=\""); + for (i = 0; i < fcall->twalk.nwname; i++) + sbuf_printf(sb, "%s%s", i == 0 ? "" : "/", + fcall->twalk.wname[i]); + sbuf_cat(sb, "\""); + } + return; + + case L9P_RWALK: + sbuf_printf(sb, " wqid=["); + for (i = 0; i < fcall->rwalk.nwqid; i++) + l9p_describe_qid(i == 0 ? "" : ",", + &fcall->rwalk.wqid[i], sb); + sbuf_cat(sb, "]"); + return; + + case L9P_TOPEN: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_mode(" mode=", fcall->tcreate.mode, sb); + return; + + case L9P_ROPEN: + l9p_describe_qid(" qid=", &fcall->ropen.qid, sb); + sbuf_printf(sb, " iounit=%d", fcall->ropen.iounit); + return; + + case L9P_TCREATE: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tcreate.name, sb); + l9p_describe_ext_perm(" perm=", fcall->tcreate.perm, sb); + l9p_describe_mode(" mode=", fcall->tcreate.mode, sb); + if (version >= L9P_2000U && fcall->tcreate.extension != NULL) + l9p_describe_name(" extension=", + fcall->tcreate.extension, sb); + return; + + case L9P_RCREATE: + l9p_describe_qid(" qid=", &fcall->rcreate.qid, sb); + sbuf_printf(sb, " iounit=%d", fcall->rcreate.iounit); + return; + + case L9P_TREAD: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32, + fcall->io.offset, fcall->io.count); + return; + + case L9P_RREAD: + case L9P_RWRITE: + sbuf_printf(sb, " count=%" PRIu32, fcall->io.count); + return; + + case L9P_TWRITE: + case L9P_TREADDIR: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + sbuf_printf(sb, " offset=%" PRIu64 " count=%" PRIu32, + fcall->io.offset, fcall->io.count); + return; + + case L9P_TCLUNK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RCLUNK: + return; + + case L9P_TREMOVE: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RREMOVE: + return; + + case L9P_TSTAT: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RSTAT: + l9p_describe_l9stat(" ", &fcall->rstat.stat, version, sb); + return; + + case L9P_TWSTAT: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_l9stat(" ", &fcall->twstat.stat, version, sb); + return; + + case L9P_RWSTAT: + return; + + case L9P_TSTATFS: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RSTATFS: + l9p_describe_statfs(" ", &fcall->rstatfs.statfs, sb); + return; + + case L9P_TLOPEN: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb); + return; + + case L9P_RLOPEN: + l9p_describe_qid(" qid=", &fcall->rlopen.qid, sb); + sbuf_printf(sb, " iounit=%d", fcall->rlopen.iounit); + return; + + case L9P_TLCREATE: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tlcreate.name, sb); + /* confusing: "flags" is open-mode, "mode" is permissions */ + l9p_describe_lflags(" flags=", fcall->tlcreate.flags, sb); + /* TLCREATE mode/permissions have S_IFREG (0x8000) set */ + l9p_describe_lperm(" mode=", fcall->tlcreate.mode, sb); + l9p_describe_ugid(" gid=", fcall->tlcreate.gid, sb); + return; + + case L9P_RLCREATE: + l9p_describe_qid(" qid=", &fcall->rlcreate.qid, sb); + sbuf_printf(sb, " iounit=%d", fcall->rlcreate.iounit); + return; + + case L9P_TSYMLINK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tsymlink.name, sb); + l9p_describe_name(" symtgt=", fcall->tsymlink.symtgt, sb); + l9p_describe_ugid(" gid=", fcall->tsymlink.gid, sb); + return; + + case L9P_RSYMLINK: + l9p_describe_qid(" qid=", &fcall->ropen.qid, sb); + return; + + case L9P_TMKNOD: + l9p_describe_fid(" dfid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tmknod.name, sb); + /* + * TMKNOD mode/permissions have S_IFBLK/S_IFCHR/S_IFIFO + * bits. The major and minor values are only meaningful + * for S_IFBLK and S_IFCHR, but just decode always here. + */ + l9p_describe_lperm(" mode=", fcall->tmknod.mode, sb); + sbuf_printf(sb, " major=%u minor=%u", + fcall->tmknod.major, fcall->tmknod.minor); + l9p_describe_ugid(" gid=", fcall->tmknod.gid, sb); + return; + + case L9P_RMKNOD: + l9p_describe_qid(" qid=", &fcall->rmknod.qid, sb); + return; + + case L9P_TRENAME: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_fid(" dfid=", fcall->trename.dfid, sb); + l9p_describe_name(" name=", fcall->trename.name, sb); + return; + + case L9P_RRENAME: + return; + + case L9P_TREADLINK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RREADLINK: + l9p_describe_name(" target=", fcall->rreadlink.target, sb); + return; + + case L9P_TGETATTR: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_getattr_mask(fcall->tgetattr.request_mask, sb); + return; + + case L9P_RGETATTR: + /* Don't need to decode bits: they're implied by the output */ + mask = fcall->rgetattr.valid; + sbuf_printf(sb, " valid=0x%016" PRIx64, mask); + l9p_describe_qid(" qid=", &fcall->rgetattr.qid, sb); + if (mask & L9PL_GETATTR_MODE) + l9p_describe_lperm(" mode=", fcall->rgetattr.mode, sb); + if (mask & L9PL_GETATTR_UID) + l9p_describe_ugid(" uid=", fcall->rgetattr.uid, sb); + if (mask & L9PL_GETATTR_GID) + l9p_describe_ugid(" gid=", fcall->rgetattr.gid, sb); + if (mask & L9PL_GETATTR_NLINK) + sbuf_printf(sb, " nlink=%" PRIu64, + fcall->rgetattr.nlink); + if (mask & L9PL_GETATTR_RDEV) + sbuf_printf(sb, " rdev=0x%" PRIx64, + fcall->rgetattr.rdev); + if (mask & L9PL_GETATTR_SIZE) + l9p_describe_size(" size=", fcall->rgetattr.size, sb); + if (mask & L9PL_GETATTR_BLOCKS) + sbuf_printf(sb, " blksize=%" PRIu64 " blocks=%" PRIu64, + fcall->rgetattr.blksize, fcall->rgetattr.blocks); + if (mask & L9PL_GETATTR_ATIME) + l9p_describe_time(sb, " atime=", + fcall->rgetattr.atime_sec, + fcall->rgetattr.atime_nsec); + if (mask & L9PL_GETATTR_MTIME) + l9p_describe_time(sb, " mtime=", + fcall->rgetattr.mtime_sec, + fcall->rgetattr.mtime_nsec); + if (mask & L9PL_GETATTR_CTIME) + l9p_describe_time(sb, " ctime=", + fcall->rgetattr.ctime_sec, + fcall->rgetattr.ctime_nsec); + if (mask & L9PL_GETATTR_BTIME) + l9p_describe_time(sb, " btime=", + fcall->rgetattr.btime_sec, + fcall->rgetattr.btime_nsec); + if (mask & L9PL_GETATTR_GEN) + sbuf_printf(sb, " gen=0x%" PRIx64, fcall->rgetattr.gen); + if (mask & L9PL_GETATTR_DATA_VERSION) + sbuf_printf(sb, " data_version=0x%" PRIx64, + fcall->rgetattr.data_version); + return; + + case L9P_TSETATTR: + /* As with RGETATTR, we'll imply decode via output. */ + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + mask = fcall->tsetattr.valid; + /* NB: tsetattr valid mask is only 32 bits, hence %08x */ + sbuf_printf(sb, " valid=0x%08" PRIx64, mask); + if (mask & L9PL_SETATTR_MODE) + l9p_describe_lperm(" mode=", fcall->tsetattr.mode, sb); + if (mask & L9PL_SETATTR_UID) + l9p_describe_ugid(" uid=", fcall->tsetattr.uid, sb); + if (mask & L9PL_SETATTR_GID) + l9p_describe_ugid(" uid=", fcall->tsetattr.gid, sb); + if (mask & L9PL_SETATTR_SIZE) + l9p_describe_size(" size=", fcall->tsetattr.size, sb); + if (mask & L9PL_SETATTR_ATIME) { + if (mask & L9PL_SETATTR_ATIME_SET) + l9p_describe_time(sb, " atime=", + fcall->tsetattr.atime_sec, + fcall->tsetattr.atime_nsec); + else + sbuf_cat(sb, " atime=now"); + } + if (mask & L9PL_SETATTR_MTIME) { + if (mask & L9PL_SETATTR_MTIME_SET) + l9p_describe_time(sb, " mtime=", + fcall->tsetattr.mtime_sec, + fcall->tsetattr.mtime_nsec); + else + sbuf_cat(sb, " mtime=now"); + } + if (mask & L9PL_SETATTR_CTIME) + sbuf_cat(sb, " ctime=now"); + return; + + case L9P_RSETATTR: + return; + + case L9P_TXATTRWALK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_fid(" newfid=", fcall->txattrwalk.newfid, sb); + l9p_describe_name(" name=", fcall->txattrwalk.name, sb); + return; + + case L9P_RXATTRWALK: + l9p_describe_size(" size=", fcall->rxattrwalk.size, sb); + return; + + case L9P_TXATTRCREATE: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->txattrcreate.name, sb); + l9p_describe_size(" size=", fcall->txattrcreate.attr_size, sb); + sbuf_printf(sb, " flags=%" PRIu32, fcall->txattrcreate.flags); + return; + + case L9P_RXATTRCREATE: + return; + + case L9P_RREADDIR: + l9p_describe_readdir(sb, &fcall->io); + return; + + case L9P_TFSYNC: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + return; + + case L9P_RFSYNC: + return; + + case L9P_TLOCK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + /* decode better later */ + sbuf_printf(sb, " type=%d flags=0x%" PRIx32 + " start=%" PRIu64 " length=%" PRIu64 + " proc_id=0x%" PRIx32 " client_id=\"%s\"", + fcall->tlock.type, fcall->tlock.flags, + fcall->tlock.start, fcall->tlock.length, + fcall->tlock.proc_id, fcall->tlock.client_id); + return; + + case L9P_RLOCK: + sbuf_printf(sb, " status=%d", fcall->rlock.status); + return; + + case L9P_TGETLOCK: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + /* FALLTHROUGH */ + + case L9P_RGETLOCK: + /* decode better later */ + sbuf_printf(sb, " type=%d " + " start=%" PRIu64 " length=%" PRIu64 + " proc_id=0x%" PRIx32 " client_id=\"%s\"", + fcall->getlock.type, + fcall->getlock.start, fcall->getlock.length, + fcall->getlock.proc_id, fcall->getlock.client_id); + return; + + case L9P_TLINK: + l9p_describe_fid(" dfid=", fcall->tlink.dfid, sb); + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tlink.name, sb); + return; + + case L9P_RLINK: + return; + + case L9P_TMKDIR: + l9p_describe_fid(" fid=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tmkdir.name, sb); + /* TMKDIR mode/permissions have S_IFDIR set */ + l9p_describe_lperm(" mode=", fcall->tmkdir.mode, sb); + l9p_describe_ugid(" gid=", fcall->tmkdir.gid, sb); + return; + + case L9P_RMKDIR: + l9p_describe_qid(" qid=", &fcall->rmkdir.qid, sb); + return; + + case L9P_TRENAMEAT: + l9p_describe_fid(" olddirfid=", fcall->hdr.fid, sb); + l9p_describe_name(" oldname=", fcall->trenameat.oldname, + sb); + l9p_describe_fid(" newdirfid=", fcall->trenameat.newdirfid, sb); + l9p_describe_name(" newname=", fcall->trenameat.newname, + sb); + return; + + case L9P_RRENAMEAT: + return; + + case L9P_TUNLINKAT: + l9p_describe_fid(" dirfd=", fcall->hdr.fid, sb); + l9p_describe_name(" name=", fcall->tunlinkat.name, sb); + l9p_describe_unlinkat_flags(" flags=", + fcall->tunlinkat.flags, sb); + return; + + case L9P_RUNLINKAT: + return; + + default: + sbuf_printf(sb, " <missing case in %s()>", __func__); + } +} diff --git a/usr/src/lib/lib9p/mapfile-vers b/usr/src/lib/lib9p/mapfile-vers new file mode 100644 index 0000000000..9bf38cc847 --- /dev/null +++ b/usr/src/lib/lib9p/mapfile-vers @@ -0,0 +1,58 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2021 OmniOS Community Edition (OmniOSce) Association. +# + +# +# MAPFILE HEADER START +# +# WARNING: STOP NOW. DO NOT MODIFY THIS FILE. +# Object versioning must comply with the rules detailed in +# +# usr/src/lib/README.mapfiles +# +# You should not be making modifications here until you've read the most current +# copy of that file. If you need help, contact a gatekeeper for guidance. +# +# MAPFILE HEADER END +# + +$mapfile_version 2 + +SYMBOL_VERSION ILLUMOSprivate { + global: + l9p_backend_fs_init; + l9p_connection_alloc_fid; + l9p_connection_close; + l9p_connection_free; + l9p_connection_init; + l9p_connection_recv; + l9p_connection_remove_fid; + l9p_describe_fcall; + l9p_dispatch_request; + l9p_freefcall; + l9p_freestat; + l9p_getgrlist; + l9p_init_msg; + l9p_pack_stat; + l9p_pudirent; + l9p_pufcall; + l9p_pustat; + l9p_respond; + l9p_seek_iov; + l9p_server_init; + l9p_sizeof_stat; + l9p_truncate_iov; + local: + *; +}; diff --git a/usr/src/man/man1m/bhyve.1m b/usr/src/man/man1m/bhyve.1m index a6c4637538..cab588665e 100644 --- a/usr/src/man/man1m/bhyve.1m +++ b/usr/src/man/man1m/bhyve.1m @@ -24,7 +24,7 @@ .\" .\" Portions Copyright 2021 OmniOS Community Edition (OmniOSce) Association. .\" -.Dd March 18, 2021 +.Dd April 20, 2021 .Dt BHYVE 1M .Os .Sh NAME @@ -263,6 +263,8 @@ Accelerated Virtio network interface. Legacy Virtio network interface. .It Li virtio-blk Virtio block storage interface. +.It Li virtio-9p +Virtio 9p (VirtFS) interface. .It Li virtio-rnd Virtio random number generator interface. .It Li virtio-console @@ -390,6 +392,24 @@ Disable emulation of guest trim requests via requests. .El .Pp +9P devices: +.Bl -tag -width 10n +.It Xo +.Sm off +.Cm sharename Sy = Pa /path/to/share +.Op Cm \&, Ar 9p-device-options +.Sm on +.Xc +.El +.Pp +The +.Ar 9p-device-options +are: +.Bl -tag -width 10n +.It Cm ro +Expose the share in read-only mode. +.El +.Pp TTY devices: .Bl -tag -width 10n .It Cm stdio diff --git a/usr/src/man/man4/bhyve_config.4 b/usr/src/man/man4/bhyve_config.4 index 23e1e33c5a..668b363115 100644 --- a/usr/src/man/man4/bhyve_config.4 +++ b/usr/src/man/man4/bhyve_config.4 @@ -25,7 +25,7 @@ .\" .\" Portions Copyright 2021 OmniOS Community Edition (OmniOSce) Association. .\" -.Dd May 6, 2021 +.Dd May 7, 2021 .Dt BHYVE_CONFIG 4 .Os .Sh NAME @@ -217,6 +217,8 @@ NVM Express (NVMe) controller. PCI pass-through device. .It Li uart PCI 16550 serial device. +.It Li virtio-9p +VirtIO 9p (VirtFS) interface. .It Li virtio-blk VirtIO block storage interface. .It Li virtio-console @@ -474,6 +476,17 @@ where .Ar N is the device number. .El +.Ss VirtIO 9p Settings +Each VirtIO 9p device exposes a single filesystem from a host path. +.Bl -column "sharename" "Format" "Default" +.It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description +.It Va sharename Ta string Ta Ta +The share name exposed to the guest. +.It Va path Ta path Ta Ta +The path of a directory on the host to export to the guest. +.It Va ro Ta bool Ta false Ta +If true, the guest filesystem is read-only. +.El .Ss VirtIO Console Device Settings Each VirtIO Console device contains one or more console ports. Each port stores its settings in a node named diff --git a/usr/src/pkg/manifests/system-library-bhyve.mf b/usr/src/pkg/manifests/system-library-bhyve.mf index f425c83034..c29a0ab1cf 100644 --- a/usr/src/pkg/manifests/system-library-bhyve.mf +++ b/usr/src/pkg/manifests/system-library-bhyve.mf @@ -14,7 +14,7 @@ # # -# Copyright 2019 OmniOS Community Edition (OmniOSce) Association. +# Copyright 2021 OmniOS Community Edition (OmniOSce) Association. # set name=pkg.fmri value=pkg:/system/library/bhyve@$(PKGVERS) @@ -30,8 +30,10 @@ dir path=usr/lib group=bin dir path=usr/lib/$(ARCH64) group=bin file path=lib/$(ARCH64)/libvmm.so.1 file path=lib/$(ARCH64)/libvmmapi.so.1 +file path=usr/lib/$(ARCH64)/lib9p.so.1 file path=usr/lib/$(ARCH64)/libppt.so.1 file path=usr/lib/libppt.so.1 license lic_CDDL license=lic_CDDL +license usr/src/lib/lib9p/COPYRIGHT license=usr/src/lib/lib9p/COPYRIGHT license usr/src/lib/libvmmapi/THIRDPARTYLICENSE \ license=usr/src/lib/libvmmapi/THIRDPARTYLICENSE |