diff options
Diffstat (limited to 'usr/src/cmd/pcieadm/pcieadm.c')
-rw-r--r-- | usr/src/cmd/pcieadm/pcieadm.c | 624 |
1 files changed, 624 insertions, 0 deletions
diff --git a/usr/src/cmd/pcieadm/pcieadm.c b/usr/src/cmd/pcieadm/pcieadm.c new file mode 100644 index 0000000000..edcad6e4d8 --- /dev/null +++ b/usr/src/cmd/pcieadm/pcieadm.c @@ -0,0 +1,624 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2021 Oxide Computer Company + */ + +/* + * PCIe shenanigans + * + * Currently this implements several different views at seeing into PCIe devices + * and is designed to (hopefully) replace pcitool and be a vector for new system + * functionality such as dealing with multicast filtering, ACS, etc. + * + * While most subcommands have their own implementations, there are a couple of + * things that are worth bearing in mind: + * + * 1) Where possible, prefer the use of libofmt. In particular, having good, + * parsable output is important. New subcommands should strive to meet that. + * + * 2) Because we're often processing binary data (and it's good hygiene), + * subcommands should make sure to drop privileges as early as they can by + * calling pcieadm_init_privs(). More on privileges below. + * + * Privilege Management + * -------------------- + * + * In an attempt to minimize privilege exposure, but to allow subcommands + * flexibility when required (e.g. show-cfgspace needs full privs to read from + * the kernel), we have two privilege sets that we maintain. One which is the + * minimial privs, which basically is a set that has stripped everything. This + * is 'pia_priv_min'. The second is one that allows a subcommand to add in + * privileges that it requires which will be left in the permitted set. These + * are in 'pia_priv_eff'. It's important to know that this set is always + * intersected with what the user actually has, so this is not meant to be a way + * for a caller to get more privileges than they already have. + * + * A subcommand is expected to call pcieadm_init_privs() once they have + * processed enough arguments that they can set an upper bound on privileges. + * It's worth noting that a subcommand will be executed in an already minimial + * environment; however, we will have already set up a libdevinfo handle for + * them, which should make the need to do much more not so bad. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <unistd.h> +#include <err.h> +#include <libdevinfo.h> +#include <strings.h> +#include <sys/stat.h> +#include <sys/pci_tools.h> +#include <sys/pci.h> +#include <sys/types.h> +#include <fcntl.h> +#include <sys/debug.h> +#include <upanic.h> +#include <libgen.h> + +#include "pcieadm.h" + +pcieadm_t pcieadm; +const char *pcieadm_progname; + +void +pcieadm_init_privs(pcieadm_t *pcip) +{ + static const char *msg = "attempted to re-initialize privileges"; + if (pcip->pia_priv_init == NULL) { + upanic(msg, strlen(msg)); + } + + priv_intersect(pcip->pia_priv_init, pcip->pia_priv_eff); + + if (setppriv(PRIV_SET, PRIV_PERMITTED, pcieadm.pia_priv_eff) != 0) { + err(EXIT_FAILURE, "failed to reduce privileges"); + } + + if (setppriv(PRIV_SET, PRIV_LIMIT, pcieadm.pia_priv_eff) != 0) { + err(EXIT_FAILURE, "failed to reduce privileges"); + } + + priv_freeset(pcip->pia_priv_init); + pcip->pia_priv_init = NULL; +} + +void +pcieadm_indent(void) +{ + pcieadm.pia_indent += 2; +} + +void +pcieadm_deindent(void) +{ + VERIFY3U(pcieadm.pia_indent, >, 0); + pcieadm.pia_indent -= 2; +} + +void +pcieadm_print(const char *fmt, ...) +{ + va_list ap; + + if (pcieadm.pia_indent > 0) { + (void) printf("%*s", pcieadm.pia_indent, ""); + } + + va_start(ap, fmt); + (void) vprintf(fmt, ap); + va_end(ap); +} + +void +pcieadm_ofmt_errx(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + verrx(EXIT_FAILURE, fmt, ap); +} + +boolean_t +pcieadm_di_node_is_pci(di_node_t node) +{ + const char *name; + + name = di_node_name(node); + return (strncmp("pci", name, 3) == 0); +} + +static int +pcieadm_di_walk_cb(di_node_t node, void *arg) +{ + pcieadm_di_walk_t *walk = arg; + + if (!pcieadm_di_node_is_pci(node)) { + return (DI_WALK_CONTINUE); + } + + /* + * We create synthetic nodes for the root of PCIe tree basically + * functions as all the resources available for one or more bridges. + * When we encounter that top-level node skip it. + */ + if (strcmp("pci", di_node_name(node)) == 0) { + return (DI_WALK_CONTINUE); + } + + return (walk->pdw_func(node, walk->pdw_arg)); +} + +void +pcieadm_di_walk(pcieadm_t *pcip, pcieadm_di_walk_t *arg) +{ + (void) di_walk_node(pcip->pia_root, DI_WALK_CLDFIRST, arg, + pcieadm_di_walk_cb); +} + +/* + * Attempt to find the nexus that corresponds to this device. To do this, we + * walk up and walk the minors until we find a "reg" minor. + */ +void +pcieadm_find_nexus(pcieadm_t *pia) +{ + di_node_t cur; + + for (cur = di_parent_node(pia->pia_devi); cur != DI_NODE_NIL; + cur = di_parent_node(cur)) { + di_minor_t minor = DI_MINOR_NIL; + + while ((minor = di_minor_next(cur, minor)) != DI_MINOR_NIL) { + if (di_minor_spectype(minor) == S_IFCHR && + strcmp(di_minor_name(minor), "reg") == 0) { + pia->pia_nexus = cur; + return; + } + } + } +} + +static int +pcieadm_find_dip_cb(di_node_t node, void *arg) +{ + char *path = NULL, *driver; + char dinst[128], bdf[128], altbdf[128]; + int inst, nprop, *regs; + pcieadm_t *pia = arg; + + path = di_devfs_path(node); + if (path == NULL) { + err(EXIT_FAILURE, "failed to construct devfs path for node: " + "%s (%s)", di_node_name(node)); + } + + driver = di_driver_name(node); + inst = di_instance(node); + if (driver != NULL && inst != -1) { + (void) snprintf(dinst, sizeof (dinst), "%s%d", driver, inst); + } + + nprop = di_prop_lookup_ints(DDI_DEV_T_ANY, node, "reg", ®s); + if (nprop <= 0) { + errx(EXIT_FAILURE, "failed to lookup regs array for %s", + path); + } + (void) snprintf(bdf, sizeof (bdf), "%x/%x/%x", PCI_REG_BUS_G(regs[0]), + PCI_REG_DEV_G(regs[0]), PCI_REG_FUNC_G(regs[0])); + (void) snprintf(bdf, sizeof (bdf), "%02x/%02x/%02x", + PCI_REG_BUS_G(regs[0]), PCI_REG_DEV_G(regs[0]), + PCI_REG_FUNC_G(regs[0])); + + if (strcmp(pia->pia_devstr, path) == 0 || + strcmp(pia->pia_devstr, bdf) == 0 || + strcmp(pia->pia_devstr, altbdf) == 0 || + (driver != NULL && inst != -1 && + strcmp(pia->pia_devstr, dinst) == 0)) { + if (pia->pia_devi != DI_NODE_NIL) { + errx(EXIT_FAILURE, "device name matched two device " + "nodes: %s and %s", di_node_name(pia->pia_devi), + di_node_name(node)); + } + + pia->pia_devi = node; + } + + if (path != NULL) { + di_devfs_path_free(path); + } + + return (DI_WALK_CONTINUE); +} + +void +pcieadm_find_dip(pcieadm_t *pcip, const char *device) +{ + pcieadm_di_walk_t walk; + + /* + * If someone specifies /devices, just skip over it. + */ + pcip->pia_devstr = device; + if (strncmp("/devices", device, strlen("/devices")) == 0) { + pcip->pia_devstr += strlen("/devices"); + } + + pcip->pia_devi = DI_NODE_NIL; + walk.pdw_arg = pcip; + walk.pdw_func = pcieadm_find_dip_cb; + pcieadm_di_walk(pcip, &walk); + + if (pcip->pia_devi == DI_NODE_NIL) { + errx(EXIT_FAILURE, "failed to find device node %s", device); + } + + pcip->pia_nexus = DI_NODE_NIL; + pcieadm_find_nexus(pcip); + if (pcip->pia_nexus == DI_NODE_NIL) { + errx(EXIT_FAILURE, "failed to find nexus for %s", device); + } +} + +typedef struct pcieadm_cfgspace_file { + int pcfi_fd; +} pcieadm_cfgspace_file_t; + +static boolean_t +pcieadm_read_cfgspace_file(uint32_t off, uint8_t len, void *buf, void *arg) +{ + uint32_t bufoff = 0; + pcieadm_cfgspace_file_t *pcfi = arg; + + while (len > 0) { + ssize_t ret = pread(pcfi->pcfi_fd, buf + bufoff, len, off); + if (ret < 0) { + err(EXIT_FAILURE, "failed to read %u bytes at %" + PRIu32, len, off); + } else if (ret == 0) { + warnx("hit unexpected EOF reading cfgspace from file " + "at offest %" PRIu32 ", still wanted to read %u " + "bytes", off, len); + return (B_FALSE); + } else { + len -= ret; + off += ret; + bufoff += ret; + } + + } + + return (B_TRUE); +} + +void +pcieadm_init_cfgspace_file(pcieadm_t *pcip, const char *path, + pcieadm_cfgspace_f *funcp, void **arg) +{ + int fd; + struct stat st; + pcieadm_cfgspace_file_t *pcfi; + + if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_eff) != 0) { + err(EXIT_FAILURE, "failed to raise privileges"); + } + + if ((fd = open(path, O_RDONLY)) < 0) { + err(EXIT_FAILURE, "failed to open input file %s", path); + } + + if (fstat(fd, &st) != 0) { + err(EXIT_FAILURE, "failed to get stat information for %s", + path); + } + + if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_min) != 0) { + err(EXIT_FAILURE, "failed to reduce privileges"); + } + + if (S_ISDIR(st.st_mode)) { + errx(EXIT_FAILURE, "input file %s is a directory, unable " + "to read data", path); + } + + if (S_ISLNK(st.st_mode)) { + errx(EXIT_FAILURE, "input file %s is a symbolic link, unable " + "to read data", path); + } + + if (S_ISDOOR(st.st_mode)) { + errx(EXIT_FAILURE, "input file %s is a door, unable " + "to read data", path); + } + + if (S_ISPORT(st.st_mode)) { + errx(EXIT_FAILURE, "input file %s is an event port, unable " + "to read data", path); + } + + /* + * Assume if we were given a FIFO, character/block device, socket, or + * something else that it's probably fine. + */ + pcfi = calloc(1, sizeof (*pcfi)); + if (pcfi == NULL) { + err(EXIT_FAILURE, "failed to allocate memory for reading " + "cfgspace data from a file"); + } + + pcfi->pcfi_fd = fd; + *arg = pcfi; + *funcp = pcieadm_read_cfgspace_file; +} + +void +pcieadm_fini_cfgspace_file(void *arg) +{ + pcieadm_cfgspace_file_t *pcfi = arg; + VERIFY0(close(pcfi->pcfi_fd)); + free(pcfi); +} + +typedef struct pcieadm_cfgspace_kernel { + pcieadm_t *pck_pci; + int pck_fd; + uint8_t pck_bus; + uint8_t pck_dev; + uint8_t pck_func; +} pcieadm_cfgspace_kernel_t; + +static boolean_t +pcieadm_read_cfgspace_kernel(uint32_t off, uint8_t len, void *buf, void *arg) +{ + pcieadm_cfgspace_kernel_t *pck = arg; + pcieadm_t *pcip = pck->pck_pci; + pcitool_reg_t pci_reg; + + bzero(&pci_reg, sizeof (pci_reg)); + pci_reg.user_version = PCITOOL_VERSION; + pci_reg.bus_no = pck->pck_bus; + pci_reg.dev_no = pck->pck_dev; + pci_reg.func_no = pck->pck_func; + pci_reg.barnum = 0; + pci_reg.offset = off; + pci_reg.acc_attr = PCITOOL_ACC_ATTR_ENDN_LTL; + + switch (len) { + case 1: + pci_reg.acc_attr += PCITOOL_ACC_ATTR_SIZE_1; + break; + case 2: + pci_reg.acc_attr += PCITOOL_ACC_ATTR_SIZE_2; + break; + case 4: + pci_reg.acc_attr += PCITOOL_ACC_ATTR_SIZE_4; + break; + case 8: + pci_reg.acc_attr += PCITOOL_ACC_ATTR_SIZE_8; + break; + default: + errx(EXIT_FAILURE, "asked to read invalid size from kernel: %u", + len); + } + + if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_eff) != 0) { + err(EXIT_FAILURE, "failed to raise privileges"); + } + + if (ioctl(pck->pck_fd, PCITOOL_DEVICE_GET_REG, &pci_reg) != 0) { + err(EXIT_FAILURE, "failed to read device offset 0x%x", off); + } + + if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_min) != 0) { + err(EXIT_FAILURE, "failed to reduce privileges"); + } + + switch (len) { + case 1: + *(uint8_t *)buf = (uint8_t)pci_reg.data; + break; + case 2: + *(uint16_t *)buf = (uint16_t)pci_reg.data; + break; + case 4: + *(uint32_t *)buf = (uint32_t)pci_reg.data; + break; + case 8: + *(uint64_t *)buf = (uint64_t)pci_reg.data; + break; + } + + return (B_TRUE); +} + +void +pcieadm_init_cfgspace_kernel(pcieadm_t *pcip, pcieadm_cfgspace_f *funcp, + void **arg) +{ + char *nexus_base; + char nexus_reg[PATH_MAX]; + int fd, nregs, *regs; + pcieadm_cfgspace_kernel_t *pck; + + if ((nexus_base = di_devfs_path(pcip->pia_nexus)) == NULL) { + err(EXIT_FAILURE, "failed to get path to nexus node"); + } + + if (snprintf(nexus_reg, sizeof (nexus_reg), "/devices%s:reg", + nexus_base) >= sizeof (nexus_reg)) { + errx(EXIT_FAILURE, "failed to construct nexus path, path " + "overflow"); + } + free(nexus_base); + + if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_eff) != 0) { + err(EXIT_FAILURE, "failed to raise privileges"); + } + + if ((fd = open(nexus_reg, O_RDONLY)) < 0) { + err(EXIT_FAILURE, "failed to open %s", nexus_reg); + } + + if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcip->pia_priv_min) != 0) { + err(EXIT_FAILURE, "failed to reduce privileges"); + } + + nregs = di_prop_lookup_ints(DDI_DEV_T_ANY, pcip->pia_devi, "reg", + ®s); + if (nregs <= 0) { + errx(EXIT_FAILURE, "failed to lookup regs array for %s", + pcip->pia_devstr); + } + + pck = calloc(1, sizeof (pcieadm_cfgspace_kernel_t)); + if (pck == NULL) { + err(EXIT_FAILURE, "failed to allocate memory for reading " + "kernel cfgspace data"); + } + + pck->pck_pci = pcip; + pck->pck_fd = fd; + pck->pck_bus = PCI_REG_BUS_G(regs[0]); + pck->pck_dev = PCI_REG_DEV_G(regs[0]); + pck->pck_func = PCI_REG_FUNC_G(regs[0]); + + *funcp = pcieadm_read_cfgspace_kernel; + *arg = pck; +} + +void +pcieadm_fini_cfgspace_kernel(void *arg) +{ + pcieadm_cfgspace_kernel_t *pck = arg; + + VERIFY0(close(pck->pck_fd)); + free(pck); +} + +static const pcieadm_cmdtab_t pcieadm_cmds[] = { + { "save-cfgspace", pcieadm_save_cfgspace, pcieadm_save_cfgspace_usage }, + { "show-cfgspace", pcieadm_show_cfgspace, pcieadm_show_cfgspace_usage }, + { "show-devs", pcieadm_show_devs, pcieadm_show_devs_usage }, + { NULL } +}; + +static void +pcieadm_usage(const char *format, ...) +{ + uint_t cmd; + + if (format != NULL) { + va_list ap; + + va_start(ap, format); + vwarnx(format, ap); + va_end(ap); + } + + (void) fprintf(stderr, "usage: %s <subcommand> <args> ...\n\n", + pcieadm_progname); + + for (cmd = 0; pcieadm_cmds[cmd].pct_name != NULL; cmd++) { + if (pcieadm_cmds[cmd].pct_use != NULL) { + pcieadm_cmds[cmd].pct_use(stderr); + } + } +} + +int +main(int argc, char *argv[]) +{ + uint_t cmd; + + pcieadm_progname = basename(argv[0]); + + if (argc < 2) { + pcieadm_usage("missing required sub-command"); + exit(EXIT_USAGE); + } + + for (cmd = 0; pcieadm_cmds[cmd].pct_name != NULL; cmd++) { + if (strcmp(pcieadm_cmds[cmd].pct_name, argv[1]) == 0) { + break; + } + } + + if (pcieadm_cmds[cmd].pct_name == NULL) { + pcieadm_usage("unknown sub-command: %s", argv[1]); + exit(EXIT_USAGE); + } + argc -= 2; + argv += 2; + optind = 0; + pcieadm.pia_cmdtab = &pcieadm_cmds[cmd]; + + /* + * Set up common things that all of pcieadm needs before dispatching to + * a specific sub-command. + */ + pcieadm.pia_pcidb = pcidb_open(PCIDB_VERSION); + if (pcieadm.pia_pcidb == NULL) { + err(EXIT_FAILURE, "failed to open PCI ID database"); + } + + pcieadm.pia_root = di_init("/", DINFOCPYALL); + if (pcieadm.pia_root == DI_NODE_NIL) { + err(EXIT_FAILURE, "failed to initialize devinfo tree"); + } + + /* + * Set up privileges now that we have already opened our core libraries. + * We first set up the minimum actual privilege set that we use while + * running. We next set up a second privilege set that has additional + * privileges that are intersected with the users actual privileges and + * are appended to by the underlying command backends. + */ + if ((pcieadm.pia_priv_init = priv_allocset()) == NULL) { + err(EXIT_FAILURE, "failed to allocate privilege set"); + } + + if (getppriv(PRIV_EFFECTIVE, pcieadm.pia_priv_init) != 0) { + err(EXIT_FAILURE, "failed to get current privileges"); + } + + if ((pcieadm.pia_priv_min = priv_allocset()) == NULL) { + err(EXIT_FAILURE, "failed to allocate privilege set"); + } + + if ((pcieadm.pia_priv_eff = priv_allocset()) == NULL) { + err(EXIT_FAILURE, "failed to allocate privilege set"); + } + + /* + * Note, PRIV_FILE_READ is not removed from the basic set so that way we + * can still open libraries that are required due to lazy loading. + */ + priv_basicset(pcieadm.pia_priv_min); + VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_FILE_LINK_ANY)); + VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_INFO)); + VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_SESSION)); + VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_FORK)); + VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_NET_ACCESS)); + VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_FILE_WRITE)); + VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_EXEC)); + VERIFY0(priv_delset(pcieadm.pia_priv_min, PRIV_PROC_EXEC)); + + priv_copyset(pcieadm.pia_priv_min, pcieadm.pia_priv_eff); + priv_intersect(pcieadm.pia_priv_init, pcieadm.pia_priv_eff); + + if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pcieadm.pia_priv_min) != 0) { + err(EXIT_FAILURE, "failed to reduce privileges"); + } + + return (pcieadm.pia_cmdtab->pct_func(&pcieadm, argc, argv)); +} |