/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright 2017 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * University Copyright- Copyright (c) 1982, 1986, 1988 * The Regents of the University of California * All Rights Reserved * * University Acknowledgment- Portions of this document are derived from * software developed by the University of California, Berkeley, and its * contributors. */ /* LINTLIBRARY */ /* PROTOLIB1 */ /* * lx-brand NFS lockd (NLM) server. * * This code is derived from the native lockd. The original history starts * from: * copied from usr/src/cmd/fs.d/nfs/nfsd/nfsd.c and then s:NFS:NLM: applied * * On Linux 'lockd' is implemented entirely inside the kernel, whereas our * native lockd support is a combination of user-level and kernel code. * Unfortunately, the native lockd is unable to run in lx for several reasons: * - tightly bound to SMF * - interacts with various native libnsl configuration files * - expects to register with a native rpcbind using /dev/ticlts * Thus, this code is derived from the native lockd, but modified to address * these issues and run inside an lx-branded zone. Because the Linux lockd * lives entirely in the kernel, our lockd must be started automatically if * it is needed. This is done by the NFS mount code when it determines that * a lockd is not already running. The kernel code ensures that there is only a * single instance of the lockd running, in the case that there is a race with * two NFS mounts occuring in parallel. * * lockd is required for both NFSv3 and v4 locking. Although v4 locking is * part of the v4 protocol, the kernel support to allow NFS locking is enabled * by the lockd when it starts. For v3, there must be a lockd registered with * rpcbind or the server side will fail the lock. This is because the server * side expects to make callbacks to the client. We must successfully register * with the Linux rpcbind, otherwise the NFS syscall to enable the kernel side * of locking will fail. For the v3 case, the user-level Linux mount helper cmd * already checks for the presence of rpcbind. It fails if rpcbind is not * running and the mount does not include the "nolock" option. For v4 the use * of rpcbind appears unnecessary, since locking is built-in to the protocol, * but it still required by our kernel NFS locking code. * * As with the native lockd, the kernel locking code makes upcalls to our lockd * over /dev/ticotsord, so that device must be present inside an lx zone. * * Because this process tries to mimic the Linux kernel lockd, there is no * stdin/out/err and we block all signals, unless we're running in debug mode. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nfs_tbind.h" #include "thrpool.h" /* Option defaults. See nfssys.h */ struct lm_svc_args lmargs = { .version = LM_SVC_CUR_VERS, /* fd, n_fmly, n_proto, n_rdev (below) */ .n_v4_only = 0, .timout = 5 * 60, .grace = 90, /* How long to wait for clients to re-establish locks. */ .retransmittimeout = 5 }; int max_servers = 256; #define RET_OK 0 /* return code for no error */ #define RET_ERR 33 /* return code for error(s) */ #define SYSLOG_BLEN 256 int nlmsvc(int fd, struct netbuf addrmask, struct netconfig *nconf); static int nlmsvcpool(int max_servers); static void usage(void); static struct timeval tottimeout = { 60, 0 }; static struct timeval rpcbtime = { 15, 0 }; static const char nullstring[] = "\000"; boolean_t have_rpcbind = B_FALSE; extern int _nfssys(int, void *); extern void nlm_do_one(char *, int (*)(int, struct netbuf, struct netconfig *)); extern int nlm_bind_to_provider(char *, struct netbuf **, struct netconfig **); /* * We want to bind to these TLI providers, and in this order, * because the kernel NLM needs the loopback first for its * initialization. (It uses it to talk to statd.) */ static NETSELDECL(defaultproviders)[] = { "/dev/ticotsord", "/dev/tcp", "/dev/udp", "/dev/tcp6", "/dev/udp6", NULL }; /* * The following are all globals used by routines in nfs_tbind.c. */ size_t end_listen_fds; /* used by conn_close_oldest() */ size_t num_fds = 0; /* used by multiple routines */ int listen_backlog = 32; /* used by bind_to_{provider,proto}() */ /* used by cots_listen_event() */ int max_conns_allowed = -1; /* used by cots_listen_event() */ int (*Mysvc)(int, struct netbuf, struct netconfig *) = nlmsvc; boolean_t debug = B_FALSE; #define LX_PMAP_VERS 4 /* * Set a mapping between program, version and address. * Calls the lx-zone's rpcbind service to do the mapping. */ boolean_t lx_rpcb_set(const rpcvers_t version, const struct netconfig *nconf, const struct netbuf *address) { CLIENT *client; bool_t rslt = FALSE; RPCB parms; char uidbuf[32]; client = clnt_create_timed("localhost", PMAPPROG, LX_PMAP_VERS, "datagram_v", &rpcbtime); if (client == NULL) return (B_FALSE); parms.r_addr = taddr2uaddr((struct netconfig *)nconf, (struct netbuf *)address); /* convert to universal */ if (!parms.r_addr) { rpc_createerr.cf_stat = RPC_N2AXLATEFAILURE; return (B_FALSE); } parms.r_prog = NLM_PROG; parms.r_vers = version; parms.r_netid = nconf->nc_netid; /* * Though uid is not being used directly, we still send it for * completeness. For non-unix platforms, perhaps some other * string or an empty string can be sent. */ (void) sprintf(uidbuf, "%d", (int)geteuid()); parms.r_owner = uidbuf; CLNT_CALL(client, RPCBPROC_SET, (xdrproc_t)xdr_rpcb, (char *)&parms, (xdrproc_t)xdr_bool, (char *)&rslt, tottimeout); CLNT_DESTROY(client); free(parms.r_addr); return (B_TRUE); } /* * Remove the mapping between program, version and netbuf address. * Calls the rpcbind service to do the un-mapping. */ void lx_rpcb_unset(const rpcvers_t version, char *nc_netid) { CLIENT *client; bool_t rslt; RPCB parms; char uidbuf[32]; if (!have_rpcbind) return; client = clnt_create_timed("localhost", PMAPPROG, LX_PMAP_VERS, "datagram_v", &rpcbtime); if (client == NULL) return; parms.r_prog = NLM_PROG; parms.r_vers = version; parms.r_netid = nc_netid; parms.r_addr = (char *)&nullstring[0]; (void) sprintf(uidbuf, "%d", (int)geteuid()); parms.r_owner = uidbuf; CLNT_CALL(client, RPCBPROC_UNSET, (xdrproc_t)xdr_rpcb, (char *)&parms, (xdrproc_t)xdr_bool, (char *)&rslt, tottimeout); CLNT_DESTROY(client); } static void lx_nlm_unreg(char *provider) { struct netconfig *retnconf; int vers; if (nlm_bind_to_provider(provider, NULL, &retnconf) == -1) return; /* Unregister all versions of the program. */ for (vers = NLM_VERS; vers <= NLM4_VERS; vers++) { lx_rpcb_unset(vers, retnconf->nc_netid); } } void lx_syslog(char *fmt, ...) { int fd, l; struct sockaddr_un snd_addr; char buf[SYSLOG_BLEN], fb[SYSLOG_BLEN], *bp, *fp, *ep; va_list ap; /* First we replace %m in fmt string with error string into fb. */ ep = fb + sizeof (fb); fb[SYSLOG_BLEN - 1] = '\0'; for (bp = fb, fp = fmt; bp < ep && (*bp = *fp) != '\0'; bp++, fp++) { if (*fp == '%' && *(fp + 1) == 'm') { (void) strlcpy(bp, strerror(errno), ep - bp); bp += strlen(bp); fp += 2; } } va_start(ap, fmt); (void) snprintf(buf, sizeof (buf), " rpc.lockd[%d]: ", getpid()); l = strlen(buf); bp = &buf[l]; (void) vsnprintf(bp, sizeof (buf) - l, fb, ap); va_end(ap); if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == -1) return; bzero(&snd_addr, sizeof (snd_addr)); strcpy(snd_addr.sun_path, "/dev/log"); snd_addr.sun_family = AF_LOCAL; l = strlen(snd_addr.sun_path) + sizeof (snd_addr.sun_family); if (connect(fd, (struct sockaddr *)&snd_addr, l) == 0) { l = strlen(buf); (void) send(fd, buf, l, 0); } close(fd); } /* When debugging, ensure cleanup */ static void sigint_handler(void) { NETSELPDECL(providerp); lx_syslog("Stopping"); /* unregister from rpcbind */ for (providerp = defaultproviders; *providerp != NULL; providerp++) { char *provider = *providerp; lx_nlm_unreg(provider); } (void) _nfssys(KILL_LOCKMGR, NULL); exit(0); } int main(int ac, char *av[]) { NETSELPDECL(providerp); int i, c, val; if (geteuid() != 0) { exit(1); } /* Initializations that require more privileges than we need to run. */ if (__init_daemon_priv(PU_RESETGROUPS | PU_CLEARLIMITSET, 1, 1, PRIV_SYS_NFS, NULL) == -1) { exit(1); } (void) enable_extended_FILE_stdio(-1, -1); while ((c = getopt(ac, av, "c:dg:l:t:")) != EOF) switch (c) { case 'c': /* max_connections */ if ((val = atoi(optarg)) <= 0) goto badval; max_conns_allowed = val; break; case 'd': /* debug */ debug = B_TRUE; break; case 'g': /* grace_period */ if ((val = atoi(optarg)) <= 0) goto badval; lmargs.grace = val; break; case 'l': /* listen_backlog */ if ((val = atoi(optarg)) <= 0) goto badval; listen_backlog = val; break; case 't': /* retrans_timeout */ if ((val = atoi(optarg)) <= 0) goto badval; lmargs.retransmittimeout = val; break; badval: if (debug) { fprintf(stderr, "Invalid -%c option value", c); } /* FALLTHROUGH */ default: if (debug) { usage(); } exit(1); } /* If there is one more argument, it is the number of servers. */ if (optind < ac) { val = atoi(av[optind]); if (val <= 0) { if (debug) { fprintf(stderr, "Invalid max_servers argument"); usage(); } exit(1); } max_servers = val; optind++; } /* If there are two or more arguments, then this is a usage error. */ if (optind != ac) { if (debug) { usage(); } exit(1); } if (debug) { printf("lx_lockd: debug=%d, conn_idle_timout=%d, " "grace_period=%d, listen_backlog=%d, " "max_connections=%d, max_servers=%d, " "retrans_timeout=%d\n", debug, lmargs.timout, lmargs.grace, listen_backlog, max_conns_allowed, max_servers, lmargs.retransmittimeout); } /* Set current dir to server root */ if (chdir("/") < 0) { lx_syslog("chdir: %m"); exit(1); } if (!debug) { /* Block all signals if not debugging. */ sigset_t set; (void) sigfillset(&set); (void) sigprocmask(SIG_BLOCK, &set, NULL); (void) setsid(); } else { struct sigaction act; act.sa_handler = sigint_handler; act.sa_flags = 0; (void) sigaction(SIGINT, &act, NULL); } lx_syslog("Starting"); /* Unregister any previous versions. */ for (i = NLM_VERS; i < NLM4_VERS; i++) { svc_unreg(NLM_PROG, i); } /* Set up kernel RPC thread pool for the NLM server. */ if (nlmsvcpool(max_servers)) { lx_syslog("Can't set up kernel NLM service: %m. Exiting"); exit(1); } /* Set up blocked thread to do LWP creation on behalf of the kernel. */ if (svcwait(NLM_SVCPOOL_ID)) { lx_syslog("Can't set up NLM pool creator: %m. Exiting"); exit(1); } for (providerp = defaultproviders; *providerp != NULL; providerp++) { char *provider = *providerp; nlm_do_one(provider, nlmsvc); } if (num_fds == 0) { lx_syslog("Could not start NLM service for any protocol. " "Exiting"); exit(1); } end_listen_fds = num_fds; /* * lockd is up and running as far as we are concerned. * * Get rid of unneeded privileges. */ __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION, PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL); /* Poll for non-data control events on the transport descriptors. */ poll_for_action(); /* If we get here, something failed in poll_for_action(). */ return (1); } static int nlmsvcpool(int maxservers) { struct svcpool_args npa; npa.id = NLM_SVCPOOL_ID; npa.maxthreads = maxservers; npa.redline = 0; npa.qsize = 0; npa.timeout = 0; npa.stksize = 0; npa.max_same_xprt = 0; return (_nfssys(SVCPOOL_CREATE, &npa)); } static int ncfmly_to_lmfmly(const char *ncfmly) { if (0 == strcmp(ncfmly, NC_INET)) return (LM_INET); if (0 == strcmp(ncfmly, NC_INET6)) return (LM_INET6); if (0 == strcmp(ncfmly, NC_LOOPBACK)) return (LM_LOOPBACK); return (-1); } static int nctype_to_lmprot(uint_t semantics) { switch (semantics) { case NC_TPI_CLTS: return (LM_UDP); case NC_TPI_COTS_ORD: return (LM_TCP); } return (-1); } static dev_t ncdev_to_rdev(const char *ncdev) { struct stat st; if (stat(ncdev, &st) < 0) return (NODEV); return (st.st_rdev); } /* * Establish NLM service thread. */ int nlmsvc(int fd, struct netbuf addrmask, struct netconfig *nconf) { struct lm_svc_args lma; lma = lmargs; /* init by struct copy */ /* * The kernel code needs to reconstruct a complete * knetconfig from n_fmly, n_proto. We use these * two fields to convey the family and semantics. */ lma.fd = fd; lma.n_fmly = ncfmly_to_lmfmly(nconf->nc_protofmly); lma.n_proto = nctype_to_lmprot(nconf->nc_semantics); lma.n_rdev = ncdev_to_rdev(nconf->nc_device); if (!have_rpcbind) { /* * Inform the kernel NLM code to run without rpcbind and * rpc.statd. */ lma.n_v4_only = -1; } return (_nfssys(LM_SVC, &lma)); } static void usage(void) { (void) fprintf(stderr, "usage: lx_lockd [options] [max_servers]\n"); (void) fprintf(stderr, "\t-c max_connections\n"); (void) fprintf(stderr, "\t-d enable debugging\n"); (void) fprintf(stderr, "\t-g grace_period\n"); (void) fprintf(stderr, "\t-l listen_backlog\n"); (void) fprintf(stderr, "\t-t retransmit_timeout\n"); }