From e7df7762bfed06e996cc80f583cbee2d8ed81d69 Mon Sep 17 00:00:00 2001 From: Cody Peter Mello Date: Thu, 3 Sep 2015 22:04:07 +0000 Subject: 6324 Add an `ndp' tool for manipulating the neighbors table Reviewed by: Robert Mustacchi Reviewed by: Alex Wilson Reviewed by: Dan McDonald Approved by: Richard Lowe --- usr/src/cmd/cmd-inet/usr.sbin/Makefile | 10 +- usr/src/cmd/cmd-inet/usr.sbin/ndp.c | 1111 +++++++++++++++++++++++++ usr/src/man/man1m/Makefile | 1 + usr/src/man/man1m/arp.1m | 9 +- usr/src/man/man1m/in.ndpd.1m | 17 +- usr/src/man/man1m/ndp.1m | 215 +++++ usr/src/man/man1m/netstat.1m | 2 +- usr/src/man/man7p/Makefile | 4 + usr/src/man/man7p/arp.7p | 57 +- usr/src/man/man7p/if_tcp.7p | 6 +- usr/src/man/man7p/ndp.7p | 345 ++++++++ usr/src/man/man7p/route.7p | 8 +- usr/src/pkg/manifests/SUNWcs.man1m.inc | 1 + usr/src/pkg/manifests/SUNWcs.mf | 1 + usr/src/pkg/manifests/system-kernel.man7p.inc | 2 + usr/src/uts/common/inet/ip/ip_ndp.c | 2 + usr/src/uts/common/net/if.h | 4 +- 17 files changed, 1719 insertions(+), 76 deletions(-) create mode 100644 usr/src/cmd/cmd-inet/usr.sbin/ndp.c create mode 100644 usr/src/man/man1m/ndp.1m create mode 100644 usr/src/man/man7p/ndp.7p (limited to 'usr/src') diff --git a/usr/src/cmd/cmd-inet/usr.sbin/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/Makefile index 3c8672d154..972b9e497f 100644 --- a/usr/src/cmd/cmd-inet/usr.sbin/Makefile +++ b/usr/src/cmd/cmd-inet/usr.sbin/Makefile @@ -29,7 +29,7 @@ SYNCPROG= syncinit syncloop syncstat PROG= 6to4relay arp if_mpadm \ in.comsat in.fingerd in.rarpd in.rexecd in.rlogind \ in.rshd in.rwhod in.telnetd in.tftpd ipaddrsel \ - ndd $(SYNCPROG) wanbootutil + ndd ndp $(SYNCPROG) wanbootutil MANIFEST= rarp.xml telnet.xml comsat.xml finger.xml \ login.xml shell.xml rexec.xml socket-filter-kssl.xml @@ -43,10 +43,10 @@ AUDITPROG= in.rexecd in.rlogind in.rshd in.telnetd PAMPROG= in.rexecd in.rlogind in.rshd in.telnetd SOCKETPROG= 6to4relay arp hostconfig if_mpadm in.comsat \ in.fingerd in.rarpd in.rexecd in.rlogind in.rshd \ - in.rwhod in.telnetd in.tftpd ipaddrsel route + in.rwhod in.telnetd in.tftpd ipaddrsel ndp route NSLPROG= 6to4relay arp hostconfig in.comsat in.rarpd \ in.rexecd in.rlogind in.rshd in.rwhod in.telnetd \ - in.tftpd ipaddrsel route + in.tftpd ipaddrsel ndp route CMDPROG= in.telnetd K5PROGS= in.telnetd in.rlogind in.rshd TSNETPROG= route @@ -74,7 +74,7 @@ MSGSUBDIRS= bootconfchk ifconfig ilbadm in.routed in.talkd \ # Eventually this hack should go away, and all in PROG should be # lint-clean. LINTCLEAN= 6to4relay arp in.rlogind in.rshd in.telnetd in.tftpd \ - ipaddrsel route \ + ipaddrsel ndp route \ in.rarpd if_mpadm $(SYNCPROG) # Likewise, as subdirs get lint-clean, add them here. Once # they're all clean, replace the dependency of the lint target @@ -269,5 +269,7 @@ lint: $(LINTSUBDIRS) $(LINT.c) syncstat.c $(LDLIBS) -ldlpi $(LINT.c) -erroff=E_NAME_USED_NOT_DEF2 in.rarpd.c $(LDLIBS) \ -lsocket -lnsl + $(LINT.c) ndp.c $(LDLIBS) \ + -lsocket -lnsl $(LINT.c) in.tftpd.c ../usr.bin/tftp/tftpsubs.c $(LDLIBS) \ -lsocket -lnsl diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ndp.c b/usr/src/cmd/cmd-inet/usr.sbin/ndp.c new file mode 100644 index 0000000000..23b940c686 --- /dev/null +++ b/usr/src/cmd/cmd-inet/usr.sbin/ndp.c @@ -0,0 +1,1111 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. All rights reserved. + */ + +/* + * ndp - display and manipulate Neighbor Cache Entries from NDP + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct sockaddr_in6 sin6_t; + +#define BUF_SIZE 2048 +typedef struct rtmsg_pkt { + struct rt_msghdr m_rtm; + char m_space[BUF_SIZE]; +} rtmsg_pkt_t; + +enum ndp_action { + NDP_A_DEFAULT, + NDP_A_GET, /* Show a single NDP entry */ + NDP_A_GET_ALL, /* Show NDP entries */ + NDP_A_GET_FOREVER, /* Repeatedly show entries */ + NDP_A_DELETE, /* Delete an NDP entry */ + NDP_A_SET_NCE, /* Set NDP entry */ + NDP_A_SET_FILE /* Read in & set NDP entries */ +}; + +typedef int (ndp_addr_f)(int, struct lifreq *, void *); +typedef void (ndp_void_f)(void); + +static void ndp_usage(const char *, ...); +static void ndp_fatal(const char *, ...); +static void ndp_badflag(enum ndp_action); +static void ndp_missingarg(char); + +static void ndp_run_in_child(ndp_void_f *); +static void ndp_do_run(void); +static void ndp_setup_handler(sigset_t *); +static void ndp_start_timer(time_t period); +static void ndp_run_periodically(time_t, ndp_void_f *); + +static int ndp_salen(const struct sockaddr *sa); +static int ndp_extract_sockaddrs(struct rt_msghdr *, struct sockaddr **, + struct sockaddr **, struct sockaddr **, struct sockaddr **, + struct sockaddr_dl **); +static int ndp_rtmsg_get(int, rtmsg_pkt_t *, struct sockaddr *); +static int ndp_find_interface(int, struct sockaddr *, char *, int); + +static int ndp_initialize_lifreq(int, struct lifreq *, struct sockaddr *); +static int ndp_host_enumerate(char *, ndp_addr_f *, void *); + +static int ndp_display(struct lifreq *); +static int ndp_display_missing(struct lifreq *); +static void ndp_lifr2ip(struct lifreq *, char *, int); + +static int ndp_get(int, struct lifreq *, void *); +static void ndp_get_all(void); +static int ndp_delete(int, struct lifreq *, void *); +static int ndp_set(int, struct lifreq *, void *); +static int ndp_set_nce(char *, char *, char *[], int); +static int ndp_set_file(char *); + +static char *ndp_iface = NULL; +static char *netstat_path = "/usr/bin/netstat"; +static pid_t ndp_pid; +static boolean_t ndp_noresolve = B_FALSE; /* Don't lookup addresses */ +static boolean_t ndp_run = B_TRUE; + +#define MAX_ATTEMPTS 5 +#define MAX_OPTS 5 +#define WORDSEPS " \t\r\n" + +/* + * Macros borrowed from route(1M) for working with PF_ROUTE messages + */ +#define RT_ADVANCE(x, n) ((x) += ndp_salen(n)) +#define RT_NEXTADDR(cp, w, u) \ + l = ndp_salen(u); \ + (void) memmove(cp, u, l); \ + cp += l; + +/* + * Print an error to stderr and then exit non-zero. + */ +static void +ndp_fatal(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + vwarnx(format, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +/* + * Print out the command usage to stderr, along with any reason why it's being + * printed, and then exit non-zero. + */ +static void +ndp_usage(const char *reason, ...) +{ + va_list ap; + const char *ndp_progname = getprogname(); + + if (reason != NULL) { + va_start(ap, reason); + (void) fprintf(stderr, "%s: ", ndp_progname); + (void) vfprintf(stderr, reason, ap); + (void) fprintf(stderr, "\n"); + va_end(ap); + } + + (void) fprintf(stderr, + "Usage: %s [-n] [-i iface] hostname\n" + " %s [-n] [-i iface] -s nodeaddr etheraddr [temp] [proxy]\n" + " %s [-n] [-i iface] -d nodeaddr\n" + " %s [-n] [-i iface] -f filename\n" + " %s [-n] -a\n" + " %s [-n] -A period\n", + ndp_progname, ndp_progname, ndp_progname, + ndp_progname, ndp_progname, ndp_progname); + exit(EXIT_FAILURE); +} + +static void +ndp_badflag(enum ndp_action action) +{ + switch (action) { + case NDP_A_DEFAULT: + case NDP_A_GET: + ndp_usage("Already going to print an entry, " + "but extra -%c given", optopt); + break; + case NDP_A_GET_ALL: + ndp_usage("Already going to print all entries (-a), " + "but extra -%c given", optopt); + break; + case NDP_A_GET_FOREVER: + ndp_usage("Already going to repeatedly print all entries (-A), " + "but extra -%c given", optopt); + break; + case NDP_A_DELETE: + ndp_usage("Already going to delete an entry (-d), " + "but extra -%c given", optopt); + break; + case NDP_A_SET_NCE: + ndp_usage("Already going to set an entry (-s), " + "but extra -%c given", optopt); + break; + case NDP_A_SET_FILE: + ndp_usage("Already going to set from file (-f), " + "but extra -%c given", optopt); + break; + } +} + +static void +ndp_missingarg(char flag) +{ + switch (flag) { + case 'A': + ndp_usage("Missing time period after -%c", flag); + break; + case 'd': + ndp_usage("Missing node name after -%c", flag); + break; + case 'f': + ndp_usage("Missing filename after -%c", flag); + break; + case 's': + ndp_usage("Missing node name after -%c", flag); + break; + case 'i': + ndp_usage("Missing interface name after -%c", flag); + break; + default: + ndp_usage("Missing option argument after -%c", flag); + break; + } +} + +/* + * Run a function that's going to exec in a child process, and don't return + * until it exits. + */ +static void +ndp_run_in_child(ndp_void_f *func) +{ + pid_t child_pid; + int childstat = 0, status = 0; + + child_pid = fork(); + if (child_pid == (pid_t)-1) { + ndp_fatal("Unable to fork: %s", strerror(errno)); + } else if (child_pid == (pid_t)0) { + func(); + exit(EXIT_FAILURE); + } + + while (waitpid(child_pid, &childstat, 0) == -1) { + if (errno == EINTR) + continue; + + ndp_fatal("Failed to wait on child: %s", strerror(errno)); + } + + status = WEXITSTATUS(childstat); + if (status != 0) { + ndp_fatal("Child process exited with %d", status); + } +} + +/* + * SIGALRM handler to schedule a run. + */ +static void +ndp_do_run(void) +{ + ndp_run = B_TRUE; +} + + +/* + * Prepare signal masks, and install the SIGALRM handler. Return old signal + * masks through the first argument. + */ +static void +ndp_setup_handler(sigset_t *oset) +{ + struct sigaction sa; + + /* + * Mask off SIGALRM so we only trigger the handler when we're ready + * using sigsuspend(3C), in case the child process takes longer to + * run than the alarm interval. + */ + if (sigprocmask(0, NULL, oset) != 0) { + ndp_fatal("Unable to set signal mask: %s", strerror(errno)); + } + + if (sighold(SIGALRM) != 0) { + ndp_fatal("Unable to add SIGALRM to signal mask: %s", + strerror(errno)); + } + + sa.sa_flags = 0; + sa.sa_handler = ndp_do_run; + + if (sigemptyset(&sa.sa_mask) != 0) { + ndp_fatal("Unable to prepare empty signal set: %s", + strerror(errno)); + } + + if (sigaction(SIGALRM, &sa, NULL) != 0) { + ndp_fatal("Unable to install timer handler: %s", + strerror(errno)); + } +} + +/* + * Start the printing timer. + */ +static void +ndp_start_timer(time_t period) +{ + timer_t timer; + struct itimerspec interval; + interval.it_value.tv_sec = interval.it_interval.tv_sec = period; + interval.it_value.tv_nsec = interval.it_interval.tv_nsec = 0; + + if (timer_create(CLOCK_REALTIME, NULL, &timer) != 0) { + ndp_fatal("Unable to create timer: %s", strerror(errno)); + } + + if (timer_settime(timer, 0, &interval, NULL) != 0) { + ndp_fatal("Unable to set time on timer: %s", strerror(errno)); + } +} + + +/* + * Run a given function forever periodically in a child process. + */ +static void +ndp_run_periodically(time_t period, ndp_void_f *func) +{ + sigset_t oset; + + ndp_setup_handler(&oset); + ndp_start_timer(period); + + do { + if (ndp_run) { + ndp_run = B_FALSE; + ndp_run_in_child(func); + } + (void) sigsuspend(&oset); + } while (errno == EINTR); + + /* + * Only an EFAULT should get us here. Abort so we get a core dump. + */ + warnx("Failure while waiting on timer: %s", strerror(errno)); + abort(); +} + +/* + * Given an address, return its size. + */ +static int +ndp_salen(const struct sockaddr *sa) +{ + switch (sa->sa_family) { + case AF_INET: + return (sizeof (struct sockaddr_in)); + case AF_LINK: + return (sizeof (struct sockaddr_dl)); + case AF_INET6: + return (sizeof (struct sockaddr_in6)); + default: + warnx("Unrecognized sockaddr with address family %d!", + sa->sa_family); + abort(); + } + /*NOTREACHED*/ +} + +/* + * Extract all socket addresses from a routing message, and return them + * through the pointers given as arguments to ndp_extract_sockaddrs. None + * of the pointers should be null. + */ +static int +ndp_extract_sockaddrs(struct rt_msghdr *rtm, struct sockaddr **dst, + struct sockaddr **gate, struct sockaddr **mask, struct sockaddr **src, + struct sockaddr_dl **ifp) +{ + struct sockaddr *sa; + char *cp; + int i; + + if (rtm->rtm_version != RTM_VERSION) { + warnx("Routing message version %d not understood", + rtm->rtm_version); + return (-1); + } + + if (rtm->rtm_errno != 0) { + warnx("Routing message couldn't be processed: %s", + strerror(rtm->rtm_errno)); + return (-1); + } + + cp = ((char *)(rtm + 1)); + if (rtm->rtm_addrs != 0) { + for (i = 1; i != 0; i <<= 1) { + if ((i & rtm->rtm_addrs) == 0) + continue; + + /*LINTED*/ + sa = (struct sockaddr *)cp; + switch (i) { + case RTA_DST: + *dst = sa; + break; + case RTA_GATEWAY: + *gate = sa; + break; + case RTA_NETMASK: + *mask = sa; + break; + case RTA_IFP: + if (sa->sa_family == AF_LINK && + ((struct sockaddr_dl *)sa)->sdl_nlen != 0) + *ifp = (struct sockaddr_dl *)sa; + break; + case RTA_SRC: + *src = sa; + break; + } + RT_ADVANCE(cp, sa); + } + } + + return (0); +} + +/* + * Given an IPv6 address, use routing information to look up + * the destination and interface it would pass through. + */ +static int +ndp_rtmsg_get(int fd, rtmsg_pkt_t *msg, struct sockaddr *sin6p) +{ + static int seq = 0; + struct sockaddr_dl sdl; + int mlen, l; + char ipaddr[INET6_ADDRSTRLEN]; + char *cp = msg->m_space; + struct rt_msghdr *m_rtm = &msg->m_rtm; + + bzero(msg, sizeof (rtmsg_pkt_t)); + bzero(&sdl, sizeof (struct sockaddr_dl)); + + m_rtm->rtm_type = RTM_GET; + m_rtm->rtm_version = RTM_VERSION; + m_rtm->rtm_seq = ++seq; + m_rtm->rtm_addrs = RTA_DST | RTA_IFP; + m_rtm->rtm_msglen = sizeof (rtmsg_pkt_t); + + /* Place the address we're looking up after the header */ + RT_NEXTADDR(cp, RTA_DST, sin6p); + + /* Load an empty link-level address, so we get an interface back */ + sdl.sdl_family = AF_LINK; + RT_NEXTADDR(cp, RTA_IFP, (struct sockaddr *)&sdl); + + m_rtm->rtm_msglen = cp - (char *)msg; + + if ((mlen = write(fd, (char *)msg, m_rtm->rtm_msglen)) < 0) { + if (errno == ESRCH) { + /*LINTED*/ + if (inet_ntop(AF_INET6, &((sin6_t *)sin6p)->sin6_addr, + ipaddr, sizeof (ipaddr)) == NULL) { + (void) snprintf(ipaddr, sizeof (ipaddr), + "(failed to format IP)"); + }; + warnx("An appropriate interface for the address %s " + "is not in the routing table; use -i to force an " + "interface", ipaddr); + return (-1); + } else { + warnx("Failed to send routing message: %s", + strerror(errno)); + return (-1); + } + } else if (mlen < (int)m_rtm->rtm_msglen) { + warnx("Failed to write all bytes to routing socket"); + return (-1); + } + + /* + * Keep reading routing messages until we find the response to the one + * we just sent. Note that we depend on the sequence number being unique + * to the running program. + */ + do { + mlen = read(fd, (char *)msg, sizeof (rtmsg_pkt_t)); + } while (mlen > 0 && + (m_rtm->rtm_seq != seq || m_rtm->rtm_pid != ndp_pid)); + if (mlen < 0) { + warnx("Failed to read from routing socket: %s", + strerror(errno)); + return (-1); + } + + return (0); +} + +/* + * Find the interface that the IPv6 address would be routed through, and store + * the name of the interface in the buffer passed in. + */ +static int +ndp_find_interface(int fd, struct sockaddr *sin6p, char *buf, int buflen) +{ + struct sockaddr *dst = NULL, *gate = NULL, *mask = NULL, *src = NULL; + struct sockaddr_dl *ifp = NULL; + rtmsg_pkt_t msg; + + if (ndp_rtmsg_get(fd, &msg, sin6p) != 0) { + return (-1); + } + + if (ndp_extract_sockaddrs(&msg.m_rtm, &dst, &gate, + &mask, &src, &ifp) != 0) { + return (-1); + } + + if (ifp == NULL) { + warnx("Unable to find appropriate interface for address"); + return (-1); + } else { + if (ifp->sdl_nlen >= buflen) { + warnx("The interface name \"%.*s\" is too big for the " + "available buffer", ifp->sdl_nlen, ifp->sdl_data); + return (-1); + } else { + (void) snprintf(buf, buflen, "%.*s", ifp->sdl_nlen, + ifp->sdl_data); + } + } + + return (0); +} + +/* + * Zero out a lifreq struct for a SIOCLIF*ND ioctl, set the address, and fetch + * the appropriate interface using the given routing socket. + */ +static int +ndp_initialize_lifreq(int route, struct lifreq *lifrp, struct sockaddr *sap) +{ + struct sockaddr_storage *lnr_addr; + /* LINTED E_BAD_PTR_CAST_ALIGN */ + struct sockaddr_in6 *sin6p = (sin6_t *)sap; + char *lifr_name = lifrp->lifr_name; + + bzero(lifrp, sizeof (struct lifreq)); + lnr_addr = &lifrp->lifr_nd.lnr_addr; + + if (ndp_iface != NULL) { + (void) strlcpy(lifr_name, ndp_iface, LIFNAMSIZ); + } else if (sin6p->sin6_scope_id != 0) { + int zone_id = sin6p->sin6_scope_id; + if (if_indextoname(zone_id, lifr_name) == NULL) { + warnx("Invalid zone identifier: %d", zone_id); + return (-1); + } + } else if (IN6_IS_ADDR_LINKSCOPE(&sin6p->sin6_addr)) { + warnx("Link-scope addresses should specify an interface with " + "a zone ID, or with -i."); + return (-1); + } else { + if (ndp_find_interface(route, sap, lifr_name, LIFNAMSIZ) != 0) + return (-1); + } + + (void) memcpy(lnr_addr, sap, sizeof (struct sockaddr_storage)); + + return (0); +} + +/* + * Take a host identifier, find the corresponding IPv6 addresses and then pass + * them to the specified function, along with any desired data. + */ +static int +ndp_host_enumerate(char *host, ndp_addr_f *addr_func, void *data) +{ + struct lifreq lifr; + struct addrinfo hints, *serverinfo, *p; + int err, attempts = 0; + int inet6, route; + + bzero(&hints, sizeof (struct addrinfo)); + hints.ai_family = AF_INET6; + hints.ai_protocol = IPPROTO_IPV6; + + while (attempts < MAX_ATTEMPTS) { + err = getaddrinfo(host, NULL, &hints, &serverinfo); + + if (err == 0) { + break; + } else if (err == EAI_AGAIN) { + attempts++; + } else { + warnx("Unable to lookup %s: %s", host, + gai_strerror(err)); + return (-1); + } + } + + if (attempts == MAX_ATTEMPTS) { + warnx("Failed multiple times to lookup %s", host); + return (-1); + } + + inet6 = socket(PF_INET6, SOCK_DGRAM, 0); + if (inet6 < 0) { + warnx("Failed to open IPv6 socket: %s", strerror(errno)); + err = -1; + } + + route = socket(PF_ROUTE, SOCK_RAW, 0); + if (route < 0) { + warnx("Failed to open routing socket: %s", strerror(errno)); + err = -1; + } + + if (err == 0) { + for (p = serverinfo; p != NULL; p = p->ai_next) { + if (ndp_initialize_lifreq(route, &lifr, p->ai_addr) + != 0) { + err = -1; + continue; + } + + if (addr_func(inet6, &lifr, data) != 0) { + err = -1; + continue; + } + } + } + + if (close(route) != 0) { + warnx("Failed to close routing socket: %s", strerror(errno)); + err = -1; + } + + if (close(inet6) != 0) { + warnx("Failed to close IPv6 socket: %s", strerror(errno)); + err = -1; + } + + /* Clean up linked list */ + freeaddrinfo(serverinfo); + + return (err); +} + +static int +ndp_display(struct lifreq *lifrp) +{ + struct sockaddr_in6 *lnr_addr; + char ipaddr[INET6_ADDRSTRLEN]; + char *lladdr = NULL; + char hostname[NI_MAXHOST]; + int flags, gni_flags; + + lnr_addr = (struct sockaddr_in6 *)&lifrp->lifr_nd.lnr_addr; + flags = lifrp->lifr_nd.lnr_flags; + + if (inet_ntop(AF_INET6, &lnr_addr->sin6_addr, ipaddr, + sizeof (ipaddr)) == NULL) { + warnx("Couldn't convert IPv6 address to string: %s", + strerror(errno)); + return (-1); + }; + + if ((lladdr = _link_ntoa((uchar_t *)lifrp->lifr_nd.lnr_hdw_addr, + NULL, lifrp->lifr_nd.lnr_hdw_len, IFT_ETHER)) == NULL) { + warnx("Couldn't convert link-layer address to string: %s", + strerror(errno)); + return (-1); + } + + gni_flags = ndp_noresolve ? NI_NUMERICHOST : 0; + + if (getnameinfo((struct sockaddr *)lnr_addr, sizeof (sin6_t), hostname, + sizeof (hostname), NULL, 0, gni_flags) != 0) { + warnx("Unable to lookup hostname for %s", ipaddr); + free(lladdr); + return (-1); + } + + (void) printf("%s (%s) at %s", ipaddr, hostname, lladdr); + + if (flags & NDF_ISROUTER_ON) { + (void) printf(" router"); + } + + if (flags & NDF_ANYCAST_ON) { + (void) printf(" any"); + } + + if (!(flags & NDF_STATIC)) { + (void) printf(" temp"); + } + + if (flags & NDF_PROXY_ON) { + (void) printf(" proxy"); + } + + (void) printf("\n"); + + free(lladdr); + return (0); +} + +static int +ndp_display_missing(struct lifreq *lifrp) +{ + struct sockaddr_in6 *lnr_addr; + char ipaddr[INET6_ADDRSTRLEN]; + char hostname[NI_MAXHOST]; + int flags = ndp_noresolve ? NI_NUMERICHOST : 0; + lnr_addr = (struct sockaddr_in6 *)&lifrp->lifr_nd.lnr_addr; + + if (inet_ntop(AF_INET6, &lnr_addr->sin6_addr, ipaddr, + sizeof (ipaddr)) == NULL) { + warnx("Couldn't convert IPv6 address to string: %s", + strerror(errno)); + return (-1); + }; + + if (getnameinfo((struct sockaddr *)lnr_addr, sizeof (sin6_t), hostname, + sizeof (hostname), NULL, 0, flags) != 0) { + warnx("Unable to lookup hostname for %s", ipaddr); + return (-1); + } + + (void) printf("%s (%s) -- no entry\n", ipaddr, hostname); + return (0); +} + +static void +ndp_lifr2ip(struct lifreq *lifrp, char *ipaddr, int buflen) +{ + sin6_t *lnr_addr = (sin6_t *)&lifrp->lifr_nd.lnr_addr; + if (inet_ntop(AF_INET6, &lnr_addr->sin6_addr, ipaddr, + buflen) == NULL) { + (void) snprintf(ipaddr, buflen, "(failed to format IP)"); + }; +} + +/* + * Perform a SIOCLIFGETND and print out information about it + */ +/*ARGSUSED*/ +static int +ndp_get(int fd, struct lifreq *lifrp, void *unused) +{ + char ipaddr[INET6_ADDRSTRLEN]; + if (ioctl(fd, SIOCLIFGETND, lifrp) < 0) { + if (errno == ESRCH) { + return (ndp_display_missing(lifrp)); + } else { + ndp_lifr2ip(lifrp, ipaddr, sizeof (ipaddr)); + warnx("Couldn't lookup %s: %s", + ipaddr, strerror(errno)); + return (-1); + } + } + + return (ndp_display(lifrp)); +} + +/* + * Print out all NDP entries + */ +static void +ndp_get_all(void) +{ + (void) execl(netstat_path, "netstat", + (ndp_noresolve ? "-np" : "-p"), + "-f", "inet6", (char *)0); + ndp_fatal("Coudn't exec %s: %s", netstat_path, strerror(errno)); +} + +/* + * Perform a SIOCLIFDELND ioctl + */ +/*ARGSUSED*/ +static int +ndp_delete(int fd, struct lifreq *lifrp, void *unused) +{ + char ipaddr[INET6_ADDRSTRLEN]; + + if (ioctl(fd, SIOCLIFDELND, lifrp) < 0) { + ndp_lifr2ip(lifrp, ipaddr, sizeof (ipaddr)); + if (errno == ESRCH) { + warnx("No entry for %s", ipaddr); + return (-1); + } else if (errno == EPERM) { + warnx("Permission denied, " + "could not delete entry for %s", ipaddr); + return (-1); + } else { + warnx("Couldn't delete mapping for %s: %s", + ipaddr, strerror(errno)); + return (-1); + } + } + + return (0); +} + +/* + * Perform a SIOCLIFSETND ioctl using properties from the example structure. + */ +static int +ndp_set(int fd, struct lifreq *lifrp, void *data) +{ + char ipaddr[INET6_ADDRSTRLEN]; + const lif_nd_req_t *nd_attrs = data; + + (void) memcpy(lifrp->lifr_nd.lnr_hdw_addr, nd_attrs->lnr_hdw_addr, + ND_MAX_HDW_LEN); + lifrp->lifr_nd.lnr_hdw_len = nd_attrs->lnr_hdw_len; + lifrp->lifr_nd.lnr_flags = nd_attrs->lnr_flags; + + lifrp->lifr_nd.lnr_state_create = nd_attrs->lnr_state_create; + lifrp->lifr_nd.lnr_state_same_lla = nd_attrs->lnr_state_same_lla; + lifrp->lifr_nd.lnr_state_diff_lla = nd_attrs->lnr_state_diff_lla; + + if (ioctl(fd, SIOCLIFSETND, lifrp) < 0) { + ndp_lifr2ip(lifrp, ipaddr, sizeof (ipaddr)); + if (errno == EPERM) { + warnx("Permission denied, " + "could not set entry for %s", ipaddr); + return (-1); + } else { + warnx("Failed to set mapping for %s: %s", + ipaddr, strerror(errno)); + return (-1); + } + } + + return (0); +} + +/* + * Given a host identifier, a link-layer address and possible options, + * add/update the NDP mappings. + */ +static int +ndp_set_nce(char *host, char *lladdr, char *opts[], int optlen) +{ + lif_nd_req_t nd_attrs; + uchar_t *ea; + char *opt; + int i; + boolean_t temp = B_FALSE; + boolean_t any = B_FALSE; + boolean_t router = B_FALSE; + + bzero(&nd_attrs, sizeof (lif_nd_req_t)); + + ea = _link_aton(lladdr, &nd_attrs.lnr_hdw_len); + + if (ea == NULL) { + warnx("Unable to parse link-layer address \"%s\"", lladdr); + return (-1); + } + + if (nd_attrs.lnr_hdw_len > sizeof (nd_attrs.lnr_hdw_addr)) { + warnx("The size of the link-layer address is " + "too large to set\n"); + free(ea); + return (-1); + } + + (void) memcpy(nd_attrs.lnr_hdw_addr, ea, nd_attrs.lnr_hdw_len); + + free(ea); + + nd_attrs.lnr_state_create = ND_REACHABLE; + nd_attrs.lnr_state_same_lla = ND_UNCHANGED; + nd_attrs.lnr_state_diff_lla = ND_STALE; + + for (i = 0; i < optlen; i++) { + opt = opts[i]; + if (strcmp(opt, "temp") == 0) { + temp = B_TRUE; + } else if (strcmp(opt, "any") == 0) { + any = B_TRUE; + } else if (strcmp(opt, "router") == 0) { + router = B_TRUE; + } else if (strcmp(opt, "proxy") == 0) { + warnx("NDP proxying is currently not supported"); + return (-1); + } else { + warnx("Unrecognized option \"%s\"", opt); + return (-1); + } + } + + if (!temp) { + nd_attrs.lnr_flags |= NDF_STATIC; + } + + if (any) { + nd_attrs.lnr_flags |= NDF_ANYCAST_ON; + } else { + nd_attrs.lnr_flags |= NDF_ANYCAST_OFF; + } + + if (router) { + nd_attrs.lnr_flags |= NDF_ISROUTER_OFF; + } else { + nd_attrs.lnr_flags |= NDF_ISROUTER_OFF; + } + + return (ndp_host_enumerate(host, ndp_set, &nd_attrs)); +} + +/* + * Read in a file and set the mappings from each line. + */ +static int +ndp_set_file(char *filename) +{ + char *line = NULL, *lasts = NULL, *curr; + char *host, *lladdr; + char *opts[MAX_OPTS]; + int optlen = 0, lineno = 0; + size_t cap = 0; + boolean_t failed_line = B_FALSE; + FILE *stream = fopen(filename, "r"); + + if (stream == NULL) { + ndp_fatal("Error while opening file %s: %s", + filename, strerror(errno)); + } + + errno = 0; + while (getline(&line, &cap, stream) != -1) { + lineno++; + + if (line[0] == '#') + continue; + + host = strtok_r(line, WORDSEPS, &lasts); + if (host == NULL) { + warnx("Line %d incomplete, skipping: " + "missing host identifier", lineno); + failed_line = B_TRUE; + continue; + } + + lladdr = strtok_r(NULL, WORDSEPS, &lasts); + if (lladdr == NULL) { + warnx("Line %d incomplete, skipping: " + "missing link-layer address", lineno); + failed_line = B_TRUE; + continue; + } + + for (optlen = 0; optlen < MAX_OPTS; optlen++) { + curr = strtok_r(NULL, WORDSEPS, &lasts); + if (curr == NULL) + break; + opts[optlen] = curr; + } + + if (ndp_set_nce(host, lladdr, opts, optlen) != 0) { + failed_line = B_TRUE; + continue; + } + } + + free(line); + + if (errno != 0 || ferror(stream)) { + ndp_fatal("Error while reading from file %s: %s", filename, + strerror(errno)); + } + + if (fclose(stream) != 0) { + ndp_fatal("Error close file %s: %s", filename, strerror(errno)); + } + + return (failed_line ? -1 : 0); +} + +int +main(int argc, char *argv[]) +{ + char *flagarg = NULL, *lladdr = NULL; + char **opts; + char *endptr; + int c, argsleft, optlen = 0, err = 0; + long long period; + enum ndp_action action = NDP_A_DEFAULT; + + setprogname(basename(argv[0])); + + if (argc < 2) { + ndp_usage("No arguments given."); + } + + while ((c = getopt(argc, argv, ":naA:d:f:i:s:")) != -1) { + switch (c) { + case 'n': + ndp_noresolve = B_TRUE; + break; + case 'i': + ndp_iface = optarg; + break; + case 's': + if (action != NDP_A_DEFAULT) + ndp_badflag(action); + action = NDP_A_SET_NCE; + flagarg = optarg; + + if ((argc - optind) < 1) { + ndp_usage("Missing link-layer address after " + "the node address, \"%s\"", flagarg); + } + lladdr = argv[optind++]; + + /* + * Grab any following keywords up to the next flag + */ + opts = argv + optind; + while ((argc - optind) > 0) { + if (argv[optind][0] == '-') + ndp_usage("Encountered \"%s\" after " + "flag parsing is done", + argv[optind]); + optind++; + optlen++; + } + break; + case 'a': + if (action != NDP_A_DEFAULT) + ndp_badflag(action); + action = NDP_A_GET_ALL; + break; + case 'A': + if (action != NDP_A_DEFAULT) + ndp_badflag(action); + action = NDP_A_GET_FOREVER; + flagarg = optarg; + break; + case 'd': + if (action != NDP_A_DEFAULT) + ndp_badflag(action); + action = NDP_A_DELETE; + flagarg = optarg; + break; + case 'f': + if (action != NDP_A_DEFAULT) + ndp_badflag(action); + action = NDP_A_SET_FILE; + flagarg = optarg; + break; + case ':': + ndp_missingarg(optopt); + break; + case '?': + ndp_usage("Unrecognized flag \"-%c\"", optopt); + default: + ndp_usage(NULL); + } + } + + argsleft = argc - optind; + ndp_pid = getpid(); + + if (action != NDP_A_DEFAULT && argsleft != 0) { + ndp_usage("Extra arguments leftover after parsing flags"); + } + + switch (action) { + case NDP_A_DEFAULT: + case NDP_A_GET: + if (argsleft != 1) { + ndp_usage("Multiple arguments given without any flags"); + } + err = ndp_host_enumerate(argv[optind], ndp_get, NULL); + break; + case NDP_A_GET_ALL: + ndp_get_all(); + /*NOTREACHED*/ + break; + case NDP_A_GET_FOREVER: + errno = 0; + period = strtoll(flagarg, &endptr, 10); + if ((period == 0 && errno != 0) || + (endptr[0] != '\0') || + (period < 0)) { + ndp_usage("Given period should be a positive integer," + " not \"%s\"", flagarg); + } + if (period > 86400) { + ndp_usage("Given period should be shorter than a day;" + " given \"%s\" seconds", flagarg); + } + ndp_run_periodically(period, ndp_get_all); + /*NOTREACHED*/ + break; + case NDP_A_DELETE: + err = ndp_host_enumerate(flagarg, ndp_delete, NULL); + break; + case NDP_A_SET_NCE: + err = ndp_set_nce(flagarg, lladdr, opts, optlen); + break; + case NDP_A_SET_FILE: + err = ndp_set_file(flagarg); + break; + } + + return (err == 0 ? 0 : 1); +} diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile index 8dbf7331b1..c5f7db2cbc 100644 --- a/usr/src/man/man1m/Makefile +++ b/usr/src/man/man1m/Makefile @@ -335,6 +335,7 @@ _MANFILES= 6to4relay.1m \ ncheck.1m \ ncheck_ufs.1m \ ndd.1m \ + ndp.1m \ ndmpadm.1m \ ndmpd.1m \ ndmpstat.1m \ diff --git a/usr/src/man/man1m/arp.1m b/usr/src/man/man1m/arp.1m index 11fa0164f0..ca4e20bdc1 100644 --- a/usr/src/man/man1m/arp.1m +++ b/usr/src/man/man1m/arp.1m @@ -4,7 +4,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH ARP 1M "Jan 5, 2009" +.TH ARP 1M "Sep 02, 2015" .SH NAME arp \- address resolution display and control .SH SYNOPSIS @@ -35,7 +35,6 @@ arp \- address resolution display and control .fi .SH DESCRIPTION -.sp .LP The \fBarp\fR program displays and modifies the Internet-to-MAC address translation tables used by the address resolution protocol (see \fBarp\fR(7P)). @@ -48,9 +47,8 @@ dot notation. .LP Options that modify the ARP translation tables (\fB-d\fR, \fB-f\fR, and \fB-s\fR) can be used only when the invoked command is granted the -\fBsys_net_config\fR privilege. See \fBprivileges\fR(5). +\fBPRIV_SYS_NET_CONFIG\fR privilege. See \fBprivileges\fR(5). .SH OPTIONS -.sp .ne 2 .na \fB\fB-a\fR\fR @@ -244,7 +242,6 @@ kernel and thus cannot be changed. .RE .SH SEE ALSO -.sp .LP -\fBifconfig\fR(1M), \fBnetstat\fR(1M), \fBattributes\fR(5), +\fBifconfig\fR(1M), \fBnetstat\fR(1M), \fBndp\fR(1M), \fBattributes\fR(5), \fBprivileges\fR(5), \fBarp\fR(7P) diff --git a/usr/src/man/man1m/in.ndpd.1m b/usr/src/man/man1m/in.ndpd.1m index 5693425a89..2ce2643405 100644 --- a/usr/src/man/man1m/in.ndpd.1m +++ b/usr/src/man/man1m/in.ndpd.1m @@ -3,7 +3,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH IN.NDPD 1M "Jan 10, 2007" +.TH IN.NDPD 1M "Sep 02, 2015" .SH NAME in.ndpd \- daemon for IPv6 autoconfiguration .SH SYNOPSIS @@ -13,7 +13,6 @@ in.ndpd \- daemon for IPv6 autoconfiguration .fi .SH DESCRIPTION -.sp .LP \fBin.ndpd\fR provides both the host and router autoconfiguration components of Neighbor Discovery for IPv6 and Stateless and Stateful Address @@ -151,7 +150,6 @@ can only be used for communication with nodes on the same link. .LP For information on how to enable IPv6 address autoconfiguration, see \fI\fR .SH OPTIONS -.sp .LP Supported options and equivalent SMF service properties are listed below. SMF service properties are set using a command of the form: @@ -223,7 +221,6 @@ controlling terminal. As such, this option cannot be run under the SMF. .RE .SH FILES -.sp .ne 2 .na \fB\fB/etc/inet/ndpd.conf\fR\fR @@ -235,14 +232,10 @@ the hosts. .RE .SH SEE ALSO -.sp .LP -\fBdhcpagent\fR(1M), \fBifconfig\fR(1M), \fBrouteadm\fR(1M), \fBsvcadm\fR(1M), -\fBndpd.conf\fR(4), \fBattributes\fR(5), \fBicmp6\fR(7P),\fBip6\fR(7P), -\fBattributes\fR(5), \fBsmf\fR(5) -.sp -.LP -\fI\fR +\fBdhcpagent\fR(1M), \fBifconfig\fR(1M), \fBndp\fR(1M), \fBrouteadm\fR(1M), +\fBsvcadm\fR(1M), \fBndpd.conf\fR(4), \fBattributes\fR(5), \fBsmf\fR(5), +\fBicmp6\fR(7P), \fBip6\fR(7P), \fBndp\fR(7P) .sp .LP Narten, T., Nordmark, E., Simpson, W.\fIRFC 2461, Neighbor Discovery for IP @@ -256,12 +249,10 @@ Autoconfiguration\fR. The Internet Society. December 1998. Narten, T., and Draves, R. RFC 3041, Privacy Extensions for Stateless Address Autoconfiguration in IPv6. The Internet Society. January 2001. .SH DIAGNOSTICS -.sp .LP Receipt of a \fBSIGHUP\fR signal will make \fBin.ndpd\fR restart and reread \fB/etc/inet/ndpd.conf\fR. .SH NOTES -.sp .LP The \fBin.ndpd\fR daemon service is managed by the service management facility, \fBsmf\fR(5), under the service identifier: diff --git a/usr/src/man/man1m/ndp.1m b/usr/src/man/man1m/ndp.1m new file mode 100644 index 0000000000..6301181f5f --- /dev/null +++ b/usr/src/man/man1m/ndp.1m @@ -0,0 +1,215 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright (c) 2015, Joyent, Inc. All rights reserved. +.\" +.Dd Sep 02, 2015 +.Dt NDP 1M +.Os +.Sh NAME +.Nm ndp +.Nd manipulate IPv6 Neighbor Discovery Protocol mappings +.Sh SYNOPSIS +.Nm +.Op Fl n +.Fl a +.Nm +.Op Fl n +.Fl A Ar period +.Nm +.Op Fl n +.Op Fl i Ar iface +.Ar hostname +.Nm +.Op Fl n +.Op Fl i Ar iface +.Fl d Ar hostname +.Nm +.Op Fl n +.Op Fl i Ar iface +.Fl f Ar filename +.Nm +.Op Fl n +.Op Fl i Ar iface +.Fl s Ar hostname lladdr +.Op Cm temp +.Op Cm any +.Op Cm router +.Sh DESCRIPTION +The +.Nm +command displays and modifies the IPv6-to-MAC address translation +tables used by the Neighbor Discovery Protocol +.Po see Xr ndp 7P Pc . +.Pp +Given just a hostname, +.Nm +will display the current entry. Note that when getting, setting or deleting, +if a hostname refers to multiple IPv6 addresses, the operation will apply to +all of them. +.Pp +The NDP translation tables can be modified with +.Fl d , +.Fl s , +or +.Fl f . +These flags can only be used when +.Nm +is given the +.Sy PRIV_SYS_NET_CONFIG +privilege. See +.Xr privileges 5 +for further information. +.Pp +Note that NDP entries for IPMP (IP Network Multipathing) data and +test addresses are managed by the kernel and therefore cannot be +modified or deleted. +.Sh OPTIONS +.Bl -tag -width 6m +.It Fl a +Display all NDP entries. Entries can be one of several types: +.Bl -tag -offset indent -width 7n +.It Sy dynamic +This is a normal NDP mapping and will eventually expire. This is the most +common type of mapping for non-local addresses that will be displayed. +.It Sy local +The IPv6 address is local to the machine. +.It Sy other +The mapping is a multicast or broadcast address, or the system is acting +as proxy for the address. +.It Sy static +The mapping is static and will not be removed from the machine over time. +.El +.Pp +Entries also exist in one of the following states: +.Bl -tag -offset indent -width 12m +.It Sy INCOMPLETE +Address resolution is in progress +.It Sy REACHABLE +This address has recently been reachable +.It Sy STALE +This address may be unreachable +.It Sy DELAY +Waiting to send out reachability probes +.It Sy PROBE +Sending out probes for the address +.It Sy UNREACHABLE +The address is unreachable, and will be deleted +.It Sy UNKNOWN +The state of the entry is unknown +.El +.Pp +Using the +.Fl a +flag is equivalent to: +.sp +.Dl # netstat -p -f inet6 +.sp +If +.Fl n +is passed to +.Nm , +then it will be passed along to +.Nm netstat . +.It Fl A +Display all NDP entries every +.Ar period +seconds. +.It Fl d +Delete NDP mappings for the host called +.Ar hostname . +.It Fl f +Read in the lines from +.Ar filename +and use each one to set a mapping. The syntax of each line is the +same as the arguments to +.Fl s . +Lines beginning with `#' will be ignored. +.It Fl i +By default, +.Nm +will use the routing table to determine the appropriate interface to place the +mapping on. This flag allows forcing a specific interface +.Ar iface . +This argument will be ignored when using the +.Fl a +or +.Fl A +flags. +.It Fl n +Disable the default translation of numeric IP addresses to host names when +printing. +.It Fl s +Add or update an NDP mapping, and set the desired properties for the entry. The +list of flags should be the full set of flags desired on the entry, i.e., not +listing a flag will remove it if it already exists. The following flags can be +used: +.Bl -tag -offset indent -width Ds +.It Cm temp +The entry should be temporary and eventually expire like a normal NDP +entry. By default, all entries created with the +.Nm +command are static, and will not be deleted. To make a static entry temporary, +it should be deleted and recreated with the +.Cm temp +flag. +.It Cm any +The address should be treated like an anycast address. This will prevent the +system from sending Neighbor Advertisements with the Override flag. +.It Cm router +The address should be treated like a router address. This cause the system to +send Neighbor Advertisements with the Router flag. +.El +.El +.Sh EXAMPLES +.Ss Example 1: Setting a mapping +To create a temporary mapping for an address: +.Bd -literal -offset indent +# ndp -s fd00::1 90:b8:d0:4b:c7:3b temp +.Ed +.Ss Example 2: Getting a mapping +An entry can be fetched via its IP address: +.Bd -literal -offset indent +# ndp fd00::1 +fd00::1 (fd00::1) at 90:b8:d0:4b:c7:3b temp +.Ed +.sp +Or via a name that resolves to that address: +.Bd -literal -offset indent +# echo fd00::1 epicac >> /etc/hosts +# echo fd00::2 epicac >> /etc/hosts +# ndp epicac +fd00::1 (epicac) at 90:b8:d0:4b:c7:3b temp +fd00::2 (epicac) -- no entry +.Ed +.Ss Example 3: Deleting a mapping +An entry can be removed with the +.Fl d +flag: +.Bd -literal -offset indent +# ndp -d fd00::1 +.Ed +.sp +Attempting to remove it again will print an error message: +.Bd -literal -offset indent +# ndp -d fd00::1 +ndp: No entry for fd00::1 +.Ed +.Sh STABILITY +The command line options for this command are stable, but the output format +and diagnostic messages are not. +.Sh SEE ALSO +.Xr arp 1M , +.Xr ifconfig 1M , +.Xr netstat 1M , +.Xr attributes 5 , +.Xr privileges 5 , +.Xr ndp 7P diff --git a/usr/src/man/man1m/netstat.1m b/usr/src/man/man1m/netstat.1m index f9d02d18e0..b1f6d0e22f 100644 --- a/usr/src/man/man1m/netstat.1m +++ b/usr/src/man/man1m/netstat.1m @@ -1028,7 +1028,7 @@ future lease. .SH SEE ALSO .LP \fBarp\fR(1M), \fBdhcpinfo\fR(1), \fBdhcpagent\fR(1M), \fBifconfig\fR(1M), -\fBiostat\fR(1M), \fBkstat\fR(1M), \fBmibiisa\fR(1M), \fBsavecore\fR(1M), +\fBiostat\fR(1M), \fBkstat\fR(1M), \fBmibiisa\fR(1M), \fBndp\fR(1M), \fBsavecore\fR(1M), \fBvmstat\fR(1M), \fBhosts\fR(4), \fBinet_type\fR(4), \fBnetworks\fR(4), \fBprotocols\fR(4), \fBservices\fR(4), \fBattributes\fR(5), \fBdhcp\fR(5), \fBkstat\fR(7D), \fBinet\fR(7P), \fBinet6\fR(7P) diff --git a/usr/src/man/man7p/Makefile b/usr/src/man/man7p/Makefile index bbcf5ecf1d..13cb58770d 100644 --- a/usr/src/man/man7p/Makefile +++ b/usr/src/man/man7p/Makefile @@ -30,6 +30,7 @@ MANFILES= arp.7p \ ipsec.7p \ ipsecah.7p \ ipsecesp.7p \ + ndp.7p \ pf_key.7p \ rarp.7p \ route.7p \ @@ -45,6 +46,7 @@ MANLINKS= AH.7p \ ESP.7p \ ICMP.7p \ IP.7p \ + NDP.7p \ RARP.7p \ SCTP.7p \ TCP.7p \ @@ -63,6 +65,8 @@ AH.7p := LINKSRC = ipsecah.7p ESP.7p := LINKSRC = ipsecesp.7p +NDP.7p := LINKSRC = ndp.7p + RARP.7p := LINKSRC = rarp.7p SCTP.7p := LINKSRC = sctp.7p diff --git a/usr/src/man/man7p/arp.7p b/usr/src/man/man7p/arp.7p index 5489ce3aca..d7055fcb0a 100644 --- a/usr/src/man/man7p/arp.7p +++ b/usr/src/man/man7p/arp.7p @@ -4,40 +4,25 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH ARP 7P "Feb 5, 2009" +.TH ARP 7P "Sep 02, 2015" .SH NAME arp, ARP \- Address Resolution Protocol .SH SYNOPSIS .LP .nf \fB#include \fR -.fi - -.LP -.nf \fB#include \fR -.fi - -.LP -.nf \fB#include \fR -.fi - -.LP -.nf \fB#include \fR .fi - .LP .nf -\fBs = socket(AF_INET, SOCK_DGRAM, 0);\fR +\fBs = socket(PF_INET, SOCK_DGRAM, 0);\fR .fi - .LP .nf \fBd = open ("/dev/arp", \fIoflag\fR);\fR .fi - .SH DESCRIPTION .LP ARP is a protocol used to map dynamically between Internet Protocol (IP) and @@ -64,8 +49,8 @@ interface. .LP To facilitate communications with systems that do not use ARP, ioctl() requests are provided to enter and delete entries in the IP-to-link -address tables. Ioctls that change the table contents require sys_net_config -privilege. See \fBprivileges\fR(5). +address tables. Ioctls that change the table contents require the +\fBPRIV_SYS_NET_CONFIG\fR privilege. See \fBprivileges\fR(5). .sp .in +2 .nf @@ -109,10 +94,8 @@ Each \fBioctl()\fR request takes the same structure as an argument. entry, and \fBSIOCD[X]ARP\fR deletes an \fBARP\fR entry. These \fBioctl()\fR requests may be applied to any Internet family socket descriptor\fIs\fR, or to a descriptor for the \fBARP\fR device. Note that \fBSIOCS[X]ARP\fR and -\fBSIOCD[X]ARP\fR require a privileged user, while \fBSIOCG[X]ARP\fR -.sp -.LP -does not. +\fBSIOCD[X]ARP\fR require the user to have the \fBPRIV_SYS_NET_CONFIG\fR +privilege, while \fBSIOCG[X]ARP\fR does not. .sp .LP The \fBarpreq\fR structure contains @@ -155,8 +138,8 @@ struct xarpreq { .sp .LP The address family for the [x]arp_pa sockaddr must be \fBAF_INET\fR. The -\fBATF_COM\fR flag bits ([x]arp_flags) cannot be altered. -\fBATF_USETRAILERS\fR is not implemented on Solaris and is retained for +\fBATF_COM\fR flag bits ([x]arp_flags) cannot be altered. \fBATF_USETRAILERS\fR +is not implemented by the operating system and is retained for compatibility only. \fBATF_PERM\fR makes the entry permanent (disables aging) if the \fBioctl()\fR request succeeds. \fBATF_PUBL\fR specifies that the system should respond to ARP requests for the indicated protocol address coming from @@ -206,11 +189,11 @@ interface). On return from the kernel on a \fBSIOCGXARP\fR ioctl, the kernel fills in the name of the interface (excluding terminating NULL) and its hardware address, one after another, in the \fBsdl_data/LLADDR\fR area; if the two are larger -than can be held in the 244 byte \fBsdl_data[\fR] area, an \fBENOSPC\fR error +than can be held in the 244 byte \fBsdl_data[\fR] area, an \fBEINVAL\fR error is returned. Assuming it fits, the kernel will also set \fBsdl_alen\fR with the -length of hardware address, \fBsdl_nlen\fR with the length of name of the +length of the hardware address, \fBsdl_nlen\fR with the length of the name of the interface (excluding terminating NULL), \fBsdl_type\fR with an IFT_* value to -indicate the type of the media, \fBsdl_slen\fR with 0, sdl_family with +indicate the type of the media, \fBsdl_slen\fR with 0, \fBsdl_family\fR with \fBAF_LINK\fR and \fBsdl_index\fR (which if not 0) with system given index for the interface. The information returned is very similar to that returned via routing sockets on an \fBRTM_IFINFO\fR message. @@ -239,7 +222,7 @@ corresponding IPMP IP interface. .TP .ie t \(bu .el o -roxy ARP mappings for an IPMP group are automatically managed by the kernel. +Proxy ARP mappings for an IPMP group are automatically managed by the kernel. Specifically, if the hardware address in a \fBSIOCSARP\fR or \fBSIOCSXARP\fR request matches the hardware address of an IP interface in an IPMP group and the IP address is not local to the system, the kernel regards this as a IPMP @@ -247,18 +230,6 @@ Proxy ARP entry. This IPMP Proxy ARP entry will have its hardware address automatically adjusted in order to keep the IP address reachable (provided the IPMP group has not entirely failed). .RE -.br -.in +2 -\(em -.in -2 -.br -.in +2 -\(em -.in -2 -.br -.in +2 -\(emP -.in -2 .sp .LP \fBARP\fR performs duplicate address detection for local addresses. When a @@ -285,7 +256,7 @@ passing it the value \fBNHF_ARP\fR. Through this interface, two packet events are supported: .sp .LP -Physical in - ARP packets received via a network inter face +Physical in - ARP packets received via a network interface .sp .LP Physical out - ARP packets to be sent out via a network interface @@ -321,7 +292,7 @@ Identifier indicating the outbound interface for packets received with the .ad .sp .6 .RS 4n -Pointer to the start of the ARP header (not the ethernet header). +Pointer to the start of the ARP header (not the Ethernet header). .RE .sp diff --git a/usr/src/man/man7p/if_tcp.7p b/usr/src/man/man7p/if_tcp.7p index 6f811bd12c..acba34c8ae 100644 --- a/usr/src/man/man7p/if_tcp.7p +++ b/usr/src/man/man7p/if_tcp.7p @@ -4,7 +4,7 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with .\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH IF_TCP 7P "Sep 14, 2009" +.TH IF_TCP 7P "Sep 02, 2015" .SH NAME if_tcp, if \- general properties of Internet Protocol network interfaces .SH DESCRIPTION @@ -60,7 +60,7 @@ struct lifreq { int lifru_metric; uint_t lifru_mtu; int lif_muxid[2]; /* mux id's for arp & ip */ - struct lif_nd_req lifru_nd_req; + struct lif_nd_req lifru_nd_req; /* SIOCLIF*ND */ struct lif_ifinfo_req lifru_ifinfo_req; zoneid_t lifru_zone; /* SIOC[GS]LIFZONE */ } lifr_lifru; @@ -869,4 +869,4 @@ specified by \fBlifr_ppa\fR plumbed. .LP \fBifconfig\fR(1M), \fBin.routed\fR(1M), \fBioctl\fR(2), \fBsockaddr\fR(3SOCKET), \fBstreamio\fR(7I), \fBarp\fR(7P), \fBdlpi\fR(7P), -\fBip\fR(7P), \fBip6\fR(7P) +\fBip\fR(7P), \fBip6\fR(7P), \fBndp\fR(7P) diff --git a/usr/src/man/man7p/ndp.7p b/usr/src/man/man7p/ndp.7p new file mode 100644 index 0000000000..ef57b1b3d6 --- /dev/null +++ b/usr/src/man/man7p/ndp.7p @@ -0,0 +1,345 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright (c) 2015, Joyent, Inc. All rights reserved. +.\" +.Dd Sep 02, 2015 +.Dt NDP 7P +.Os +.Sh NAME +.Nm ndp , +.Nm NDP +.Nd Neighbor Discovery Protocol +.Sh SYNOPSIS +.In sys/socket.h +.In sys/sockio.h +.In netinet/in.h +.In net/if.h +.Bd -literal +s = socket(PF_INET6, SOCK_DGRAM, 0); + +struct lifreq lifr; +ioctl(s, SIOCLIFGETND, &lifr); +ioctl(s, SIOCLIFSETND, &lifr); +ioctl(s, SIOCLIFDELND, &lifr); +.Ed +.Sh DESCRIPTION +The Neighbor Discovery Protocol (NDP) is a protocol used to distribute and request +information about neighboring IPv6 systems on the local network, much like +.Xr ARP 7P +for IPv4. NDP is also responsible for spreading information about the network +gateway and how hosts should configure themselves +.Pq see Xr in.ndpd 1M for more on how this happens . +.Sh APPLICATION PROGRAMMING INTERFACE +The operating system provides several ioctls to help manipulate the mappings +obtained through NDP. They are +.Sy SIOCLIFGETND , +.Sy SIOCLIFSETND , +and +.Sy SIOCLIFDELND , +for getting, setting, and deleting respectively. Each of these ioctls takes a +.Vt struct lifreq +.Pq see Xr if 7P for details , +where the +.Fa lifr_lifru +field is of type +.Vt struct lif_nd_req : +.Bd -literal -offset 2m +typedef struct lif_nd_req { + struct sockaddr_storage lnr_addr; + uint8_t lnr_state_create; + uint8_t lnr_state_same_lla; + uint8_t lnr_state_diff_lla; + int lnr_hdw_len; + int lnr_flags; + int lnr_pad0; + char lnr_hdw_addr[ND_MAX_HDW_LEN]; +} lif_nd_req_t; +.Ed +.Pp +The +.Fa lnr_addr +field should be filled in with an IPv6 address +.Pq see Xr sockaddr_in6 3SOCKET , +and the +.Fa lnr_hdw_addr +is the link-layer address of length +.Fa lnr_hdw_len . +.Pp +State flags for +.Fa lnr_state_create , +.Fa lnr_state_same_lla , +and +.Fa lnr_state_diff_lla +can be set to one of the following values: +.Bl -tag -offset indent -width 16m +.It Sy ND_UNCHANGED +For ioctls that don't modify state +.It Sy ND_INCOMPLETE +Address resolution is currently in progress +.It Sy ND_REACHABLE +The link-layer address has recently been reachable +.It Sy ND_STALE +The link-layer address may be unreachable, and the system shouldn't do anything +.It Sy ND_DELAY +This entry hasn't yet started sending Neighbor Solicitations +.It Sy ND_PROBE +The operating system is currently sending out Neighbor Solicitations for the address +.It Sy ND_UNREACHABLE +The link-layer address is unreachable, and this entry is going to be deleted. +.El +.sp +When creating a new entry, the only valid values for +.Fa lnr_state_create +are +.Sy ND_REACHABLE +and +.Sy ND_STALE . +Any other value will return +.Sy EINVAL . +The +.Fa lnr_state_same_lla +and +.Fa lnr_state_diff_lla +fields are reserved for future use and can be safely set to +.Sy ND_UNCHANGED +and +.Sy ND_STALE +respectively. +.Pp +Flags that can be placed in +.Fa lnr_flags +are: +.Bl -tag -offset indent -width 16m +.It Sy NDF_ISROUTER_ON +Mark this entry as being a router. This will cause Neighbor Advertisements for +this address to be sent with the R-bit (Router). +.It Sy NDF_ISROUTER_OFF +If this entry was flagged as being a router, remove the flag. +.It Sy NDF_ANYCAST_ON +Mark this entry as being for an anycast address. This prevents sending Neighbor +Advertisements with the O-bit (Override). +.It Sy NDF_ANYCAST_OFF +If this entry was flagged as an anycast address, remove the flag. +.It Sy NDF_STATIC +Prevent this entry from being deleted by the system. +.El +.sp +When using +.Sy SIOCLIFGETND , +these flags represent the current state of the corresponding Neighbor Cache +Entry. When using +.Sy SIOCLIFSETND , +these flags represent what changes should be applied to the underlying entry. +.Pp +The only fields that need to be set for the +.Sy SIOCLIFGETND +or +.Sy SIOCLIFDELND +ioctls are +.Fa lifr_name +and +.Fa lnr_addr . +All other fields should be zeroed out. After successfully getting an entry, the +other fields will be filled in. When using +.Sy SIOCLIFSETND , +all fields should be set to an appropriate value, as described above, with the +exception of +.Fa lnr_pad0 , +which is unused and only exists for padding purposes. +.Pp +After performing the ioctl, the following errors may be returned through the +global +.Sy errno +variable: +.Bl -tag -offset indent -width 16m +.It Sy EAFNOSUPPORT +A non-IPv6 socket was used to perform the ioctl. +.It Sy EINVAL +The request contents were bad. This could be because conflicting flags were +used, the specified interface wasn't logical unit zero, or another reason. +.It Sy ENOMEM +The system ran out of memory for internal data structures. +.It Sy ENXIO +The specified interface does not exist. +.It Sy EPERM +The caller does not have permission to modify the Neighbor Cache Entries +associated with this interface. They may be lacking the +.Sy PRIV_SYS_NET_CONFIG +privilege +.Po see Xr privileges 5 Pc , +or the interface is managed by IPMP (IP Network Multipathing). +.It Sy ESRCH +There is no entry matching the specified address. +.El +.Sh EXAMPLES +The following examples demonstrate how to get and set NDP mappings using the +provided ioctls. They can be compiled by using a C compiler and linking against +the sockets library. +.Ss Example 1: Getting a mapping +.Bd -literal -offset indent +$ gcc -Wall -lsocket -o get get.c +$ cat get.c +/* + * Example of getting a mapping for a node name. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +int get(char *host) { + struct lifreq lifr; + struct addrinfo hints, *serverinfo, *p; + int err, s; + + bzero(&hints, sizeof (struct addrinfo)); + hints.ai_family = PF_INET6; + hints.ai_protocol = IPPROTO_IPV6; + + if ((err = getaddrinfo(host, NULL, &hints, &serverinfo)) != 0) { + (void) fprintf(stderr, "Unable to lookup %s: %s\\n", host, + gai_strerror(err)); + return (1); + } + + s = socket(AF_INET6, SOCK_DGRAM, 0); + if (s < 0) { + perror("Failed to open IPv6 socket"); + return (1); + } + + for (p = serverinfo; p != NULL; p = p->ai_next) { + /* Zero out structure */ + bzero(&lifr, sizeof (struct lifreq)); + (void) strlcpy(lifr.lifr_name, "net0", + sizeof (lifr.lifr_name)); + (void) memcpy(&lifr.lifr_nd.lnr_addr, p->ai_addr, + sizeof (struct sockaddr_storage)); + + /* Get mapping */ + if (ioctl(s, SIOCLIFGETND, &lifr) < 0) { + perror("Unable to get NDP mapping"); + continue; + } + + /* + * lifr.lifr_nd.lnr_hdw_addr now contains the MAC address, + * and can be used as desired. + */ + } + + /* + * Clean up linked list. + */ + freeaddrinfo(serverinfo); + return (0); +} + +int main(int argc, char *argv[]) { + if (argc < 2) + exit(1); + return (get(argv[1])); +} +.Ed +.sp +Deleting a mapping would work similarly, except that instead of using +.Sy SIOCLIFGETND , +you would instead use the +.Sy SIOCLIFDELND +ioctl. +.Ss Example 2: Adding a mapping +.Bd -literal -offset indent +$ gcc -Wall -lsocket -o set set.c +$ cat set.c +/* + * Example of setting a mapping to an all-zero Ethernet address. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +int set(char *host) { + struct lifreq lifr; + struct addrinfo hints, *serverinfo, *p; + int err, s; + + bzero(&hints, sizeof (struct addrinfo)); + hints.ai_family = PF_INET6; + hints.ai_protocol = IPPROTO_IPV6; + + if ((err = getaddrinfo(host, NULL, &hints, &serverinfo)) != 0) { + (void) fprintf(stderr, "Unable to lookup %s: %s\\n", host, + gai_strerror(err)); + return (1); + } + + s = socket(AF_INET6, SOCK_DGRAM, 0); + if (s < 0) { + perror("Failed to open IPv6 socket"); + return (1); + } + + for (p = serverinfo; p != NULL; p = p->ai_next) { + /* Zero out structure */ + bzero(&lifr, sizeof (struct lifreq)); + (void) strlcpy(lifr.lifr_name, "net0", + sizeof (lifr.lifr_name)); + (void) memcpy(&lifr.lifr_nd.lnr_addr, p->ai_addr, + sizeof (struct sockaddr_storage)); + + lifr.lifr_nd.lnr_state_create = ND_REACHABLE; + lifr.lifr_nd.lnr_flags = NDF_STATIC; + + /* Get mapping */ + if (ioctl(s, SIOCLIFSETND, &lifr) < 0) { + perror("Unable to set NDP mapping"); + continue; + } + } + + /* + * Clean up linked list. + */ + freeaddrinfo(serverinfo); + return (0); +} + +int main(int argc, char *argv[]) { + if (argc < 2) + exit(1); + return (set(argv[1])); +} +.Ed +.Sh SEE ALSO +.Xr in.ndpd 1M , +.Xr ndp 1M , +.Xr ifconfig 1M , +.Xr sockaddr_in6 3SOCKET , +.Xr privileges 5 +.Rs +.%A Narten, T. +.%A Nordmark, E. +.%A Simpson, W. +.%A Soliman, H. +.%R Neighbor Discovery for IP version 6 +.%T RFC 4861 +.%D September 2007 +.Re diff --git a/usr/src/man/man7p/route.7p b/usr/src/man/man7p/route.7p index ba193fef11..bbd74fd740 100644 --- a/usr/src/man/man7p/route.7p +++ b/usr/src/man/man7p/route.7p @@ -7,7 +7,7 @@ .\" OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER .\" IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" Portions Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved. -.TH ROUTE 7P "Feb 5, 2009" +.TH ROUTE 7P "Sep 02, 2015" .SH NAME route \- kernel packet forwarding database .SH SYNOPSIS @@ -252,8 +252,8 @@ struct rt_msghdr { int rtm_use; /* from rtentry */ uint_t rtm_inits; /* which values we are initializing */ -struct rt_metrics rtm_rmx; /* metrics themselves */ - }; + struct rt_metrics rtm_rmx; /* metrics themselves */ +}; .fi .in -2 @@ -274,7 +274,7 @@ struct rt_metrics { uint32_t rmx_rtt; /* estimated round trip time */ uint32_t rmx_rttvar; /* estimated rtt variance */ uint32_t rmx_pksent; /* packets sent using this route */ - }; +}; /* Flags include the values */ diff --git a/usr/src/pkg/manifests/SUNWcs.man1m.inc b/usr/src/pkg/manifests/SUNWcs.man1m.inc index 6437bb2bf9..c8d0a1c3fb 100644 --- a/usr/src/pkg/manifests/SUNWcs.man1m.inc +++ b/usr/src/pkg/manifests/SUNWcs.man1m.inc @@ -166,6 +166,7 @@ file path=usr/share/man/man1m/mvdir.1m file path=usr/share/man/man1m/ncheck.1m file path=usr/share/man/man1m/ncheck_ufs.1m file path=usr/share/man/man1m/ndd.1m +file path=usr/share/man/man1m/ndp.1m file path=usr/share/man/man1m/netstat.1m file path=usr/share/man/man1m/netstrategy.1m file path=usr/share/man/man1m/newfs.1m diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf index 908111a784..85285ae7cd 100644 --- a/usr/src/pkg/manifests/SUNWcs.mf +++ b/usr/src/pkg/manifests/SUNWcs.mf @@ -1348,6 +1348,7 @@ file path=usr/sbin/mountall group=sys mode=0555 file path=usr/sbin/msgid mode=0555 file path=usr/sbin/mvdir mode=0555 file path=usr/sbin/ndd mode=0555 +file path=usr/sbin/ndp mode=0555 file path=usr/sbin/nlsadmin group=adm mode=0755 file path=usr/sbin/nscd mode=0555 file path=usr/sbin/nwamadm mode=0555 diff --git a/usr/src/pkg/manifests/system-kernel.man7p.inc b/usr/src/pkg/manifests/system-kernel.man7p.inc index 5d9471375d..77260928e8 100644 --- a/usr/src/pkg/manifests/system-kernel.man7p.inc +++ b/usr/src/pkg/manifests/system-kernel.man7p.inc @@ -26,6 +26,7 @@ file path=usr/share/man/man7p/ip6.7p file path=usr/share/man/man7p/ipsec.7p file path=usr/share/man/man7p/ipsecah.7p file path=usr/share/man/man7p/ipsecesp.7p +file path=usr/share/man/man7p/ndp.7p file path=usr/share/man/man7p/pf_key.7p file path=usr/share/man/man7p/route.7p file path=usr/share/man/man7p/routing.7p @@ -37,6 +38,7 @@ link path=usr/share/man/man7p/ARP.7p target=arp.7p link path=usr/share/man/man7p/ESP.7p target=ipsecesp.7p link path=usr/share/man/man7p/ICMP.7p target=icmp.7p link path=usr/share/man/man7p/IP.7p target=ip.7p +link path=usr/share/man/man7p/NDP.7p target=ndp.7p link path=usr/share/man/man7p/SCTP.7p target=sctp.7p link path=usr/share/man/man7p/TCP.7p target=tcp.7p link path=usr/share/man/man7p/UDP.7p target=udp.7p diff --git a/usr/src/uts/common/inet/ip/ip_ndp.c b/usr/src/uts/common/inet/ip/ip_ndp.c index c6dee0247a..2cb91f8c41 100644 --- a/usr/src/uts/common/inet/ip/ip_ndp.c +++ b/usr/src/uts/common/inet/ip/ip_ndp.c @@ -1097,6 +1097,8 @@ ndp_query(ill_t *ill, struct lif_nd_req *lnr) lnr->lnr_flags = NDF_ISROUTER_ON; if (ncec->ncec_flags & NCE_F_ANYCAST) lnr->lnr_flags |= NDF_ANYCAST_ON; + if (ncec->ncec_flags & NCE_F_STATIC) + lnr->lnr_flags |= NDF_STATIC; ncec_refrele(ncec); return (0); } diff --git a/usr/src/uts/common/net/if.h b/usr/src/uts/common/net/if.h index 81f34c5659..b184fcdda9 100644 --- a/usr/src/uts/common/net/if.h +++ b/usr/src/uts/common/net/if.h @@ -279,7 +279,7 @@ struct ifaddr { * The entry already exists and the link-layer address is the same * The entry already exists and the link-layer address differs * - * Use ND_UNCHANGED and ND_ISROUTER_UNCHANGED to not change any state. + * Use ND_UNCHANGED to not change any state. */ #define ND_MAX_HDW_LEN 64 typedef struct lif_nd_req { @@ -380,7 +380,7 @@ struct lifreq { int lifru_metric; uint_t lifru_mtu; int lif_muxid[2]; /* mux id's for arp and ip */ - struct lif_nd_req lifru_nd_req; + struct lif_nd_req lifru_nd_req; /* SIOCLIF*ND */ struct lif_ifinfo_req lifru_ifinfo_req; char lifru_groupname[LIFGRNAMSIZ]; /* SIOC[GS]LIFGROUPNAME */ char lifru_binding[LIFNAMSIZ]; /* SIOCGLIFBINDING */ -- cgit v1.2.3 From 7509ca605713ac7f244b0e812b1712dd25f04da1 Mon Sep 17 00:00:00 2001 From: Josef 'Jeff' Sipek Date: Fri, 16 Oct 2015 01:19:30 -0400 Subject: 6027 EOL zulu (XVR-4000) Reviewed by: Garrett D'Amore Reviewed by: Peter Tribble Reviewed by: Richard Lowe Approved by: Dan McDonald --- usr/src/pkg/manifests/system-kernel-platform.mf | 2 - usr/src/uts/sun4u/Makefile.files | 1 - usr/src/uts/sun4u/Makefile.sun4u | 1 - usr/src/uts/sun4u/daktari/os/daktari.c | 4 +- usr/src/uts/sun4u/io/zuluvm.c | 1495 ----------------------- usr/src/uts/sun4u/ml/zulu_asm.s | 325 ----- usr/src/uts/sun4u/ml/zulu_hat_asm.s | 314 ----- usr/src/uts/sun4u/sys/zulu_hat.h | 214 ---- usr/src/uts/sun4u/sys/zulumod.h | 262 ---- usr/src/uts/sun4u/sys/zuluvm.h | 121 -- usr/src/uts/sun4u/vm/zulu_hat.c | 1469 ---------------------- usr/src/uts/sun4u/zuluvm/Makefile | 133 -- usr/src/uts/sun4u/zuluvm/zuluvm_offsets.in | 77 -- 13 files changed, 3 insertions(+), 4415 deletions(-) delete mode 100644 usr/src/uts/sun4u/io/zuluvm.c delete mode 100644 usr/src/uts/sun4u/ml/zulu_asm.s delete mode 100644 usr/src/uts/sun4u/ml/zulu_hat_asm.s delete mode 100644 usr/src/uts/sun4u/sys/zulu_hat.h delete mode 100644 usr/src/uts/sun4u/sys/zulumod.h delete mode 100644 usr/src/uts/sun4u/sys/zuluvm.h delete mode 100644 usr/src/uts/sun4u/vm/zulu_hat.c delete mode 100644 usr/src/uts/sun4u/zuluvm/Makefile delete mode 100644 usr/src/uts/sun4u/zuluvm/zuluvm_offsets.in (limited to 'usr/src') diff --git a/usr/src/pkg/manifests/system-kernel-platform.mf b/usr/src/pkg/manifests/system-kernel-platform.mf index 17394600c4..ccaa252a95 100644 --- a/usr/src/pkg/manifests/system-kernel-platform.mf +++ b/usr/src/pkg/manifests/system-kernel-platform.mf @@ -1081,8 +1081,6 @@ $(sparc_ONLY)file path=platform/sun4u/kernel/misc/$(ARCH64)/sbd group=sys \ mode=0755 $(sparc_ONLY)file path=platform/sun4u/kernel/misc/$(ARCH64)/vis group=sys \ mode=0755 -$(sparc_ONLY)file path=platform/sun4u/kernel/misc/$(ARCH64)/zuluvm group=sys \ - mode=0755 $(sparc_ONLY)file path=platform/sun4u/kernel/strmod/$(ARCH64)/kb group=sys \ mode=0755 $(sparc_ONLY)file path=platform/sun4u/kernel/tod/$(ARCH64)/todblade group=sys \ diff --git a/usr/src/uts/sun4u/Makefile.files b/usr/src/uts/sun4u/Makefile.files index 96021facfb..ece838c2be 100644 --- a/usr/src/uts/sun4u/Makefile.files +++ b/usr/src/uts/sun4u/Makefile.files @@ -135,7 +135,6 @@ TRAPSTAT_OBJS += trapstat.o I2BSC_OBJS += i2bsc.o GPTWOCFG_OBJS += gptwocfg.o GPTWO_CPU_OBJS += gptwo_cpu.o -ZULUVM_OBJS += zuluvm.o zulu_asm.o zulu_hat.o zulu_hat_asm.o JBUSPPM_OBJS += jbusppm.o RMC_COMM_OBJS += rmc_comm.o rmc_comm_crctab.o rmc_comm_dp.o rmc_comm_drvintf.o diff --git a/usr/src/uts/sun4u/Makefile.sun4u b/usr/src/uts/sun4u/Makefile.sun4u index 86ccfa622b..4a68d62163 100644 --- a/usr/src/uts/sun4u/Makefile.sun4u +++ b/usr/src/uts/sun4u/Makefile.sun4u @@ -428,7 +428,6 @@ MISC_KMODS += obpsym bootdev vis cpr platmod md5 sha1 i2c_svc MISC_KMODS += sbd MISC_KMODS += opl_cfg -MISC_KMODS += zuluvm MISC_KMODS += gptwo_cpu gptwocfg MISC_KMODS += pcie diff --git a/usr/src/uts/sun4u/daktari/os/daktari.c b/usr/src/uts/sun4u/daktari/os/daktari.c index ecfe08b4c2..f522c7b26b 100644 --- a/usr/src/uts/sun4u/daktari/os/daktari.c +++ b/usr/src/uts/sun4u/daktari/os/daktari.c @@ -575,7 +575,9 @@ plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) } /* - * The zuluvm module requires a dmv interrupt for each installed zulu board. + * The zuluvm module required a dmv interrupt for each installed + * Zulu/XVR-4000 board. The following has not been updated during the + * removal of zuluvm and therefore it may be suboptimal. */ void plat_dmv_params(uint_t *hwint, uint_t *swint) diff --git a/usr/src/uts/sun4u/io/zuluvm.c b/usr/src/uts/sun4u/io/zuluvm.c deleted file mode 100644 index f1184f4580..0000000000 --- a/usr/src/uts/sun4u/io/zuluvm.c +++ /dev/null @@ -1,1495 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * zuluvm module - * - * Provides services required by the XVR-4000 graphics accelerator (zulu) - * that are not provided by the ddi. See PSARC 2002/231. - * - * Zulu has 2 dma engines with built in MMUs. zuluvm provides TLB miss - * interrupt support obtaining virtual to physical address translations - * using the XHAT interface PSARC/2003/517. - * - * The module has 3 components. This file, sun4u/vm/zulu_hat.c, and the - * assembly language routines in sun4u/ml/zulu_asm.s and - * sun4u/ml/zulu_hat_asm.s. - * - * The interrupt handler is a data bearing mondo interrupt handled at TL=1 - * If no translation is found in the zulu hat's tsb, or if the tsb is locked by - * C code, the handler posts a soft interrupt which wakes up a parked - * thread belonging to zuludaemon(1M). - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define ZULUVM_GET_PAGE(val) \ - (caddr_t)((uintptr_t)(val) & PAGEMASK) -#define ZULUVM_GET_AS curthread->t_procp->p_as - -#define ZULUVM_LOCK mutex_enter(&(zdev->dev_lck)) -#define ZULUVM_UNLOCK mutex_exit(&(zdev->dev_lck)) - -#define ZULUVM_SET_STATE(_z, b, c) \ - atomic_cas_32((uint32_t *)&((_z)->zvm.state), c, b) -#define ZULUVM_GET_STATE(_z) \ - (_z)->zvm.state -#define ZULUVM_SET_IDLE(_z) \ - (_z)->zvm.state = ZULUVM_STATE_IDLE; - -#define ZULUVM_INO_MASK ((1<agentid) << INO_SIZE) | \ - (ZULUVM_INO_MASK & (_n)) - -static void zuluvm_stop(zuluvm_state_t *, int, char *); -static zuluvm_proc_t *zuluvm_find_proc(zuluvm_state_t *, struct as *); -static int zuluvm_proc_release(zuluvm_state_t *zdev, zuluvm_proc_t *proc); -static int zuluvm_get_intr_props(zuluvm_state_t *zdev, dev_info_t *devi); -static int zuluvm_driver_attach(zuluvm_state_t *); -static int zuluvm_driver_detach(zuluvm_state_t *); -static void zuluvm_retarget_intr(void *arg); -static void zuluvm_do_retarget(zuluvm_state_t *zdev); - -extern const unsigned int _mmu_pageshift; - -extern int zuluvm_base_pgsize; -static int zuluvm_pagesizes[ZULUM_MAX_PG_SIZES + 1]; - -int zuluvm_fast_tlb = 1; - -zuluvm_state_t *zuluvm_devtab[ZULUVM_MAX_DEV]; -kmutex_t zuluvm_lck; - -#ifdef DEBUG -int zuluvm_debug_state = 0; -#endif - -unsigned long zuluvm_ctx_locked = 0; - -/* - * Module linkage information for the kernel. - */ -extern struct mod_ops mod_miscops; - -static struct modlmisc modlmisc = { - &mod_miscops, - "sun4u support " ZULUVM_MOD_VERSION -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modlmisc, - NULL -}; - -int -_init(void) -{ - zuluvm_base_pgsize = (_mmu_pageshift - 13) / 3; - if (zulu_hat_init() != 0) { - return (ZULUVM_ERROR); - } - mutex_init(&zuluvm_lck, NULL, MUTEX_DEFAULT, NULL); - return (mod_install(&modlinkage)); -} - -int -_fini(void) -{ - mutex_destroy(&zuluvm_lck); - (void) zulu_hat_destroy(); - return (mod_remove(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -/* - * currently the kernel driver makes the following assumptions: - * - there is only one TLB miss per zulu device handled at - * any given time - * ==> we only need local data storage per device, not per DMA - * ==> a page fault will block the DMA engine until the fault - * is resolved - * ==> a pagefault will not trigger a zulu DMA context switch - * - * If we want to implement asynnchronous zulu page fault, then we - * need to keep track of outstanding faults while zulu DMA runs - * in a different context. - */ -static int -zuluvm_write_tte(zuluvm_state_t *zdev, void *arg, caddr_t addr, - int t_pfn, int t_perm, int t_size, uint64_t tag, - int tlbtype, int *size) -{ - int error; - - (void) addr; - - ZULUVM_STATS_MISS(zdev, t_size); - - if (tag == 0) { /* not coming from preload */ - int state = ZULUVM_SET_STATE(zdev, ZULUVM_STATE_WRITE_TTE, - ZULUVM_STATE_INTR_PENDING); - if (state != ZULUVM_STATE_INTR_PENDING) { - zuluvm_stop(zdev, state, "zuluvm_write_tte"); - return (ZULUVM_MISS_CANCELED); - } - } - - if (!(tlbtype & ZULUVM_ITLB_FLAG) && - t_size != zuluvm_base_pgsize && - t_size != ZULU_TTE4M) { - t_size = zuluvm_base_pgsize; - TNF_PROBE_2(zuluvm_write_tte_new_pfn, "zuluvm", /* */, - tnf_opaque, t_pfn, t_pfn, tnf_int, pagesize, t_size); - } - TNF_PROBE_1(zuluvm_write_tte, "zuluvm", /* */, - tnf_opaque, t_pfn, t_pfn); - /* - * if the caller is zuluvm_preload, then we need to pass - * back the page size so it can add the right offset. - */ - if (size) - *size = t_size; - - error = zulud_write_tte(zdev, arg, t_size, tag, t_pfn, - t_perm, tlbtype); - - return (error); -} - -static void -zuluvm_stop(zuluvm_state_t *zdev, int state, char *tag) -{ - int ostate = state; - while (state != ZULUVM_STATE_STOPPED) { - state = ZULUVM_SET_STATE(zdev, - ZULUVM_STATE_STOPPED, state); -#ifdef DEBUG - if (zuluvm_debug_state) - cmn_err(CE_NOTE, "zuluvm_stop(%s): (loop) state %d\n", - tag, state); -#endif - } - TNF_PROBE_2(zuluvm_stop, "zuluvm", /* */, - tnf_string, tag, tag, - tnf_int, state, ostate); - ZULUVM_STATS_CANCEL(zdev); -} - -/* - * Executed with the context of the parked zulu deamon thread, - * uses zulu_hat_load to resolve the miss. - * The tte is loaded and miss done called by the function zuluvm_load_tte - * which is called from zulu_hat - * - * This function is synchronized with the zuluvm_as_free. - * zuluvm_as_free will block until miss servicing is complete. - * - * There is a race condition between as_free and the zulu tlb miss - * soft interrupt: - * - queue zulu interrupt - * - process dies, as_free runs - * - interrupt gets scheduled and runs as_fault on the - * already freed as. - * This is solved by keeping track of current zulu dma processes - * and invalidating them in zuluvm_as_free. - */ -uint_t -zuluvm_tlb_handler(caddr_t data) -{ - zuluvm_state_t *zdev = (zuluvm_state_t *)data; - int error; - int flag = 0; - int wait = 0; - zuluvm_proc_t *proc = NULL; - struct zulu_hat *zhat = NULL; - caddr_t addr; - int tlbtype; - void *arg; - int state, newstate; - - TNF_PROBE_1(zuluvm_tlb_handler_lwp, "zuluvm", /* */, - tnf_opaque, lwp, ttolwp(curthread)); - - ZULUVM_LOCK; - error = ZULUVM_GET_TLB_ERRCODE(zdev); - addr = (caddr_t)ZULUVM_GET_TLB_ADDR(zdev); - tlbtype = ZULUVM_GET_TLB_TYPE(zdev); - arg = zdev->zvm.arg; - - /* - * select the correct dma engine and remember the - * the as_free synchronization flags. - */ - switch (tlbtype) { - case ZULUVM_ITLB1: - case ZULUVM_DMA1: - proc = zdev->zvm.proc1; - flag |= ZULUVM_DO_INTR1; - wait |= ZULUVM_WAIT_INTR1; - break; - case ZULUVM_ITLB2: - case ZULUVM_DMA2: - proc = zdev->zvm.proc2; - flag |= ZULUVM_DO_INTR2; - wait |= ZULUVM_WAIT_INTR2; - break; - } - - state = ZULUVM_SET_STATE(zdev, ZULUVM_STATE_INTR_PENDING, - ZULUVM_STATE_INTR_QUEUED); - newstate = ZULUVM_GET_STATE(zdev); - - TNF_PROBE_2(zuluvm_tlb_handler_state, "zuluvm", /* */, - tnf_int, oldstate, state, - tnf_int, newstate, newstate); -#ifdef DEBUG - if (zuluvm_debug_state) - cmn_err(CE_NOTE, "zuluvm_tlb_handler: state %d\n", state); -#endif - if (state != ZULUVM_STATE_INTR_PENDING && - state != ZULUVM_STATE_INTR_QUEUED) { - ZULUVM_UNLOCK; - - zuluvm_stop(zdev, state, "softintr1"); - zulud_tlb_done(zdev, arg, tlbtype, ZULUVM_MISS_CANCELED); - return (1); - } - - /* - * block the as_free callback in case it comes in - */ - zdev->intr_flags |= flag; - ZULUVM_UNLOCK; - - mutex_enter(&zdev->proc_lck); - /* - * check if this as is still valid - */ - if (proc == NULL || proc->valid == 0 || proc->zhat == NULL) { - mutex_exit(&zdev->proc_lck); - /* - * we are on our way out, wake up the as_free - * callback if it is waiting for us - */ - ZULUVM_LOCK; - zdev->intr_flags &= ~flag; - if (zdev->intr_flags | wait) - cv_broadcast(&zdev->intr_wait); - ZULUVM_UNLOCK; - state = ZULUVM_SET_STATE(zdev, ZULUVM_STATE_IDLE, - ZULUVM_STATE_INTR_PENDING); - if (state != ZULUVM_STATE_INTR_PENDING) { - zuluvm_stop(zdev, state, "softintr3"); - } - zulud_tlb_done(zdev, arg, tlbtype, ZULUVM_NO_HAT); - return (1); - } - zhat = proc->zhat; - mutex_exit(&zdev->proc_lck); - - TNF_PROBE_1(zuluvm_tlb_handler, "zuluvm", /* */, - tnf_opaque, addr, addr); - - switch (error) { - case ZULUVM_CTX_LOCKED: - /* - * trap handler found that zulu_hat had the lock bit set - * rather than block in the fast trap handler, it punts - * in this rare instance - */ - ++zuluvm_ctx_locked; - TNF_PROBE_1(zuluvm_ctx_locked, "zuluvm", /* CSTYLED */, - tnf_ulong, zuluvm_ctx_locked, zuluvm_ctx_locked); - - /*FALLTHROUGH*/ - - case ZULUVM_TTE_DELAY: - /* - * fast tlb handler was skipped, see zuluvm_fast_tlb flag - */ - /*FALLTHROUGH*/ - - case ZULUVM_NO_TTE: - /* - * no TSB entry and TTE in the hash - */ - mutex_enter(&zdev->load_lck); - zdev->in_intr = 1; - error = zulu_hat_load(zhat, addr, - (tlbtype == ZULUVM_DMA2) ? S_WRITE : S_READ, NULL); - zdev->in_intr = 0; - mutex_exit(&zdev->load_lck); - if (error) { - - error = ZULUVM_NO_MAP; - } else { - error = ZULUVM_SUCCESS; - TNF_PROBE_1(zuluvm_tlb_handler_done, "zuluvm", /* */, - tnf_int, error, error); - return (1); - } - - default: - /* - * error case, fall through and tell zulu driver to abort DMA - */ - break; - } - - if (error != ZULUVM_MISS_CANCELED) { - state = ZULUVM_SET_STATE(zdev, ZULUVM_STATE_IDLE, - ZULUVM_STATE_WRITE_TTE); - newstate = ZULUVM_GET_STATE(zdev); - TNF_PROBE_2(zuluvm_tlb_handler_state_done, "zuluvm", /* */, - tnf_int, oldstate, state, - tnf_int, newstate, newstate); - if (state != ZULUVM_STATE_WRITE_TTE) { - zuluvm_stop(zdev, state, "softintr4"); - } - } - /* - * synchronize with as_free callback - * It will set the wait flag, in that case we send - * a wake up. - */ - ZULUVM_LOCK; - zdev->intr_flags &= ~flag; - if (zdev->intr_flags | wait) - cv_broadcast(&zdev->intr_wait); - ZULUVM_UNLOCK; - - TNF_PROBE_1(zuluvm_tlb_handler_done, "zuluvm", /* */, - tnf_int, error, error); - - zulud_tlb_done(zdev, arg, tlbtype, error); - - return (1); -} - - -void -zuluvm_load_tte(struct zulu_hat *zhat, caddr_t addr, uint64_t pfn, - int perm, int size) -{ - zuluvm_state_t *zdev = zhat->zdev; - int tlbtype = ZULUVM_GET_TLB_TYPE(zdev); - - ASSERT(MUTEX_HELD(&zdev->load_lck)); - ASSERT(pfn != 0); - - if (zdev->in_intr) { - int error; - int flag = 0; - int wait = 0; - - error = zuluvm_write_tte(zdev, zdev->zvm.arg, addr, pfn, - perm, size, 0, tlbtype, NULL); - - if (error != ZULUVM_MISS_CANCELED) { - int state, newstate; - - state = ZULUVM_SET_STATE(zdev, ZULUVM_STATE_IDLE, - ZULUVM_STATE_WRITE_TTE); - newstate = ZULUVM_GET_STATE(zdev); - TNF_PROBE_2(zuluvm_tlb_handler_state_done, "zuluvm", - /* */, tnf_int, oldstate, state, - tnf_int, newstate, newstate); - if (state != ZULUVM_STATE_WRITE_TTE) { - zuluvm_stop(zdev, state, "softintr4"); - } - } - /* - * synchronize with as_free callback - * It will set the wait flag, in that case we send - * a wake up. - */ - switch (tlbtype) { - case ZULUVM_ITLB1: - case ZULUVM_DMA1: - flag = ZULUVM_DO_INTR1; - wait = ZULUVM_WAIT_INTR1; - break; - case ZULUVM_ITLB2: - case ZULUVM_DMA2: - flag = ZULUVM_DO_INTR2; - wait = ZULUVM_WAIT_INTR2; - break; - } - - ZULUVM_LOCK; - zdev->intr_flags &= ~flag; - if (zdev->intr_flags | wait) - cv_broadcast(&zdev->intr_wait); - ZULUVM_UNLOCK; - - zulud_tlb_done(zdev, zdev->zvm.arg, tlbtype, error); - } else { - (void) zuluvm_write_tte(zdev, zdev->zvm.arg, addr, pfn, - perm, size, (uint64_t)addr | - zhat->zulu_ctx, tlbtype, NULL); - } -} - - - - -/* - * This function provides the faulting thread for zulu page faults - * It is call from the device driver in response to an ioctl issued - * by a zuludaemon thread. - * It sits in cv_wait_sig until it gets woken up by a signal or - * zulu tlb miss soft interrupt. - */ -int -zuluvm_park(zuluvm_info_t devp) -{ - int rval; - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - mutex_enter(&zdev->park_lck); - zdev->parking = 1; - for (;;) { - rval = cv_wait_sig(&zdev->park_cv, &zdev->park_lck); - if (rval == 0) - break; - rval = zuluvm_tlb_handler(devp); - } - zdev->parking = 0; - mutex_exit(&zdev->park_lck); - return (rval); -} - -/* - * zulu soft interrupt handler, just triggers the parked zulu fault - * thread - */ -/*ARGSUSED*/ -uint_t -zuluvm_softintr(caddr_t devp, caddr_t arg2) -{ - int tlbtype; - void *arg; - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - mutex_enter(&zdev->park_lck); - if (zdev->parking) { - cv_signal(&zdev->park_cv); - mutex_exit(&zdev->park_lck); - TNF_PROBE_1(zuluvm_fast_intr, "zuluvm", /* */, - tnf_opaque, devp, devp); - } else { - mutex_exit(&zdev->park_lck); - cmn_err(CE_NOTE, "zuluvm: no page fault thread\n"); - ZULUVM_LOCK; - tlbtype = ZULUVM_GET_TLB_TYPE(zdev); - arg = zdev->zvm.arg; - ZULUVM_UNLOCK; - TNF_PROBE_0(zuluvm_fast_intr, "zuluvm", /* */); - zuluvm_stop(zdev, ZULUVM_STATE_INTR_QUEUED, "fast_intr"); - zulud_tlb_done(zdev, arg, tlbtype, ZULUVM_NO_TTE); - } - return (1); -} - -/* ***** public interface for process mapping events (hat layer) ***** */ - -/* - * If the page size matches the Zulu page sizes then just pass - * it thru. If not then emulate the page demap with demaps of - * smaller page size. - */ -/* ARGSUSED */ -void -zuluvm_demap_page(void *arg, struct hat *hat_ptr, short ctx, - caddr_t vaddr, uint_t size) -{ - void *ddarg; - zuluvm_state_t *zdev = (zuluvm_state_t *)arg; - - if (arg == NULL) - return; - - ZULUVM_STATS_DEMAP_PAGE(zdev); - - ddarg = zdev->zvm.arg; - - TNF_PROBE_3(zuluvm_demap_page, "zuluvm", /* */, - tnf_opaque, addr, vaddr, - tnf_int, size, size, - tnf_int, ctx, ctx); - - if (ddarg != NULL) { - if (size != zuluvm_base_pgsize && - size != ZULU_TTE4M) { - int i; - int cnt = size - zuluvm_base_pgsize; - cnt = ZULU_HAT_SZ_SHIFT(cnt); - for (i = 0; i < cnt; i++) { - uintptr_t addr = (uintptr_t)vaddr | - i << ZULU_HAT_BP_SHIFT; - zulud_demap_page(zdev, ddarg, - (caddr_t)addr, ctx); - } - } else { - zulud_demap_page(zdev, ddarg, vaddr, ctx); - } - TNF_PROBE_0(zuluvm_demap_page_done, "zuluvm", /* */); - } else { - TNF_PROBE_0(zuluvm_demap_page_null_ddarg, "zuluvm", /* */); - } -} - -/* - * An entire context has gone away, just pass it thru - */ -void -zuluvm_demap_ctx(void *arg, short ctx) -{ - void *ddarg; - zuluvm_state_t *zdev = (zuluvm_state_t *)arg; - - if (arg == NULL) - return; - - ZULUVM_STATS_DEMAP_CTX(zdev); - - TNF_PROBE_1(zuluvm_demap_ctx, "zuluvm", /* */, - tnf_int, ctx, ctx); - ddarg = zdev->zvm.arg; - - if (ddarg != NULL) - zulud_demap_ctx(zdev, ddarg, ctx); -} - -static int -zuluvm_driver_attach(zuluvm_state_t *zdev) -{ - int i; - mutex_enter(&zuluvm_lck); - for (i = 0; i < ZULUVM_MAX_DEV; i++) { - if (zuluvm_devtab[i] == NULL) { - zuluvm_devtab[i] = zdev; - ZULUVM_SET_IDLE(zdev); - break; - } - } - mutex_exit(&zuluvm_lck); - if (i >= ZULUVM_MAX_DEV) - return (ZULUVM_ERROR); - - if (zulu_hat_attach((void *)zdev) != 0) { - return (ZULUVM_ERROR); - } - - mutex_init(&zdev->dev_lck, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&zdev->load_lck, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&zdev->proc_lck, NULL, MUTEX_DEFAULT, NULL); - mutex_init(&zdev->park_lck, NULL, MUTEX_DEFAULT, NULL); - cv_init(&zdev->park_cv, NULL, CV_DEFAULT, NULL); - cv_init(&zdev->intr_wait, NULL, CV_DEFAULT, NULL); - zdev->parking = 0; - -#ifdef ZULUVM_STATS - zdev->zvm.cancel = 0; - zdev->zvm.pagefault = 0; - zdev->zvm.no_mapping = 0; - zdev->zvm.preload = 0; - zdev->zvm.migrate = 0; - zdev->zvm.pagesize = 0; - zdev->zvm.tlb_miss[0] = 0; - zdev->zvm.tlb_miss[1] = 0; - zdev->zvm.tlb_miss[2] = 0; - zdev->zvm.tlb_miss[3] = 0; - zdev->zvm.itlb1miss = 0; - zdev->zvm.dtlb1miss = 0; - zdev->zvm.itlb2miss = 0; - zdev->zvm.dtlb2miss = 0; -#endif - zdev->zvm.pfncnt = 0; - for (i = 0; i < 50; i++) - zdev->zvm.pfnbuf[i] = 0; - - zdev->zvm.mmu_pa = NULL; - zdev->zvm.proc1 = NULL; - zdev->zvm.proc2 = NULL; - zdev->procs = NULL; - return (ZULUVM_SUCCESS); -} - -static int -zuluvm_driver_detach(zuluvm_state_t *zdev) -{ - int i; - cv_destroy(&zdev->intr_wait); - cv_destroy(&zdev->park_cv); - mutex_destroy(&zdev->park_lck); - mutex_destroy(&zdev->proc_lck); - mutex_destroy(&zdev->dev_lck); - mutex_destroy(&zdev->load_lck); - zdev->dops = NULL; - - mutex_enter(&zuluvm_lck); - for (i = 0; i < ZULUVM_MAX_DEV; i++) { - if (zuluvm_devtab[i] == zdev) { - zuluvm_devtab[i] = NULL; - break; - } - } - mutex_exit(&zuluvm_lck); - - if (zulu_hat_detach((void *)zdev) == 0) { - return (ZULUVM_SUCCESS); - } else { - return (ZULUVM_ERROR); - } -} - -zulud_ops_t *zuluvm_dops = NULL; - -/* - * init the zulu kernel driver (variables, locks, etc) - */ -int -zuluvm_init(zulud_ops_t *ops, int **pagesizes) -{ - int error = ZULUVM_SUCCESS; - int i; - int size = zuluvm_base_pgsize; /* MMU_PAGESIZE; */ - - if (ops->version != ZULUVM_INTERFACE_VERSION) - return (ZULUVM_VERSION_MISMATCH); - - zuluvm_dops = ops; - for (i = 0; i < ZULUM_MAX_PG_SIZES && size <= ZULU_TTE4M; i++) { - zuluvm_pagesizes[i] = size++; - } - zuluvm_pagesizes[i] = -1; - *pagesizes = zuluvm_pagesizes; - - return (error); -} - -/* - * cleanup afterwards - */ -int -zuluvm_fini(void) -{ - zuluvm_dops = NULL; - return (ZULUVM_SUCCESS); -} - -/* - * allocate a zulu kernel driver instance for this zulu device - */ -int -zuluvm_alloc_device(dev_info_t *devi, void *arg, zuluvm_info_t *devp, - caddr_t mmu, caddr_t imr) -{ - uint64_t intr_num; - zuluvm_state_t *zdev; - int error = ZULUVM_SUCCESS; - - TNF_PROBE_3(zuluvm_alloc_device, "zuluvm", /* */, - tnf_opaque, arg, arg, - tnf_opaque, mmu, mmu, - tnf_opaque, imr, imr); - - zdev = kmem_zalloc(sizeof (zuluvm_state_t), KM_SLEEP); - zdev->dip = devi; - zdev->dops = zuluvm_dops; - error = zuluvm_driver_attach(zdev); - if (error != ZULUVM_SUCCESS) { - kmem_free(zdev, sizeof (zuluvm_state_t)); - return (ZULUVM_NO_DEV); - } - - ZULUVM_LOCK; - error = zuluvm_get_intr_props(zdev, devi); - if (error != ZULUVM_SUCCESS) { - ZULUVM_UNLOCK; - error = zuluvm_driver_detach(zdev); - if (error != ZULUVM_SUCCESS) - return (error); - kmem_free(zdev, sizeof (zuluvm_state_t)); - return (ZULUVM_NO_DEV); - } - zdev->zvm.arg = arg; - zdev->zvm.mmu_pa = (uint64_t)va_to_pa((void *)mmu); - zdev->imr = (uint64_t *)imr; - zdev->zvm.dmv_intr = dmv_add_softintr(zuluvm_dmv_tlbmiss_tl1, - (void *)zdev); - zulud_set_itlb_pc(zdev, arg, DMV_MAKE_DMV(zdev->zvm.dmv_intr, - (void *)zdev)); - zulud_set_dtlb_pc(zdev, arg, DMV_MAKE_DMV(zdev->zvm.dmv_intr, - (void *)zdev)); - intr_dist_add(zuluvm_retarget_intr, (void *)zdev); - zuluvm_do_retarget(zdev); - intr_num = add_softintr(ZULUVM_PIL, zuluvm_softintr, - (caddr_t)zdev, SOFTINT_ST); - zdev->zvm.intr_num = intr_num; - *devp = (caddr_t)zdev; - ZULUVM_UNLOCK; - TNF_PROBE_1(zuluvm_alloc_device_done, "zuluvm", /* */, - tnf_opaque, devp, *devp); - return (ZULUVM_SUCCESS); -} - -/* - * free a zulu kernel driver instance - */ -int -zuluvm_free_device(zuluvm_info_t devp) -{ - int error; - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - - TNF_PROBE_1(zuluvm_free_device, "zuluvm", /* */, - tnf_opaque, zdev, zdev); - - if (zdev == NULL) - return (ZULUVM_NO_DEV); - ZULUVM_LOCK; - if (zdev->zvm.arg == NULL) { - ZULUVM_UNLOCK; - TNF_PROBE_1(zuluvm_free_device_done, "zuluvm", /* */, - tnf_int, error, ZULUVM_NO_DEV); - return (ZULUVM_NO_DEV); - } - (void) dmv_rem_intr(zdev->zvm.dmv_intr); - (void) rem_softintr(zdev->zvm.intr_num); - intr_dist_rem(zuluvm_retarget_intr, (void *)zdev); - zdev->zvm.arg = NULL; - ZULUVM_UNLOCK; - error = zuluvm_driver_detach(zdev); - if (error != ZULUVM_SUCCESS) - return (error); - zdev->dops = NULL; - kmem_free(zdev, sizeof (zuluvm_state_t)); - - TNF_PROBE_0(zuluvm_free_device_done, "zuluvm", /* */); - return (ZULUVM_SUCCESS); -} - -/* - * find the as in the list of active zulu processes - * The caller has to hold zdev->proc_lck - */ -static zuluvm_proc_t * -zuluvm_find_proc(zuluvm_state_t *zdev, struct as *asp) -{ - zuluvm_proc_t *p; - TNF_PROBE_2(zuluvm_find_proc, "zuluvm", /* */, - tnf_opaque, zdev, zdev, - tnf_opaque, asp, asp); - for (p = zdev->procs; p != NULL; p = p->next) { - if (ZULU_HAT2AS(p->zhat) == asp) { - TNF_PROBE_1(zuluvm_find_proc_done, - "zuluvm", /* */, tnf_opaque, proc, p); - return (p); - } - } - TNF_PROBE_0(zuluvm_find_proc_fail, "zuluvm", /* */); - return (NULL); -} - -void -zuluvm_as_free(struct as *as, void *arg, uint_t events) -{ - zuluvm_proc_t *proc = (zuluvm_proc_t *)arg; - zuluvm_state_t *zdev = proc->zdev; - int wait = 0; - int flag = 0; - int valid; - - (void) events; - - TNF_PROBE_1(zuluvm_as_free, "zuluvm", /* */, - tnf_opaque, arg, arg); - - (void) as_delete_callback(as, arg); - /* - * if this entry is still valid, then we need to sync - * with zuluvm_tlb_handler rountine. - */ - mutex_enter(&zdev->proc_lck); - valid = proc->valid; - proc->valid = 0; - mutex_exit(&zdev->proc_lck); - - if (valid) { - ZULUVM_LOCK; - if (proc == zdev->zvm.proc1) { - flag |= ZULUVM_WAIT_INTR1; - wait |= ZULUVM_DO_INTR1; - } - if (proc == zdev->zvm.proc2) { - flag |= ZULUVM_WAIT_INTR2; - wait |= ZULUVM_DO_INTR2; - } - if (flag) { - zdev->intr_flags |= flag; - /* - * wait until the tlb miss is resloved - */ - while (zdev->intr_flags & wait) { - cv_wait(&zdev->intr_wait, &zdev->dev_lck); - } - zdev->intr_flags &= ~flag; - } - ZULUVM_UNLOCK; - } - - if (proc->zhat != NULL) { - /* - * prevent any further tlb miss processing for this hat - */ - zulu_hat_terminate(proc->zhat); - } - - /* - * decrement the ref count and do the appropriate - * if it drops to zero. - */ - mutex_enter(&zdev->proc_lck); - (void) zuluvm_proc_release(zdev, proc); - mutex_exit(&zdev->proc_lck); -} - -/* - * notify zulu vm driver about a new process going to - * use zulu DMA. Create a zulu_hat. - */ -int -zuluvm_dma_add_proc(zuluvm_info_t devp, uint64_t *cookie) -{ - zuluvm_proc_t *proc; - int refcnt; - struct as *asp = ZULUVM_GET_AS; - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - - TNF_PROBE_1(zuluvm_dma_add_proc, "zuluvm", /* */, - tnf_opaque, zdev, zdev); - mutex_enter(&zdev->proc_lck); - proc = zuluvm_find_proc(zdev, asp); - if (proc == NULL) { - proc = kmem_zalloc(sizeof (zuluvm_proc_t), KM_SLEEP); - proc->zhat = zulu_hat_proc_attach(asp, zdev); - if (proc->zhat == NULL) { - mutex_exit(&zdev->proc_lck); - kmem_free(proc, sizeof (zuluvm_proc_t)); - TNF_PROBE_2(zuluvm_dma_add_proc_done, "zuluvm", /* */, - tnf_int, valid, 0, - tnf_int, error, ZULUVM_ERROR); - return (ZULUVM_ERROR); - } - proc->zdev = zdev; - proc->valid = 1; - proc->refcnt = 1; - proc->next = zdev->procs; - if (zdev->procs) - zdev->procs->prev = proc; - proc->prev = NULL; - zdev->procs = proc; - proc->refcnt++; - (void) as_add_callback(asp, zuluvm_as_free, proc, - AS_FREE_EVENT, 0, -1, KM_SLEEP); - } else { - if (proc->valid == 0) { - mutex_exit(&zdev->proc_lck); - TNF_PROBE_2(zuluvm_dma_add_proc_done, "zuluvm", /* */, - tnf_int, valid, 0, - tnf_int, error, ZULUVM_ERROR); - return (ZULUVM_ERROR); - } - proc->refcnt++; - } - refcnt = proc->refcnt; - mutex_exit(&zdev->proc_lck); - *cookie = (uint64_t)proc; - TNF_PROBE_2(zuluvm_dma_add_proc_done, "zuluvm", /* */, - tnf_int, refcnt, refcnt, - tnf_int, error, ZULUVM_SUCCESS); - return (ZULUVM_SUCCESS); -} - -void -zuluvm_proc_hold(zuluvm_state_t *zdev, zuluvm_proc_t *proc) -{ - mutex_enter(&zdev->proc_lck); - proc->refcnt++; - mutex_exit(&zdev->proc_lck); -} - -/* - * decrement ref count and free data if it drops to zero - */ -static int -zuluvm_proc_release(zuluvm_state_t *zdev, zuluvm_proc_t *proc) -{ - int refcnt; - ASSERT(MUTEX_HELD(&zdev->proc_lck)); - refcnt = --proc->refcnt; - TNF_PROBE_3(zuluvm_proc_release, "zuluvm", /* */, - tnf_opaque, zdev, zdev, - tnf_opaque, proc, proc, - tnf_int, refcnt, refcnt); - if (refcnt == 0) { - if (proc->next) - proc->next->prev = proc->prev; - if (proc->prev) - proc->prev->next = proc->next; - else - zdev->procs = proc->next; - kmem_free(proc, sizeof (zuluvm_proc_t)); - } - return (refcnt); -} - -/* - * this process is not longer using DMA, all entries - * have been removed from the TLB. - */ -int -zuluvm_dma_delete_proc(zuluvm_info_t devp, uint64_t cookie) -{ - int refcnt; - zuluvm_proc_t *proc = (zuluvm_proc_t *)cookie; - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - - TNF_PROBE_2(zuluvm_dma_delete_proc, "zuluvm", /* */, - tnf_opaque, zdev, zdev, - tnf_opaque, cookie, cookie); - mutex_enter(&zdev->proc_lck); - if (proc != NULL) { - TNF_PROBE_1(zuluvm_dma_delete_proc, "zuluvm", /* */, - tnf_opaque, proc, proc); - if (proc->zhat != NULL) { - zulu_hat_proc_detach(proc->zhat); - proc->zhat = NULL; - } - refcnt = zuluvm_proc_release(zdev, proc); - } - mutex_exit(&zdev->proc_lck); - - TNF_PROBE_2(zuluvm_dma_delete_proc_done, "zuluvm", /* */, - tnf_int, refcnt, refcnt, - tnf_int, error, ZULUVM_SUCCESS); - return (ZULUVM_SUCCESS); -} - -/* - * barrier sync for device driver - * blocks until zuluvm_tlbmiss_tl1 function is done - */ -void -zuluvm_fast_tlb_wait(caddr_t devp) -{ - int state; - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - int cnt = 0; - - do { - state = ZULUVM_GET_STATE(zdev); - cnt++; - } while (state == ZULUVM_STATE_TLB_PENDING); - TNF_PROBE_1(zuluvm_fast_tlb_wait, "zuluvm", /* */, - tnf_int, loop_cnt, cnt); -} - -/* - * setup DMA handling for this handle - */ -int -zuluvm_dma_alloc_ctx(zuluvm_info_t devp, int dma, short *mmuctx, - uint64_t *tsbreg) -{ - struct as *asp = ZULUVM_GET_AS; - int error = ZULUVM_NO_DEV; - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - int state, newstate; - - if (asp == NULL) { - TNF_PROBE_1(zuluvm_dma_alloc_ctx_done, "zuluvm", /* */, - tnf_int, error, ZULUVM_NO_HAT); - return (ZULUVM_NO_HAT); - } - - *tsbreg = 0; - state = ZULUVM_SET_STATE(zdev, ZULUVM_STATE_IDLE, - ZULUVM_STATE_STOPPED); - newstate = ZULUVM_GET_STATE(zdev); - TNF_PROBE_4(zuluvm_dma_alloc_ctx, "zuluvm", /* */, - tnf_opaque, devp, devp, - tnf_int, dma, dma, - tnf_int, oldstate, state, - tnf_int, newstate, newstate); -#ifdef DEBUG - if (zuluvm_debug_state) - cmn_err(CE_NOTE, "zuluvm_dma_alloc_ctx: state %d\n", state); -#endif - if (state != ZULUVM_STATE_STOPPED && state != ZULUVM_STATE_IDLE) { - while (state != ZULUVM_STATE_IDLE) { - state = ZULUVM_SET_STATE(zdev, ZULUVM_STATE_IDLE, - ZULUVM_STATE_STOPPED); -#ifdef DEBUG - if (zuluvm_debug_state) - cmn_err(CE_NOTE, "zuluvm_dma_alloc_ctx: (loop)" - " state %d\n", state); -#endif - if (state != ZULUVM_STATE_IDLE) - delay(1); - } - } - - if (zdev->zvm.arg != NULL) { - struct zulu_hat *zhat; - zuluvm_proc_t *proc; - - mutex_enter(&zdev->proc_lck); - proc = zuluvm_find_proc(zdev, asp); - if (proc != NULL) { - zhat = proc->zhat; - proc->refcnt++; - } - mutex_exit(&zdev->proc_lck); - - switch (dma) { - case ZULUVM_DMA1: - ZULUVM_LOCK; - zdev->zvm.proc1 = proc; - ZULUVM_UNLOCK; - error = ZULUVM_SUCCESS; - break; - case ZULUVM_DMA2: - ZULUVM_LOCK; - zdev->zvm.proc2 = proc; - ZULUVM_UNLOCK; - error = ZULUVM_SUCCESS; - break; - default: - mutex_enter(&zdev->proc_lck); - (void) zuluvm_proc_release(zdev, proc); - mutex_exit(&zdev->proc_lck); - } - - if (error == ZULUVM_SUCCESS) { - zulu_hat_validate_ctx(zhat); - if (zhat->zulu_ctx >= 0) { - *mmuctx = zhat->zulu_ctx; - } else { - printf("invalid context value: %d\n", - zhat->zulu_ctx); - - mutex_enter(&zdev->proc_lck); - (void) zuluvm_proc_release(zdev, proc); - mutex_exit(&zdev->proc_lck); - - error = ZULUVM_ERROR; - } - } else { - error = ZULUVM_ERROR; - } - } - TNF_PROBE_1(zuluvm_dma_alloc_ctx_done, "zuluvm", /* */, - tnf_int, error, error); - return (error); -} - -/* - * preload TLB - * this will try to pre-set the zulu tlb, mainly used for dma engine 2, - * video read-back. - */ -int -zuluvm_dma_preload(zuluvm_info_t devp, int dma, - int num, zulud_preload_t *list) -{ - int i; - int error = ZULUVM_SUCCESS; - struct zulu_hat *zhat; - zuluvm_proc_t *proc = NULL; - - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - - TNF_PROBE_4(zuluvm_dma_preload, "zuluvm", /* */, - tnf_opaque, devp, devp, - tnf_int, dma, dma, - tnf_int, num, num, - tnf_opaque, list, list); - ZULUVM_LOCK; - switch (dma) { - case ZULUVM_DMA1: - proc = zdev->zvm.proc1; - break; - case ZULUVM_DMA2: - proc = zdev->zvm.proc2; - break; - } - - mutex_enter(&zdev->proc_lck); - if (proc == NULL || proc->valid == 0 || proc->zhat == NULL) { - mutex_exit(&zdev->proc_lck); - ZULUVM_UNLOCK; - return (ZULUVM_NO_HAT); - } - mutex_exit(&zdev->proc_lck); - - zhat = proc->zhat; - /* - * need to release this to avoid recursive enter in zuluvm_load_tte - * which gets called from zulu_hat_memload() - */ - ZULUVM_UNLOCK; - - mutex_enter(&zdev->load_lck); - for (i = 0; i < num; i++) { - int pg_size; - int res; - int first = 1; - caddr_t addr = ZULUVM_GET_PAGE(list[i].addr); - int64_t size = (int64_t)list[i].len; - while (size > 0) { - if (list[i].tlbtype & ~ZULUVM_DMA_MASK) { - error = ZULUVM_INVALID_MISS; - break; - } - res = zulu_hat_load(zhat, addr, - (list[i].tlbtype == ZULUVM_DMA2) ? S_WRITE : S_READ, - &pg_size); - if ((res != 0) || (pg_size < 0)) { - error = ZULUVM_NO_MAP; - break; - } - ZULUVM_STATS_PRELOAD(zdev); - TNF_PROBE_2(zuluvm_dma_preload_addr, "zuluvm", /* */, - tnf_opaque, addr, addr, - tnf_opaque, size, size); - if (first) { - first = 0; - size -= ZULU_HAT_PGDIFF(list[i].addr, - pg_size); - } else { - size -= ZULU_HAT_PGSZ(pg_size); - } - addr += ZULU_HAT_PGSZ(pg_size); - } - } - mutex_exit(&zdev->load_lck); - TNF_PROBE_1(zuluvm_dma_preload_done, "zuluvm", /* */, - tnf_int, error, error); - return (ZULUVM_SUCCESS); -} - -/* - * destroy DMA handling for this handle - */ -int -zuluvm_dma_free_ctx(zuluvm_info_t devp, int dma) -{ - int error = ZULUVM_NO_DEV; - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - int state, newstate; - - state = ZULUVM_SET_STATE(zdev, ZULUVM_STATE_STOPPED, - ZULUVM_STATE_IDLE); - newstate = ZULUVM_GET_STATE(zdev); - TNF_PROBE_4(zuluvm_dma_free_ctx, "zuluvm", /* */, - tnf_opaque, devp, devp, - tnf_int, dma, dma, - tnf_int, oldstate, state, - tnf_int, newstate, newstate); -#ifdef DEBUG - if (zuluvm_debug_state) - cmn_err(CE_NOTE, "zuluvm_dma_free_ctx: state %d\n", state); -#endif - if (state != ZULUVM_STATE_IDLE && state != ZULUVM_STATE_STOPPED) { - int doit = 1; - while (doit) { - switch (state) { - case ZULUVM_STATE_CANCELED: - case ZULUVM_STATE_STOPPED: - doit = 0; - break; - case ZULUVM_STATE_IDLE: - state = ZULUVM_SET_STATE(zdev, - ZULUVM_STATE_STOPPED, - ZULUVM_STATE_IDLE); - break; - default: - state = ZULUVM_SET_STATE(zdev, - ZULUVM_STATE_CANCELED, state); - } - TNF_PROBE_1(zuluvm_dma_free_ctx, "zuluvm", /* */, - tnf_int, state, state); -#ifdef DEBUG - if (zuluvm_debug_state) - cmn_err(CE_NOTE, "zuluvm_dma_free_ctx: (loop1)" - " state %d\n", state); -#endif - } - } - TNF_PROBE_1(zuluvm_dma_free_ctx, "zuluvm", /* */, - tnf_int, state, state); - - error = ZULUVM_SUCCESS; - while (state != ZULUVM_STATE_STOPPED) { - state = ZULUVM_GET_STATE(zdev); -#ifdef DEBUG - if (zuluvm_debug_state) - cmn_err(CE_NOTE, "zuluvm_dma_free: (loop2) state %d\n", - state); -#endif - if (state != ZULUVM_STATE_STOPPED) - delay(1); - } - ZULUVM_LOCK; - if (zdev->zvm.arg != NULL) { - zuluvm_proc_t *proc = NULL; - switch (dma) { - case ZULUVM_DMA1: - proc = zdev->zvm.proc1; - zdev->zvm.proc1 = NULL; - break; - case ZULUVM_DMA2: - proc = zdev->zvm.proc2; - zdev->zvm.proc2 = NULL; - break; - default: - error = ZULUVM_NO_DEV; - } - ZULUVM_UNLOCK; - if (proc) { - mutex_enter(&zdev->proc_lck); - (void) zuluvm_proc_release(zdev, proc); - mutex_exit(&zdev->proc_lck); - } - } else { - ZULUVM_UNLOCK; - error = ZULUVM_NO_DEV; - } - TNF_PROBE_1(zuluvm_dma_free_ctx_done, "zuluvm", /* */, - tnf_int, error, error); - return (error); -} - -static void -zuluvm_do_retarget(zuluvm_state_t *zdev) -{ - int i, idx; - uint_t cpu; - for (i = 0; i < ZULUVM_MAX_INTR; i++) { - if (zdev->interrupts[i].ino != -1) { - cpu = intr_dist_cpuid(); - idx = zdev->interrupts[i].offset; - if (zdev->imr[idx] & ZULUVM_IMR_V_MASK) - zdev->imr[idx] = ZULUVM_IMR_V_MASK | - (cpu<imr[idx] = - cpu<dip, ino, NULL, NULL, handler, arg) - != DDI_SUCCESS) { - TNF_PROBE_1(zuluvm_add_intr_done, "zuluvm", /* */, - tnf_int, error, ZULUVM_ERROR); - return (ZULUVM_ERROR); - } - return (ZULUVM_SUCCESS); -} - -int -zuluvm_rem_intr(zuluvm_info_t devp, int ino) -{ - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - if (devp == NULL) { - TNF_PROBE_1(zuluvm_rem_intr_done, "zuluvm", /* */, - tnf_int, error, ZULUVM_NO_DEV); - return (ZULUVM_NO_DEV); - } - /* remove from distributin list */ - ZULUVM_LOCK; - zdev->imr[zdev->interrupts[ino].offset] &= ~ZULUVM_IMR_V_MASK; - ZULUVM_UNLOCK; - ddi_remove_intr(zdev->dip, ino, NULL); - return (ZULUVM_SUCCESS); -} - -int -zuluvm_enable_intr(zuluvm_info_t devp, int num) -{ - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - - TNF_PROBE_2(zuluvm_enable_intr, "zuluvm_intr", /* */, - tnf_opaque, devp, devp, - tnf_int, num, num); - if (devp == NULL) { - TNF_PROBE_1(zuluvm_enable_intr_done, "zuluvm", /* */, - tnf_int, error, ZULUVM_NO_DEV); - return (ZULUVM_NO_DEV); - } - if (num < 0 || num > ZULUVM_IMR_MAX) { - TNF_PROBE_1(zuluvm_enable_intr_done, "zuluvm", /* */, - tnf_int, error, ZULUVM_BAD_IDX); - return (ZULUVM_BAD_IDX); - } - ZULUVM_LOCK; - zdev->imr[num] |= ZULUVM_IMR_V_MASK; - ZULUVM_UNLOCK; - TNF_PROBE_1(zuluvm_enable_intr_done, "zuluvm_intr", /* */, - tnf_int, error, ZULUVM_SUCCESS); - return (ZULUVM_SUCCESS); -} - -int -zuluvm_disable_intr(zuluvm_info_t devp, int num) -{ - zuluvm_state_t *zdev = (zuluvm_state_t *)devp; - - TNF_PROBE_2(zuluvm_disable_intr, "zuluvm_intr", /* */, - tnf_opaque, devp, devp, - tnf_int, num, num); - if (devp == NULL) { - TNF_PROBE_1(zuluvm_disable_intr_done, "zuluvm", /* */, - tnf_int, error, ZULUVM_NO_DEV); - return (ZULUVM_NO_DEV); - } - if (num < 0 || num > ZULUVM_IMR_MAX) { - TNF_PROBE_1(zuluvm_disable_intr_done, "zuluvm", /* */, - tnf_int, error, ZULUVM_BAD_IDX); - return (ZULUVM_BAD_IDX); - } - ZULUVM_LOCK; - zdev->imr[num] &= ~ZULUVM_IMR_V_MASK; - ZULUVM_UNLOCK; - TNF_PROBE_1(zuluvm_disable_intr_done, "zuluvm_intr", /* */, - tnf_int, error, ZULUVM_SUCCESS); - return (ZULUVM_SUCCESS); -} - -static int -zuluvm_get_intr_props(zuluvm_state_t *zdev, - dev_info_t *devi) -{ - int *intr; - int i; - uint_t nintr; - - zdev->agentid = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, - "portid", -1); - if (zdev->agentid == -1) { - cmn_err(CE_WARN, "%s%d: no portid property", - ddi_get_name(devi), - ddi_get_instance(devi)); - return (ZULUVM_ERROR); - } - - for (i = 0; i < ZULUVM_MAX_INTR; i++) { - zdev->interrupts[i].offset = 0; - zdev->interrupts[i].ino = -1; - } - - if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, - "interrupts", &intr, &nintr) == DDI_PROP_SUCCESS) { - - if (nintr == 0) { - cmn_err(CE_WARN, "%s%d: no interrupts in property", - ddi_get_name(devi), - ddi_get_instance(devi)); - ddi_prop_free(intr); - return (ZULUVM_ERROR); - } - if (nintr >= ZULUVM_MAX_INTR) { - cmn_err(CE_WARN, "%s%d: to many interrupts (%d)", - ddi_get_name(devi), - ddi_get_instance(devi), nintr); - ddi_prop_free(intr); - return (ZULUVM_ERROR); - } - for (i = 0; i < nintr; i++) { - zdev->interrupts[i].offset = intr[i]; - zdev->interrupts[i].ino = i; - } - ddi_prop_free(intr); - } else { - cmn_err(CE_WARN, "%s%d: no interrupts property", - ddi_get_name(devi), - ddi_get_instance(devi)); - } - return (ZULUVM_SUCCESS); -} - -/* *** enf of zulu *** */ diff --git a/usr/src/uts/sun4u/ml/zulu_asm.s b/usr/src/uts/sun4u/ml/zulu_asm.s deleted file mode 100644 index eb6c7497c6..0000000000 --- a/usr/src/uts/sun4u/ml/zulu_asm.s +++ /dev/null @@ -1,325 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#if defined(lint) -#include -#include -#else /* lint */ -#include "assym.h" -#endif /* lint */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef lint -void -zuluvm_dmv_tlbmiss_tl1() -{} - -#else /* lint */ - - DGDEF(zuluvm_base_pgsize) - .word 0 - - ENTRY_NP(zuluvm_dmv_tlbmiss_tl1) - - ! g1 - zuluvm_state_t pointer - ! g2 - IRDR_0 - mov UIII_IRDR_1, %g3 - ldxa [%g3]ASI_INTR_RECEIVE, %g5 - stx %g5, [%g1 + ZULUVM_ASM_TLB_ADDR] - mov UIII_IRDR_6, %g3 - ldxa [%g3]ASI_INTR_RECEIVE, %g5 - stx %g5, [%g1 + ZULUVM_ASM_TLB_TYPE] - - stxa %g0, [%g0]ASI_INTR_RECEIVE_STATUS ! clear the BUSY bit - membar #Sync - - mov %g1, %g7 - - ! check the fast tlb miss flag - sethi %hi(zuluvm_fast_tlb), %g6 - lduw [%g6 + %lo(zuluvm_fast_tlb)], %g6 - brz,pn %g6, send_intr1 - mov ZULUVM_TTE_DELAY, %g1 -#if 1 - add %g7, ZULUVM_STATE, %g4 - mov ZULUVM_STATE_IDLE, %g1 - mov ZULUVM_STATE_TLB_PENDING, %g6 - casa [%g4]ASI_N, %g1, %g6 - cmp %g6, %g1 - be,pt %icc, 2f - nop - - mov ZULUVM_STATE_CANCELED, %g1 - cmp %g6, %g1 - be,pt %icc, 1f - mov ZULUVM_STATE_STOPPED, %g1 - retry -1: - st %g1, [%g4] -#ifdef ZULUVM_STATS - lduw [%g7 + ZULUVM_ST_TLBCANCEL], %g3 - add %g3, 1, %g3 - stuw %g3, [%g7 + ZULUVM_ST_TLBCANCEL] -#endif - retry - -2: - ldx [%g7 + ZULUVM_ASM_TLB_TYPE], %g4 - and %g4, ZULUVM_DMA_MASK, %g4 -#ifdef ZULUVM_STATS - cmp %g4, ZULUVM_DMA2 - be,a,pn %icc, 1f - add %g7, ZULUVM_ST_DTLB2MISS, %g1 - cmp %g4, ZULUVM_ITLB1 - be,a,pn %icc, 1f - add %g7, ZULUVM_ST_ITLB1MISS, %g1 - cmp %g4, ZULUVM_ITLB2 - be,a,pn %icc, 1f - add %g7, ZULUVM_ST_ITLB2MISS, %g1 - add %g7, ZULUVM_ST_DTLB1MISS, %g1 -1: - lduw [%g1], %g3 - add %g3, 1, %g3 - stuw %g3, [%g1] -#endif - /* - * lookup the tte in the tsb - * %g1 - vaddr[63:13], ctx[12:0] - * %g2 - our trap level - * %g3 - return address - * %g7 - zulu data pointer (needs to be preserved) - * return: - * %g1 - flags [63..58] and pfn [31..0] - * %g2 - status code if %g1 is null - * %g7 - zulu data pointer - */ - mov 1, %g2 - set zulu_hat_tsb_lookup_tl1, %g3 - jmpl %g3, %g3 - ldx [%g7 + ZULUVM_ASM_TLB_ADDR], %g1 ! vaddr(tag) - - /* - * did we find a tte ?? - * If not, %g2 has the error code - */ - brgez,a,pt %g1, send_intr - mov %g2, %g1 - - set zulu_tsb_hit, %g6 - ldx [%g6], %g3 - add %g3, 1, %g3 - stx %g3, [%g6] - - /* - * get flags and pfn - */ - sllx %g1, 32, %g6 - srlx %g6, 32, %g6 ! %g6 pfn - srlx %g1, 59, %g3 - and %g3, 0x7, %g2 ! %g2 page size - srlx %g3, 3, %g4 - and %g4, 1, %g4 ! %g4 write perm - mov %g6, %g1 - - /* - * check if this is a dtlb2 miss(no itlb, pgsz != 8k) - * and if the current dtlb2 pgsz != tte pgsz - */ - ldx [%g7 + ZULUVM_ASM_TLB_TYPE], %g3 - and %g3, 0x1, %g3 - brnz,pt %g3, 3f ! not 0 => itlb => handles - nop - - ! check page size, base page size is always handled by dtlb1, so we - ! only need to check against dtlb2 - sethi %hi(zuluvm_base_pgsize), %g3 - lduw [%g3 + %lo(zuluvm_base_pgsize)], %g3 - cmp %g2, %g3 - be,pt %icc, 2f - cmp %g2, ZULU_TTE4M - be,pt %icc, 2f ! TTE4M => dtlb2 => ok! - nop - -#ifdef ZULUVM_STATS - lduw [%g7 + ZULUVM_ST_PAGESIZE], %g3 - add %g3, 1, %g3 - stuw %g3, [%g7 + ZULUVM_ST_PAGESIZE] - add %g7, ZULUVM_ST_MISS, %g3 - sll %g2, 2, %g5 - add %g5, %g3, %g5 - lduw [%g5], %g3 - add %g3, 1, %g3 - stuw %g3, [%g5] -#endif - ! set tte size to ZULUVM_BASE_PGSZ - sethi %hi(zuluvm_base_pgsize), %g3 - lduw [%g3 + %lo(zuluvm_base_pgsize)], %g3 - ba,pt %icc, 3f - mov %g3, %g2 -2: - -#ifdef ZULUVM_STATS - add %g7, ZULUVM_ST_MISS, %g3 - sll %g2, 2, %g5 - add %g3, %g5, %g5 - lduw [%g5], %g3 - add %g3, 1, %g3 - stuw %g3, [%g5] -#endif - - ! we maintain data on the last pfns for the last 12 pfns that we - ! processed -3: - lduw [%g7 + ZULUVM_PFNCNT], %g5 - add %g5, 4, %g3 - cmp %g3, 48 - be,a,pn %icc, 1f - mov %g0, %g3 - -1: - stuw %g3, [%g7 + ZULUVM_PFNCNT] - sllx %g5, 3, %g5 - add %g7, ZULUVM_PFNBUF, %g3 - add %g3, %g5, %g3 - stx %g1, [%g3] - stx %g2, [%g3 + 8] - stx %g4, [%g3 + 16] - ldx [%g7 + ZULUVM_ASM_TLB_TYPE], %g5 - stx %g5, [%g3 + 24] - - ldx [%g7 + ZULUVM_ASM_TLB_TYPE], %g3 - and %g3, 0x3, %g3 ! tlbtype - ldx [%g7 + ZULUVM_ARG], %g6 - - ! write tte to zulu mmu - ! %g1 pfn - ! %g2 tte size - ! %g3 tlbtype - ! %g4 tte wrperm - ! %g6 zulu device driver arg - ! %g7 devtab pointer - - sllx %g1, ZULUVM_ZFB_MMU_TLB_D_PA_SHIFT, %g1 - mov 0x1, %g5 - sllx %g5, 63, %g5 ! ZFB_MMU_TLB_D_V_MASK - or %g1, %g5, %g1 - or %g1, ZULUVM_ZFB_MMU_TLB_D_C_MASK, %g1 - sllx %g2, ZULUVM_ZFB_MMU_TLB_D_SZ_SHIFT, %g2 - - brz,pt %g4, 3f ! write perm ?? - or %g2, %g1, %g1 - - or %g1, ZULUVM_ZFB_MMU_TLB_D_W_MASK, %g1 -3: - ! at this point %g1 is ready to be written to the corresponding - ! data_in register, let's see which if it was itlb or dtlb... - and %g3, ZULUVM_ITLB_FLAG, %g3 - ! assumption is that data miss - brz,pt %g3, 4f ! is more likely than instr miss - ldx [%g7 + ZULUVM_PAMMU], %g2 ! physical addr of zulu mmu regs - - ! instruction miss - mov ZULUVM_ZFB_MMU_TLB_CR_IMISS_MASK, %g5 - add %g2, ZULUVM_ITLB_DATA_IN, %g4 - !stxa %g1, [%g4]ASI_IO - ba,pt %xcc, 5f - stxa %g1, [%g4]ASI_IO - !ldxa [%g4]ASI_IO, %g4 -4: - ! data miss - mov ZULUVM_ZFB_MMU_TLB_CR_DMISS_MASK, %g5 - add %g2, ZULUVM_DTLB_DATA_IN, %g4 - stxa %g1, [%g4]ASI_IO - !ldxa [%g4]ASI_IO, %g4 -5: - add %g7, ZULUVM_STATE, %g4 - mov ZULUVM_STATE_TLB_PENDING, %g6 - mov ZULUVM_STATE_IDLE, %g1 - casa [%g4]ASI_N, %g6, %g1 - cmp %g6, %g1 - bne,a,pn %icc, stopped - mov ZULUVM_STATE_STOPPED, %g3 - - ldx [%g7 + ZULUVM_PAMMU], %g2 - add %g2, ZULUVM_TLB_CONTROL, %g2 - stxa %g5, [%g2]ASI_IO - !ldxa [%g2]ASI_IO, %g3 - retry - -send_intr: - add %g7, ZULUVM_STATE, %g4 - mov ZULUVM_STATE_INTR_QUEUED, %g5 - mov ZULUVM_STATE_TLB_PENDING, %g3 - casa [%g4]ASI_N, %g3, %g5 - cmp %g3, %g5 - be,pt %icc, deliver_intr - mov ZULUVM_STATE_STOPPED, %g3 - ba,pt %icc, stopped - nop -#endif - -send_intr1: - add %g7, ZULUVM_STATE, %g4 - mov ZULUVM_STATE_IDLE, %g3 - mov ZULUVM_STATE_INTR_QUEUED, %g5 - casa [%g4]ASI_N, %g3, %g5 - cmp %g3, %g5 - be,pt %icc, deliver_intr - mov ZULUVM_STATE_STOPPED, %g3 -stopped: - st %g3, [%g4] -#ifdef ZULUVM_STATS - lduw [%g7 + ZULUVM_ST_TLBCANCEL], %g3 - add %g3, 1, %g3 - stuw %g3, [%g7 + ZULUVM_ST_TLBCANCEL] -#endif - retry - -deliver_intr: - stx %g1, [%g7 + ZULUVM_ASM_TLB_ERRCODE] ! set the error field - stx %g6, [%g7 + ZULUVM_ASM_TLB_TTE] ! deliver tte in data_0 - ! %g6 is invalid if error != SUCCESS - ! setsoftint_tl1(uint64_t inum, uint64_t dummy) - set setsoftint_tl1, %g5 - jmp %g5 - ldx [%g7 + ZULUVM_INTRNUM], %g1 - - SET_SIZE(zuluvm_dmv_tlbmiss_tl1) - -#endif /* lint */ - diff --git a/usr/src/uts/sun4u/ml/zulu_hat_asm.s b/usr/src/uts/sun4u/ml/zulu_hat_asm.s deleted file mode 100644 index eba64b9368..0000000000 --- a/usr/src/uts/sun4u/ml/zulu_hat_asm.s +++ /dev/null @@ -1,314 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#if defined(lint) -#include -#include -#else /* lint */ -#include "assym.h" -#endif /* lint */ - -#include -#include -#include -#include -#include -#include - -/* - * function to look up ttes in zulu_hat TSB. - * - * zulu_hat_tsb_lookup_tl1 is called from the zuluvm dmv interrupt handler - * so we can only use the global registers. - * - * zulu_hat_tsb_lookup_tl0 is called from TL=0 - */ -#ifdef lint - -/* ARGSUSED */ -uint64_t -zulu_hat_tsb_lookup_tl1(caddr_t vaddr) -{ - return (0); -} - -/* ARGSUSED */ -uint64_t -zulu_hat_tsb_lookup_tl0(struct zulu_hat *zhat, caddr_t vaddr) -{ - return (0); -} - -#else /* lint */ - - /* - * %g1 - vaddr | ctx - * %g3 - return address - * Must preserve %g7 for caller - * - * returns: - * %g1 - pfn and flags - * %g2 - zuluvm error code if %g1 is null - */ - ENTRY_NP(zulu_hat_tsb_lookup_tl1) - set ZULU_CTX_MASK, %g4 - and %g1, %g4, %g4 - - ! we're at trap level 1, (TL=1) - ! if the context is already locked by another - ! thread, punt to the TL=0 code - ! it's not safe to spinloop now. - - set zulu_ctx_tab, %g6 - sllx %g4, 3, %g5 -#ifdef DEBUG - mov %g5, %g2 ! remember ctx * 8 -#endif - add %g5, %g6, %g6 - - ldx [%g6], %g4 - andcc %g4, 1, %g0 - bne,a,pn %icc, ctx_busy - mov ZULUVM_CTX_LOCKED, %g2 - - ! now do a compare and swap and make sure it's still not locked - or %g4, 1, %g5 - casxa [%g6]ASI_N, %g4, %g5 - cmp %g4, %g5 - bne,a,pn %icc, ctx_busy - mov ZULUVM_CTX_LOCKED, %g2 - - brz,a,pn %g4, zulu_hat_tsb_exit - mov %g0, %g1 - - ! we have the lock now proceed - - ! set lsb of g3 to indicate that we need to unlock the context - ! before returning - ba,pt %xcc, zulu_hat_tsb_lookup - or %g3, 1, %g3 - -ctx_busy: - mov %g0, %g1 - jmpl %g3+8, %g0 - nop - - - /* - * zulu_hat_tsb_lookup_tl0 jumps here - * - * %g1 vaddr | ctx - * %g3 return address | unlock flag (bit zero) - * %g4 has the zulu hat ptr (locked) - */ -zulu_hat_tsb_lookup: - mov %g1, %g2 - mov %g4, %g1 - - add %g1, ZULU_HAT_TSB_SZ, %g5 - lduh [%g5], %g5 ! tsb size - sub %g5, 1, %g5 - - srlx %g2, 22, %g4 ! 4m page hash - and %g5, %g4, %g4 ! hash index - sllx %g4, 4, %g4 - add %g1, ZULU_HAT_TSB, %g5 - ldx [%g5], %g5 - add %g5, %g4, %g4 ! ptr to struct zulu_tte - ldx [%g4], %g5 ! get the tag - - set (0x1ff << 13), %g6 - andn %g5, %g6, %g5 - andn %g2, %g6, %g6 - cmp %g5, %g6 - bne,pn %xcc, zulu_hat_tsb_try_512k - nop - - ldx [%g4 + 8], %g4 ! flags and pfn - brgez,pn %g4, zulu_hat_tsb_try_512k ! check if entry is valid - nop - - sllx %g4, 2, %g5 - srlx %g5, 61, %g5 ! tte size - cmp %g5, ZULU_TTE4M - be,pn %xcc, zulu_hat_tsb_found - nop - -zulu_hat_tsb_try_512k: - add %g1, ZULU_HAT_TSB_SZ, %g5 - lduh [%g5], %g5 ! tsb size - sub %g5, 1, %g5 - - srlx %g2, 19, %g4 ! 4m page hash - and %g5, %g4, %g4 ! hash index - sllx %g4, 4, %g4 - add %g1, ZULU_HAT_TSB, %g5 - ldx [%g5], %g5 - add %g5, %g4, %g4 ! ptr to struct zulu_tte - ldx [%g4], %g5 ! get the tag - - set (0x3f << 13), %g6 - andn %g5, %g6, %g5 - andn %g2, %g6, %g6 - cmp %g5, %g6 - bne,pn %xcc, zulu_hat_tsb_try_64k - nop - - ldx [%g4 + 8], %g4 ! flags and pfn - brgez,pn %g4, zulu_hat_tsb_try_64k ! check if entry is valid - nop - - sllx %g4, 2, %g5 - srlx %g5, 61, %g5 ! tte size - cmp %g5, ZULU_TTE512K - be,pn %xcc, zulu_hat_tsb_found - nop - -zulu_hat_tsb_try_64k: - add %g1, ZULU_HAT_TSB_SZ, %g5 - lduh [%g5], %g5 ! tsb size - sub %g5, 1, %g5 - - srlx %g2, 16, %g4 ! 4m page hash - and %g5, %g4, %g4 ! hash index - sllx %g4, 4, %g4 - add %g1, ZULU_HAT_TSB, %g5 - ldx [%g5], %g5 - add %g5, %g4, %g4 ! ptr to struct zulu_tte - ldx [%g4], %g5 ! get the tag - - set (0x7 << 13), %g6 - andn %g5, %g6, %g5 - andn %g2, %g6, %g6 - cmp %g5, %g6 - bne,pn %xcc, zulu_hat_tsb_try_8k - nop - - ldx [%g4 + 8], %g4 ! flags and pfn - brgez,pn %g4, zulu_hat_tsb_try_8k ! check if entry is valid - nop - - sllx %g4, 2, %g5 - srlx %g5, 61, %g5 ! tte size - cmp %g5, ZULU_TTE64K - be,pn %xcc, zulu_hat_tsb_found - nop - -zulu_hat_tsb_try_8k: - add %g1, ZULU_HAT_TSB_SZ, %g5 - lduh [%g5], %g5 ! tsb size - sub %g5, 1, %g5 - - srlx %g2, 13, %g4 ! calc hash - and %g5, %g4, %g4 ! hash index - sllx %g4, 4, %g4 - add %g1, ZULU_HAT_TSB, %g5 - ldx [%g5], %g5 ! tsb ptr - add %g5, %g4, %g4 ! ptr to struct tte - ldx [%g4], %g5 ! get the tag - cmp %g5, %g2 - bne,pn %xcc, zulu_hat_tsb_exit - mov %g0, %g1 - - ldx [%g4 + 8], %g4 ! flags and pfn - brgez,pn %g4, zulu_hat_tsb_exit ! check if entry is valid - mov %g0, %g1 - - sllx %g4, 2, %g5 - srlx %g5, 61, %g5 ! tte size - brnz,pn %g5, zulu_hat_tsb_exit - mov %g0, %g1 - -zulu_hat_tsb_found: - ! expect the tte size in %g5 - mulx %g5, 3, %g5 - mov 1, %g1 - sllx %g1, %g5, %g1 - sub %g1, 1, %g1 - andn %g4, %g1, %g4 - srlx %g2, 13, %g5 - and %g1, %g5, %g5 - or %g5, %g4, %g4 - mov %g4, %g1 - - ! now fall through to exit - -zulu_hat_tsb_exit: - ! if bit zero of %g3 is set, we're at TL=1 and need to unlock - ! the context here - andcc %g3, 1, %g0 - be,pn %xcc, after_unlock - nop - - ! clear the context unlock flag - andn %g3, 1, %g3 - - set ZULU_CTX_MASK, %g6 - and %g2, %g6, %g6 ! ctx num - - sllx %g6, 3, %g6 - set zulu_ctx_tab, %g5 - add %g6, %g5, %g5 ! %g5 = &zulu_ctx_tab[ctx_num] - ldx [%g5], %g6 - andn %g6, 1, %g6 - stx %g6, [%g5] - -after_unlock: - - ! set the status code to ZULUVM_NO_TTE in case we are running at TL=1 - ! and no tte was found. - ! - ! note: caller doesn't examine %g2 unless flags and pfn are null - jmpl %g3 + 0x8, %g0 - mov ZULUVM_NO_TTE, %g2 - - - - - SET_SIZE(zulu_hat_tsb_lookup_tl1) - - /* - * %o0 - zulu hat ptr (already locked) - * %o1 - vaddr - */ - ENTRY_NP(zulu_hat_tsb_lookup_tl0) - mov %o0, %g4 - - set zulu_hat_tsb_lookup, %g3 - - ! note bit zero of g3 is zero which tells zulu_hat_tsb_lookup - ! to not unlock tsb before returning - - jmpl %g3, %g3 - mov %o1, %g1 - - retl - mov %g1, %o0 - SET_SIZE(zulu_hat_tsb_lookup_tl0) - -#endif /* lint */ diff --git a/usr/src/uts/sun4u/sys/zulu_hat.h b/usr/src/uts/sun4u/sys/zulu_hat.h deleted file mode 100644 index 447d631944..0000000000 --- a/usr/src/uts/sun4u/sys/zulu_hat.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef __ZULU_HAT_INCL__ -#define __ZULU_HAT_INCL__ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#define ZULU_TTE8K 0 -#define ZULU_TTE64K 1 -#define ZULU_TTE512K 2 -#define ZULU_TTE4M 3 -#define ZULUM_MAX_PG_SIZES 4 - -#define ZULU_CTX_MASK 0x1fff - -#ifndef _ASM - -#include -#include -#include -#include - - -#define ZULU_HAT_BP_SHIFT 13 -#define ZULU_HAT_SZ_SHIFT(sz) ((sz) * 3) -#define ZULU_HAT_NUM_PGS(sz) (1<zulu_tte_pfn & ~ZULU_HAT_PFN_MASK(ttep->zulu_tte_size)) | \ - (((uintptr_t)vaddr >> ZULU_HAT_BP_SHIFT) & \ - ZULU_HAT_PFN_MASK(ttep->zulu_tte_size))) - -/* - * zulu_ctx_tab is an array of pointers to zulu hat structures. - * since the addresses are 8 byte aligned we use bit 0 as a lock flag. - * This will synchronize TL1 access to the tsb and the mappings. - */ - -#define ZULU_CTX_LOCK 0x1 - -#define ZULU_CTX_LOCK_INIT(c) zulu_ctx_tab[c] = NULL -#define ZULU_CTX_IS_FREE(c) (zulu_ctx_tab[c] == NULL) -#define ZULU_CTX_SET_HAT(c, h) zulu_ctx_tab[c] = h - -#define ZULU_CTX_GET_HAT(c) (struct zulu_hat *)((uint64_t) \ - zulu_ctx_tab[c] & ~ZULU_CTX_LOCK) - -struct zulu_tag { - uint64_t zulu_tag_page:51; /* [63:13] vpage */ -}; - -struct zulu_tte { - union { - struct zulu_tag zulu_tte_tag; - uint64_t zulu_tte_addr; - } un; - uint_t zulu_tte_valid :1; - uint_t zulu_tte_perm :1; - uint_t zulu_tte_size :3; - uint_t zulu_tte_locked :1; - uint_t zulu_tte_pfn; -}; - -/* - * zulu hat stores its list of translation in a hash table. - * TODO: size this table. 256 buckets may be too small. - */ -#define ZULU_HASH_TBL_NUM 0x100 -#define ZULU_HASH_TBL_MASK (ZULU_HASH_TBL_NUM - 1) -#define ZULU_HASH_TBL_SHIFT(_s) (ZULU_HAT_BP_SHIFT + (3 * _s)) -#define ZULU_HASH_TBL_SZ (ZULU_HASH_TBL_NUM * sizeof (struct zulu_hat_blk *)) -#define ZULU_MAP_HASH_VAL(_v, _s) (((_v) >> ZULU_HASH_TBL_SHIFT(_s)) & \ - ZULU_HASH_TBL_MASK) -#define ZULU_MAP_HASH_HEAD(_zh, _v, _s) \ - (_zh->hash_tbl[ZULU_MAP_HASH_VAL(_v, _s)]) - -/* - * - * TODO: need finalize the number of entries in the TSB - * 32K tsb entries caused us to never get a tsb miss that didn't cause - * a page fault. - * - * Reducing TSB_NUM to 512 entries caused tsb_miss > tsb_hit - * in a yoyo run. - */ -#define ZULU_TSB_NUM 4096 -#define ZULU_TSB_SZ (ZULU_TSB_NUM * sizeof (struct zulu_tte)) -#define ZULU_TSB_HASH(a, ts, s) (((uintptr_t)(a) >> ZULU_HAT_PGSHIFT(ts)) & \ - (s-1)) - -#define ZULU_VADDR(tag) (tag & ~ZULU_CTX_MASK) -#define ZULU_TTE_TO_PAGE(a) a.un.zulu_tte_tag.zulu_tag_page - - -struct zulu_hat_blk { - struct zulu_hat_blk *zulu_hash_next; - struct zulu_hat_blk *zulu_hash_prev; - struct zulu_shadow_blk *zulu_shadow_blk; - struct zulu_tte zulu_hat_blk_tte; -}; - -#define zulu_hat_blk_vaddr zulu_hat_blk_tte.un.zulu_tte_addr -#define zulu_hat_blk_pfn zulu_hat_blk_tte.zulu_tte_pfn -#define zulu_hat_blk_page ZULU_TTE_TO_PAGE(zulu_hat_blk_tte) -#define zulu_hat_blk_perm zulu_hat_blk_tte.zulu_tte_perm -#define zulu_hat_blk_size zulu_hat_blk_tte.zulu_tte_size -#define zulu_hat_blk_valid zulu_hat_blk_tte.zulu_tte_valid - -/* - * for fast lookups by address, len we use an avl list to shadow occupied - * 4Mb regions that have mappings. - */ -#define ZULU_SHADOW_BLK_RANGE 0x400000 -#define ZULU_SHADOW_BLK_MASK (~(ZULU_SHADOW_BLK_RANGE - 1)) - -struct zulu_shadow_blk { - avl_node_t link; /* must be at beginning of struct */ - uint64_t ivaddr; /* base address of this node */ - uint64_t ref_count; - uint64_t min_addr; - uint64_t max_addr; -}; -#define ZULU_SHADOW_BLK_LINK_OFFSET (0) - -struct zulu_hat { - struct xhat zulu_xhat; - kmutex_t lock; - avl_tree_t shadow_tree; - char magic; /* =42 to mark our data for mdb */ - unsigned in_fault : 1; - unsigned freed : 1; - unsigned map8k : 1; - unsigned map64k : 1; - unsigned map512k : 1; - unsigned map4m : 1; - short zulu_ctx; - unsigned short zulu_tsb_size; /* TODO why not a constant? */ - struct zulu_hat_blk **hash_tbl; - struct zulu_tte *zulu_tsb; - struct zulu_shadow_blk *sblk_last; /* last sblk looked up */ - uint64_t fault_ivaddr_last; /* last translation loaded */ - caddr_t vaddr_max; - hrtime_t last_used; - void *zdev; -}; - -#define ZULU_HAT2AS(h) ((h)->zulu_xhat.xhat_as) - -/* - * Assembly language function for TSB lookups - */ -uint64_t zulu_hat_tsb_lookup_tl0(struct zulu_hat *zhat, caddr_t vaddr); - -/* - * zuluvm's interface to zulu_hat - */ - -int zulu_hat_load(struct zulu_hat *zhat, caddr_t vaddr, enum seg_rw rw, int *); - -int zulu_hat_init(); -int zulu_hat_destroy(); -int zulu_hat_attach(void *arg); -int zulu_hat_detach(void *arg); -struct zulu_hat *zulu_hat_proc_attach(struct as *as, void *zdev); -void zulu_hat_proc_detach(struct zulu_hat *zhat); - -void zulu_hat_validate_ctx(struct zulu_hat *zhat); -void zulu_hat_terminate(struct zulu_hat *zhat); - -#endif /* _ASM */ - -#ifdef __cplusplus -} -#endif - -#endif /* __ZULU_HAT_INCL__ */ diff --git a/usr/src/uts/sun4u/sys/zulumod.h b/usr/src/uts/sun4u/sys/zulumod.h deleted file mode 100644 index cce574a1dd..0000000000 --- a/usr/src/uts/sun4u/sys/zulumod.h +++ /dev/null @@ -1,262 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _ZULUMOD_H -#define _ZULUMOD_H - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -#ifndef _ASM - -#include -#include - -#define ZULUVM_VERSION_STR(a) #a -#define ZULUVM_VERSION(a) ZULUVM_VERSION_STR(a) -#define ZULUVM_MOD_VERSION \ - ZULUVM_VERSION(XHAT_PROVIDER_VERSION) "." \ - ZULUVM_VERSION(ZULUVM_INTERFACE_VERSION) - -#define ZULUDCHKFUNC(_p1, _p2, _p3) \ - ((_p1) != NULL && (_p1)->_p2 != NULL) ? \ - (_p1)->_p2 _p3 : ZULUVM_NO_SUPPORT -#define ZULUDCHKPROC(_p1, _p2, _p3) \ - if ((_p1) != NULL && (_p1)->_p2 != NULL) (_p1)->_p2 _p3 - -#define zulud_set_itlb_pc(_devp, _a, _b) \ - ZULUDCHKPROC((_devp)->dops, set_itlb_pc, (_a, _b)) -#define zulud_set_dtlb_pc(_devp, _a, _b) \ - ZULUDCHKPROC((_devp)->dops, set_dtlb_pc, (_a, _b)) -#define zulud_write_tte(_devp, _a, _b, _c, _d, _e, _f) \ - ZULUDCHKFUNC((_devp)->dops, write_tte, (_a, _b, _c, _d, _e, _f)) -#define zulud_tlb_done(_devp, _a, _b, _c) \ - ZULUDCHKPROC((_devp)->dops, tlb_done, (_a, _b, _c)) -#define zulud_demap_page(_devp, _a, _b, _c) \ - ZULUDCHKPROC((_devp)->dops, demap_page, (_a, _b, _c)) -#define zulud_demap_ctx(_devp, _a, _b) \ - ZULUDCHKPROC((_devp)->dops, demap_ctx, (_a, _b)) - -#endif - -#define ZULUVM_DATA0_IDX 0 -#define ZULUVM_DATA1_IDX 1 -#define ZULUVM_DATA2_IDX 2 -#define ZULUVM_DATA3_IDX 3 -#define ZULUVM_DATA4_IDX 4 -#define ZULUVM_DATA5_IDX 5 -#define ZULUVM_DATA6_IDX 6 -#define ZULUVM_DATA7_IDX 7 - -#define ZULUVM_IDX2FLAG(i) (1 << (7 - i)) -#define ZULUVM_DATA0_FLAG ZULUVM_IDX2FLAG(ZULUVM_DATA0_IDX) -#define ZULUVM_DATA1_FLAG ZULUVM_IDX2FLAG(ZULUVM_DATA1_IDX) -#define ZULUVM_DATA2_FLAG ZULUVM_IDX2FLAG(ZULUVM_DATA2_IDX) -#define ZULUVM_DATA3_FLAG ZULUVM_IDX2FLAG(ZULUVM_DATA3_IDX) -#define ZULUVM_DATA4_FLAG ZULUVM_IDX2FLAG(ZULUVM_DATA4_IDX) -#define ZULUVM_DATA5_FLAG ZULUVM_IDX2FLAG(ZULUVM_DATA5_IDX) -#define ZULUVM_DATA6_FLAG ZULUVM_IDX2FLAG(ZULUVM_DATA6_IDX) -#define ZULUVM_DATA7_FLAG ZULUVM_IDX2FLAG(ZULUVM_DATA7_IDX) - -#define ZULUVM_TLB_ADDR_IDX ZULUVM_DATA0_IDX -#define ZULUVM_TLB_TYPE_IDX ZULUVM_DATA1_IDX -#define ZULUVM_TLB_TTE_IDX ZULUVM_DATA2_IDX -#define ZULUVM_TLB_ERRCODE_IDX ZULUVM_DATA3_IDX - -#define ZULUVM_DATA_FLAGS (ZULUVM_DATA1_FLAG | \ - ZULUVM_DATA6_FLAG) - -#define ZULUVM_GET_TLB_TTE(devp) \ - (devp)->zvm.idata[ZULUVM_TLB_TTE_IDX] -#define ZULUVM_GET_TLB_ADDR(devp) \ - (devp)->zvm.idata[ZULUVM_TLB_ADDR_IDX] -#define ZULUVM_GET_TLB_TYPE(devp) (ZULUVM_DMA_MASK & \ - (devp)->zvm.idata[ZULUVM_TLB_TYPE_IDX]) -#define ZULUVM_GET_TLB_ERRCODE(devp) (int)(0xffffffff & \ - (devp)->zvm.idata[ZULUVM_TLB_ERRCODE_IDX]) - -#define ZULUVM_MAX_DEV 2 -#define ZULUVM_PIL PIL_2 -#define ZULUVM_NUM_PGSZS 4 - -#define ZULUVM_STATE_IDLE 0 -#define ZULUVM_STATE_STOPPED 1 -#define ZULUVM_STATE_CANCELED 2 -#define ZULUVM_STATE_TLB_PENDING 3 -#define ZULUVM_STATE_INTR_QUEUED 4 -#define ZULUVM_STATE_INTR_PENDING 5 -#define ZULUVM_STATE_WRITE_TTE 6 - -#ifndef _ASM - -typedef struct { - uint64_t idata[4]; /* mondo pkt copy area */ - void *arg; /* arg for device calls */ - uint64_t mmu_pa; /* phy. addr of MMU regs */ - struct zuluvm_proc *proc1; - struct zuluvm_proc *proc2; - volatile uint32_t state; /* state of tlb miss handling */ - uint64_t intr_num; /* our soft intr number */ - short dmv_intr; /* dmv interrupt handle */ -#ifdef ZULUVM_STATS - int cancel; - int tlb_miss[ZULUVM_NUM_PGSZS]; - int pagefault; - int no_mapping; - int preload; - int migrate; - int pagesize; - int itlb1miss; - int dtlb1miss; - int itlb2miss; - int dtlb2miss; - int demap_page; - int demap_ctx; -#endif - uint64_t pfnbuf[50]; - int pfncnt; -} zuluvm_miss_t; - -#ifdef ZULUVM_STATS -#define ZULUVM_STATS_MISS(devp, sz) (devp)->zvm.tlb_miss[sz]++ -#define ZULUVM_STATS_PAGEFAULT(devp) (devp)->zvm.pagefault++ -#define ZULUVM_STATS_NOMAP(devp) (devp)->zvm.no_mapping++ -#define ZULUVM_STATS_PRELOAD(devp) (devp)->zvm.preload++ -#define ZULUVM_STATS_MIGRATE(devp) (devp)->zvm.migrate++ -#define ZULUVM_STATS_PAGEZISE(devp) (devp)->zvm.pagesize++ -#define ZULUVM_STATS_CANCEL(devp) (devp)->zvm.cancel++ -#define ZULUVM_STATS_DEMAP_PAGE(devp) (devp)->zvm.demap_page++ -#define ZULUVM_STATS_DEMAP_CTX(devp) (devp)->zvm.demap_ctx++ -#else -#define ZULUVM_STATS_MISS(devp, sz) -#define ZULUVM_STATS_PAGEFAULT(devp) -#define ZULUVM_STATS_NOMAP(devp) -#define ZULUVM_STATS_PRELOAD(devp) -#define ZULUVM_STATS_MIGRATE(devp) -#define ZULUVM_STATS_PAGEZISE(devp) -#define ZULUVM_STATS_CANCEL(devp) -#define ZULUVM_STATS_DEMAP_PAGE(devp) -#define ZULUVM_STATS_DEMAP_CTX(devp) -#endif - -#define ZULUVM_MAX_INTR 32 - -typedef struct { - short offset; - short ino; -} zuluvm_intr_t; - -/* - * This structure contains per device data. - * It is protected by dev_lck. - */ -typedef struct { - zuluvm_miss_t zvm; /* tlb miss state */ - volatile uint64_t *imr; /* intr mapping regs */ - struct zuluvm_proc *procs; /* protected by proc_lck */ - dev_info_t *dip; /* device driver instance */ - zulud_ops_t *dops; /* device drv operations */ - kmutex_t load_lck; /* protects in_intr */ - kmutex_t dev_lck; /* protects this struct */ - kmutex_t proc_lck; /* protects active procs */ - kcondvar_t intr_wait; /* sync for as_free */ - int intr_flags; - int in_intr; - kmutex_t park_lck; /* page fault thread */ - kcondvar_t park_cv; - int parking; - int agentid; /* zulu's agent id */ - zuluvm_intr_t interrupts[ZULUVM_MAX_INTR]; -} zuluvm_state_t; - -#define ZULUVM_INTR_OFFSET offsetof(zuluvm_state_t, interrupts) -#define ZULUVM_INTR2INO(addr) (((zuluvm_intr_t *)(addr))->ino) -#define ZULUVM_INTR2ZDEV(addr) \ - (zuluvm_state_t *)((caddr_t)addr - (ZULUVM_INTR2INO(addr) * \ - sizeof (zuluvm_intr_t)) - ZULUVM_INTR_OFFSET) - -typedef struct zuluvm_proc { - struct zulu_hat *zhat; - zuluvm_state_t *zdev; /* back ptr to dev instance */ - unsigned short refcnt; /* keep this until ref == 0 */ - short valid; /* if valid is 0 then don't use */ - struct zuluvm_proc *next; - struct zuluvm_proc *prev; -} zuluvm_proc_t; - -#define ZULUVM_DO_INTR1 INT32_C(1) -#define ZULUVM_WAIT_INTR1 INT32_C(2) -#define ZULUVM_DO_INTR2 INT32_C(4) -#define ZULUVM_WAIT_INTR2 INT32_C(8) - -int zuluvm_change_state(uint32_t *state_pa, int new, int assume); -void zuluvm_demap_page(void *, struct hat *, short, caddr_t, uint_t); -void zuluvm_demap_ctx(void *, short); -void zuluvm_dmv_tlbmiss_tl1(void); -void zuluvm_load_tte(struct zulu_hat *zhat, caddr_t addr, uint64_t pfn, - int perm, int size); - - -#endif - -/* - * The following defines are copied from the ZFB and ZULU - * workspaces. We re-define them here since we can't have - * a dependency onto files outside our consolidation - */ -#define ZULUVM_IMR_V_MASK UINT64_C(0x0000000080000000) -#define ZULUVM_IMR_TARGET_SHIFT INT32_C(26) -#define ZULUVM_IMR_MAX INT32_C(0x3f) - -#define ZULUVM_ZFB_MMU_TLB_D_V_MASK 0x8000000000000000 -#define ZULUVM_ZFB_MMU_TLB_D_PA_SHIFT 0xD /* 13 bits */ -#define ZULUVM_ZFB_MMU_TLB_D_C_MASK 0x20 -#define ZULUVM_ZFB_MMU_TLB_D_SZ_SHIFT 0x3D /* 61 */ -#define ZULUVM_ZFB_MMU_TLB_D_SZ_MASK 0x6000000000000000 -#define ZULUVM_ZFB_MMU_TLB_D_W_MASK 0x2 -#define ZULUVM_ZFB_MMU_TLB_CR_IMISS_MASK 0x2 -#define ZULUVM_ZFB_MMU_TLB_CR_DMISS_MASK 0x1 -#define ZULUVM_ZFB_MMU_DTLB_PAGE_SZ_2_MASK 0xc /* DTLB2 Page size */ -#define ZULUVM_ZFB_MMU_DTLB_PAGE_SZ_2_SHIFT 2 -#define ZULUVM_DTLB_PAGE_SZ 0x8 -#define ZULUVM_ITLB_DATA_IN 0x18 -#define ZULUVM_DTLB_DATA_IN 0x28 -#define ZULUVM_TLB_CONTROL 0 -#define ZULUVM_ITLB_MISS_ICR 0x0 -#define ZULUVM_DTLB_MISS_ICR 0x8 -#define ZULUVM_DMA1_TSB_BASE 0x50 -#define ZULUVM_DMA2_TSB_BASE 0x68 - -#ifdef __cplusplus -} -#endif - -#endif /* _ZULUMOD_H */ diff --git a/usr/src/uts/sun4u/sys/zuluvm.h b/usr/src/uts/sun4u/sys/zuluvm.h deleted file mode 100644 index d36a63b9bc..0000000000 --- a/usr/src/uts/sun4u/sys/zuluvm.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef __ZULUVM_INCL__ -#define __ZULUVM_INCL__ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifdef __cplusplus -extern "C" { -#endif - -/* zulud interface */ - -#ifndef _ASM - -#include - -typedef struct { - caddr_t addr; - size_t len; - int tlbtype; -} zulud_preload_t; - -typedef struct { - int version; - int (*set_itlb_pc)(void *handle, uint64_t mondo); - int (*set_dtlb_pc)(void *handle, uint64_t mondo); - int (*set_suspendAck_pc)(void *handle, uint64_t mondo); - int (*write_tte)(void *handle, int ttesize, uint64_t tag, - pfn_t pfn, int permission, int tlbtype); - int (*tlb_done)(void *handle, int tlbtype, int status); - int (*demap_page)(void *handle, caddr_t vaddr, short ctx); - int (*demap_ctx)(void *handle, short ctx); - int (*dma_suspend_ack)(void *handle); - int (*set_tsb)(void *handle, int tlbtype, uint64_t tsbreg); -} zulud_ops_t; - -#endif - -#define ZULUVM_SUCCESS 0 -#define ZULUVM_ERROR 1 -#define ZULUVM_NO_TTE 2 -#define ZULUVM_INVALID_MISS 3 -#define ZULUVM_NO_DEV 4 -#define ZULUVM_NO_HAT 5 -#define ZULUVM_NO_MAP 6 -#define ZULUVM_VERSION_MISMATCH 7 -#define ZULUVM_TTE_DELAY 8 -#define ZULUVM_MISS_CANCELED 9 -#define ZULUVM_BAD_IDX 10 -#define ZULUVM_WATCH_POINT 11 -#define ZULUVM_NO_SUPPORT 12 -#define ZULUVM_CTX_LOCKED 13 - -#define ZULUVM_ITLB_FLAG 0x1 -#define ZULUVM_DMA_FLAG 0x2 -#define ZULUVM_DMA_MASK 0x3 - -#define ZULUVM_DMA1 0 -#define ZULUVM_DMA2 ZULUVM_DMA_FLAG -#define ZULUVM_ITLB1 ZULUVM_ITLB_FLAG -#define ZULUVM_ITLB2 (ZULUVM_ITLB_FLAG | ZULUVM_DMA_FLAG) -#define ZULUVM_INVAL 0x4 - -#ifndef _ASM - -/* zuluvm interface */ - -#define ZULUVM_INTERFACE_VERSION 1 /* inc with every intf change */ - -typedef void * zuluvm_info_t; -int zuluvm_init(zulud_ops_t *ops, int **pagesizes); -int zuluvm_fini(void); -int zuluvm_alloc_device(dev_info_t *devi, void *arg, zuluvm_info_t *devp, - caddr_t mmu, caddr_t imr); -int zuluvm_free_device(zuluvm_info_t devp); -int zuluvm_dma_add_proc(zuluvm_info_t devp, uint64_t *cookie); -int zuluvm_dma_delete_proc(zuluvm_info_t devp, uint64_t cookie); -int zuluvm_dma_alloc_ctx(zuluvm_info_t devp, int dma, short *ctx, - uint64_t *tsb); -int zuluvm_dma_preload(zuluvm_info_t devp, int dma, int num, - zulud_preload_t *list); -int zuluvm_dma_free_ctx(zuluvm_info_t devp, int dma); -int zuluvm_add_intr(zuluvm_info_t devp, int ino, uint_t (*handler)(caddr_t), - caddr_t arg); -int zuluvm_rem_intr(zuluvm_info_t devp, int ino); -int zuluvm_enable_intr(zuluvm_info_t devp, int num); -int zuluvm_disable_intr(zuluvm_info_t devp, int num); -int zuluvm_park(zuluvm_info_t devp); - -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* __ZULUVM_INCL__ */ diff --git a/usr/src/uts/sun4u/vm/zulu_hat.c b/usr/src/uts/sun4u/vm/zulu_hat.c deleted file mode 100644 index 5ecadc028f..0000000000 --- a/usr/src/uts/sun4u/vm/zulu_hat.c +++ /dev/null @@ -1,1469 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * This file contains the implementation of zulu_hat: an XHAT provider - * to support the MMU for the XVR-4000 graphics accelerator (code name zulu). - * - * The zulu hat is linked into the kernel misc module zuluvm. - * zuluvm provides services that the zulu device driver module requires - * that are not part of the standard ddi. See PSARC 2002/231. - * - * The zulu driver is delivered by the graphics consolidation. - * zuluvm is in ON workspace. - * - * There are two types of interfaces provided by zulu_hat - * 1. The set of functions and data structures used by zuluvm to obtain - * tte entries for the zulu MMU and to manage the association between - * user process's address spaces and zulu graphics contexts. - * - * 2. The entry points required for an XHAT provider: zulu_hat_ops - */ - -/* - * zulu_ctx_tab contains an array of pointers to the zulu_hats. - * - * During zulu graphics context switch, the zulu MMU's current context register - * is set to the index of the process's zulu hat's location in the array - * zulu_ctx_tab. - * - * This allows the TL=1 TLB miss handler to quickly find the zulu hat and - * lookup a tte in the zulu hat's TSB. - * - * To synchronize with the trap handler we use bit zero of - * the pointer as a lock bit. See the function zulu_ctx_tsb_lock_enter(). - * - * If the trap handler finds the ctx locked it doesn't wait, it - * posts a soft interrupt which is handled at TL=0. - */ - -#define ZULU_HAT_MAX_CTX 32 -struct zulu_hat *zulu_ctx_tab[ZULU_HAT_MAX_CTX]; - -/* - * To avoid searching through the whole zulu_ctx_tab for a free slot, - * we maintain the value of zulu_ctx_search_start. - * - * This value is a guess as to where a free slot in the context table might be. - * All slots < zulu_ctx_search_start are definitely occupied. - */ -static int zulu_ctx_search_start = 0; - - -/* - * this mutex protects the zulu_ctx_tab and zulu_ctx_search_start - */ -static kmutex_t zulu_ctx_lock; - - -uint64_t zulu_tsb_hit = 0; /* assembly code increments this */ -static uint64_t zulu_tsb_miss = 0; -static uint64_t zulu_as_fault = 0; - -/* - * The zulu device has two zulu data mmus. - * We use the base pagesize for one of them and the and 4M for the other. - */ -extern int zuluvm_base_pgsize; - - - -/* - * call zuluvm to remove translations for a page - */ -static void -zulu_hat_demap_page(struct zulu_hat *zhat, caddr_t vaddr, int size) -{ - if (zhat->zulu_ctx < 0) { - /* context has been stolen, so page is already demapped */ - return; - } - zuluvm_demap_page(zhat->zdev, NULL, zhat->zulu_ctx, vaddr, size); -} - -static void -zulu_hat_demap_ctx(void *zdev, int zulu_ctx) -{ - if (zulu_ctx < 0) { - /* context has been stolen */ - return; - } - zuluvm_demap_ctx(zdev, zulu_ctx); -} - - -/* - * steal the least recently used context slot. - */ -static int -zulu_hat_steal_ctx() -{ - int ctx; - hrtime_t delta = INT64_MAX; - struct zulu_hat *zhat_oldest = NULL; - - ASSERT(mutex_owned(&zulu_ctx_lock)); - - for (ctx = 0; ctx < ZULU_HAT_MAX_CTX; ctx++) { - struct zulu_hat *zhat = ZULU_CTX_GET_HAT(ctx); - - /* - * we shouldn't be here unless all slots are occupied - */ - ASSERT(zhat != NULL); - - TNF_PROBE_3(steal_ctx_loop, "zulu_hat", /* CSTYLED */, - tnf_int, ctx, ctx, - tnf_long, last_used, zhat->last_used, - tnf_long, oldest, delta); - - if (zhat->last_used < delta) { - zhat_oldest = zhat; - delta = zhat->last_used; - } - } - - ASSERT(zhat_oldest != NULL); - - mutex_enter(&zhat_oldest->lock); - - /* Nobody should have the tsb lock bit set here */ - ASSERT(((uint64_t)zulu_ctx_tab[zhat_oldest->zulu_ctx] & ZULU_CTX_LOCK) - == 0); - - ctx = zhat_oldest->zulu_ctx; - zhat_oldest->zulu_ctx = -1; - - ZULU_CTX_SET_HAT(ctx, NULL); - - zulu_hat_demap_ctx(zhat_oldest->zdev, ctx); - - mutex_exit(&zhat_oldest->lock); - - TNF_PROBE_1(zulu_hat_steal_ctx, "zulu_hat", /* CSTYLED */, - tnf_int, ctx, ctx); - - return (ctx); -} - -/* - * find a slot in the context table for a zulu_hat - */ -static void -zulu_hat_ctx_alloc(struct zulu_hat *zhat) -{ - int ctx; - - mutex_enter(&zulu_ctx_lock); - - for (ctx = zulu_ctx_search_start; ctx < ZULU_HAT_MAX_CTX; ctx++) { - if (ZULU_CTX_IS_FREE(ctx)) { - zulu_ctx_search_start = ctx + 1; - break; - } - } - - if (ctx == ZULU_HAT_MAX_CTX) { - /* table is full need to steal an entry */ - zulu_ctx_search_start = ZULU_HAT_MAX_CTX; - ctx = zulu_hat_steal_ctx(); - } - - mutex_enter(&zhat->lock); - - ZULU_CTX_SET_HAT(ctx, zhat); - zhat->zulu_ctx = ctx; - - mutex_exit(&zhat->lock); - - mutex_exit(&zulu_ctx_lock); - - TNF_PROBE_2(zulu_hat_ctx_alloc, "zulu_hat", /* CSTYLED */, - tnf_opaque, zhat, zhat, tnf_int, ctx, ctx); -} - -/* - * zulu_hat_validate_ctx: Called before the graphics context associated - * with a given zulu hat becomes the current zulu graphics context. - * Make sure that the hat has a slot in zulu_ctx_tab. - */ -void -zulu_hat_validate_ctx(struct zulu_hat *zhat) -{ - if (zhat->zulu_ctx < 0) { - zulu_hat_ctx_alloc(zhat); - } - zhat->last_used = gethrtime(); -} - - -static void -zulu_hat_ctx_free(struct zulu_hat *zhat) -{ - TNF_PROBE_1(zulu_hat_ctx_free, "zulu_hat", /* CSTYLED */, - tnf_int, ctx, zhat->zulu_ctx); - - mutex_enter(&zulu_ctx_lock); - - mutex_enter(&zhat->lock); - if (zhat->zulu_ctx >= 0) { - ZULU_CTX_SET_HAT(zhat->zulu_ctx, NULL); - - if (zulu_ctx_search_start > zhat->zulu_ctx) { - zulu_ctx_search_start = zhat->zulu_ctx; - } - } - mutex_exit(&zhat->lock); - mutex_exit(&zulu_ctx_lock); -} - -/* - * Lock the zulu tsb for a given zulu_hat. - * - * We're just protecting against the TLB trap handler here. Other operations - * on the zulu_hat require entering the zhat's lock. - */ -static void -zulu_ctx_tsb_lock_enter(struct zulu_hat *zhat) -{ - uint64_t lck; - uint64_t *plck; - - ASSERT(mutex_owned(&zhat->lock)); - - if (zhat->zulu_ctx < 0) { - return; - } - plck = (uint64_t *)&zulu_ctx_tab[zhat->zulu_ctx]; - - for (; ; ) { - lck = *plck; - if (!(lck & ZULU_CTX_LOCK)) { - uint64_t old_lck, new_lck; - - new_lck = lck | ZULU_CTX_LOCK; - - old_lck = atomic_cas_64(plck, lck, new_lck); - - if (old_lck == lck) { - /* - * success - */ - break; - } - } - } -} - -static void -zulu_ctx_tsb_lock_exit(struct zulu_hat *zhat) -{ - uint64_t lck; - int zulu_ctx = zhat->zulu_ctx; - - if (zulu_ctx < 0) { - return; - } - lck = (uint64_t)zulu_ctx_tab[zulu_ctx]; - ASSERT(lck & ZULU_CTX_LOCK); - lck &= ~ZULU_CTX_LOCK; - zulu_ctx_tab[zulu_ctx] = (struct zulu_hat *)lck; -} - -/* - * Each zulu hat has a "shadow tree" which is a table of 4MB address regions - * for which the zhat has mappings. - * - * This table is maintained in an avl tree. - * Nodes in the tree are called shadow blocks (or sblks) - * - * This data structure allows unload operations by (address, range) to be - * much more efficent. - * - * We get called a lot for address ranges that have never been supplied - * to zulu. - */ - -/* - * compare the base address of two nodes in the shadow tree - */ -static int -zulu_shadow_tree_compare(const void *a, const void *b) -{ - struct zulu_shadow_blk *zba = (struct zulu_shadow_blk *)a; - struct zulu_shadow_blk *zbb = (struct zulu_shadow_blk *)b; - uint64_t addr_a = zba->ivaddr; - uint64_t addr_b = zbb->ivaddr; - - TNF_PROBE_2(zulu_shadow_tree_compare, "zulu_shadow_tree", /* CSTYLED */, - tnf_opaque, addr_a, addr_a, tnf_opaque, addr_b, addr_b); - - if (addr_a < addr_b) { - return (-1); - } else if (addr_a > addr_b) { - return (1); - } else { - return (0); - } -} - -/* - * lookup the entry in the shadow tree for a given virtual address - */ -static struct zulu_shadow_blk * -zulu_shadow_tree_lookup(struct zulu_hat *zhat, uint64_t ivaddr, - avl_index_t *where) -{ - struct zulu_shadow_blk proto; - struct zulu_shadow_blk *sblk; - - proto.ivaddr = ivaddr & ZULU_SHADOW_BLK_MASK; - - /* - * pages typically fault in in order so we cache the last shadow - * block that was referenced so we usually get to reduce calls to - * avl_find. - */ - if ((zhat->sblk_last != NULL) && - (proto.ivaddr == zhat->sblk_last->ivaddr)) { - sblk = zhat->sblk_last; - } else { - sblk = (struct zulu_shadow_blk *)avl_find(&zhat->shadow_tree, - &proto, where); - zhat->sblk_last = sblk; - } - - TNF_PROBE_2(zulu_shadow_tree_lookup, "zulu_shadow_tree", /* CSTYLED */, - tnf_opaque, ivaddr, proto.ivaddr, - tnf_opaque, where, where ? *where : ~0); - - return (sblk); -} - -/* - * insert a sblk into the shadow tree for a given zblk. - * If a sblk already exists, just increment it's refcount. - */ -static void -zulu_shadow_tree_insert(struct zulu_hat *zhat, struct zulu_hat_blk *zblk) -{ - avl_index_t where; - struct zulu_shadow_blk *sblk = NULL; - uint64_t ivaddr; - uint64_t end; - - ivaddr = zblk->zulu_hat_blk_vaddr & ZULU_SHADOW_BLK_MASK; - - end = zblk->zulu_hat_blk_vaddr + ZULU_HAT_PGSZ(zblk->zulu_hat_blk_size); - - sblk = zulu_shadow_tree_lookup(zhat, ivaddr, &where); - if (sblk != NULL) { - sblk->ref_count++; - - end = zblk->zulu_hat_blk_vaddr + - ZULU_HAT_PGSZ(zblk->zulu_hat_blk_size); - if (zblk->zulu_hat_blk_vaddr < sblk->min_addr) { - sblk->min_addr = zblk->zulu_hat_blk_vaddr; - } - /* - * a blk can set both the minimum and maximum when it - * is the first zblk added to a previously emptied sblk - */ - if (end > sblk->max_addr) { - sblk->max_addr = end; - } - } else { - sblk = kmem_zalloc(sizeof (*sblk), KM_SLEEP); - sblk->ref_count = 1; - sblk->ivaddr = ivaddr; - sblk->min_addr = zblk->zulu_hat_blk_vaddr; - sblk->max_addr = end; - zhat->sblk_last = sblk; - - avl_insert(&zhat->shadow_tree, sblk, where); - } - zblk->zulu_shadow_blk = sblk; - TNF_PROBE_2(zulu_shadow_tree_insert, "zulu_shadow_tree", /* CSTYLED */, - tnf_opaque, vaddr, ivaddr, - tnf_opaque, ref_count, sblk->ref_count); -} - -/* - * decrement the ref_count for the sblk that corresponds to a given zblk. - * When the ref_count goes to zero remove the sblk from the tree and free it. - */ - -static void -zulu_shadow_tree_delete(struct zulu_hat *zhat, struct zulu_hat_blk *zblk) -{ - struct zulu_shadow_blk *sblk; - - ASSERT(zblk->zulu_shadow_blk != NULL); - - sblk = zblk->zulu_shadow_blk; - - TNF_PROBE_2(zulu_shadow_tree_delete, "zulu_shadow_tree", /* CSTYLED */, - tnf_opaque, vaddr, sblk->ivaddr, - tnf_opaque, ref_count, sblk->ref_count-1); - - if (--sblk->ref_count == 0) { - if (zhat->sblk_last == sblk) { - zhat->sblk_last = NULL; - } - sblk->min_addr = sblk->ivaddr + ZULU_SHADOW_BLK_RANGE; - sblk->max_addr = sblk->ivaddr; - } else { - /* - * Update the high and low water marks for this sblk. - * These are estimates, because we don't know if the previous - * or next region are actually occupied, but we can tell - * whether the previous values have become invalid. - * - * In the most often applied case a segment is being - * unloaded, and the min_addr will be kept up to date as - * the zblks are deleted in order. - */ - uint64_t end = zblk->zulu_hat_blk_vaddr + - ZULU_HAT_PGSZ(zblk->zulu_hat_blk_size); - - if (zblk->zulu_hat_blk_vaddr == sblk->min_addr) { - sblk->min_addr = end; - } - if (end == sblk->max_addr) { - sblk->max_addr = zblk->zulu_hat_blk_vaddr; - } - } - - zblk->zulu_shadow_blk = NULL; -} - -static void -zulu_shadow_tree_destroy(struct zulu_hat *zhat) -{ - struct zulu_shadow_blk *sblk; - void *cookie = NULL; - - while ((sblk = (struct zulu_shadow_blk *)avl_destroy_nodes( - &zhat->shadow_tree, &cookie)) != NULL) { - TNF_PROBE_2(shadow_tree_destroy, "zulu_hat", /* CSTYLED */, - tnf_opaque, vaddr, sblk->ivaddr, - tnf_opaque, ref_count, sblk->ref_count); - kmem_free(sblk, sizeof (*sblk)); - } - avl_destroy(&zhat->shadow_tree); -} - -/* - * zulu_hat_insert_map: - * - * Add a zulu_hat_blk to the a zhat's mappings list. - * - * Several data stuctures are used - * tsb: for simple fast lookups by the trap handler - * hash table: for efficent lookups by address, range - * An shadow tree of 4MB ranges with mappings for unloading big regions. - */ -static void -zulu_hat_insert_map(struct zulu_hat *zhat, struct zulu_hat_blk *zblk) -{ - int tsb_hash; - - tsb_hash = ZULU_TSB_HASH(zblk->zulu_hat_blk_vaddr, - zblk->zulu_hat_blk_size, zhat->zulu_tsb_size); - - TNF_PROBE_3(zulu_hat_insert_map, "zulu_hat", /* CSTYLED */, - tnf_opaque, zblkp, zblk, - tnf_opaque, vaddr, zblk->zulu_hat_blk_vaddr, - tnf_opaque, hash, tsb_hash); - - ASSERT(tsb_hash < zhat->zulu_tsb_size); - - zulu_shadow_tree_insert(zhat, zblk); - - /* - * The hash table is an array of buckets. Each bucket is the - * head of a linked list of mappings who's address hashess to the bucket - * New entries go to the head of the list. - */ - zblk->zulu_hash_prev = NULL; - zblk->zulu_hash_next = ZULU_MAP_HASH_HEAD(zhat, - zblk->zulu_hat_blk_vaddr, zblk->zulu_hat_blk_size); - if (zblk->zulu_hash_next) { - zblk->zulu_hash_next->zulu_hash_prev = zblk; - } - ZULU_MAP_HASH_HEAD(zhat, zblk->zulu_hat_blk_vaddr, - zblk->zulu_hat_blk_size) = zblk; - - zulu_ctx_tsb_lock_enter(zhat); - zhat->zulu_tsb[tsb_hash] = zblk->zulu_hat_blk_tte; - zulu_ctx_tsb_lock_exit(zhat); -} - -/* - * remove a block from a zhat - */ -static void -zulu_hat_remove_map(struct zulu_hat *zhat, struct zulu_hat_blk *zblk) -{ - int tsb_hash = ZULU_TSB_HASH(zblk->zulu_hat_blk_vaddr, - zblk->zulu_hat_blk_size, zhat->zulu_tsb_size); - - TNF_PROBE_2(zulu_hat_remove_map, "zulu_hat", /* CSTYLED */, - tnf_opaque, vaddr, zblk->zulu_hat_blk_vaddr, - tnf_opaque, hash, tsb_hash); - - ASSERT(tsb_hash < zhat->zulu_tsb_size); - ASSERT(mutex_owned(&zhat->lock)); - - zulu_shadow_tree_delete(zhat, zblk); - - /* - * first remove zblk from hash table - */ - if (zblk->zulu_hash_prev) { - zblk->zulu_hash_prev->zulu_hash_next = zblk->zulu_hash_next; - } else { - ZULU_MAP_HASH_HEAD(zhat, zblk->zulu_hat_blk_vaddr, - zblk->zulu_hat_blk_size) = NULL; - } - if (zblk->zulu_hash_next) { - zblk->zulu_hash_next->zulu_hash_prev = zblk->zulu_hash_prev; - } - zblk->zulu_hash_next = NULL; - zblk->zulu_hash_prev = NULL; - - /* - * then remove the tsb entry - */ - zulu_ctx_tsb_lock_enter(zhat); - if (zhat->zulu_tsb[tsb_hash].un.zulu_tte_addr == - zblk->zulu_hat_blk_vaddr) { - zhat->zulu_tsb[tsb_hash].zulu_tte_valid = 0; - } - zulu_ctx_tsb_lock_exit(zhat); -} - -/* - * look for a mapping to a given vaddr and page size - */ -static struct zulu_hat_blk * -zulu_lookup_map_bysize(struct zulu_hat *zhat, caddr_t vaddr, int page_sz) -{ - struct zulu_hat_blk *zblkp; - uint64_t ivaddr = (uint64_t)vaddr; - int blks_checked = 0; - - ASSERT(mutex_owned(&zhat->lock)); - - for (zblkp = ZULU_MAP_HASH_HEAD(zhat, ivaddr, page_sz); zblkp != NULL; - zblkp = zblkp->zulu_hash_next) { - uint64_t size; - uint64_t iaddr; - - blks_checked++; - - size = ZULU_HAT_PGSZ(zblkp->zulu_hat_blk_size); - iaddr = ZULU_VADDR((uint64_t)zblkp->zulu_hat_blk_vaddr); - - if (iaddr <= ivaddr && (iaddr + size) > ivaddr) { - int tsb_hash; - - tsb_hash = ZULU_TSB_HASH(zblkp->zulu_hat_blk_vaddr, - zblkp->zulu_hat_blk_size, - zhat->zulu_tsb_size); - ASSERT(tsb_hash < zhat->zulu_tsb_size); - - zulu_ctx_tsb_lock_enter(zhat); - zhat->zulu_tsb[tsb_hash] = zblkp->zulu_hat_blk_tte; - zulu_ctx_tsb_lock_exit(zhat); - break; - } - - } - - TNF_PROBE_3(zulu_hat_lookup_map_bysz, "zulu_hat", /* CSTYLED */, - tnf_opaque, zblkp, zblkp, - tnf_int, blks_checked, blks_checked, - tnf_int, page_sz, page_sz); - - return (zblkp); -} - -/* - * Lookup a zblk for a given virtual address. - */ -static struct zulu_hat_blk * -zulu_lookup_map(struct zulu_hat *zhat, caddr_t vaddr) -{ - struct zulu_hat_blk *zblkp = NULL; - - /* - * if the hat is using 4M pages, look first for a 4M page - */ - if (zhat->map4m) { - zblkp = zulu_lookup_map_bysize(zhat, vaddr, ZULU_TTE4M); - if (zblkp != NULL) { - return (zblkp); - } - } - /* - * Otherwise look for a 8k page - * Note: if base pagesize gets increased to 64K remove this test - */ - if (zhat->map8k) { - zblkp = zulu_lookup_map_bysize(zhat, vaddr, ZULU_TTE8K); - if (zblkp != NULL) { - return (zblkp); - } - } - /* - * only if the page isn't found in the sizes that match the zulu mmus - * look for the inefficient 64K or 512K page sizes - */ - if (zhat->map64k) { - zblkp = zulu_lookup_map_bysize(zhat, vaddr, ZULU_TTE64K); - if (zblkp != NULL) { - return (zblkp); - } - } - if (zhat->map512k) { - zblkp = zulu_lookup_map_bysize(zhat, vaddr, ZULU_TTE512K); - } - - return (zblkp); -} - -/* - * zulu_hat_load: Load translation for given vaddr - */ -int -zulu_hat_load(struct zulu_hat *zhat, caddr_t vaddr, - enum seg_rw rw, int *ppg_size) -{ - faultcode_t as_err; - struct zulu_hat_blk *zblkp; - int rval; - uint64_t flags_pfn; - struct zulu_tte tte; - - TNF_PROBE_2(zulu_hat_load, "zulu_hat", /* CSTYLED */, - tnf_int, zulu_ctx, zhat->zulu_ctx, - tnf_opaque, vaddr, vaddr); - - mutex_enter(&zhat->lock); - ASSERT(zhat->zulu_ctx >= 0); - /* - * lookup in our tsb first - */ - zulu_ctx_tsb_lock_enter(zhat); - flags_pfn = zulu_hat_tsb_lookup_tl0(zhat, vaddr); - zulu_ctx_tsb_lock_exit(zhat); - - if (flags_pfn) { - uint64_t *p = (uint64_t *)&tte; - - p++; /* ignore the tag */ - *p = flags_pfn; /* load the flags */ - - zuluvm_load_tte(zhat, vaddr, flags_pfn, tte.zulu_tte_perm, - tte.zulu_tte_size); - if (ppg_size != NULL) { - *ppg_size = tte.zulu_tte_size; - } - - zulu_tsb_hit++; - mutex_exit(&zhat->lock); - return (0); - } - - zulu_tsb_miss++; - - zblkp = zulu_lookup_map(zhat, vaddr); - if (zblkp) { - tte = zblkp->zulu_hat_blk_tte; - tte.zulu_tte_pfn = ZULU_HAT_ADJ_PFN((&tte), vaddr); - zuluvm_load_tte(zhat, vaddr, tte.zulu_tte_pfn, - tte.zulu_tte_perm, tte.zulu_tte_size); - if (ppg_size != NULL) { - *ppg_size = tte.zulu_tte_size; - } - mutex_exit(&zhat->lock); - return (0); - } - - /* - * Set a flag indicating that we're processing a fault. - * See comments in zulu_hat_unload_region. - */ - zhat->in_fault = 1; - mutex_exit(&zhat->lock); - - zulu_as_fault++; - TNF_PROBE_0(calling_as_fault, "zulu_hat", /* CSTYLED */); - - as_err = as_fault((struct hat *)zhat, zhat->zulu_xhat.xhat_as, - (caddr_t)(ZULU_VADDR((uint64_t)vaddr) & PAGEMASK), - PAGESIZE, F_INVAL, rw); - - mutex_enter(&zhat->lock); - zhat->in_fault = 0; - if (ppg_size != NULL) { - /* - * caller wants to know the page size (used by preload) - */ - zblkp = zulu_lookup_map(zhat, vaddr); - if (zblkp != NULL) { - *ppg_size = zblkp->zulu_hat_blk_size; - } else { - *ppg_size = -1; - } - } - mutex_exit(&zhat->lock); - - TNF_PROBE_1(as_fault_returned, "zulu_hat", /* CSTYLED */, - tnf_int, as_err, as_err); - - if (as_err != 0) { - printf("as_fault returned %d\n", as_err); - rval = as_err; - } else if (zhat->freed) { - rval = -1; - } else { - rval = 0; - } - - return (rval); -} - -static struct xhat * -zulu_hat_alloc(void *arg) -{ - struct zulu_hat *zhat = kmem_zalloc(sizeof (struct zulu_hat), KM_SLEEP); - - (void) arg; - - zulu_hat_ctx_alloc(zhat); - - mutex_init(&zhat->lock, NULL, MUTEX_DEFAULT, NULL); - - zhat->zulu_tsb = kmem_zalloc(ZULU_TSB_SZ, KM_SLEEP); - zhat->zulu_tsb_size = ZULU_TSB_NUM; - zhat->hash_tbl = kmem_zalloc(ZULU_HASH_TBL_SZ, KM_SLEEP); - avl_create(&zhat->shadow_tree, zulu_shadow_tree_compare, - sizeof (zhat->shadow_tree), ZULU_SHADOW_BLK_LINK_OFFSET); - /* - * The zulu hat has a few opaque data structs embedded in it. - * This tag makes finding the our data easier with a debugger. - */ - zhat->magic = 0x42; - - zhat->freed = 0; - TNF_PROBE_1(zulu_hat_alloc, "zulu_hat", /* CSTYLED */, - tnf_int, zulu_ctx, zhat->zulu_ctx); - return ((struct xhat *)zhat); -} - -static void -zulu_hat_free(struct xhat *xhat) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; - - TNF_PROBE_1(zulu_hat_free, "zulu_hat", /* CSTYLED */, - tnf_int, zulu_ctx, zhat->zulu_ctx); - - zulu_shadow_tree_destroy(zhat); - kmem_free(zhat->hash_tbl, ZULU_HASH_TBL_SZ); - kmem_free(zhat->zulu_tsb, ZULU_TSB_SZ); - mutex_destroy(&zhat->lock); - kmem_free(xhat, sizeof (struct zulu_hat)); -} - -static void -zulu_hat_free_start(struct xhat *xhat) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; - - TNF_PROBE_1(zulu_hat_free_start, "zulu_hat", /* CSTYLED */, - tnf_int, zulu_ctx, zhat->zulu_ctx); - (void) xhat; -} - -/* - * zulu_hat_memload: This is the callback where the vm system gives us our - * translations - */ -static void -zulu_do_hat_memload(struct xhat *xhat, caddr_t vaddr, struct page *page, - uint_t attr, uint_t flags, int use_pszc) -{ - void *blk; - struct zulu_hat *zhat = (struct zulu_hat *)xhat; - struct zulu_hat_blk *zblk; - pfn_t pfn; - - TNF_PROBE_4(zulu_hat_memload, "zulu_hat", /* CSTYLED */, - tnf_int, zulu_ctx, zhat->zulu_ctx, - tnf_opaque, vaddr, vaddr, tnf_opaque, attr, attr, - tnf_opaque, flags, flags); - - /* - * keep track of the highest address that this zhat has had - * a mapping for. - * We use this in unload to avoid searching for regions that - * we've never seen. - * - * This is particularly useful avoiding repeated searches for - * for the process's mappings to the zulu hardware. These mappings - * are explicitly unloaded at each graphics context switch.. - * - * This takes advantage of the fact that the device addresses - * are always above than the heap where most DMA data is stored. - */ - if (vaddr > zhat->vaddr_max) { - zhat->vaddr_max = vaddr; - } - - pfn = xhat_insert_xhatblk(page, xhat, &blk); - zblk = (struct zulu_hat_blk *)blk; - zblk->zulu_hat_blk_vaddr = (uintptr_t)vaddr; - zblk->zulu_hat_blk_pfn = (uint_t)pfn; - /* - * The perm bit is actually in the tte which gets copied to the TSB - */ - zblk->zulu_hat_blk_perm = (attr & PROT_WRITE) ? 1 : 0; - zblk->zulu_hat_blk_size = use_pszc ? page->p_szc : 0; - zblk->zulu_hat_blk_valid = 1; - - switch (zblk->zulu_hat_blk_size) { - case ZULU_TTE8K: - zhat->map8k = 1; - break; - case ZULU_TTE64K: - zhat->map64k = 1; - break; - case ZULU_TTE512K: - zhat->map512k = 1; - break; - case ZULU_TTE4M: - zhat->map4m = 1; - break; - default: - panic("zulu_hat illegal page size\n"); - } - - mutex_enter(&zhat->lock); - - zulu_hat_insert_map(zhat, zblk); - if (!zhat->freed) { - zuluvm_load_tte(zhat, vaddr, zblk->zulu_hat_blk_pfn, - zblk->zulu_hat_blk_perm, zblk->zulu_hat_blk_size); - } - zhat->fault_ivaddr_last = - ZULU_VADDR((uint64_t)zblk->zulu_hat_blk_vaddr); - - mutex_exit(&zhat->lock); -} - -static void -zulu_hat_memload(struct xhat *xhat, caddr_t vaddr, struct page *page, - uint_t attr, uint_t flags) -{ - zulu_do_hat_memload(xhat, vaddr, page, attr, flags, 0); -} - -static void -zulu_hat_devload(struct xhat *xhat, caddr_t vaddr, size_t size, pfn_t pfn, - uint_t attr, int flags) -{ - struct page *pp = page_numtopp_nolock(pfn); - (void) size; - zulu_do_hat_memload(xhat, vaddr, pp, attr, (uint_t)flags, 1); -} - -static void -zulu_hat_memload_array(struct xhat *xhat, caddr_t addr, size_t len, - struct page **gen_pps, uint_t attr, uint_t flags) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; - - TNF_PROBE_3(zulu_hat_memload_array, "zulu_hat", /* CSTYLED */, - tnf_int, zulu_ctx, zhat->zulu_ctx, - tnf_opaque, addr, addr, - tnf_opaque, len, len); - - for (; len > 0; len -= ZULU_HAT_PGSZ((*gen_pps)->p_szc), - gen_pps += ZULU_HAT_NUM_PGS((*gen_pps)->p_szc)) { - zulu_do_hat_memload(xhat, addr, *gen_pps, attr, flags, 1); - - addr += ZULU_HAT_PGSZ((*gen_pps)->p_szc); - } -} - -static void -free_zblks(struct zulu_hat_blk *free_list) -{ - struct zulu_hat_blk *zblkp; - struct zulu_hat_blk *next; - - for (zblkp = free_list; zblkp != NULL; zblkp = next) { - next = zblkp->zulu_hash_next; - (void) xhat_delete_xhatblk((struct xhat_hme_blk *)zblkp, 0); - } -} - -static void -add_to_free_list(struct zulu_hat_blk **pfree_list, struct zulu_hat_blk *zblk) -{ - zblk->zulu_hash_next = *pfree_list; - *pfree_list = zblk; -} - -static void -zulu_hat_unload_region(struct zulu_hat *zhat, uint64_t ivaddr, size_t size, - struct zulu_shadow_blk *sblk, struct zulu_hat_blk **pfree_list) -{ - uint64_t end = ivaddr + size; - int found = 0; - - TNF_PROBE_2(zulu_hat_unload_region, "zulu_hat", /* CSTYLED */, - tnf_opaque, vaddr, ivaddr, tnf_opaque, size, size); - - /* - * check address against the low and highwater marks for mappings - * in this sblk - */ - if (ivaddr < sblk->min_addr) { - ivaddr = sblk->min_addr; - TNF_PROBE_1(zulu_hat_unload_skip, "zulu_hat", /* CSTYLED */, - tnf_opaque, ivaddr, ivaddr); - } - if (end > sblk->max_addr) { - end = sblk->max_addr; - TNF_PROBE_1(zulu_hat_unload_reg_skip, "zulu_hat", /* CSTYLED */, - tnf_opaque, end, end); - } - /* - * REMIND: It's not safe to touch the sblk after we enter this loop - * because it may get deleted. - */ - - while (ivaddr < end) { - uint64_t iaddr; - size_t pg_sz; - struct zulu_hat_blk *zblkp; - - zblkp = zulu_lookup_map(zhat, (caddr_t)ivaddr); - if (zblkp == NULL) { - ivaddr += PAGESIZE; - continue; - } - - iaddr = ZULU_VADDR((uint64_t)zblkp->zulu_hat_blk_vaddr); - pg_sz = ZULU_HAT_PGSZ(zblkp->zulu_hat_blk_size); - - found++; - - zulu_hat_remove_map(zhat, zblkp); - /* - * skip demap page if as_free has already been entered - * zuluvm demapped the context already - */ - if (!zhat->freed) { - if ((zhat->in_fault) && - (iaddr == zhat->fault_ivaddr_last)) { - /* - * We're being called from within as_fault to - * unload the last translation we loaded. - * - * This is probably due to watchpoint handling. - * Delay the demap for a millisecond - * to allow zulu to make some progress. - */ - drv_usecwait(1000); - zhat->fault_ivaddr_last = 0; - } - zulu_hat_demap_page(zhat, (caddr_t)iaddr, - zblkp->zulu_hat_blk_size); - } - - add_to_free_list(pfree_list, zblkp); - - if ((iaddr + pg_sz) >= end) { - break; - } - - ivaddr += pg_sz; - } - TNF_PROBE_1(zulu_hat_unload_region_done, "zulu_hat", /* CSTYLED */, - tnf_opaque, found, found); -} - -static void -zulu_hat_unload(struct xhat *xhat, caddr_t vaddr, size_t size, uint_t flags) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; - uint64_t ivaddr; - uint64_t end; - int found = 0; - struct zulu_hat_blk *free_list = NULL; - - (void) flags; - - TNF_PROBE_4(zulu_hat_unload, "zulu_hat", /* CSTYLED */, - tnf_int, zulu_ctx, zhat->zulu_ctx, - tnf_opaque, vaddr, vaddr, - tnf_opaque, vaddr_max, zhat->vaddr_max, - tnf_opaque, size, size); - - mutex_enter(&zhat->lock); - - /* - * The following test prevents us from searching for the user's - * mappings to the zulu device registers. Those mappings get unloaded - * every time a graphics context switch away from a given context - * occurs. - * - * Since the heap is located at smaller virtual addresses than the - * registers, this simple test avoids quite a bit of useless work. - */ - if (vaddr > zhat->vaddr_max) { - /* - * all existing mappings have lower addresses than vaddr - * no need to search further. - */ - mutex_exit(&zhat->lock); - return; - } - - ivaddr = (uint64_t)vaddr; - end = ivaddr + size; - - do { - struct zulu_shadow_blk *sblk; - - sblk = zulu_shadow_tree_lookup(zhat, ivaddr, NULL); - if (sblk != NULL) { - uint64_t sblk_end; - size_t region_size; - - found++; - - sblk_end = (ivaddr + ZULU_SHADOW_BLK_RANGE) & - ZULU_SHADOW_BLK_MASK; - - if (sblk_end < end) { - region_size = sblk_end - ivaddr; - } else { - region_size = end - ivaddr; - } - zulu_hat_unload_region(zhat, ivaddr, region_size, sblk, - &free_list); - - } - ivaddr += ZULU_SHADOW_BLK_RANGE; - } while (ivaddr < end); - - mutex_exit(&zhat->lock); - - free_zblks(free_list); - - TNF_PROBE_1(zulu_hat_unload_done, "zulu_hat", /* CSTYLED */, - tnf_int, found, found); -} - -static void -zulu_hat_unload_callback(struct xhat *xhat, caddr_t vaddr, size_t size, - uint_t flags, hat_callback_t *pcb) -{ - (void) size; - (void) pcb; - zulu_hat_unload(xhat, vaddr, size, flags); -} - - -/* - * unload one page - */ -static int -zulu_hat_pageunload(struct xhat *xhat, struct page *pp, uint_t flags, - void *xblk) -{ - struct zulu_hat_blk *zblk = (struct zulu_hat_blk *)xblk; - struct zulu_hat *zhat = (struct zulu_hat *)xhat; - int do_delete; - - (void) pp; - (void) flags; - - TNF_PROBE_3(zulu_hat_pageunload, "zulu_hat", /* CSTYLED */, - tnf_int, zulu_ctx, zhat->zulu_ctx, - tnf_opaque, vaddr, zblk->zulu_hat_blk_vaddr, - tnf_int, pg_size, zblk->zulu_hat_blk_size); - - mutex_enter(&zhat->lock); - if (zblk->zulu_shadow_blk != NULL) { - - do_delete = 1; - - zulu_hat_remove_map(zhat, zblk); - - /* - * now that the entry is removed from the TSB, remove the - * translation from the zulu hardware. - * - * Skip the demap if this as is in the process of being freed. - * The zuluvm as callback has demapped the whole context. - */ - if (!zhat->freed) { - zulu_hat_demap_page(zhat, - (caddr_t)(uintptr_t)(zblk->zulu_hat_blk_page << - ZULU_HAT_BP_SHIFT), - zblk->zulu_hat_blk_size); - } - } else { - /* - * This block has already been removed from the zulu_hat, - * it's on a free list waiting for our thread to release - * a mutex so it can be freed - */ - do_delete = 0; - - TNF_PROBE_0(zulu_hat_pageunload_skip, "zulu_hat", - /* CSTYLED */); - } - mutex_exit(&zhat->lock); - - if (do_delete) { - (void) xhat_delete_xhatblk(xblk, 1); - } - - return (0); -} - -static void -zulu_hat_swapout(struct xhat *xhat) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; - struct zulu_hat_blk *zblk; - struct zulu_hat_blk *free_list = NULL; - int i; - int nblks = 0; - - TNF_PROBE_1(zulu_hat_swapout, "zulu_hat", /* CSTYLED */, - tnf_int, zulu_ctx, zhat->zulu_ctx); - - mutex_enter(&zhat->lock); - - /* - * real swapout calls are rare so we don't do anything in - * particular to optimize them. - * - * Just loop over all buckets in the hash table and free each - * zblk. - */ - for (i = 0; i < ZULU_HASH_TBL_NUM; i++) { - struct zulu_hat_blk *next; - for (zblk = zhat->hash_tbl[i]; zblk != NULL; zblk = next) { - next = zblk->zulu_hash_next; - zulu_hat_remove_map(zhat, zblk); - add_to_free_list(&free_list, zblk); - nblks++; - } - } - - /* - * remove all mappings for this context from zulu hardware. - */ - zulu_hat_demap_ctx(zhat->zdev, zhat->zulu_ctx); - - mutex_exit(&zhat->lock); - - free_zblks(free_list); - - TNF_PROBE_1(zulu_hat_swapout_done, "zulu_hat", /* CSTYLED */, - tnf_int, nblks, nblks); -} - - -static void -zulu_hat_unshare(struct xhat *xhat, caddr_t vaddr, size_t size) -{ - TNF_PROBE_0(zulu_hat_unshare, "zulu_hat", /* CSTYLED */); - - zulu_hat_unload(xhat, vaddr, size, 0); -} - -/* - * Functions to manage changes in protections for mappings. - * - * These are rarely called in normal operation so for now just unload - * the region. - * If the mapping is still needed, it will fault in later with the new - * attrributes. - */ -typedef enum { - ZULU_HAT_CHGATTR, - ZULU_HAT_SETATTR, - ZULU_HAT_CLRATTR -} zulu_hat_prot_op; - -static void -zulu_hat_update_attr(struct xhat *xhat, caddr_t vaddr, size_t size, - uint_t flags, zulu_hat_prot_op op) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; - - TNF_PROBE_5(zulu_hat_changeprot, "zulu_hat", /* CSTYLED */, - tnf_int, ctx, zhat->zulu_ctx, - tnf_opaque, vaddr, vaddr, tnf_opaque, size, size, - tnf_uint, flags, flags, tnf_int, op, op); - - zulu_hat_unload(xhat, vaddr, size, 0); -} - -static void -zulu_hat_chgprot(struct xhat *xhat, caddr_t vaddr, size_t size, uint_t flags) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; -#ifdef DEBUG - printf("zulu_hat_chgprot: ctx: %d addr: %lx, size: %lx flags: %x\n", - zhat->zulu_ctx, (uint64_t)vaddr, size, flags); -#endif - zulu_hat_update_attr(xhat, vaddr, size, flags, ZULU_HAT_CHGATTR); -} - - -static void -zulu_hat_setattr(struct xhat *xhat, caddr_t vaddr, size_t size, uint_t flags) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; -#ifdef DEBUG - printf("zulu_hat_setattr: ctx: %d addr: %lx, size: %lx flags: %x\n", - zhat->zulu_ctx, (uint64_t)vaddr, size, flags); -#endif - zulu_hat_update_attr(xhat, vaddr, size, flags, ZULU_HAT_SETATTR); -} - -static void -zulu_hat_clrattr(struct xhat *xhat, caddr_t vaddr, size_t size, uint_t flags) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; -#ifdef DEBUG - printf("zulu_hat_clrattr: ctx: %d addr: %lx, size: %lx flags: %x\n", - zhat->zulu_ctx, (uint64_t)vaddr, size, flags); -#endif - zulu_hat_update_attr(xhat, vaddr, size, flags, ZULU_HAT_CLRATTR); -} - -static void -zulu_hat_chgattr(struct xhat *xhat, caddr_t vaddr, size_t size, uint_t flags) -{ - struct zulu_hat *zhat = (struct zulu_hat *)xhat; - TNF_PROBE_3(zulu_hat_chgattr, "zulu_hat", /* CSTYLED */, - tnf_int, ctx, zhat->zulu_ctx, - tnf_opaque, vaddr, vaddr, - tnf_opaque, flags, flags); -#ifdef DEBUG - printf("zulu_hat_chgattr: ctx: %d addr: %lx, size: %lx flags: %x\n", - zhat->zulu_ctx, (uint64_t)vaddr, size, flags); -#endif - zulu_hat_update_attr(xhat, vaddr, size, flags, ZULU_HAT_CHGATTR); -} - - -struct xhat_ops zulu_hat_ops = { - zulu_hat_alloc, /* xhat_alloc */ - zulu_hat_free, /* xhat_free */ - zulu_hat_free_start, /* xhat_free_start */ - NULL, /* xhat_free_end */ - NULL, /* xhat_dup */ - NULL, /* xhat_swapin */ - zulu_hat_swapout, /* xhat_swapout */ - zulu_hat_memload, /* xhat_memload */ - zulu_hat_memload_array, /* xhat_memload_array */ - zulu_hat_devload, /* xhat_devload */ - zulu_hat_unload, /* xhat_unload */ - zulu_hat_unload_callback, /* xhat_unload_callback */ - zulu_hat_setattr, /* xhat_setattr */ - zulu_hat_clrattr, /* xhat_clrattr */ - zulu_hat_chgattr, /* xhat_chgattr */ - zulu_hat_unshare, /* xhat_unshare */ - zulu_hat_chgprot, /* xhat_chgprot */ - zulu_hat_pageunload, /* xhat_pageunload */ -}; - -xblk_cache_t zulu_xblk_cache = { - NULL, - NULL, - NULL, - xhat_xblkcache_reclaim -}; - -xhat_provider_t zulu_hat_provider = { - XHAT_PROVIDER_VERSION, - 0, - NULL, - NULL, - "zulu_hat_provider", - &zulu_xblk_cache, - &zulu_hat_ops, - sizeof (struct zulu_hat_blk) + sizeof (struct xhat_hme_blk) -}; - -/* - * The following functions are the entry points that zuluvm uses. - */ - -/* - * initialize this module. Called from zuluvm's _init function - */ -int -zulu_hat_init() -{ - int c; - int rval; - mutex_init(&zulu_ctx_lock, NULL, MUTEX_DEFAULT, NULL); - - for (c = 0; c < ZULU_HAT_MAX_CTX; c++) { - ZULU_CTX_LOCK_INIT(c); - } - zulu_ctx_search_start = 0; - rval = xhat_provider_register(&zulu_hat_provider); - if (rval != 0) { - mutex_destroy(&zulu_ctx_lock); - } - return (rval); -} - -/* - * un-initialize this module. Called from zuluvm's _fini function - */ -int -zulu_hat_destroy() -{ - if (xhat_provider_unregister(&zulu_hat_provider) != 0) { - return (-1); - } - mutex_destroy(&zulu_ctx_lock); - return (0); -} - -int -zulu_hat_attach(void *arg) -{ - (void) arg; - return (0); -} - -int -zulu_hat_detach(void *arg) -{ - (void) arg; - return (0); -} - -/* - * create a zulu hat for this address space. - */ -struct zulu_hat * -zulu_hat_proc_attach(struct as *as, void *zdev) -{ - struct zulu_hat *zhat; - int xhat_rval; - - xhat_rval = xhat_attach_xhat(&zulu_hat_provider, as, - (struct xhat **)&zhat, NULL); - if ((xhat_rval == 0) && (zhat != NULL)) { - mutex_enter(&zhat->lock); - ZULU_HAT2AS(zhat) = as; - zhat->zdev = zdev; - mutex_exit(&zhat->lock); - } - - TNF_PROBE_3(zulu_hat_proc_attach, "zulu_hat", /* CSTYLED */, - tnf_int, xhat_rval, xhat_rval, tnf_opaque, as, as, - tnf_opaque, zhat, zhat); - - return (zhat); -} - -void -zulu_hat_proc_detach(struct zulu_hat *zhat) -{ - struct as *as = ZULU_HAT2AS(zhat); - - zulu_hat_ctx_free(zhat); - - (void) xhat_detach_xhat(&zulu_hat_provider, ZULU_HAT2AS(zhat)); - - TNF_PROBE_1(zulu_hat_proc_detach, "zulu_hat", /* CSTYLED */, - tnf_opaque, as, as); -} - -/* - * zulu_hat_terminate - * - * Disables any further TLB miss processing for this hat - * Called by zuluvm's as_free callback. The primary purpose of this - * function is to cause any pending zulu DMA to abort quickly. - */ -void -zulu_hat_terminate(struct zulu_hat *zhat) -{ - int ctx = zhat->zulu_ctx; - - TNF_PROBE_1(zulu_hat_terminate, "zulu_hat", /* CSTYLED */, - tnf_int, ctx, ctx); - - mutex_enter(&zhat->lock); - - zhat->freed = 1; - - zulu_ctx_tsb_lock_enter(zhat); - /* - * zap the tsb - */ - bzero(zhat->zulu_tsb, ZULU_TSB_SZ); - zulu_ctx_tsb_lock_exit(zhat); - - zulu_hat_demap_ctx(zhat->zdev, zhat->zulu_ctx); - - mutex_exit(&zhat->lock); - - TNF_PROBE_0(zulu_hat_terminate_done, "zulu_hat", /* CSTYLED */); -} diff --git a/usr/src/uts/sun4u/zuluvm/Makefile b/usr/src/uts/sun4u/zuluvm/Makefile deleted file mode 100644 index 719558aa46..0000000000 --- a/usr/src/uts/sun4u/zuluvm/Makefile +++ /dev/null @@ -1,133 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# -# This makefile drives the production of the zulunvm -# mics module -# -# sun4u implementation architecture dependent -# - -# -# Path to the base of the uts directory tree (usually /usr/src/uts). -# -UTSBASE = ../.. - -# -# Define the module and object file sets. -# -MODULE = zuluvm -OBJECTS = $(ZULUVM_OBJS:%=$(OBJS_DIR)/%) -LINTS = $(ZULUVM_OBJS:%.o=$(LINTS_DIR)/%.ln) -ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE) - -# -# Include common rules. -# -include $(UTSBASE)/sun4u/Makefile.sun4u - -# -# Override defaults to build a unique, local modstubs.o. -# -MODSTUBS_DIR = . -$(MODSTUBS_O) := AS_CPPFLAGS += -DZULU_MISC_MODULE - -CLEANFILES += $(MODSTUBS_O) - -# -# Define targets -# -ALL_TARGET = $(BINARY) -LINT_TARGET = $(MODULE).lint -INSTALL_TARGET = $(BINARY) $(ROOTMODULE) - -ZULUVM_OFFSETS = $(UTSBASE)/sun4u/zuluvm/zuluvm_offsets.in -ZULUVM_OFFSETS_H = $(OBJS_DIR)/zuluvm_offsets.h - -ZULUVM_STATS = -DZULUVM_STATS - -# -# We turn off tnf probes for opt builds. -# -PROBE_FLAGS_OBJ64 = -DNPROBE - -# -# lint pass one enforcement -# -CFLAGS += $(CCVERBOSE) $(ZULUVM_STATS) $(PROBE_FLAGS_$(BUILD_TYPE)) -ASFLAGS += $(ZULUVM_STATS) -LINTFLAGS += -I$(OBJS_DIR) - -# -# For now, disable these lint checks; maintainers should endeavor -# to investigate and remove these for maximum lint coverage. -# Please do not carry these forward to new Makefiles. -# -LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN -LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV - -CERRWARN += -_gcc=-Wno-uninitialized - -# -# Default build targets. -# -.KEEP_STATE: - -def: $(DEF_DEPS) - -all: $(ALL_DEPS) - -clean: $(CLEAN_DEPS) - -clobber: $(CLOBBER_DEPS) - -lint: $(LINT_DEPS) - -modlintlib: $(MODLINTLIB_DEPS) lint64 - -clean.lint: $(CLEAN_LINT_DEPS) - -install: $(INSTALL_DEPS) - -# -# Special rules for generating assym.h for inclusion in assembly files -# -#$(DSF_DIR)/$(OBJS_DIR)/assym.h: FRC -# @cd $(DSF_DIR); $(MAKE) all.targ -# -AS_INC_PATH += -I$(OBJS_DIR) - -ZULUVM_DEPS += zulu_hat_asm.o zulu_asm.o zulu_asm.ln zulu_hat_asm.ln - -CLEANFILES += $(ZULUVM_OFFSETS_H) $(ZULUVM_OFFSETS_OUT) - -$(ZULUVM_DEPS:%=$(OBJS_DIR)/%): $(ZULUVM_OFFSETS_H) - -$(ZULUVM_OFFSETS_H): $(ZULUVM_OFFSETS) - $(OFFSETS_CREATE) <$(ZULUVM_OFFSETS) >$@ - -# -# Include common targets. -# -include $(UTSBASE)/sun4u/Makefile.targ diff --git a/usr/src/uts/sun4u/zuluvm/zuluvm_offsets.in b/usr/src/uts/sun4u/zuluvm/zuluvm_offsets.in deleted file mode 100644 index 14470a9274..0000000000 --- a/usr/src/uts/sun4u/zuluvm/zuluvm_offsets.in +++ /dev/null @@ -1,77 +0,0 @@ -\ -\ Copyright 2005 Sun Microsystems, Inc. All rights reserved. -\ Use is subject to license terms. -\ -\ CDDL HEADER START -\ -\ The contents of this file are subject to the terms of the -\ Common Development and Distribution License, Version 1.0 only -\ (the "License"). You may not use this file except in compliance -\ with the License. -\ -\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -\ or http://www.opensolaris.org/os/licensing. -\ See the License for the specific language governing permissions -\ and limitations under the License. -\ -\ When distributing Covered Code, include this CDDL HEADER in each -\ file and include the License file at usr/src/OPENSOLARIS.LICENSE. -\ If applicable, add the following below this CDDL HEADER, with the -\ fields enclosed by brackets "[]" replaced with your own identifying -\ information: Portions Copyright [yyyy] [name of copyright owner] -\ -\ CDDL HEADER END -\ - -#pragma ident "%Z%%M% %I% %E% SMI" - -#include -#include -#include -#include - -zulu_hat - zulu_tsb ZULU_HAT_TSB - zulu_tsb_size ZULU_HAT_TSB_SZ - zulu_ctx ZULU_HAT_CTX - -#ifdef ZULUVM_STATS -zuluvm_miss_t - idata ZULUVM_IDATA - arg ZULUVM_ARG - mmu_pa ZULUVM_PAMMU - state ZULUVM_STATE - intr_num ZULUVM_INTRNUM - cancel ZULUVM_ST_TLBCANCEL - tlb_miss ZULUVM_ST_MISS - pagefault ZULUVM_ST_PGFAULT - no_mapping ZULUVM_ST_NOMAP - preload ZULUVM_ST_PRELOAD - migrate ZULUVM_ST_MIGRATE - pagesize ZULUVM_ST_PAGESIZE - itlb1miss ZULUVM_ST_ITLB1MISS - dtlb1miss ZULUVM_ST_DTLB1MISS - itlb2miss ZULUVM_ST_ITLB2MISS - dtlb2miss ZULUVM_ST_DTLB2MISS - demap_page ZULUVM_ST_DEMAP_PAGE - demap_ctx ZULUVM_ST_DEMAP_CTX - pfnbuf ZULUVM_PFNBUF - pfncnt ZULUVM_PFNCNT - -#else - -zuluvm_miss_t - idata ZULUVM_IDATA - arg ZULUVM_ARG - mmu_pa ZULUVM_PAMMU - state ZULUVM_STATE - intr_num ZULUVM_INTRNUM - pfnbuf ZULUVM_PFNBUF - pfncnt ZULUVM_PFNCNT -#endif - -\#define ZULUVM_OFFSET(a) (ZULUVM_IDATA + ((a) * ZULUVM_IDATA_INCR)) -\#define ZULUVM_ASM_TLB_TTE ZULUVM_OFFSET(ZULUVM_TLB_TTE_IDX) -\#define ZULUVM_ASM_TLB_ADDR ZULUVM_OFFSET(ZULUVM_TLB_ADDR_IDX) -\#define ZULUVM_ASM_TLB_TYPE ZULUVM_OFFSET(ZULUVM_TLB_TYPE_IDX) -\#define ZULUVM_ASM_TLB_ERRCODE ZULUVM_OFFSET(ZULUVM_TLB_ERRCODE_IDX) -- cgit v1.2.3 From a5eb7107f06a6e23e8e77e8d3a84c1ff90a73ac6 Mon Sep 17 00:00:00 2001 From: Bryan Cantrill Date: Sat, 14 Feb 2015 16:55:35 -0800 Subject: 5640 want epoll support Reviewed by: Jerry Jelinek Reviewed by: Robert Mustacchi Approved by: Garrett D'Amore --- usr/src/cmd/truss/codes.c | 4 +- usr/src/lib/libc/amd64/Makefile | 1 + usr/src/lib/libc/i386/Makefile.com | 1 + usr/src/lib/libc/port/mapfile-vers | 9 + usr/src/lib/libc/port/sys/epoll.c | 207 +++++++++ usr/src/lib/libc/sparc/Makefile.com | 1 + usr/src/lib/libc/sparcv9/Makefile.com | 1 + usr/src/man/man3c/Makefile | 8 + usr/src/man/man3c/epoll_create.3c | 99 ++++ usr/src/man/man3c/epoll_ctl.3c | 309 +++++++++++++ usr/src/man/man3c/epoll_wait.3c | 108 +++++ usr/src/man/man5/Makefile | 1 + usr/src/man/man5/epoll.5 | 110 +++++ usr/src/man/man7d/poll.7d | 15 - usr/src/man/man9e/chpoll.9e | 17 +- usr/src/pkg/manifests/system-header.mf | 1 + usr/src/pkg/manifests/system-kernel.man5.inc | 1 + usr/src/pkg/manifests/system-library.man3c.inc | 5 + usr/src/uts/common/fs/fifofs/fifovnops.c | 13 +- usr/src/uts/common/fs/portfs/port_vnops.c | 16 +- usr/src/uts/common/fs/proc/prvnops.c | 4 +- usr/src/uts/common/fs/sockfs/sockcommon_sops.c | 15 +- usr/src/uts/common/fs/sockfs/socknotify.c | 2 +- .../common/io/1394/targets/av1394/av1394_async.c | 13 +- usr/src/uts/common/io/devpoll.c | 510 +++++++++++++++++++-- usr/src/uts/common/io/ksocket/ksocket.c | 3 +- usr/src/uts/common/io/usb/usba/usba_ugen.c | 24 +- usr/src/uts/common/os/fio.c | 3 +- usr/src/uts/common/os/streamio.c | 3 +- usr/src/uts/common/sys/Makefile | 3 +- usr/src/uts/common/sys/devpoll.h | 18 +- usr/src/uts/common/sys/epoll.h | 89 ++++ usr/src/uts/common/sys/file.h | 5 + usr/src/uts/common/sys/poll.h | 13 +- usr/src/uts/common/sys/poll_impl.h | 10 +- usr/src/uts/common/syscall/poll.c | 19 +- usr/src/uts/intel/poll/Makefile | 10 + usr/src/uts/sparc/poll/Makefile | 5 + usr/src/uts/sun4v/io/vcc.c | 5 +- 39 files changed, 1568 insertions(+), 113 deletions(-) create mode 100644 usr/src/lib/libc/port/sys/epoll.c create mode 100644 usr/src/man/man3c/epoll_create.3c create mode 100644 usr/src/man/man3c/epoll_ctl.3c create mode 100644 usr/src/man/man3c/epoll_wait.3c create mode 100644 usr/src/man/man5/epoll.5 create mode 100644 usr/src/uts/common/sys/epoll.h (limited to 'usr/src') diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index 069268dc05..4e453da0c1 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -23,7 +23,7 @@ * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2014, OmniTI Computer Consulting, Inc. All rights reserved. */ @@ -703,6 +703,8 @@ const struct ioc { /* /dev/poll ioctl() control codes */ { (uint_t)DP_POLL, "DP_POLL", NULL }, { (uint_t)DP_ISPOLLED, "DP_ISPOLLED", NULL }, + { (uint_t)DP_PPOLL, "DP_PPOLL", NULL }, + { (uint_t)DP_EPOLLCOMPAT, "DP_EPOLLCOMPAT", NULL }, /* the old /proc ioctl() control codes */ #define PIOC ('q'<<8) { (uint_t)(PIOC|1), "PIOCSTATUS", NULL }, diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index 0c1421bbf2..b5e54b19fa 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -863,6 +863,7 @@ PORTSYS= \ chmod.o \ chown.o \ corectl.o \ + epoll.o \ exacctsys.o \ execl.o \ execle.o \ diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index 9a76280c0a..d7e77502f2 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -903,6 +903,7 @@ PORTSYS= \ chmod.o \ chown.o \ corectl.o \ + epoll.o \ eventfd.o \ exacctsys.o \ execl.o \ diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index 0b683fc923..017c7c31bc 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -93,6 +93,15 @@ $if _x86 && _ELF64 $add amd64 $endif +SYMBOL_VERSION ILLUMOS_0.15 { # epoll(3C) + protected: + epoll_create; + epoll_create1; + epoll_ctl; + epoll_wait; + epoll_pwait; +} ILLUMOS_0.14; + SYMBOL_VERSION ILLUMOS_0.14 { # strerror_l protected: strerror_l; diff --git a/usr/src/lib/libc/port/sys/epoll.c b/usr/src/lib/libc/port/sys/epoll.c new file mode 100644 index 0000000000..93379b583e --- /dev/null +++ b/usr/src/lib/libc/port/sys/epoll.c @@ -0,0 +1,207 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * Events that match their epoll(7) equivalents. + */ +#if EPOLLIN != POLLIN +#error value of EPOLLIN does not match value of POLLIN +#endif + +#if EPOLLPRI != POLLPRI +#error value of EPOLLPRI does not match value of POLLPRI +#endif + +#if EPOLLOUT != POLLOUT +#error value of EPOLLOUT does not match value of POLLOUT +#endif + +#if EPOLLRDNORM != POLLRDNORM +#error value of EPOLLRDNORM does not match value of POLLRDNORM +#endif + +#if EPOLLRDBAND != POLLRDBAND +#error value of EPOLLRDBAND does not match value of POLLRDBAND +#endif + +#if EPOLLERR != POLLERR +#error value of EPOLLERR does not match value of POLLERR +#endif + +#if EPOLLHUP != POLLHUP +#error value of EPOLLHUP does not match value of POLLHUP +#endif + +/* + * Events that we ignore entirely. They can be set in events, but they will + * never be returned. + */ +#define EPOLLIGNORED (EPOLLMSG | EPOLLWAKEUP) + +/* + * Events that we swizzle into other bit positions. + */ +#define EPOLLSWIZZLED \ + (EPOLLRDHUP | EPOLLONESHOT | EPOLLET | EPOLLWRBAND | EPOLLWRNORM) + +int +epoll_create(int size) +{ + int fd; + + /* + * From the epoll_create() man page: "Since Linux 2.6.8, the size + * argument is ignored, but must be greater than zero." You keep using + * that word "ignored"... + */ + if (size <= 0) { + errno = EINVAL; + return (-1); + } + + if ((fd = open("/dev/poll", O_RDWR)) == -1) + return (-1); + + if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) { + (void) close(fd); + return (-1); + } + + return (fd); +} + +int +epoll_create1(int flags) +{ + int fd, oflags = O_RDWR; + + if (flags & EPOLL_CLOEXEC) + oflags |= O_CLOEXEC; + + if ((fd = open("/dev/poll", oflags)) == -1) + return (-1); + + if (ioctl(fd, DP_EPOLLCOMPAT, 0) == -1) { + (void) close(fd); + return (-1); + } + + return (fd); +} + +int +epoll_ctl(int epfd, int op, int fd, struct epoll_event *event) +{ + dvpoll_epollfd_t epoll[2]; + uint32_t events, ev = 0; + int i = 0; + + epoll[i].dpep_pollfd.fd = fd; + + switch (op) { + case EPOLL_CTL_DEL: + ev = POLLREMOVE; + break; + + case EPOLL_CTL_MOD: + /* + * In the modify case, we pass down two events: one to + * remove the event and another to add it back. + */ + epoll[i++].dpep_pollfd.events = POLLREMOVE; + epoll[i].dpep_pollfd.fd = fd; + /* FALLTHROUGH */ + + case EPOLL_CTL_ADD: + /* + * Mask off the events that we ignore, and then swizzle the + * events for which our values differ from their epoll(7) + * equivalents. + */ + events = event->events; + ev = events & ~(EPOLLIGNORED | EPOLLSWIZZLED); + + if (events & EPOLLRDHUP) + ev |= POLLRDHUP; + + if (events & EPOLLET) + ev |= POLLET; + + if (events & EPOLLONESHOT) + ev |= POLLONESHOT; + + if (events & EPOLLWRNORM) + ev |= POLLWRNORM; + + if (events & EPOLLWRBAND) + ev |= POLLWRBAND; + + epoll[i].dpep_data = event->data.u64; + break; + + default: + errno = EOPNOTSUPP; + return (-1); + } + + epoll[i].dpep_pollfd.events = ev; + + return (write(epfd, epoll, sizeof (epoll[0]) * (i + 1)) == -1 ? -1 : 0); +} + +int +epoll_wait(int epfd, struct epoll_event *events, + int maxevents, int timeout) +{ + struct dvpoll arg; + + if (maxevents <= 0) { + errno = EINVAL; + return (-1); + } + + arg.dp_nfds = maxevents; + arg.dp_timeout = timeout; + arg.dp_fds = (pollfd_t *)events; + + return (ioctl(epfd, DP_POLL, &arg)); +} + +int +epoll_pwait(int epfd, struct epoll_event *events, + int maxevents, int timeout, const sigset_t *sigmask) +{ + struct dvpoll arg; + + if (maxevents <= 0) { + errno = EINVAL; + return (-1); + } + + arg.dp_nfds = maxevents; + arg.dp_timeout = timeout; + arg.dp_fds = (pollfd_t *)events; + arg.dp_setp = (sigset_t *)sigmask; + + return (ioctl(epfd, DP_PPOLL, &arg)); +} diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com index 3856c5332c..dc965fe6ac 100644 --- a/usr/src/lib/libc/sparc/Makefile.com +++ b/usr/src/lib/libc/sparc/Makefile.com @@ -937,6 +937,7 @@ PORTSYS= \ chmod.o \ chown.o \ corectl.o \ + epoll.o \ eventfd.o \ exacctsys.o \ execl.o \ diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com index 1a65ab7680..415aaf2be2 100644 --- a/usr/src/lib/libc/sparcv9/Makefile.com +++ b/usr/src/lib/libc/sparcv9/Makefile.com @@ -882,6 +882,7 @@ PORTSYS= \ chown.o \ corectl.o \ eventfd.o \ + epoll.o \ exacctsys.o \ execl.o \ execle.o \ diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile index eeb6267192..f6cadebe95 100644 --- a/usr/src/man/man3c/Makefile +++ b/usr/src/man/man3c/Makefile @@ -110,6 +110,9 @@ MANFILES= __fbufsize.3c \ enable_extended_FILE_stdio.3c \ encrypt.3c \ end.3c \ + epoll_create.3c \ + epoll_ctl.3c \ + epoll_wait.3c \ err.3c \ euclen.3c \ eventfd.3c \ @@ -733,6 +736,8 @@ MANLINKS= FD_CLR.3c \ endusershell.3c \ endutent.3c \ endutxent.3c \ + epoll_create1.3c \ + epoll_pwait.3c \ erand48.3c \ errno.3c \ errx.3c \ @@ -1582,6 +1587,9 @@ _etext.3c := LINKSRC = end.3c edata.3c := LINKSRC = end.3c etext.3c := LINKSRC = end.3c +epoll_create1.3c := LINKSRC = epoll_create.3c +epoll_pwait.3c := LINKSRC = epoll_wait.3c + errx.3c := LINKSRC = err.3c verr.3c := LINKSRC = err.3c verrx.3c := LINKSRC = err.3c diff --git a/usr/src/man/man3c/epoll_create.3c b/usr/src/man/man3c/epoll_create.3c new file mode 100644 index 0000000000..6f54f638f7 --- /dev/null +++ b/usr/src/man/man3c/epoll_create.3c @@ -0,0 +1,99 @@ +'\" te +.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved. +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.TH EPOLL_CREATE 3C "Apr 17, 2014" +.SH NAME +epoll_create, epoll_create1 \- create an epoll instance +.SH SYNOPSIS + +.LP +.nf +#include + +\fBint\fR \fBepoll_create\fR(\fBint\fR \fIsize\fR); +.fi + +.LP +.nf +\fBint\fR \fBepoll_create1\fR(\fBint\fR \fIflags\fR); +.fi + +.SH DESCRIPTION +.LP +The \fBepoll_create()\fR and \fBepoll_create1()\fR functions both create an +\fBepoll\fR(5) instance that can be operated upon via \fBepoll_ctl\fR(3C), +\fBepoll_wait\fR(3C) and \fBepoll_pwait\fR(3C). \fBepoll\fR instances are +represented as file descriptors, and should be closed via \fBclose\fR(2). + +The only difference between the two functions is their signature; +\fBepoll_create()\fR takes a size argument that +is vestigal and is only meaningful in as much as it must be greater than +zero, while \fBepoll_create1()\fR takes a flags argument that can have +any of the following values: + +.sp +.ne 2 +.na +\fBEPOLL_CLOEXEC\fR +.ad +.RS 12n +Instance should be closed upon an +\fBexec\fR(2); see \fBopen\fR(2)'s description of \fBO_CLOEXEC\fR. +.RE + +.SH RETURN VALUES +.LP +Upon succesful completion, 0 is returned. Otherwise, -1 is returned and errno +is set to indicate the error. +.SH ERRORS +.LP +The \fBepoll_create()\fR and \fBepoll_create1()\fR functions will fail if: +.sp +.ne 2 +.na +\fB\fBEINVAL\fR\fR +.ad +.RS 10n +Either the \fIsize\fR is zero (\fBepoll_create()\fR) or the \fIflags\fR +are invalid (\fBepoll_create1()\fR). +.RE + +.sp +.ne 2 +.na +\fB\fBEMFILE\fR\fR +.ad +.RS 10n +There are currently {\fBOPEN_MAX\fR} file descriptors open in the calling +process. +.RE + +.sp +.ne 2 +.na +\fB\fBENFILE\fR\fR +.ad +.RS 10n +The maximum allowable number of files is currently open in the system. +.RE + +.sp +.SH NOTES +.LP + +The \fBepoll\fR(5) facility is implemented for purposes of offering +compatibility for Linux-borne applications; native +applications should continue to prefer using event ports via the +\fBport_create\fR(3C), \fBport_associate\fR(3C) and \fBport_get\fR(3C) +interfaces. See \fBepoll\fR(5) for compatibility details and restrictions. + +.SH SEE ALSO +.LP +\fBepoll_ctl\fR(3C), \fBepoll_wait\fR(3C), \fBepoll\fR(5) diff --git a/usr/src/man/man3c/epoll_ctl.3c b/usr/src/man/man3c/epoll_ctl.3c new file mode 100644 index 0000000000..19c02f2abb --- /dev/null +++ b/usr/src/man/man3c/epoll_ctl.3c @@ -0,0 +1,309 @@ +'\" te +.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved. +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.TH EPOLL_CTL 3C "Apr 17, 2014" +.SH NAME +epoll_ctl \- control an epoll instance +.SH SYNOPSIS + +.LP +.nf +#include + +\fBint\fR \fBepoll_ctl\fR(\fBint\fR \fIepfd\fR, \fBint\fR \fIop\fR, \fBint\fR \fIfd\fR, \fBstruct epoll_event *\fR\fIevent\fR); +.fi + +.SH DESCRIPTION +.LP +The \fBepoll_ctl()\fR function executes the operation specified by +\fIop\fR (as parameterized by \fIevent\fR) on the \fIepfd\fR epoll instance. +Valid values for \fIop\fR: + +.sp +.ne 2 +.na +\fBEPOLL_CTL_ADD\fR +.ad +.RS 12n +For the \fBepoll\fR(5) instance specified by \fIepfd\fR, +associate the file descriptor specified by \fIfd\fR with the event specified +by \fIevent\fR. +.RE + +.sp +.ne 2 +.na +\fBEPOLL_CTL_DEL\fR +.ad +.RS 12n +For the \fBepoll\fR(5) instance specified by \fIepfd\fR, +remove all event associations for the file descriptor specified by \fIfd\fR. +\fIevent\fR is ignored, and may be NULL. +.RE + +.sp +.ne 2 +.na +\fBEPOLL_CTL_MOD\fR +.ad +.RS 12n +For the \fBepoll\fR(5) instance specified by \fIepfd\fR, modify the event +association for the file descriptor specified by \fIfd\fR to be that +specified by \fIevent\fR. + +.RE + +The \fIevent\fR parameter has the following structure: + +.in +4 +.nf +typedef union epoll_data { + void *ptr; + int fd; + uint32_t u32; + uint64_t u64; +} epoll_data_t; + +struct epoll_event { + uint32_t events; + epoll_data_t data; +}; +.fi +.in -4 + +The \fIdata\fR field specifies the datum to +be associated with the event and +will be returned via \fBepoll_wait\fR(3C). +The \fIevents\fR field denotes both the desired events (when specified via +\fBepoll_ctl()\fR) and the events that have occurred (when returned via +\fBepoll_wait\fR(3C)). +In either case, the +\fIevents\fR field is a bitmask constructed by a logical \fBOR\fR operation +of any combination of the following event flags: + +.sp +.ne 2 +.na +\fBEPOLLIN\fR +.ad +.RS 14n +Data other than high priority data may be read without blocking. For streams, +this flag is set in the returned \fIevents\fR even if the message is of +zero length. +.RE + +.sp +.ne 2 +.na +\fBEPOLLPRI\fR +.ad +.RS 14n +Normal data (priority band equals 0) may be read without blocking. For streams, +this flag is set in the returned \fIevents\fR even if the message is of zero +length. +.RE + +.sp +.ne 2 +.na +\fBEPOLLOUT\fR +.ad +.RS 14n +Normal data (priority band equals 0) may be written without blocking. +.RE + +.sp +.ne 2 +.na +\fBEPOLLRDNORM\fR +.ad +.RS 14n +Normal data (priority band equals 0) may be read without blocking. For streams, +this flag is set in the returned \fIrevents\fR even if the message is of +zero length. +.RE + +.sp +.ne 2 +.na +\fBEPOLLRDBAND\fR +.ad +.RS 14n +Data from a non-zero priority band may be read without blocking. For streams, +this flag is set in the returned \fIrevents\fR even if the message is of +zero length. +.RE + +.sp +.ne 2 +.na +\fBEPOLLWRNORM\fR +.ad +.RS 14n +The same as \fBEPOLLOUT\fR. +.RE + +.sp +.ne 2 +.na +\fBEPOLLWRBAND\fR +.ad +.RS 14n +Priority data (priority band > 0) may be written. This event only examines +bands that have been written to at least once. +.RE + +.sp +.ne 2 +.na +\fBEPOLLMSG\fR +.ad +.RS 14n +This exists only for backwards binary and source compatibility with Linux; +it has no meaning and is ignored. +.RE + +.sp +.ne 2 +.na +\fBEPOLLERR\fR +.ad +.RS 14n +An error has occurred on the device or stream. This flag is only valid in the +returned \fIevents\fR field. +.RE + +.sp +.ne 2 +.na +\fBEPOLLHUP\fR +.ad +.RS 14n +A hangup has occurred on the stream. This event and \fBEPOLLOUT\fR are mutually +exclusive; a stream can never be writable if a hangup has occurred. However, +this event and \fBEPOLLIN\fR, \fBEPOLLRDNORM\fR, \fBEPOLLRDBAND\fR, +\fBEPOLLRDHUP\fR or +\fBEPOLLPRI\fR are not mutually exclusive. This flag is only valid in the +the \fIevents\fR field returned from \fBepoll_wait\fR(3C); it is not used +in the \fIevents\fR field specified via \fBepoll_ctl()\fR. +.RE + +.sp +.ne 2 +.na +\fBEPOLLRDHUP\fR +.ad +.RS 14n +The stream socket peer shutdown the writing half of the connection and no +further data will be readable via the socket. This event is not mutually +exclusive with \fBEPOLLIN\fR. +.RE + +.sp +.ne 2 +.na +\fBEPOLLWAKEUP\fR +.ad +.RS 14n +This exists only for backwards binary and source compatibility with Linux; +it has no meaning and is ignored. +.RE + +.sp +.ne 2 +.na +\fBEPOLLONESHOT\fR +.ad +.RS 14n +Sets the specified event to be in one-shot mode, whereby the event association +with the \fBepoll\fR(5) instance specified by \fIepfd\fR is removed atomically +as the event is returned via \fBepoll_wait\fR(3C). Use of this mode allows +for resolution of some of the +races inherent in multithreaded use of \fBepoll_wait\fR(3C). +.RE + +.sp +.ne 2 +.na +\fBEPOLLET\fR +.ad +.RS 14n +Sets the specified event to be edge-triggered mode instead of the default +mode of level-triggered. In this mode, events will be induced by +transitions on an event source rather than the state of the event source. +While perhaps superficially appealing, this mode introduces several new +potential failure modes for user-level software and should be used +with caution. +.RE + +.SH RETURN VALUES +.LP +Upon succesful completion, \fBepoll_ctl()\fR returns 0. +If an error occurs, -1 is returned and errno is set to indicate +the error. + +.SH ERRORS +.LP +\fBepoll_ctl()\fR will fail if: +.sp +.ne 2 +.na +\fB\fBEBADF\fR\fR +.ad +.RS 10n +\fIepfd\fR is not a valid file descriptor. +.RE + +.sp +.ne 2 +.na +\fB\fBEFAULT\fR\fR +.ad +.RS 10n +The memory associated with \fIevent\fR was not mapped. +.RE + +.sp +.ne 2 +.na +\fB\fBEEXIST\fR\fR +.ad +.RS 10n +The operation specified was \fBEPOLL_CTL_ADD\fR and the specified file +descriptor is already associated with an event for the specified +\fBepoll\fR(5) instance. +.RE + +.sp +.ne 2 +.na +\fB\fBENOENT\fR\fR +.ad +.RS 10n +The operation specified was \fBEPOLL_CTL_MOD\fR or \fBEPOLL_CTL_DEL\fR and +the specified file descriptor is not associated with an event for the +specified \fBepoll\fR(5) instance. +.RE + +.sp +.SH NOTES +.LP + +The \fBepoll\fR(5) facility is implemented for purposes of offering +compatibility for Linux-borne applications; native +applications should continue to prefer using event ports via the +\fBport_create\fR(3C), \fBport_associate\fR(3C) and \fBport_get\fR(3C) +interfaces. See \fBepoll\fR(5) for compatibility details and restrictions. + +.SH SEE ALSO +.LP +\fBepoll_create\fR(3C), \fBepoll_wait\fR(3C), +\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C), +\fBepoll\fR(5) diff --git a/usr/src/man/man3c/epoll_wait.3c b/usr/src/man/man3c/epoll_wait.3c new file mode 100644 index 0000000000..6ae9e0f9c4 --- /dev/null +++ b/usr/src/man/man3c/epoll_wait.3c @@ -0,0 +1,108 @@ +'\" te +.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved. +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.TH EPOLL_WAIT 3C "Apr 17, 2014" +.SH NAME +epoll_wait, epoll_pwait \- wait for epoll events +.SH SYNOPSIS + +.LP +.nf +#include + +\fBint\fR \fBepoll_wait\fR(\fBint\fR \fIepfd\fR, \fBstruct epoll_event *\fR\fIevents\fR, + \fBint\fR \fImaxevents\fR, \fBint\fR \fItimeout\fR); +.fi + +.LP +.nf +\fBint\fR \fBepoll_pwait\fR(\fBint\fR \fIepfd\fR, \fBstruct epoll_event *\fR\fIevents\fR, + \fBint\fR \fImaxevents\fR, \fBint\fR \fItimeout\fR, + \fBconst sigset_t *\fR\fIsigmask\fR); +.fi + +.SH DESCRIPTION +.LP +The \fBepoll_wait()\fR function waits for events on the \fBepoll\fR(5) +instance specified by \fIepfd\fR. The \fIevents\fR parameter must point to +an array of \fImaxevents\fR \fIepoll_event\fR structures to be +filled in with pending events. The \fItimeout\fR argument specifies the +number of milliseconds to wait for an event if none is pending. A +\fItimeout\fR of -1 denotes an infinite timeout. + +The \fBepoll_pwait()\fR is similar to \fBepoll_wait()\fR, but takes an +additional \fIsigmask\fR argument that specifies the desired signal mask +when \fBepoll_pwait()\fR is blocked. It is equivalent to atomically +setting the signal mask, calling \fBepoll_wait()\fR, and restoring the +signal mask upon return, and is therefore similar to the relationship +between \fBselect\fR(3C) and \fBpselect\fR(3C). + +.SH RETURN VALUES +.LP +Upon successful completion, \fBepoll_wait()\fR and \fBepoll_pwait()\fR return +the number of events, or 0 if none was pending and \fItimeout\fR milliseconds +elapsed. If an error occurs, -1 is returned and errno is set to indicate +the error. + +.SH ERRORS +.LP +The \fBepoll_wait()\fR and \fBepoll_pwait()\fR functions will fail if: +.sp +.ne 2 +.na +\fB\fBEBADF\fR\fR +.ad +.RS 10n +\fIepfd\fR is not a valid file descriptor. +.RE + +.sp +.ne 2 +.na +\fB\fBEFAULT\fR\fR +.ad +.RS 10n +The memory associated with \fIevents\fR was not mapped or was not writable. +.RE + +.sp +.ne 2 +.na +\fB\fBEINTR\fR\fR +.ad +.RS 10n +A signal was received during the \fBepoll_wait()\fR or \fBepoll_pwait()\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBEINVAL\fR\fR +.ad +.RS 10n +Either \fIepfd\fR is not a valid \fBepoll\fR(5) instance or \fImaxevents\fR +is not greater than zero. +.RE + +.sp +.SH NOTES +.LP + +The \fBepoll\fR(5) facility is implemented for purposes of offering +compatibility for Linux-borne applications; native +applications should continue to prefer using event ports via the +\fBport_create\fR(3C), \fBport_associate\fR(3C) and \fBport_get\fR(3C) +interfaces. See \fBepoll\fR(5) for compatibility details and restrictions. + +.SH SEE ALSO +.LP +\fBepoll_create\fR(3C), \fBepoll_ctl\fR(3C), +\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C), +\fBpselect\fR(3C), \fBepoll\fR(5) diff --git a/usr/src/man/man5/Makefile b/usr/src/man/man5/Makefile index 7c928f3473..4784603013 100644 --- a/usr/src/man/man5/Makefile +++ b/usr/src/man/man5/Makefile @@ -41,6 +41,7 @@ MANFILES= Intro.5 \ device_clean.5 \ dhcp.5 \ environ.5 \ + epoll.5 \ eqn.5 \ eqnchar.5 \ eventfd.5 \ diff --git a/usr/src/man/man5/epoll.5 b/usr/src/man/man5/epoll.5 new file mode 100644 index 0000000000..860b2bb91f --- /dev/null +++ b/usr/src/man/man5/epoll.5 @@ -0,0 +1,110 @@ +'\" te +.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved. +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.TH EPOLL 5 "Apr 17, 2014" +.SH NAME +epoll \- Linux-compatible I/O event notification facility +.SH SYNOPSIS + +.LP +.nf +#include +.fi + +.SH DESCRIPTION +.LP + +\fBepoll\fR is a facility for efficient event-oriented I/O that has a +similar model to \fBpoll\fR(2), but does not necessitate rescanning a +set of file descriptors to wait for an event. \fBepoll\fR is of Linux +origins, and this facility is designed to be binary-compatible with +the Linux facility, including the following interfaces: + +.RS +4 +.TP +.ie t \(bu +.el o +\fBepoll_create\fR(3C) creates an \fBepoll\fR instance, returning a file +descriptor. It contains a size arugment which is meaningful only in as +much as it cannot be 0. +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBepoll_create1\fR(3C) also creates an \fBepoll\fR instance, but eliminates +the meaningless size argument -- replacing it instead with a flags +argument. +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBepoll_ctl\fR(3C) allows file descriptors to be added +(via \fBEPOLL_CTL_ADD\fR), deleted (via \fBEPOLL_CTL_DEL\fR) or +modified (via \fBEPOLL_CTL_MOD\fR) with respect to the \fBepoll\fR'd set +of file descriptors. +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBepoll_wait\fR(3C) fetches pending events for file descriptors added +via \fBepoll_ctl\fR(3C), blocking the caller if no such events are pending. +.RE +.RS +4 +.TP +.ie t \(bu +.el o +\fBepoll_pwait\fR(3C) opeates in a similar manner to \fBepoll_wait\fR(3C), but +allows the caller to specify a signal mask to be set atomically with respect +to waiting for events. +.RE + +.sp +.SH NOTES +.LP + +The \fBepoll\fR facility is implemented +for purposes of offering compatibility to and portability of Linux-borne +applications; native applications should continue to prefer using event ports +via the \fBport_create\fR(3C), +\fBport_associate\fR(3C) and \fBport_getn\fR(3C) interfaces. +In particular, use of \fBepoll\fR in a multithreaded environment is fraught +with peril; even when using \fBEPOLLONESHOT\fR for one-shot events, +there are race conditions with respect to \fBclose\fR(2) that are unresolvable. +(For more details, see the aborted effort in Linux to resolve this via the +proposed +\fBEPOLL_CTL_DISABLE\fR operation.) +The event port facility -- like the BSD kqueue facility that inspired it -- +is designed to deal with such issues via explicit event source dissociation. + +While a best effort has been made to mimic the Linux semantics, there +are some semantics that are too peculiar or ill-conceived to merit +accommodation. In particular, the Linux \fBepoll\fR facility will -- by +design -- continue to generate events for closed file descriptors where/when +the underlying file description remains open. For example, if one were +to \fBfork\fR(2) and subsequently close an actively \fBepoll\fR'd file +descriptor in the parent, +any events generated in the child on the implicitly duplicated file descriptor +will continue to be delivered to the parent -- despite the fact that the +parent itself no longer has any notion of the file description! +This \fBepoll\fR facility refuses to honor +these semantics; closing the \fBEPOLL_CTL_ADD\fR'd file descriptor +will always result in no further +events being generated for that event description. + +.SH SEE ALSO +.LP +\fBepoll_create\fR(3C), \fBepoll_create1\fR(3C), \fBepoll_ctl\fR(3C), +\fBepoll_wait\fR(3C), \fBepoll_pwait\fR(3C), +\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_dissociate\fR(3C), +\fBport_get\fR(3C), +\fBpselect\fR(3C) diff --git a/usr/src/man/man7d/poll.7d b/usr/src/man/man7d/poll.7d index cd3db77de9..e3d9e074aa 100644 --- a/usr/src/man/man7d/poll.7d +++ b/usr/src/man/man7d/poll.7d @@ -17,7 +17,6 @@ int n = ioctl(int fd, DP_ISPOLLED, struct pollfd* pfd);\fR .fi .SH PARAMETERS -.sp .ne 2 .na \fB\fIfd\fR \fR @@ -73,15 +72,6 @@ Pointer to \fBpollfd\fR structure. .SH DESCRIPTION .LP -Note - -.sp -.RS 2 -The \fB/dev/poll\fR device, associated driver and corresponding manpages may be -removed in a future Solaris release. For similar functionality in the event -ports framework, see \fBport_create\fR(3C). -.RE -.sp -.LP The \fB/dev/poll\fR driver is a special driver that enables you to monitor multiple sets of polled file descriptors. By using the \fB/dev/poll\fR driver, you can efficiently poll large numbers of file descriptors. Access to @@ -165,7 +155,6 @@ currently polled \fBevents\fR. The ioctl returns \fB0\fR if the file descriptor is not in the set. The \fBpollfd\fR structure pointed by \fIpfd\fR is not modified. The ioctl returns a \fB-1\fR if the call fails. .SH EXAMPLES -.sp .LP The following example shows how \fB/dev/poll\fR may be used. .sp @@ -293,7 +282,6 @@ The following example is part of a test program which shows how .in -2 .SH ERRORS -.sp .ne 2 .na \fB\fBEACCES\fR \fR @@ -347,7 +335,6 @@ special file does not exist. .RE .SH ATTRIBUTES -.sp .LP See \fBattributes\fR(5) for a description of the following attributes: .sp @@ -364,11 +351,9 @@ MT-Level Safe .TE .SH SEE ALSO -.sp .LP \fBopen\fR(2), \fBpoll\fR(2), \fBwrite\fR(2), \fBattributes\fR(5) .SH NOTES -.sp .LP The \fB/dev/poll\fR API is particularly beneficial to applications that poll a large number of file descriptors repeatedly. Applications will exhibit the diff --git a/usr/src/man/man9e/chpoll.9e b/usr/src/man/man9e/chpoll.9e index 27fe2a20e9..468ef7b53f 100644 --- a/usr/src/man/man9e/chpoll.9e +++ b/usr/src/man/man9e/chpoll.9e @@ -121,6 +121,17 @@ The same as \fBPOLLOUT\fR. Priority data (priority band > 0) may be written. .RE +.sp +.ne 2 +.na +\fB\fBPOLLET\fR\fR +.ad +.RS 14n +The desired event is to be edge-triggered; calls to \fBpollwakeup\fR(9F) +should not be suppressed, even if the event is pending at the time of +call to the \fBchpoll()\fR function. +.RE + .RE .sp @@ -197,7 +208,11 @@ be called with multiple events at one time. The \fBpollwakup()\fR can be called regardless of whether or not the \fBchpoll()\fR entry is called; it should be called every time the driver detects the pollable event. The driver must not hold any mutex across the call to \fBpollwakeup\fR(9F) that is acquired in its -\fBchpoll()\fR entry point, or a deadlock may result. +\fBchpoll()\fR entry point, or a deadlock may result. Note that if +\fBPOLLET\fR is set in the specified events, the driver must call +\fBpollwakeup\fR(9F) on subsequent events, even if events are pending at +the time of the call to \fBchpoll()\fR. + .RE .SH RETURN VALUES .LP diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf index 54ba88c061..b72d713cd8 100644 --- a/usr/src/pkg/manifests/system-header.mf +++ b/usr/src/pkg/manifests/system-header.mf @@ -954,6 +954,7 @@ file path=usr/include/sys/elf_amd64.h file path=usr/include/sys/elf_notes.h file path=usr/include/sys/elftypes.h file path=usr/include/sys/epm.h +file path=usr/include/sys/epoll.h file path=usr/include/sys/errno.h file path=usr/include/sys/errorq.h file path=usr/include/sys/errorq_impl.h diff --git a/usr/src/pkg/manifests/system-kernel.man5.inc b/usr/src/pkg/manifests/system-kernel.man5.inc index ab1d31cea6..090344a9c3 100644 --- a/usr/src/pkg/manifests/system-kernel.man5.inc +++ b/usr/src/pkg/manifests/system-kernel.man5.inc @@ -12,6 +12,7 @@ # Copyright 2011, Richard Lowe # Copyright 2014 Garrett D'Amore +file path=usr/share/man/man5/epoll.5 file path=usr/share/man/man5/fsattr.5 file path=usr/share/man/man5/ieee802.11.5 file path=usr/share/man/man5/ieee802.3.5 diff --git a/usr/src/pkg/manifests/system-library.man3c.inc b/usr/src/pkg/manifests/system-library.man3c.inc index 3b67d7408b..27268505b3 100644 --- a/usr/src/pkg/manifests/system-library.man3c.inc +++ b/usr/src/pkg/manifests/system-library.man3c.inc @@ -105,6 +105,9 @@ file path=usr/share/man/man3c/ecvt.3c file path=usr/share/man/man3c/enable_extended_FILE_stdio.3c file path=usr/share/man/man3c/encrypt.3c file path=usr/share/man/man3c/end.3c +file path=usr/share/man/man3c/epoll_create.3c +file path=usr/share/man/man3c/epoll_ctl.3c +file path=usr/share/man/man3c/epoll_wait.3c file path=usr/share/man/man3c/err.3c file path=usr/share/man/man3c/euclen.3c file path=usr/share/man/man3c/eventfd.3c @@ -732,6 +735,8 @@ link path=usr/share/man/man3c/endspent.3c target=getspnam.3c link path=usr/share/man/man3c/endusershell.3c target=getusershell.3c link path=usr/share/man/man3c/endutent.3c target=getutent.3c link path=usr/share/man/man3c/endutxent.3c target=getutxent.3c +link path=usr/share/man/man3c/epoll_create1.3c target=epoll_create.3c +link path=usr/share/man/man3c/epoll_pwait.3c target=epoll_wait.3c link path=usr/share/man/man3c/erand48.3c target=drand48.3c link path=usr/share/man/man3c/errno.3c target=perror.3c link path=usr/share/man/man3c/errx.3c target=err.3c diff --git a/usr/src/uts/common/fs/fifofs/fifovnops.c b/usr/src/uts/common/fs/fifofs/fifovnops.c index ac89e430c7..fee2924093 100644 --- a/usr/src/uts/common/fs/fifofs/fifovnops.c +++ b/usr/src/uts/common/fs/fifofs/fifovnops.c @@ -27,7 +27,9 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ /* * FIFOFS file system vnode operations. This file system @@ -1832,17 +1834,16 @@ fifo_poll(vnode_t *vp, short events, int anyyet, short *reventsp, } /* - * if we happened to get something, return + * if we happened to get something and we're not edge-triggered, return */ - - if ((*reventsp = (short)retevents) != 0) { + if ((*reventsp = (short)retevents) != 0 && !(events & POLLET)) { mutex_exit(&fnp->fn_lock->flk_lock); return (0); } /* - * If poll() has not found any events yet, set up event cell - * to wake up the poll if a requested event occurs on this + * If poll() has not found any events yet or we're edge-triggered, set + * up event cell to wake up the poll if a requested event occurs on this * pipe/fifo. */ if (!anyyet) { diff --git a/usr/src/uts/common/fs/portfs/port_vnops.c b/usr/src/uts/common/fs/portfs/port_vnops.c index b2f5088e06..ab95c0a1f8 100644 --- a/usr/src/uts/common/fs/portfs/port_vnops.c +++ b/usr/src/uts/common/fs/portfs/port_vnops.c @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + #include #include #include @@ -294,14 +298,10 @@ port_poll(vnode_t *vp, short events, int anyyet, short *reventsp, levents |= POLLOUT; levents &= events; *reventsp = levents; - if (levents == 0) { - if (!anyyet) { - *phpp = &pp->port_pollhd; - portq->portq_flags |= - events & POLLIN ? PORTQ_POLLIN : 0; - portq->portq_flags |= - events & POLLOUT ? PORTQ_POLLOUT : 0; - } + if ((levents == 0 && !anyyet) || (events & POLLET)) { + *phpp = &pp->port_pollhd; + portq->portq_flags |= events & POLLIN ? PORTQ_POLLIN : 0; + portq->portq_flags |= events & POLLOUT ? PORTQ_POLLOUT : 0; } mutex_exit(&portq->portq_mutex); return (0); diff --git a/usr/src/uts/common/fs/proc/prvnops.c b/usr/src/uts/common/fs/proc/prvnops.c index 411c9b8b0b..e392ce4b14 100644 --- a/usr/src/uts/common/fs/proc/prvnops.c +++ b/usr/src/uts/common/fs/proc/prvnops.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -6010,7 +6010,7 @@ prpoll(vnode_t *vp, short events, int anyyet, short *reventsp, } *reventsp = revents; - if (!anyyet && revents == 0) { + if ((!anyyet && revents == 0) || (events & POLLET)) { /* * Arrange to wake up the polling lwp when * the target process/lwp stops or terminates diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c index 0be628f329..e5bc6dc845 100644 --- a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c +++ b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c @@ -953,6 +953,13 @@ so_poll(struct sonode *so, short events, int anyyet, short *reventsp, if (!list_is_empty(&so->so_acceptq_list)) *reventsp |= (POLLIN|POLLRDNORM) & events; + /* + * If we're looking for POLLRDHUP, indicate it if we have sent the + * last rx signal for the socket. + */ + if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG)) + *reventsp |= POLLRDHUP; + /* Data */ /* so_downcalls is null for sctp */ if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { @@ -988,14 +995,20 @@ so_poll(struct sonode *so, short events, int anyyet, short *reventsp, *reventsp |= POLLHUP; } - if (!*reventsp && !anyyet) { + if ((!*reventsp && !anyyet) || (events & POLLET)) { /* Check for read events again, but this time under lock */ if (events & (POLLIN|POLLRDNORM)) { mutex_enter(&so->so_lock); if (SO_HAVE_DATA(so) || !list_is_empty(&so->so_acceptq_list)) { + if (events & POLLET) { + so->so_pollev |= SO_POLLEV_IN; + *phpp = &so->so_poll_list; + } + mutex_exit(&so->so_lock); *reventsp |= (POLLIN|POLLRDNORM) & events; + return (0); } else { so->so_pollev |= SO_POLLEV_IN; diff --git a/usr/src/uts/common/fs/sockfs/socknotify.c b/usr/src/uts/common/fs/sockfs/socknotify.c index 3d5ba2a7e8..3f858afecc 100644 --- a/usr/src/uts/common/fs/sockfs/socknotify.c +++ b/usr/src/uts/common/fs/sockfs/socknotify.c @@ -377,7 +377,7 @@ i_so_notify_last_rx(struct sonode *so, int *pollev, int *sigev) so->so_state |= SS_SENTLASTREADSIG; so->so_pollev &= ~SO_POLLEV_IN; - *pollev |= POLLIN|POLLRDNORM; + *pollev |= POLLIN|POLLRDNORM|POLLRDHUP; *sigev |= SOCKETSIG_READ; return (1); diff --git a/usr/src/uts/common/io/1394/targets/av1394/av1394_async.c b/usr/src/uts/common/io/1394/targets/av1394/av1394_async.c index 94323582d6..4a2556177e 100644 --- a/usr/src/uts/common/io/1394/targets/av1394/av1394_async.c +++ b/usr/src/uts/common/io/1394/targets/av1394/av1394_async.c @@ -24,7 +24,9 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ /* * av1394 asynchronous module @@ -359,9 +361,10 @@ av1394_async_poll(av1394_inst_t *avp, short events, int anyyet, short *reventsp, AV1394_TNF_ENTER(av1394_async_poll); if (events & POLLIN) { - if (av1394_peekq(rq)) { + if (av1394_peekq(rq)) *reventsp |= POLLIN; - } else if (!anyyet) { + + if ((!*reventsp && !anyyet) || (events & POLLET)) { mutex_enter(&ap->a_mutex); ap->a_pollevents |= POLLIN; *phpp = &ap->a_pollhead; @@ -438,8 +441,8 @@ av1394_async_create_minor_node(av1394_inst_t *avp) int ret; ret = ddi_create_minor_node(avp->av_dip, "async", - S_IFCHR, AV1394_ASYNC_INST2MINOR(avp->av_instance), - DDI_NT_AV_ASYNC, NULL); + S_IFCHR, AV1394_ASYNC_INST2MINOR(avp->av_instance), + DDI_NT_AV_ASYNC, NULL); if (ret != DDI_SUCCESS) { TNF_PROBE_0(av1394_async_create_minor_node_error, AV1394_TNF_ASYNC_ERROR, ""); diff --git a/usr/src/uts/common/io/devpoll.c b/usr/src/uts/common/io/devpoll.c index a3fcbbba03..7b3454f89c 100644 --- a/usr/src/uts/common/io/devpoll.c +++ b/usr/src/uts/common/io/devpoll.c @@ -25,6 +25,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. All rights reserved. */ #include @@ -45,6 +46,8 @@ #include #include #include +#include +#include #define RESERVED 1 @@ -237,7 +240,8 @@ dpinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) * stale entries! */ static int -dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) +dp_pcache_poll(dp_entry_t *dpep, void *dpbuf, + pollcache_t *pcp, nfds_t nfds, int *fdcntp) { int start, ostart, end; int fdcnt, fd; @@ -247,7 +251,10 @@ dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) boolean_t no_wrap; pollhead_t *php; polldat_t *pdp; + pollfd_t *pfdp; + epoll_event_t *epoll; int error = 0; + short mask = POLLRDHUP | POLLWRBAND; ASSERT(MUTEX_HELD(&pcp->pc_lock)); if (pcp->pc_bitmap == NULL) { @@ -257,6 +264,14 @@ dp_pcache_poll(pollfd_t *pfdp, pollcache_t *pcp, nfds_t nfds, int *fdcntp) */ return (error); } + + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + pfdp = NULL; + epoll = (epoll_event_t *)dpbuf; + } else { + pfdp = (pollfd_t *)dpbuf; + epoll = NULL; + } retry: start = ostart = pcp->pc_mapstart; end = pcp->pc_mapend; @@ -316,11 +331,32 @@ repoll: * polling a closed fd. Hope this will remind * user to do a POLLREMOVE. */ - pfdp[fdcnt].fd = fd; - pfdp[fdcnt].revents = POLLNVAL; - fdcnt++; + if (pfdp != NULL) { + pfdp[fdcnt].fd = fd; + pfdp[fdcnt].revents = POLLNVAL; + fdcnt++; + continue; + } + + /* + * In the epoll compatibility case, we actually + * perform the implicit removal to remain + * closer to the epoll semantics. + */ + ASSERT(epoll != NULL); + + pdp->pd_fp = NULL; + pdp->pd_events = 0; + + if (php != NULL) { + pollhead_delete(php, pdp); + pdp->pd_php = NULL; + } + + BT_CLEAR(pcp->pc_bitmap, fd); continue; } + if (fp != pdp->pd_fp) { /* * user is polling on a cached fd which was @@ -376,9 +412,69 @@ repoll: } if (revent != 0) { - pfdp[fdcnt].fd = fd; - pfdp[fdcnt].events = pdp->pd_events; - pfdp[fdcnt].revents = revent; + if (pfdp != NULL) { + pfdp[fdcnt].fd = fd; + pfdp[fdcnt].events = pdp->pd_events; + pfdp[fdcnt].revents = revent; + } else { + epoll_event_t *ep = &epoll[fdcnt]; + + ASSERT(epoll != NULL); + ep->data.u64 = pdp->pd_epolldata; + + /* + * If any of the event bits are set for + * which poll and epoll representations + * differ, swizzle in the native epoll + * values. + */ + if (revent & mask) { + ep->events = (revent & ~mask) | + ((revent & POLLRDHUP) ? + EPOLLRDHUP : 0) | + ((revent & POLLWRBAND) ? + EPOLLWRBAND : 0); + } else { + ep->events = revent; + } + + /* + * We define POLLWRNORM to be POLLOUT, + * but epoll has separate definitions + * for them; if POLLOUT is set and the + * user has asked for EPOLLWRNORM, set + * that as well. + */ + if ((revent & POLLOUT) && + (pdp->pd_events & EPOLLWRNORM)) { + ep->events |= EPOLLWRNORM; + } + } + + /* + * If POLLET is set, clear the bit in the + * bitmap -- which effectively latches the + * edge on a pollwakeup() from the driver. + */ + if (pdp->pd_events & POLLET) + BT_CLEAR(pcp->pc_bitmap, fd); + + /* + * If POLLONESHOT is set, perform the implicit + * POLLREMOVE. + */ + if (pdp->pd_events & POLLONESHOT) { + pdp->pd_fp = NULL; + pdp->pd_events = 0; + + if (php != NULL) { + pollhead_delete(php, pdp); + pdp->pd_php = NULL; + } + + BT_CLEAR(pcp->pc_bitmap, fd); + } + fdcnt++; } else if (php != NULL) { /* @@ -392,7 +488,7 @@ repoll: * in bitmap. */ if ((pdp->pd_php != NULL) && - ((pcp->pc_flag & T_POLLWAKE) == 0)) { + ((pcp->pc_flag & PC_POLLWAKE) == 0)) { BT_CLEAR(pcp->pc_bitmap, fd); } if (pdp->pd_php == NULL) { @@ -473,11 +569,15 @@ dpopen(dev_t *devp, int flag, int otyp, cred_t *credp) /* * allocate a pollcache skeleton here. Delay allocating bitmap * structures until dpwrite() time, since we don't know the - * optimal size yet. + * optimal size yet. We also delay setting the pid until either + * dpwrite() or attempt to poll on the instance, allowing parents + * to create instances of /dev/poll for their children. (In the + * epoll compatibility case, this check isn't performed to maintain + * semantic compatibility.) */ pcp = pcache_alloc(); dpep->dpe_pcache = pcp; - pcp->pc_pid = curproc->p_pid; + pcp->pc_pid = -1; *devp = makedevice(getmajor(*devp), minordev); /* clone the driver */ mutex_enter(&devpoll_lock); ASSERT(minordev < dptblsize); @@ -499,7 +599,9 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) dp_entry_t *dpep; pollcache_t *pcp; pollfd_t *pollfdp, *pfdp; - int error; + dvpoll_epollfd_t *epfdp; + uintptr_t limit; + int error, size; ssize_t uiosize; nfds_t pollfdnum; struct pollhead *php = NULL; @@ -515,11 +617,23 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) ASSERT(dpep != NULL); mutex_exit(&devpoll_lock); pcp = dpep->dpe_pcache; - if (curproc->p_pid != pcp->pc_pid) { - return (EACCES); + + if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) && + curproc->p_pid != pcp->pc_pid) { + if (pcp->pc_pid != -1) + return (EACCES); + + pcp->pc_pid = curproc->p_pid; } + + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + size = sizeof (dvpoll_epollfd_t); + } else { + size = sizeof (pollfd_t); + } + uiosize = uiop->uio_resid; - pollfdnum = uiosize / sizeof (pollfd_t); + pollfdnum = uiosize / size; mutex_enter(&curproc->p_lock); if (pollfdnum > (uint_t)rctl_enforced_value( rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) { @@ -534,6 +648,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) * each polled fd to the cached set. */ pollfdp = kmem_alloc(uiosize, KM_SLEEP); + limit = (uintptr_t)pollfdp + (pollfdnum * size); /* * Although /dev/poll uses the write(2) interface to cache fds, it's @@ -555,9 +670,27 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) mutex_enter(&dpep->dpe_lock); dpep->dpe_writerwait++; while (dpep->dpe_refcnt != 0) { + /* + * We need to do a bit of a dance here: we need to drop + * our dpe_lock and grab the pc_lock to broadcast the pc_cv to + * kick any DP_POLL/DP_PPOLL sleepers. + */ + mutex_exit(&dpep->dpe_lock); + mutex_enter(&pcp->pc_lock); + pcp->pc_flag |= PC_WRITEWANTED; + cv_broadcast(&pcp->pc_cv); + mutex_exit(&pcp->pc_lock); + mutex_enter(&dpep->dpe_lock); + + if (dpep->dpe_refcnt == 0) + break; + if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { dpep->dpe_writerwait--; mutex_exit(&dpep->dpe_lock); + mutex_enter(&pcp->pc_lock); + pcp->pc_flag &= ~PC_WRITEWANTED; + mutex_exit(&pcp->pc_lock); kmem_free(pollfdp, uiosize); return (set_errno(EINTR)); } @@ -565,24 +698,107 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) dpep->dpe_writerwait--; dpep->dpe_flag |= DP_WRITER_PRESENT; dpep->dpe_refcnt++; + mutex_exit(&dpep->dpe_lock); mutex_enter(&pcp->pc_lock); + pcp->pc_flag &= ~PC_WRITEWANTED; + if (pcp->pc_bitmap == NULL) { pcache_create(pcp, pollfdnum); } - for (pfdp = pollfdp; pfdp < pollfdp + pollfdnum; pfdp++) { + for (pfdp = pollfdp; (uintptr_t)pfdp < limit; + pfdp = (pollfd_t *)((uintptr_t)pfdp + size)) { fd = pfdp->fd; - if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) + if ((uint_t)fd >= P_FINFO(curproc)->fi_nfiles) { + /* + * epoll semantics demand that we return EBADF if our + * specified fd is invalid. + */ + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + error = EBADF; + break; + } + continue; + } + pdp = pcache_lookup_fd(pcp, fd); if (pfdp->events != POLLREMOVE) { + + fp = NULL; + if (pdp == NULL) { + /* + * If we're in epoll compatibility mode, check + * that the fd is valid before allocating + * anything for it; epoll semantics demand that + * we return EBADF if our specified fd is + * invalid. + */ + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if ((fp = getf(fd)) == NULL) { + error = EBADF; + break; + } + } + pdp = pcache_alloc_fd(0); pdp->pd_fd = fd; pdp->pd_pcache = pcp; pcache_insert_fd(pcp, pdp, pollfdnum); + } else { + /* + * epoll semantics demand that we error out if + * a file descriptor is added twice, which we + * check (imperfectly) by checking if we both + * have the file descriptor cached and the + * file pointer that correponds to the file + * descriptor matches our cached value. If + * there is a pointer mismatch, the file + * descriptor was closed without being removed. + * The converse is clearly not true, however, + * so to narrow the window by which a spurious + * EEXIST may be returned, we also check if + * this fp has been added to an epoll control + * descriptor in the past; if it hasn't, we + * know that this is due to fp reuse -- it's + * not a true EEXIST case. (By performing this + * additional check, we limit the window of + * spurious EEXIST to situations where a single + * file descriptor is being used across two or + * more epoll control descriptors -- and even + * then, the file descriptor must be closed and + * reused in a relatively tight time span.) + */ + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if (pdp->pd_fp != NULL && + (fp = getf(fd)) != NULL && + fp == pdp->pd_fp && + (fp->f_flag2 & FEPOLLED)) { + error = EEXIST; + releasef(fd); + break; + } + + /* + * We have decided that the cached + * information was stale: it either + * didn't match, or the fp had never + * actually been epoll()'d on before. + * We need to now clear our pd_events + * to assure that we don't mistakenly + * operate on cached event disposition. + */ + pdp->pd_events = 0; + } } + + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + epfdp = (dvpoll_epollfd_t *)pfdp; + pdp->pd_epolldata = epfdp->dpep_data; + } + ASSERT(pdp->pd_fd == fd); ASSERT(pdp->pd_pcache == pcp); if (fd >= pcp->pc_mapsize) { @@ -593,7 +809,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) if (fd > pcp->pc_mapend) { pcp->pc_mapend = fd; } - if ((fp = getf(fd)) == NULL) { + if (fp == NULL && (fp = getf(fd)) == NULL) { /* * The fd is not valid. Since we can't pass * this error back in the write() call, set @@ -604,12 +820,21 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) pdp->pd_events |= pfdp->events; continue; } + + /* + * To (greatly) reduce EEXIST false positives, we + * denote that this fp has been epoll()'d. We do this + * regardless of epoll compatibility mode, as the flag + * is harmless if not in epoll compatibility mode. + */ + fp->f_flag2 |= FEPOLLED; + /* * Don't do VOP_POLL for an already cached fd with * same poll events. */ if ((pdp->pd_events == pfdp->events) && - (pdp->pd_fp != NULL)) { + (pdp->pd_fp == fp)) { /* * the events are already cached */ @@ -665,7 +890,17 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) } releasef(fd); } else { - if (pdp == NULL) { + if (pdp == NULL || pdp->pd_fp == NULL) { + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + /* + * As with the add case (above), epoll + * semantics demand that we error out + * in this case. + */ + error = ENOENT; + break; + } + continue; } ASSERT(pdp->pd_fd == fd); @@ -690,6 +925,17 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) return (error); } +#define DP_SIGMASK_RESTORE(ksetp) { \ + if (ksetp != NULL) { \ + mutex_enter(&p->p_lock); \ + if (lwp->lwp_cursig == 0) { \ + t->t_hold = lwp->lwp_sigoldmask; \ + t->t_flag &= ~T_TOMASK; \ + } \ + mutex_exit(&p->p_lock); \ + } \ +} + /*ARGSUSED*/ static int dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) @@ -701,7 +947,7 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) int error = 0; STRUCT_DECL(dvpoll, dvpoll); - if (cmd == DP_POLL) { + if (cmd == DP_POLL || cmd == DP_PPOLL) { /* do this now, before we sleep on DP_WRITER_PRESENT */ now = gethrtime(); } @@ -713,10 +959,39 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) mutex_exit(&devpoll_lock); ASSERT(dpep != NULL); pcp = dpep->dpe_pcache; - if (curproc->p_pid != pcp->pc_pid) - return (EACCES); mutex_enter(&dpep->dpe_lock); + + if (cmd == DP_EPOLLCOMPAT) { + if (dpep->dpe_refcnt != 0) { + /* + * We can't turn on epoll compatibility while there + * are outstanding operations. + */ + mutex_exit(&dpep->dpe_lock); + return (EBUSY); + } + + /* + * epoll compatibility is a one-way street: there's no way + * to turn it off for a particular open. + */ + dpep->dpe_flag |= DP_ISEPOLLCOMPAT; + mutex_exit(&dpep->dpe_lock); + + return (0); + } + + if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) && + curproc->p_pid != pcp->pc_pid) { + if (pcp->pc_pid != -1) { + mutex_exit(&dpep->dpe_lock); + return (EACCES); + } + + pcp->pc_pid = curproc->p_pid; + } + while ((dpep->dpe_flag & DP_WRITER_PRESENT) || (dpep->dpe_writerwait != 0)) { if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { @@ -729,15 +1004,43 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) switch (cmd) { case DP_POLL: + case DP_PPOLL: { pollstate_t *ps; nfds_t nfds; int fdcnt = 0; + size_t size, fdsize, dpsize; hrtime_t deadline = 0; + k_sigset_t *ksetp = NULL; + k_sigset_t kset; + sigset_t set; + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + struct proc *p = ttoproc(curthread); STRUCT_INIT(dvpoll, mode); - error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), - STRUCT_SIZE(dvpoll)); + + /* + * The dp_setp member is only required/consumed for DP_PPOLL, + * which otherwise uses the same structure as DP_POLL. + */ + if (cmd == DP_POLL) { + dpsize = (uintptr_t)STRUCT_FADDR(dvpoll, dp_setp) - + (uintptr_t)STRUCT_FADDR(dvpoll, dp_fds); + } else { + ASSERT(cmd == DP_PPOLL); + dpsize = STRUCT_SIZE(dvpoll); + } + + if ((mode & FKIOCTL) != 0) { + /* Kernel-internal ioctl call */ + bcopy((caddr_t)arg, STRUCT_BUF(dvpoll), dpsize); + error = 0; + } else { + error = copyin((caddr_t)arg, STRUCT_BUF(dvpoll), + dpsize); + } + if (error) { DP_REFRELE(dpep); return (EFAULT); @@ -755,6 +1058,52 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) deadline += now; } + if (cmd == DP_PPOLL) { + void *setp = STRUCT_FGETP(dvpoll, dp_setp); + + if (setp != NULL) { + if (copyin(setp, &set, sizeof (set))) { + DP_REFRELE(dpep); + return (EFAULT); + } + + sigutok(&set, &kset); + ksetp = &kset; + + mutex_enter(&p->p_lock); + schedctl_finish_sigblock(t); + lwp->lwp_sigoldmask = t->t_hold; + t->t_hold = *ksetp; + t->t_flag |= T_TOMASK; + + /* + * Like ppoll() with a non-NULL sigset, we'll + * call cv_reltimedwait_sig() just to check for + * signals. This call will return immediately + * with either 0 (signalled) or -1 (no signal). + * There are some conditions whereby we can + * get 0 from cv_reltimedwait_sig() without + * a true signal (e.g., a directed stop), so + * we restore our signal mask in the unlikely + * event that lwp_cursig is 0. + */ + if (!cv_reltimedwait_sig(&t->t_delay_cv, + &p->p_lock, 0, TR_CLOCK_TICK)) { + if (lwp->lwp_cursig == 0) { + t->t_hold = lwp->lwp_sigoldmask; + t->t_flag &= ~T_TOMASK; + } + + mutex_exit(&p->p_lock); + + DP_REFRELE(dpep); + return (EINTR); + } + + mutex_exit(&p->p_lock); + } + } + if ((nfds = STRUCT_FGET(dvpoll, dp_nfds)) == 0) { /* * We are just using DP_POLL to sleep, so @@ -762,17 +1111,29 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) * Do not check for signals if we have a zero timeout. */ DP_REFRELE(dpep); - if (deadline == 0) + if (deadline == 0) { + DP_SIGMASK_RESTORE(ksetp); return (0); + } + mutex_enter(&curthread->t_delay_lock); while ((error = cv_timedwait_sig_hrtime(&curthread->t_delay_cv, &curthread->t_delay_lock, deadline)) > 0) continue; mutex_exit(&curthread->t_delay_lock); + + DP_SIGMASK_RESTORE(ksetp); + return (error == 0 ? EINTR : 0); } + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + size = nfds * (fdsize = sizeof (epoll_event_t)); + } else { + size = nfds * (fdsize = sizeof (pollfd_t)); + } + /* * XXX It would be nice not to have to alloc each time, but it * requires another per thread structure hook. This can be @@ -782,37 +1143,45 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) curthread->t_pollstate = pollstate_create(); ps = curthread->t_pollstate; } - if (ps->ps_dpbufsize < nfds) { - struct proc *p = ttoproc(curthread); + + if (ps->ps_dpbufsize < size) { /* - * The maximum size should be no large than - * current maximum open file count. + * If nfds is larger than twice the current maximum + * open file count, we'll silently clamp it. This + * only limits our exposure to allocating an + * inordinate amount of kernel memory; it doesn't + * otherwise affect the semantics. (We have this + * check at twice the maximum instead of merely the + * maximum because some applications pass an nfds that + * is only slightly larger than their limit.) */ mutex_enter(&p->p_lock); - if (nfds > p->p_fno_ctl) { - mutex_exit(&p->p_lock); - DP_REFRELE(dpep); - return (EINVAL); + if ((nfds >> 1) > p->p_fno_ctl) { + nfds = p->p_fno_ctl; + size = nfds * fdsize; } mutex_exit(&p->p_lock); - kmem_free(ps->ps_dpbuf, sizeof (pollfd_t) * - ps->ps_dpbufsize); - ps->ps_dpbuf = kmem_zalloc(sizeof (pollfd_t) * - nfds, KM_SLEEP); - ps->ps_dpbufsize = nfds; + + if (ps->ps_dpbufsize < size) { + kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize); + ps->ps_dpbuf = kmem_zalloc(size, KM_SLEEP); + ps->ps_dpbufsize = size; + } } mutex_enter(&pcp->pc_lock); for (;;) { - pcp->pc_flag = 0; - error = dp_pcache_poll(ps->ps_dpbuf, pcp, nfds, &fdcnt); + pcp->pc_flag &= ~PC_POLLWAKE; + + error = dp_pcache_poll(dpep, ps->ps_dpbuf, + pcp, nfds, &fdcnt); if (fdcnt > 0 || error != 0) break; /* * A pollwake has happened since we polled cache. */ - if (pcp->pc_flag & T_POLLWAKE) + if (pcp->pc_flag & PC_POLLWAKE) continue; /* @@ -822,8 +1191,40 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) /* immediate timeout; do not check signals */ break; } - error = cv_timedwait_sig_hrtime(&pcp->pc_cv, - &pcp->pc_lock, deadline); + + if (!(pcp->pc_flag & PC_WRITEWANTED)) { + error = cv_timedwait_sig_hrtime(&pcp->pc_cv, + &pcp->pc_lock, deadline); + } else { + error = 1; + } + + if (error > 0 && (pcp->pc_flag & PC_WRITEWANTED)) { + /* + * We've been kicked off of our cv because a + * writer wants in. We're going to drop our + * reference count and then wait until the + * writer is gone -- at which point we'll + * reacquire the pc_lock and call into + * dp_pcache_poll() to get the updated state. + */ + mutex_exit(&pcp->pc_lock); + + mutex_enter(&dpep->dpe_lock); + dpep->dpe_refcnt--; + cv_broadcast(&dpep->dpe_cv); + + while ((dpep->dpe_flag & DP_WRITER_PRESENT) || + (dpep->dpe_writerwait != 0)) { + error = cv_wait_sig_swap(&dpep->dpe_cv, + &dpep->dpe_lock); + } + + dpep->dpe_refcnt++; + mutex_exit(&dpep->dpe_lock); + mutex_enter(&pcp->pc_lock); + } + /* * If we were awakened by a signal or timeout * then break the loop, else poll again. @@ -837,9 +1238,11 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) } mutex_exit(&pcp->pc_lock); + DP_SIGMASK_RESTORE(ksetp); + if (error == 0 && fdcnt > 0) { - if (copyout(ps->ps_dpbuf, STRUCT_FGETP(dvpoll, - dp_fds), sizeof (pollfd_t) * fdcnt)) { + if (copyout(ps->ps_dpbuf, + STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) { DP_REFRELE(dpep); return (EFAULT); } @@ -901,10 +1304,25 @@ static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, struct pollhead **phpp) { + minor_t minor; + dp_entry_t *dpep; + + minor = getminor(dev); + + mutex_enter(&devpoll_lock); + dpep = devpolltbl[minor]; + ASSERT(dpep != NULL); + mutex_exit(&devpoll_lock); + /* * Polling on a /dev/poll fd is not fully supported yet. */ - *reventsp = POLLERR; + if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + /* no error in epoll compat. mode */ + *reventsp = 0; + } else { + *reventsp = POLLERR; + } return (0); } diff --git a/usr/src/uts/common/io/ksocket/ksocket.c b/usr/src/uts/common/io/ksocket/ksocket.c index 49ca6f0475..8944fcbff3 100644 --- a/usr/src/uts/common/io/ksocket/ksocket.c +++ b/usr/src/uts/common/io/ksocket/ksocket.c @@ -22,6 +22,7 @@ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ #include @@ -820,7 +821,7 @@ ksocket_spoll(ksocket_t ks, int timo, short events, short *revents, if (error != 0 || *revents != 0) break; - if (pcp->pc_flag & T_POLLWAKE) + if (pcp->pc_flag & PC_POLLWAKE) continue; if (timo == -1) { diff --git a/usr/src/uts/common/io/usb/usba/usba_ugen.c b/usr/src/uts/common/io/usb/usba/usba_ugen.c index cb20c24270..5852e40799 100644 --- a/usr/src/uts/common/io/usb/usba/usba_ugen.c +++ b/usr/src/uts/common/io/usb/usba/usba_ugen.c @@ -22,6 +22,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + /* * UGEN: USB Generic Driver support code * @@ -1082,7 +1086,10 @@ usb_ugen_poll(usb_ugen_hdl_t usb_ugen_hdl, dev_t dev, short events, ((epp->ep_state & UGEN_EP_STATE_INTR_IN_POLLING_ON) == 0)) { *reventsp |= POLLIN; - } else if (!anyyet) { + } + + if ((!*reventsp && !anyyet) || + (events & POLLET)) { *phpp = &epp->ep_pollhead; epp->ep_state |= UGEN_EP_STATE_INTR_IN_POLL_PENDING; @@ -1101,7 +1108,10 @@ usb_ugen_poll(usb_ugen_hdl_t usb_ugen_hdl, dev_t dev, short events, ((epp->ep_state & UGEN_EP_STATE_ISOC_IN_POLLING_ON) == 0)) { *reventsp |= POLLIN; - } else if (!anyyet) { + } + + if ((!*reventsp && !anyyet) || + (events & POLLET)) { *phpp = &epp->ep_pollhead; epp->ep_state |= UGEN_EP_STATE_ISOC_IN_POLL_PENDING; @@ -1115,9 +1125,10 @@ usb_ugen_poll(usb_ugen_hdl_t usb_ugen_hdl, dev_t dev, short events, break; case UGEN_MINOR_DEV_STAT_NODE: - if (ugenp->ug_ds.dev_stat & UGEN_DEV_STATUS_CHANGED) { + if (ugenp->ug_ds.dev_stat & UGEN_DEV_STATUS_CHANGED) *reventsp |= POLLIN; - } else if (!anyyet) { + + if ((!*reventsp && !anyyet) || (events & POLLET)) { *phpp = &ugenp->ug_ds.dev_pollhead; ugenp->ug_ds.dev_stat |= UGEN_DEV_STATUS_POLL_PENDING; @@ -1131,9 +1142,10 @@ usb_ugen_poll(usb_ugen_hdl_t usb_ugen_hdl, dev_t dev, short events, break; } } else { - if (ugenp->ug_ds.dev_stat & UGEN_DEV_STATUS_CHANGED) { + if (ugenp->ug_ds.dev_stat & UGEN_DEV_STATUS_CHANGED) *reventsp |= POLLHUP|POLLIN; - } else if (!anyyet) { + + if ((!*reventsp && !anyyet) || (events & POLLET)) { *phpp = &ugenp->ug_ds.dev_pollhead; ugenp->ug_ds.dev_stat |= UGEN_DEV_STATUS_POLL_PENDING; diff --git a/usr/src/uts/common/os/fio.c b/usr/src/uts/common/os/fio.c index 6dc0d00011..98ca32332f 100644 --- a/usr/src/uts/common/os/fio.c +++ b/usr/src/uts/common/os/fio.c @@ -1209,7 +1209,8 @@ f_getfl(int fd, int *flagp) error = EBADF; else { vnode_t *vp = fp->f_vnode; - int flag = fp->f_flag | (fp->f_flag2 << 16); + int flag = fp->f_flag | + ((fp->f_flag2 & ~FEPOLLED) << 16); /* * BSD fcntl() FASYNC compatibility. diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c index c6ebe8b110..18a5ded1c6 100644 --- a/usr/src/uts/common/os/streamio.c +++ b/usr/src/uts/common/os/streamio.c @@ -24,6 +24,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ #include @@ -8311,7 +8312,7 @@ chkrd: } *reventsp = (short)retevents; - if (retevents) { + if (retevents && !(events & POLLET)) { if (headlocked) mutex_exit(&stp->sd_lock); return (0); diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index ee396632ad..c2bf2f0483 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -20,7 +20,7 @@ # # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright 2013, Joyent, Inc. All rights reserved. +# Copyright 2014, Joyent, Inc. All rights reserved. # Copyright 2013 Garrett D'Amore # Copyright 2013 Saso Kiselkov. All rights reserved. # Copyright 2015 Nexenta Systems, Inc. All rights reserved. @@ -215,6 +215,7 @@ CHKHDRS= \ emul64cmd.h \ emul64var.h \ epm.h \ + epoll.h \ errno.h \ errorq.h \ errorq_impl.h \ diff --git a/usr/src/uts/common/sys/devpoll.h b/usr/src/uts/common/sys/devpoll.h index 36c815c69f..4e4c76d9b0 100644 --- a/usr/src/uts/common/sys/devpoll.h +++ b/usr/src/uts/common/sys/devpoll.h @@ -24,11 +24,13 @@ * All rights reserved. */ +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + #ifndef _SYS_DEVPOLL_H #define _SYS_DEVPOLL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include @@ -39,8 +41,10 @@ extern "C" { /* /dev/poll ioctl */ #define DPIOC (0xD0 << 8) -#define DP_POLL (DPIOC | 1) /* poll on fds in cached in /dev/poll */ +#define DP_POLL (DPIOC | 1) /* poll on fds cached via /dev/poll */ #define DP_ISPOLLED (DPIOC | 2) /* is this fd cached in /dev/poll */ +#define DP_PPOLL (DPIOC | 3) /* ppoll on fds cached via /dev/poll */ +#define DP_EPOLLCOMPAT (DPIOC | 4) /* turn on epoll compatibility */ #define DEVPOLLSIZE 1000 /* /dev/poll table size increment */ @@ -51,14 +55,21 @@ typedef struct dvpoll { pollfd_t *dp_fds; /* pollfd array */ nfds_t dp_nfds; /* num of pollfd's in dp_fds[] */ int dp_timeout; /* time out in milisec */ + sigset_t *dp_setp; /* sigset, if any */ } dvpoll_t; typedef struct dvpoll32 { caddr32_t dp_fds; /* pollfd array */ uint32_t dp_nfds; /* num of pollfd's in dp_fds[] */ int32_t dp_timeout; /* time out in milisec */ + caddr32_t dp_setp; /* sigset, if any */ } dvpoll32_t; +typedef struct dvpoll_epollfd { + pollfd_t dpep_pollfd; /* must be first member */ + uint64_t dpep_data; /* data payload */ +} dvpoll_epollfd_t; + #ifdef _KERNEL typedef struct dp_entry { @@ -71,6 +82,7 @@ typedef struct dp_entry { } dp_entry_t; #define DP_WRITER_PRESENT 0x1 /* a write is in progress */ +#define DP_ISEPOLLCOMPAT 0x2 /* epoll compatibility mode */ #define DP_REFRELE(dpep) { \ mutex_enter(&(dpep)->dpe_lock); \ diff --git a/usr/src/uts/common/sys/epoll.h b/usr/src/uts/common/sys/epoll.h new file mode 100644 index 0000000000..f2e4b90ab7 --- /dev/null +++ b/usr/src/uts/common/sys/epoll.h @@ -0,0 +1,89 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_EPOLL_H +#define _SYS_EPOLL_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef union epoll_data { + void *ptr; + int fd; + uint32_t u32; + uint64_t u64; +} epoll_data_t; + +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack(4) +#endif + +typedef struct epoll_event { + uint32_t events; /* events */ + epoll_data_t data; /* user-specified data */ +} epoll_event_t; + +#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 +#pragma pack() +#endif + +/* + * Define the EPOLL* constants in terms of their poll(2)/poll(7) equivalents. + * Note that the values match the equivalents in Linux to allow for any binary + * compatibility layers to not need to translate them. + */ +#define EPOLLIN 0x0001 +#define EPOLLPRI 0x0002 +#define EPOLLOUT 0x0004 +#define EPOLLRDNORM 0x0040 +#define EPOLLRDBAND 0x0080 +#define EPOLLWRNORM 0x0100 +#define EPOLLWRBAND 0x0200 +#define EPOLLMSG 0x0400 /* not used */ +#define EPOLLERR 0x0008 +#define EPOLLHUP 0x0010 +#define EPOLLRDHUP 0x2000 + +#define EPOLLWAKEUP (1UL << 29) /* no meaning; silently ignored */ +#define EPOLLONESHOT (1UL << 30) /* translated to POLLONESHOT */ +#define EPOLLET (1UL << 31) /* translated to POLLET */ + +#define EPOLL_CTL_ADD 1 +#define EPOLL_CTL_DEL 2 +#define EPOLL_CTL_MOD 3 + +#define EPOLL_CLOEXEC 02000000 + +#if !defined(_KERNEL) + +extern int epoll_create(int size); +extern int epoll_create1(int flags); +extern int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event); +extern int epoll_wait(int epfd, struct epoll_event *events, + int maxevents, int timeout); +extern int epoll_pwait(int epfd, struct epoll_event *events, + int maxevents, int timeout, const sigset_t *sigmask); + +#endif /* !_KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_EPOLL_H */ diff --git a/usr/src/uts/common/sys/file.h b/usr/src/uts/common/sys/file.h index 03acc088c2..1f736d3d01 100644 --- a/usr/src/uts/common/sys/file.h +++ b/usr/src/uts/common/sys/file.h @@ -119,6 +119,11 @@ typedef struct fpollinfo { #ifdef _KERNEL +/* + * This is a flag that is set on f_flag2, but is never user-visible + */ +#define FEPOLLED 0x8000 + /* * Fake flags for driver ioctl calls to inform them of the originating * process' model. See diff --git a/usr/src/uts/common/sys/poll.h b/usr/src/uts/common/sys/poll.h index 9fff78a966..efc8457a6a 100644 --- a/usr/src/uts/common/sys/poll.h +++ b/usr/src/uts/common/sys/poll.h @@ -30,6 +30,10 @@ * All rights reserved. */ +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + #ifndef _SYS_POLL_H #define _SYS_POLL_H @@ -59,6 +63,7 @@ typedef unsigned long nfds_t; #define POLLWRNORM POLLOUT #define POLLRDBAND 0x0080 /* out-of-band data is readable */ #define POLLWRBAND 0x0100 /* out-of-band data is writeable */ +#define POLLRDHUP 0x4000 /* read-side hangup */ #define POLLNORM POLLRDNORM @@ -70,7 +75,13 @@ typedef unsigned long nfds_t; #define POLLHUP 0x0010 /* fd has been hung up on */ #define POLLNVAL 0x0020 /* invalid pollfd entry */ -#define POLLREMOVE 0x0800 /* remove a cached poll fd from /dev/poll */ +/* + * These events will never be specified in revents, but may be specified in + * events to control /dev/poll behavior. + */ +#define POLLREMOVE 0x0800 /* remove cached /dev/poll fd */ +#define POLLONESHOT 0x1000 /* /dev/poll should one-shot this fd */ +#define POLLET 0x2000 /* edge-triggered /dev/poll fd */ #ifdef _KERNEL diff --git a/usr/src/uts/common/sys/poll_impl.h b/usr/src/uts/common/sys/poll_impl.h index ede99d0df2..2e866ec4d4 100644 --- a/usr/src/uts/common/sys/poll_impl.h +++ b/usr/src/uts/common/sys/poll_impl.h @@ -24,11 +24,13 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + #ifndef _SYS_POLL_IMPL_H #define _SYS_POLL_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Caching Poll Subsystem: * @@ -160,6 +162,7 @@ typedef struct polldat { int pd_nsets; /* num of xref sets, used by poll(2) */ xref_t *pd_ref; /* ptr to xref info, 1 for each set */ struct port_kevent *pd_portev; /* associated port event struct */ + uint64_t pd_epolldata; /* epoll data, if any */ } polldat_t; /* @@ -187,7 +190,8 @@ typedef struct pollcache { } pollcache_t; /* pc_flag */ -#define T_POLLWAKE 0x02 /* pollwakeup() occurred */ +#define PC_POLLWAKE 0x02 /* pollwakeup() occurred */ +#define PC_WRITEWANTED 0x04 /* writer wishes to modify the pollcache_t */ #if defined(_KERNEL) /* diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c index 7f37529941..c33156a4fc 100644 --- a/usr/src/uts/common/syscall/poll.c +++ b/usr/src/uts/common/syscall/poll.c @@ -29,6 +29,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ /* @@ -525,13 +526,13 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) } /* - * If T_POLLWAKE is set, a pollwakeup() was performed on + * If PC_POLLWAKE is set, a pollwakeup() was performed on * one of the file descriptors. This can happen only if * one of the VOP_POLL() functions dropped pcp->pc_lock. * The only current cases of this is in procfs (prpoll()) * and STREAMS (strpoll()). */ - if (pcp->pc_flag & T_POLLWAKE) + if (pcp->pc_flag & PC_POLLWAKE) continue; /* @@ -886,9 +887,9 @@ retry: } /* - * This function is called to inform a thread that - * an event being polled for has occurred. - * The pollstate lock on the thread should be held on entry. + * This function is called to inform a thread (or threads) that an event being + * polled on has occurred. The pollstate lock on the thread should be held + * on entry. */ void pollnotify(pollcache_t *pcp, int fd) @@ -896,8 +897,8 @@ pollnotify(pollcache_t *pcp, int fd) ASSERT(fd < pcp->pc_mapsize); ASSERT(MUTEX_HELD(&pcp->pc_lock)); BT_SET(pcp->pc_bitmap, fd); - pcp->pc_flag |= T_POLLWAKE; - cv_signal(&pcp->pc_cv); + pcp->pc_flag |= PC_POLLWAKE; + cv_broadcast(&pcp->pc_cv); } /* @@ -2024,7 +2025,7 @@ retry: */ if ((pdp->pd_php != NULL) && (pollfdp[entry].events == pdp->pd_events) && - ((pcp->pc_flag & T_POLLWAKE) == 0)) { + ((pcp->pc_flag & PC_POLLWAKE) == 0)) { BT_CLEAR(pcp->pc_bitmap, fd); } /* @@ -2251,7 +2252,7 @@ pollstate_destroy(pollstate_t *ps) pcacheset_destroy(ps->ps_pcacheset, ps->ps_nsets); ps->ps_pcacheset = NULL; if (ps->ps_dpbuf != NULL) { - kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize * sizeof (pollfd_t)); + kmem_free(ps->ps_dpbuf, ps->ps_dpbufsize); ps->ps_dpbuf = NULL; } mutex_destroy(&ps->ps_lock); diff --git a/usr/src/uts/intel/poll/Makefile b/usr/src/uts/intel/poll/Makefile index fe16be6421..b4be5deb63 100644 --- a/usr/src/uts/intel/poll/Makefile +++ b/usr/src/uts/intel/poll/Makefile @@ -51,6 +51,16 @@ include $(UTSBASE)/intel/Makefile.intel CERRWARN += -_gcc=-Wno-uninitialized +# +# It's unfortunate that we have to disable this; however, it's lint's fault. We +# have a line which only causes a lint warning on a 64-bit build. If we suppress +# it, then the 32-bit lint build complains about it being unnecessarily +# suppressed. Therefore, the only thing it seems like we can do is disable the +# lint warning completely. +# + +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + # # Define targets # diff --git a/usr/src/uts/sparc/poll/Makefile b/usr/src/uts/sparc/poll/Makefile index f49278c1c5..c8722105ee 100644 --- a/usr/src/uts/sparc/poll/Makefile +++ b/usr/src/uts/sparc/poll/Makefile @@ -56,6 +56,11 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) CFLAGS += $(CCVERBOSE) CERRWARN += -_gcc=-Wno-uninitialized +# +# See uts/intel/poll/Makefile for why this is necessary. +# +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN + # # Default build targets. # diff --git a/usr/src/uts/sun4v/io/vcc.c b/usr/src/uts/sun4v/io/vcc.c index feeaf03e8f..85f722e467 100644 --- a/usr/src/uts/sun4v/io/vcc.c +++ b/usr/src/uts/sun4v/io/vcc.c @@ -24,6 +24,9 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ #include #include @@ -2456,7 +2459,7 @@ vcc_chpoll(dev_t dev, short events, int anyyet, short *reventsp, *reventsp |= (events & POLLIN); } - if (((*reventsp) == 0) && (!anyyet)) { + if ((((*reventsp) == 0) && (!anyyet)) || (events & POLLET)) { *phpp = &vport->poll; if (events & POLLIN) { mutex_enter(&vport->lock); -- cgit v1.2.3 From f3bb54f387fc03cf651e19bbee54cc88ee51bb29 Mon Sep 17 00:00:00 2001 From: Patrick Mooney Date: Mon, 5 Oct 2015 17:20:33 -0700 Subject: 6291 nested epoll does not mimic Linux behavior Reviewed by: Bryan Cantrill Approved by: Richard Lowe --- usr/src/uts/common/fs/fifofs/fifovnops.c | 7 +- usr/src/uts/common/fs/proc/prvnops.c | 6 +- usr/src/uts/common/io/devpoll.c | 517 ++++++++++++++++++++++++------- usr/src/uts/common/io/tty_pty.c | 6 +- usr/src/uts/common/os/streamio.c | 30 +- usr/src/uts/common/sys/devpoll.h | 5 +- usr/src/uts/common/sys/poll.h | 6 +- usr/src/uts/common/sys/poll_impl.h | 106 +++++-- usr/src/uts/common/syscall/poll.c | 298 ++++++++++++++++-- 9 files changed, 811 insertions(+), 170 deletions(-) (limited to 'usr/src') diff --git a/usr/src/uts/common/fs/fifofs/fifovnops.c b/usr/src/uts/common/fs/fifofs/fifovnops.c index fee2924093..61edfab76c 100644 --- a/usr/src/uts/common/fs/fifofs/fifovnops.c +++ b/usr/src/uts/common/fs/fifofs/fifovnops.c @@ -28,7 +28,7 @@ */ /* - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ /* @@ -1775,7 +1775,10 @@ fifo_poll(vnode_t *vp, short events, int anyyet, short *reventsp, fn_dest = fnp->fn_dest; fn_lock = fnp->fn_lock; - polllock(&stp->sd_pollist, &fn_lock->flk_lock); + if (polllock(&stp->sd_pollist, &fn_lock->flk_lock) != 0) { + *reventsp = POLLNVAL; + return (0); + } /* * see if FIFO/pipe open diff --git a/usr/src/uts/common/fs/proc/prvnops.c b/usr/src/uts/common/fs/proc/prvnops.c index e392ce4b14..b60f938c9b 100644 --- a/usr/src/uts/common/fs/proc/prvnops.c +++ b/usr/src/uts/common/fs/proc/prvnops.c @@ -5939,7 +5939,11 @@ prpoll(vnode_t *vp, short events, int anyyet, short *reventsp, return (0); } - lockstate = pollunlock(); /* avoid deadlock with prnotify() */ + /* avoid deadlock with prnotify() */ + if (pollunlock(&lockstate) != 0) { + *reventsp = POLLNVAL; + return (0); + } if ((error = prlock(pnp, ZNO)) != 0) { pollrelock(lockstate); diff --git a/usr/src/uts/common/io/devpoll.c b/usr/src/uts/common/io/devpoll.c index 7b3454f89c..a63e1f1a08 100644 --- a/usr/src/uts/common/io/devpoll.c +++ b/usr/src/uts/common/io/devpoll.c @@ -123,6 +123,12 @@ static struct modlinkage modlinkage = { NULL }; +static void pcachelink_assoc(pollcache_t *, pollcache_t *); +static void pcachelink_mark_stale(pollcache_t *); +static void pcachelink_purge_stale(pollcache_t *); +static void pcachelink_purge_all(pollcache_t *); + + /* * Locking Design * @@ -157,7 +163,6 @@ _init() mutex_init(&devpoll_lock, NULL, MUTEX_DEFAULT, NULL); devpoll_init = 1; if ((error = mod_install(&modlinkage)) != 0) { - mutex_destroy(&devpoll_lock); kmem_free(devpolltbl, sizeof (caddr_t) * dptblsize); devpoll_init = 0; } @@ -255,6 +260,7 @@ dp_pcache_poll(dp_entry_t *dpep, void *dpbuf, epoll_event_t *epoll; int error = 0; short mask = POLLRDHUP | POLLWRBAND; + boolean_t is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0; ASSERT(MUTEX_HELD(&pcp->pc_lock)); if (pcp->pc_bitmap == NULL) { @@ -265,7 +271,7 @@ dp_pcache_poll(dp_entry_t *dpep, void *dpbuf, return (error); } - if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if (is_epoll) { pfdp = NULL; epoll = (epoll_event_t *)dpbuf; } else { @@ -331,7 +337,7 @@ repoll: * polling a closed fd. Hope this will remind * user to do a POLLREMOVE. */ - if (pfdp != NULL) { + if (!is_epoll && pfdp != NULL) { pfdp[fdcnt].fd = fd; pfdp[fdcnt].revents = POLLNVAL; fdcnt++; @@ -343,18 +349,18 @@ repoll: * perform the implicit removal to remain * closer to the epoll semantics. */ - ASSERT(epoll != NULL); + if (is_epoll) { + pdp->pd_fp = NULL; + pdp->pd_events = 0; - pdp->pd_fp = NULL; - pdp->pd_events = 0; + if (php != NULL) { + pollhead_delete(php, pdp); + pdp->pd_php = NULL; + } - if (php != NULL) { - pollhead_delete(php, pdp); - pdp->pd_php = NULL; + BT_CLEAR(pcp->pc_bitmap, fd); + continue; } - - BT_CLEAR(pcp->pc_bitmap, fd); - continue; } if (fp != pdp->pd_fp) { @@ -394,6 +400,7 @@ repoll: if (error != 0) { break; } + /* * layered devices (e.g. console driver) * may change the vnode and thus the pollhead @@ -416,7 +423,7 @@ repoll: pfdp[fdcnt].fd = fd; pfdp[fdcnt].events = pdp->pd_events; pfdp[fdcnt].revents = revent; - } else { + } else if (epoll != NULL) { epoll_event_t *ep = &epoll[fdcnt]; ASSERT(epoll != NULL); @@ -449,6 +456,35 @@ repoll: (pdp->pd_events & EPOLLWRNORM)) { ep->events |= EPOLLWRNORM; } + } else { + pollstate_t *ps = + curthread->t_pollstate; + /* + * The devpoll handle itself is being + * polled. Notify the caller of any + * readable event(s), leaving as much + * state as possible untouched. + */ + VERIFY(fdcnt == 0); + VERIFY(ps != NULL); + + /* + * If a call to pollunlock() fails + * during VOP_POLL, skip over the fd + * and continue polling. + * + * Otherwise, report that there is an + * event pending. + */ + if ((ps->ps_flags & POLLSTATE_ULFAIL) + != 0) { + ps->ps_flags &= + ~POLLSTATE_ULFAIL; + continue; + } else { + fdcnt++; + break; + } } /* @@ -608,6 +644,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) polldat_t *pdp; int fd; file_t *fp; + boolean_t is_epoll, fds_added = B_FALSE; minor = getminor(dev); @@ -616,22 +653,21 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) dpep = devpolltbl[minor]; ASSERT(dpep != NULL); mutex_exit(&devpoll_lock); + + mutex_enter(&dpep->dpe_lock); pcp = dpep->dpe_pcache; + is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0; + size = (is_epoll) ? sizeof (dvpoll_epollfd_t) : sizeof (pollfd_t); + mutex_exit(&dpep->dpe_lock); - if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) && - curproc->p_pid != pcp->pc_pid) { - if (pcp->pc_pid != -1) + if (!is_epoll && curproc->p_pid != pcp->pc_pid) { + if (pcp->pc_pid != -1) { return (EACCES); + } pcp->pc_pid = curproc->p_pid; } - if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { - size = sizeof (dvpoll_epollfd_t); - } else { - size = sizeof (pollfd_t); - } - uiosize = uiop->uio_resid; pollfdnum = uiosize / size; mutex_enter(&curproc->p_lock); @@ -640,7 +676,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc, RCA_SAFE); mutex_exit(&curproc->p_lock); - return (set_errno(EINVAL)); + return (EINVAL); } mutex_exit(&curproc->p_lock); /* @@ -665,44 +701,44 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) /* * We are about to enter the core portion of dpwrite(). Make sure this * write has exclusive access in this portion of the code, i.e., no - * other writers in this code and no other readers in dpioctl. + * other writers in this code. + * + * Waiting for all readers to drop their references to the dpe is + * unecessary since the pollcache itself is protected by pc_lock. */ mutex_enter(&dpep->dpe_lock); dpep->dpe_writerwait++; - while (dpep->dpe_refcnt != 0) { - /* - * We need to do a bit of a dance here: we need to drop - * our dpe_lock and grab the pc_lock to broadcast the pc_cv to - * kick any DP_POLL/DP_PPOLL sleepers. - */ - mutex_exit(&dpep->dpe_lock); - mutex_enter(&pcp->pc_lock); - pcp->pc_flag |= PC_WRITEWANTED; - cv_broadcast(&pcp->pc_cv); - mutex_exit(&pcp->pc_lock); - mutex_enter(&dpep->dpe_lock); - - if (dpep->dpe_refcnt == 0) - break; + while ((dpep->dpe_flag & DP_WRITER_PRESENT) != 0) { + ASSERT(dpep->dpe_refcnt != 0); if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { dpep->dpe_writerwait--; mutex_exit(&dpep->dpe_lock); - mutex_enter(&pcp->pc_lock); - pcp->pc_flag &= ~PC_WRITEWANTED; - mutex_exit(&pcp->pc_lock); kmem_free(pollfdp, uiosize); - return (set_errno(EINTR)); + return (EINTR); } } dpep->dpe_writerwait--; dpep->dpe_flag |= DP_WRITER_PRESENT; dpep->dpe_refcnt++; + if (!is_epoll && (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0) { + /* + * The epoll compat mode was enabled while we were waiting to + * establish write access. It is not safe to continue since + * state was prepared for non-epoll operation. + */ + error = EBUSY; + goto bypass; + } mutex_exit(&dpep->dpe_lock); - mutex_enter(&pcp->pc_lock); - pcp->pc_flag &= ~PC_WRITEWANTED; + /* + * Since the dpwrite() may recursively walk an added /dev/poll handle, + * pollstate_enter() deadlock and loop detection must be used. + */ + (void) pollstate_create(); + VERIFY(pollstate_enter(pcp) == PSE_SUCCESS); if (pcp->pc_bitmap == NULL) { pcache_create(pcp, pollfdnum); @@ -715,7 +751,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) * epoll semantics demand that we return EBADF if our * specified fd is invalid. */ - if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if (is_epoll) { error = EBADF; break; } @@ -736,7 +772,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) * we return EBADF if our specified fd is * invalid. */ - if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if (is_epoll) { if ((fp = getf(fd)) == NULL) { error = EBADF; break; @@ -771,7 +807,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) * then, the file descriptor must be closed and * reused in a relatively tight time span.) */ - if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if (is_epoll) { if (pdp->pd_fp != NULL && (fp = getf(fd)) != NULL && fp == pdp->pd_fp && @@ -794,7 +830,7 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) } } - if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if (is_epoll) { epfdp = (dvpoll_epollfd_t *)pfdp; pdp->pd_epolldata = epfdp->dpep_data; } @@ -886,12 +922,12 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) pdp->pd_php = php; } } - } + fds_added = B_TRUE; releasef(fd); } else { if (pdp == NULL || pdp->pd_fp == NULL) { - if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if (is_epoll) { /* * As with the add case (above), epoll * semantics demand that we error out @@ -914,10 +950,19 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp) BT_CLEAR(pcp->pc_bitmap, fd); } } - mutex_exit(&pcp->pc_lock); + /* + * Any fds added to an recursive-capable pollcache could themselves be + * /dev/poll handles. To ensure that proper event propagation occurs, + * parent pollcaches are woken so that they can create any needed + * pollcache links. + */ + if (fds_added) { + pcache_wake_parents(pcp); + } + pollstate_exit(pcp); mutex_enter(&dpep->dpe_lock); +bypass: dpep->dpe_flag &= ~DP_WRITER_PRESENT; - ASSERT(dpep->dpe_refcnt == 1); dpep->dpe_refcnt--; cv_broadcast(&dpep->dpe_cv); mutex_exit(&dpep->dpe_lock); @@ -945,6 +990,7 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) pollcache_t *pcp; hrtime_t now; int error = 0; + boolean_t is_epoll; STRUCT_DECL(dvpoll, dvpoll); if (cmd == DP_POLL || cmd == DP_PPOLL) { @@ -961,6 +1007,7 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) pcp = dpep->dpe_pcache; mutex_enter(&dpep->dpe_lock); + is_epoll = (dpep->dpe_flag & DP_ISEPOLLCOMPAT) != 0; if (cmd == DP_EPOLLCOMPAT) { if (dpep->dpe_refcnt != 0) { @@ -982,8 +1029,7 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) return (0); } - if (!(dpep->dpe_flag & DP_ISEPOLLCOMPAT) && - curproc->p_pid != pcp->pc_pid) { + if (!is_epoll && curproc->p_pid != pcp->pc_pid) { if (pcp->pc_pid != -1) { mutex_exit(&dpep->dpe_lock); return (EACCES); @@ -992,7 +1038,8 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) pcp->pc_pid = curproc->p_pid; } - while ((dpep->dpe_flag & DP_WRITER_PRESENT) || + /* Wait until all writers have cleared the handle before continuing */ + while ((dpep->dpe_flag & DP_WRITER_PRESENT) != 0 || (dpep->dpe_writerwait != 0)) { if (!cv_wait_sig_swap(&dpep->dpe_cv, &dpep->dpe_lock)) { mutex_exit(&dpep->dpe_lock); @@ -1128,7 +1175,7 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) return (error == 0 ? EINTR : 0); } - if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { + if (is_epoll) { size = nfds * (fdsize = sizeof (epoll_event_t)); } else { size = nfds * (fdsize = sizeof (pollfd_t)); @@ -1139,10 +1186,7 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) * requires another per thread structure hook. This can be * implemented later if data suggests that it's necessary. */ - if ((ps = curthread->t_pollstate) == NULL) { - curthread->t_pollstate = pollstate_create(); - ps = curthread->t_pollstate; - } + ps = pollstate_create(); if (ps->ps_dpbufsize < size) { /* @@ -1169,15 +1213,25 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) } } - mutex_enter(&pcp->pc_lock); + VERIFY(pollstate_enter(pcp) == PSE_SUCCESS); for (;;) { pcp->pc_flag &= ~PC_POLLWAKE; + /* + * Mark all child pcachelinks as stale. + * Those which are still part of the tree will be + * marked as valid during the poll. + */ + pcachelink_mark_stale(pcp); + error = dp_pcache_poll(dpep, ps->ps_dpbuf, pcp, nfds, &fdcnt); if (fdcnt > 0 || error != 0) break; + /* Purge still-stale child pcachelinks */ + pcachelink_purge_stale(pcp); + /* * A pollwake has happened since we polled cache. */ @@ -1192,42 +1246,12 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) break; } - if (!(pcp->pc_flag & PC_WRITEWANTED)) { - error = cv_timedwait_sig_hrtime(&pcp->pc_cv, - &pcp->pc_lock, deadline); - } else { - error = 1; - } - - if (error > 0 && (pcp->pc_flag & PC_WRITEWANTED)) { - /* - * We've been kicked off of our cv because a - * writer wants in. We're going to drop our - * reference count and then wait until the - * writer is gone -- at which point we'll - * reacquire the pc_lock and call into - * dp_pcache_poll() to get the updated state. - */ - mutex_exit(&pcp->pc_lock); - - mutex_enter(&dpep->dpe_lock); - dpep->dpe_refcnt--; - cv_broadcast(&dpep->dpe_cv); - - while ((dpep->dpe_flag & DP_WRITER_PRESENT) || - (dpep->dpe_writerwait != 0)) { - error = cv_wait_sig_swap(&dpep->dpe_cv, - &dpep->dpe_lock); - } - - dpep->dpe_refcnt++; - mutex_exit(&dpep->dpe_lock); - mutex_enter(&pcp->pc_lock); - } + error = cv_timedwait_sig_hrtime(&pcp->pc_cv, + &pcp->pc_lock, deadline); /* - * If we were awakened by a signal or timeout - * then break the loop, else poll again. + * If we were awakened by a signal or timeout then + * break the loop, else poll again. */ if (error <= 0) { error = (error == 0) ? EINTR : 0; @@ -1236,7 +1260,7 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) error = 0; } } - mutex_exit(&pcp->pc_lock); + pollstate_exit(pcp); DP_SIGMASK_RESTORE(ksetp); @@ -1299,6 +1323,66 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) return (error); } +/* + * Overview of Recursive Polling + * + * It is possible for /dev/poll to poll for events on file descriptors which + * themselves are /dev/poll handles. Pending events in the child handle are + * represented as readable data via the POLLIN flag. To limit surface area, + * this recursion is presently allowed on only /dev/poll handles which have + * been placed in epoll mode via the DP_EPOLLCOMPAT ioctl. Recursion depth is + * limited to 5 in order to be consistent with Linux epoll. + * + * Extending dppoll() for VOP_POLL: + * + * The recursive /dev/poll implementation begins by extending dppoll() to + * report when resources contained in the pollcache have relevant event state. + * At the highest level, it means calling dp_pcache_poll() so it indicates if + * fd events are present without consuming them or altering the pollcache + * bitmap. This ensures that a subsequent DP_POLL operation on the bitmap will + * yield the initiating event. Additionally, the VOP_POLL should return in + * such a way that dp_pcache_poll() does not clear the parent bitmap entry + * which corresponds to the child /dev/poll fd. This means that child + * pollcaches will be checked during every poll which facilitates wake-up + * behavior detailed below. + * + * Pollcache Links and Wake Events: + * + * Recursive /dev/poll avoids complicated pollcache locking constraints during + * pollwakeup events by eschewing the traditional pollhead mechanism in favor + * of a different approach. For each pollcache at the root of a recursive + * /dev/poll "tree", pcachelink_t structures are established to all child + * /dev/poll pollcaches. During pollnotify() in a child pollcache, the + * linked list of pcachelink_t entries is walked, where those marked as valid + * incur a cv_broadcast to their parent pollcache. Most notably, these + * pcachelink_t cv wakeups are performed without acquiring pc_lock on the + * parent pollcache (which would require careful deadlock avoidance). This + * still allows the woken poll on the parent to discover the pertinent events + * due to the fact that bitmap entires for the child pollcache are always + * maintained by the dppoll() logic above. + * + * Depth Limiting and Loop Prevention: + * + * As each pollcache is encountered (either via DP_POLL or dppoll()), depth and + * loop constraints are enforced via pollstate_enter(). The pollcache_t + * pointer is compared against any existing entries in ps_pc_stack and is added + * to the end if no match (and therefore loop) is found. Once poll operations + * for a given pollcache_t are complete, pollstate_exit() clears the pointer + * from the list. The pollstate_enter() and pollstate_exit() functions are + * responsible for acquiring and releasing pc_lock, respectively. + * + * Deadlock Safety: + * + * Descending through a tree of recursive /dev/poll handles involves the tricky + * business of sequentially entering multiple pollcache locks. This tree + * topology cannot define a lock acquisition order in such a way that it is + * immune to deadlocks between threads. The pollstate_enter() and + * pollstate_exit() functions provide an interface for recursive /dev/poll + * operations to safely lock pollcaches while failing gracefully in the face of + * deadlocking topologies. (See pollstate_contend() for more detail about how + * deadlocks are detected and resolved.) + */ + /*ARGSUSED*/ static int dppoll(dev_t dev, short events, int anyyet, short *reventsp, @@ -1306,24 +1390,63 @@ dppoll(dev_t dev, short events, int anyyet, short *reventsp, { minor_t minor; dp_entry_t *dpep; + pollcache_t *pcp; + int res, rc = 0; minor = getminor(dev); - mutex_enter(&devpoll_lock); + ASSERT(minor < dptblsize); dpep = devpolltbl[minor]; ASSERT(dpep != NULL); mutex_exit(&devpoll_lock); - /* - * Polling on a /dev/poll fd is not fully supported yet. - */ - if (dpep->dpe_flag & DP_ISEPOLLCOMPAT) { - /* no error in epoll compat. mode */ - *reventsp = 0; - } else { + mutex_enter(&dpep->dpe_lock); + if ((dpep->dpe_flag & DP_ISEPOLLCOMPAT) == 0) { + /* Poll recursion is not yet supported for non-epoll handles */ *reventsp = POLLERR; + mutex_exit(&dpep->dpe_lock); + return (0); + } else { + dpep->dpe_refcnt++; + pcp = dpep->dpe_pcache; + mutex_exit(&dpep->dpe_lock); } - return (0); + + res = pollstate_enter(pcp); + if (res == PSE_SUCCESS) { + nfds_t nfds = 1; + int fdcnt = 0; + pollstate_t *ps = curthread->t_pollstate; + + rc = dp_pcache_poll(dpep, NULL, pcp, nfds, &fdcnt); + if (rc == 0) { + *reventsp = (fdcnt > 0) ? POLLIN : 0; + } + pcachelink_assoc(pcp, ps->ps_pc_stack[0]); + pollstate_exit(pcp); + } else { + switch (res) { + case PSE_FAIL_DEPTH: + rc = EINVAL; + break; + case PSE_FAIL_LOOP: + case PSE_FAIL_DEADLOCK: + rc = ELOOP; + break; + default: + /* + * If anything else has gone awry, such as being polled + * from an unexpected context, fall back to the + * recursion-intolerant response. + */ + *reventsp = POLLERR; + rc = 0; + break; + } + } + + DP_REFRELE(dpep); + return (rc); } /* @@ -1376,8 +1499,190 @@ dpclose(dev_t dev, int flag, int otyp, cred_t *credp) while (pcp->pc_busy > 0) cv_wait(&pcp->pc_busy_cv, &pcp->pc_no_exit); mutex_exit(&pcp->pc_no_exit); + + /* Clean up any pollcache links created via recursive /dev/poll */ + if (pcp->pc_parents != NULL || pcp->pc_children != NULL) { + /* + * Because of the locking rules for pcachelink manipulation, + * acquring pc_lock is required for this step. + */ + mutex_enter(&pcp->pc_lock); + pcachelink_purge_all(pcp); + mutex_exit(&pcp->pc_lock); + } + pcache_destroy(pcp); ASSERT(dpep->dpe_refcnt == 0); kmem_free(dpep, sizeof (dp_entry_t)); return (0); } + +static void +pcachelink_locked_rele(pcachelink_t *pl) +{ + ASSERT(MUTEX_HELD(&pl->pcl_lock)); + VERIFY(pl->pcl_refcnt >= 1); + + pl->pcl_refcnt--; + if (pl->pcl_refcnt == 0) { + VERIFY(pl->pcl_state == PCL_INVALID); + ASSERT(pl->pcl_parent_pc == NULL); + ASSERT(pl->pcl_child_pc == NULL); + ASSERT(pl->pcl_parent_next == NULL); + ASSERT(pl->pcl_child_next == NULL); + + pl->pcl_state = PCL_FREE; + mutex_destroy(&pl->pcl_lock); + kmem_free(pl, sizeof (pcachelink_t)); + } else { + mutex_exit(&pl->pcl_lock); + } +} + +/* + * Associate parent and child pollcaches via a pcachelink_t. If an existing + * link (stale or valid) between the two is found, it will be reused. If a + * suitable link is not found for reuse, a new one will be allocated. + */ +static void +pcachelink_assoc(pollcache_t *child, pollcache_t *parent) +{ + pcachelink_t *pl, **plpn; + + ASSERT(MUTEX_HELD(&child->pc_lock)); + ASSERT(MUTEX_HELD(&parent->pc_lock)); + + /* Search for an existing link we can reuse. */ + plpn = &child->pc_parents; + for (pl = child->pc_parents; pl != NULL; pl = *plpn) { + mutex_enter(&pl->pcl_lock); + if (pl->pcl_state == PCL_INVALID) { + /* Clean any invalid links while walking the list */ + *plpn = pl->pcl_parent_next; + pl->pcl_child_pc = NULL; + pl->pcl_parent_next = NULL; + pcachelink_locked_rele(pl); + } else if (pl->pcl_parent_pc == parent) { + /* Successfully found parent link */ + ASSERT(pl->pcl_state == PCL_VALID || + pl->pcl_state == PCL_STALE); + pl->pcl_state = PCL_VALID; + mutex_exit(&pl->pcl_lock); + return; + } else { + plpn = &pl->pcl_parent_next; + mutex_exit(&pl->pcl_lock); + } + } + + /* No existing link to the parent was found. Create a fresh one. */ + pl = kmem_zalloc(sizeof (pcachelink_t), KM_SLEEP); + mutex_init(&pl->pcl_lock, NULL, MUTEX_DEFAULT, NULL); + + pl->pcl_parent_pc = parent; + pl->pcl_child_next = parent->pc_children; + parent->pc_children = pl; + pl->pcl_refcnt++; + + pl->pcl_child_pc = child; + pl->pcl_parent_next = child->pc_parents; + child->pc_parents = pl; + pl->pcl_refcnt++; + + pl->pcl_state = PCL_VALID; +} + +/* + * Mark all child links in a pollcache as stale. Any invalid child links found + * during iteration are purged. + */ +static void +pcachelink_mark_stale(pollcache_t *pcp) +{ + pcachelink_t *pl, **plpn; + + ASSERT(MUTEX_HELD(&pcp->pc_lock)); + + plpn = &pcp->pc_children; + for (pl = pcp->pc_children; pl != NULL; pl = *plpn) { + mutex_enter(&pl->pcl_lock); + if (pl->pcl_state == PCL_INVALID) { + /* + * Remove any invalid links while we are going to the + * trouble of walking the list. + */ + *plpn = pl->pcl_child_next; + pl->pcl_parent_pc = NULL; + pl->pcl_child_next = NULL; + pcachelink_locked_rele(pl); + } else { + pl->pcl_state = PCL_STALE; + plpn = &pl->pcl_child_next; + mutex_exit(&pl->pcl_lock); + } + } +} + +/* + * Purge all stale (or invalid) child links from a pollcache. + */ +static void +pcachelink_purge_stale(pollcache_t *pcp) +{ + pcachelink_t *pl, **plpn; + + ASSERT(MUTEX_HELD(&pcp->pc_lock)); + + plpn = &pcp->pc_children; + for (pl = pcp->pc_children; pl != NULL; pl = *plpn) { + mutex_enter(&pl->pcl_lock); + switch (pl->pcl_state) { + case PCL_STALE: + pl->pcl_state = PCL_INVALID; + /* FALLTHROUGH */ + case PCL_INVALID: + *plpn = pl->pcl_child_next; + pl->pcl_parent_pc = NULL; + pl->pcl_child_next = NULL; + pcachelink_locked_rele(pl); + break; + default: + plpn = &pl->pcl_child_next; + mutex_exit(&pl->pcl_lock); + } + } +} + +/* + * Purge all child and parent links from a pollcache, regardless of status. + */ +static void +pcachelink_purge_all(pollcache_t *pcp) +{ + pcachelink_t *pl, **plpn; + + ASSERT(MUTEX_HELD(&pcp->pc_lock)); + + plpn = &pcp->pc_parents; + for (pl = pcp->pc_parents; pl != NULL; pl = *plpn) { + mutex_enter(&pl->pcl_lock); + pl->pcl_state = PCL_INVALID; + *plpn = pl->pcl_parent_next; + pl->pcl_child_pc = NULL; + pl->pcl_parent_next = NULL; + pcachelink_locked_rele(pl); + } + + plpn = &pcp->pc_children; + for (pl = pcp->pc_children; pl != NULL; pl = *plpn) { + mutex_enter(&pl->pcl_lock); + pl->pcl_state = PCL_INVALID; + *plpn = pl->pcl_child_next; + pl->pcl_parent_pc = NULL; + pl->pcl_child_next = NULL; + pcachelink_locked_rele(pl); + } + + ASSERT(pcp->pc_parents == NULL); + ASSERT(pcp->pc_children == NULL); +} diff --git a/usr/src/uts/common/io/tty_pty.c b/usr/src/uts/common/io/tty_pty.c index 6c829dcd21..a8eea823be 100644 --- a/usr/src/uts/common/io/tty_pty.c +++ b/usr/src/uts/common/io/tty_pty.c @@ -1,6 +1,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015, Joyent, Inc. */ /* @@ -988,7 +989,10 @@ ptcpoll(dev_t dev, #ifdef lint anyyet = anyyet; #endif - polllock(php, &pty->ptc_lock); + if (polllock(php, &pty->ptc_lock) != 0) { + *reventsp = POLLNVAL; + return (0); + } ASSERT(MUTEX_HELD(&pty->ptc_lock)); diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c index 18a5ded1c6..62f94729cf 100644 --- a/usr/src/uts/common/os/streamio.c +++ b/usr/src/uts/common/os/streamio.c @@ -8218,7 +8218,11 @@ strpoll( tq = qp->q_next->q_nfsrv; ASSERT(tq != NULL); - polllock(&stp->sd_pollist, QLOCK(tq)); + if (polllock(&stp->sd_pollist, QLOCK(tq)) != 0) { + releasestr(qp); + *reventsp = POLLNVAL; + return (0); + } if (events & POLLWRNORM) { queue_t *sqp; @@ -8228,7 +8232,12 @@ strpoll( else if ((sqp = stp->sd_struiowrq) != NULL) { /* Check sync stream barrier write q */ mutex_exit(QLOCK(tq)); - polllock(&stp->sd_pollist, QLOCK(sqp)); + if (polllock(&stp->sd_pollist, + QLOCK(sqp)) != 0) { + releasestr(qp); + *reventsp = POLLNVAL; + return (0); + } if (sqp->q_flag & QFULL) /* ensure pollwakeup() is done */ sqp->q_flag |= QWANTWSYNC; @@ -8241,7 +8250,12 @@ strpoll( goto chkrd; } mutex_exit(QLOCK(sqp)); - polllock(&stp->sd_pollist, QLOCK(tq)); + if (polllock(&stp->sd_pollist, + QLOCK(tq)) != 0) { + releasestr(qp); + *reventsp = POLLNVAL; + return (0); + } } else retevents |= POLLOUT; } @@ -8273,7 +8287,10 @@ chkrd: * Note: Need to do polllock() here since ps_lock may be * held. See bug 4191544. */ - polllock(&stp->sd_pollist, &stp->sd_lock); + if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) { + *reventsp = POLLNVAL; + return (0); + } headlocked = 1; mp = qp->q_first; while (mp) { @@ -8326,7 +8343,10 @@ chkrd: if (!anyyet) { *phpp = &stp->sd_pollist; if (headlocked == 0) { - polllock(&stp->sd_pollist, &stp->sd_lock); + if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) { + *reventsp = POLLNVAL; + return (0); + } headlocked = 1; } stp->sd_rput_opt |= SR_POLLIN; diff --git a/usr/src/uts/common/sys/devpoll.h b/usr/src/uts/common/sys/devpoll.h index 4e4c76d9b0..3b6bd159c3 100644 --- a/usr/src/uts/common/sys/devpoll.h +++ b/usr/src/uts/common/sys/devpoll.h @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_DEVPOLL_H @@ -88,9 +88,6 @@ typedef struct dp_entry { mutex_enter(&(dpep)->dpe_lock); \ ASSERT((dpep)->dpe_refcnt > 0); \ (dpep)->dpe_refcnt--; \ - if ((dpep)->dpe_refcnt == 0) { \ - cv_broadcast(&(dpep)->dpe_cv); \ - } \ mutex_exit(&(dpep)->dpe_lock); \ } #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/poll.h b/usr/src/uts/common/sys/poll.h index efc8457a6a..75a588533f 100644 --- a/usr/src/uts/common/sys/poll.h +++ b/usr/src/uts/common/sys/poll.h @@ -31,7 +31,7 @@ */ /* - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_POLL_H @@ -130,8 +130,8 @@ extern void pollwakeup(pollhead_t *, short); /* * Internal routines. */ -extern void polllock(pollhead_t *, kmutex_t *); -extern int pollunlock(void); +extern int polllock(pollhead_t *, kmutex_t *); +extern int pollunlock(int *); extern void pollrelock(int); extern void pollcleanup(void); extern void pollblockexit(struct fpollinfo *); diff --git a/usr/src/uts/common/sys/poll_impl.h b/usr/src/uts/common/sys/poll_impl.h index 2e866ec4d4..67b47f9a1e 100644 --- a/usr/src/uts/common/sys/poll_impl.h +++ b/usr/src/uts/common/sys/poll_impl.h @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_POLL_IMPL_H @@ -36,7 +36,7 @@ * * Each kernel thread (1), if engaged in poll system call, has a reference to * a pollstate_t (2), which contains relevant flags and locks. The pollstate_t - * contains a pointer to a pcache_t (3), which caches the state of previous + * contains a pointer to a pollcache_t (3), which caches the state of previous * calls to poll. A bitmap (4) is stored inside the poll cache, where each * bit represents a file descriptor. The bits are set if the corresponding * device has a polled event pending. Only fds with their bit set will be @@ -45,7 +45,7 @@ * structures keep track of the pollfd_t arrays (6) passed in from userland. * Each polled file descriptor has a corresponding polldat_t which can be * chained onto a device's pollhead, and these are kept in a hash table (7) - * inside the pcache_t. The hash table allows efficient conversion of a + * inside the pollcache_t. The hash table allows efficient conversion of a * given fd to its corresponding polldat_t. * * (1) (2) @@ -76,7 +76,7 @@ * Both poll system call and /dev/poll use the pollcache_t structure * definition and the routines managing the structure. But poll(2) and * /dev/poll have their own copy of the structures. The /dev/poll driver - * table (1a) contains an array of pointers, each pointing at a pcache_t + * table (1a) contains an array of pointers, each pointing at a pollcache_t * struct (3). A device minor number is used as an device table index. * */ @@ -86,11 +86,25 @@ #include #include +#include #ifdef __cplusplus extern "C" { #endif +/* + * Typedefs + */ +struct pollcache; +struct pollstate; +struct pcachelink; +struct polldat; + +typedef struct pollcache pollcache_t; +typedef struct pollstate pollstate_t; +typedef struct pcachelink pcachelink_t; +typedef struct polldat polldat_t; + /* * description of pollcacheset structure */ @@ -103,19 +117,41 @@ typedef struct pollcacheset { #define POLLFDSETS 2 +/* + * Maximum depth for recusive poll operations. + */ +#define POLLMAXDEPTH 5 + /* * State information kept by each polling thread */ -typedef struct pollstate { +struct pollstate { pollfd_t *ps_pollfd; /* hold the current poll list */ size_t ps_nfds; /* size of ps_pollfd */ kmutex_t ps_lock; /* mutex for sleep/wakeup */ - struct pollcache *ps_pcache; /* cached poll fd set */ + pollcache_t *ps_pcache; /* cached poll fd set */ pollcacheset_t *ps_pcacheset; /* cached poll lists */ int ps_nsets; /* no. of cached poll sets */ pollfd_t *ps_dpbuf; /* return pollfd buf used by devpoll */ size_t ps_dpbufsize; /* size of ps_dpbuf */ -} pollstate_t; + int ps_depth; /* epoll recursion depth */ + pollcache_t *ps_pc_stack[POLLMAXDEPTH]; /* epoll recursion state */ + pollcache_t *ps_contend_pc; /* pollcache waited on */ + pollstate_t *ps_contend_nextp; /* next in contender list */ + pollstate_t **ps_contend_pnextp; /* pointer-to-previous-next */ + int ps_flags; /* state flags */ +}; + +/* pollstate flags */ +#define POLLSTATE_STALEMATE 0x1 +#define POLLSTATE_ULFAIL 0x2 + +/* pollstate_enter results */ +#define PSE_SUCCESS 0 +#define PSE_FAIL_DEPTH 1 +#define PSE_FAIL_LOOP 2 +#define PSE_FAIL_DEADLOCK 3 +#define PSE_FAIL_POLLSTATE 4 /* * poll cache size defines @@ -143,27 +179,54 @@ typedef struct xref { #define POLLPOSINVAL (-1L) /* xf_position is invalid */ #define POLLPOSTRANS (-2L) /* xf_position is transient state */ + +typedef enum pclstate { + PCL_INIT = 0, /* just allocated/zeroed, prior */ + PCL_VALID, /* linked with both parent and child pollcaches */ + PCL_STALE, /* still linked but marked stale, pending refresh */ + PCL_INVALID, /* dissociated from one pollcache, awaiting cleanup */ + PCL_FREE /* only meant to indicate use-after-free */ +} pclstate_t; + +/* + * The pcachelink struct creates an association between parent and child + * pollcaches in a recursive /dev/poll operation. Fields are protected by + * pcl_lock although manipulation of pcl_child_next or pcl_parent_next also + * requires holding pc_lock in the respective pcl_parent_pc or pcl_child_pc + * pollcache. + */ +struct pcachelink { + kmutex_t pcl_lock; /* protects contents */ + pclstate_t pcl_state; /* status of link entry */ + int pcl_refcnt; /* ref cnt of linked pcaches */ + pollcache_t *pcl_child_pc; /* child pollcache */ + pollcache_t *pcl_parent_pc; /* parent pollcache */ + pcachelink_t *pcl_child_next; /* next in child list */ + pcachelink_t *pcl_parent_next; /* next in parents list */ +}; + + /* * polldat is an entry for a cached poll fd. A polldat struct can be in * poll cache table as well as on pollhead ph_list, which is used by * pollwakeup to wake up a sleeping poller. There should be one polldat * per polled fd hanging off pollstate struct. */ -typedef struct polldat { +struct polldat { int pd_fd; /* cached poll fd */ int pd_events; /* union of all polled events */ file_t *pd_fp; /* used to detect fd reuse */ pollhead_t *pd_php; /* used to undo poll registration */ kthread_t *pd_thread; /* used for waking up a sleep thrd */ - struct pollcache *pd_pcache; /* a ptr to the pollcache of this fd */ - struct polldat *pd_next; /* next on pollhead's ph_list */ - struct polldat *pd_hashnext; /* next on pollhead's ph_list */ + pollcache_t *pd_pcache; /* a ptr to the pollcache of this fd */ + polldat_t *pd_next; /* next on pollhead's ph_list */ + polldat_t *pd_hashnext; /* next on pollhead's ph_list */ int pd_count; /* total count from all ref'ed sets */ int pd_nsets; /* num of xref sets, used by poll(2) */ xref_t *pd_ref; /* ptr to xref info, 1 for each set */ - struct port_kevent *pd_portev; /* associated port event struct */ + port_kevent_t *pd_portev; /* associated port event struct */ uint64_t pd_epolldata; /* epoll data, if any */ -} polldat_t; +}; /* * One cache for each thread that polls. Points to a bitmap (used by pollwakeup) @@ -172,7 +235,7 @@ typedef struct polldat { * of port_fdcache_t, both structs implement pc_lock with offset 0 (see also * pollrelock()). */ -typedef struct pollcache { +struct pollcache { kmutex_t pc_lock; /* lock to protect pollcache */ ulong_t *pc_bitmap; /* point to poll fd bitmap */ polldat_t **pc_hash; /* points to a hash table of ptrs */ @@ -187,11 +250,12 @@ typedef struct pollcache { kcondvar_t pc_cv; /* cv to wait on if needed */ pid_t pc_pid; /* for check acc rights, devpoll only */ int pc_mapstart; /* where search start, devpoll only */ -} pollcache_t; + pcachelink_t *pc_parents; /* linked list of epoll parents */ + pcachelink_t *pc_children; /* linked list of epoll children */ +}; /* pc_flag */ #define PC_POLLWAKE 0x02 /* pollwakeup() occurred */ -#define PC_WRITEWANTED 0x04 /* writer wishes to modify the pollcache_t */ #if defined(_KERNEL) /* @@ -218,11 +282,15 @@ extern void pollhead_delete(pollhead_t *, polldat_t *); /* * poll state interfaces: * - * pollstate_create creates per-thread pollstate - * pollstate_destroy cleans up per-thread pollstate + * pollstate_create initializes per-thread pollstate + * pollstate_destroy cleans up per-thread pollstate + * pollstate_enter safely lock pollcache for pollstate + * pollstate_exit unlock pollcache from pollstate */ extern pollstate_t *pollstate_create(void); extern void pollstate_destroy(pollstate_t *); +extern int pollstate_enter(pollcache_t *); +extern void pollstate_exit(pollcache_t *); /* * public pcache interfaces: @@ -254,6 +322,7 @@ extern void pcache_destroy(pollcache_t *); * pcache_grow_map grows the pollcache bitmap * pcache_update_xref update cross ref (from polldat back to cacheset) info * pcache_clean_entry cleanup an entry in pcache and more... + * pcache_wake_parents wake linked parent pollcaches */ extern polldat_t *pcache_lookup_fd(pollcache_t *, int); extern polldat_t *pcache_alloc_fd(int); @@ -263,6 +332,7 @@ extern void pcache_grow_hashtbl(pollcache_t *, nfds_t); extern void pcache_grow_map(pollcache_t *, int); extern void pcache_update_xref(pollcache_t *, int, ssize_t, int); extern void pcache_clean_entry(pollstate_t *, int); +extern void pcache_wake_parents(pollcache_t *); /* * pcacheset interfaces: diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c index c33156a4fc..cc125f127a 100644 --- a/usr/src/uts/common/syscall/poll.c +++ b/usr/src/uts/common/syscall/poll.c @@ -29,7 +29,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ /* @@ -77,11 +77,13 @@ static struct { kstat_named_t pollcachehit; /* list matched 100% w/ cached one */ kstat_named_t pollcachephit; /* list matched < 100% w/ cached one */ kstat_named_t pollcachemiss; /* every list entry is dif from cache */ + kstat_named_t pollunlockfail; /* failed to perform pollunlock */ } pollstats = { { "polllistmiss", KSTAT_DATA_UINT64 }, { "pollcachehit", KSTAT_DATA_UINT64 }, { "pollcachephit", KSTAT_DATA_UINT64 }, - { "pollcachemiss", KSTAT_DATA_UINT64 } + { "pollcachemiss", KSTAT_DATA_UINT64 }, + { "pollunlockfail", KSTAT_DATA_UINT64 } }; kstat_named_t *pollstats_ptr = (kstat_named_t *)&pollstats; @@ -96,6 +98,10 @@ struct pplock { static struct pplock plocks[NPHLOCKS]; /* Hash array of pollhead locks */ +/* Contention lock & list for preventing deadlocks in recursive /dev/poll. */ +static kmutex_t pollstate_contenders_lock; +static pollstate_t *pollstate_contenders = NULL; + #ifdef DEBUG static int pollchecksanity(pollstate_t *, nfds_t); static int pollcheckxref(pollstate_t *, int); @@ -223,19 +229,35 @@ static int plist_chkdupfd(file_t *, polldat_t *, pollstate_t *, pollfd_t *, int, * (which hold poll locks on entry to xx_poll(), then acquire foo) * and pollwakeup() threads (which hold foo, then acquire poll locks). * - * pollunlock(void) releases whatever poll locks the current thread holds, - * returning a cookie for use by pollrelock(); + * pollunlock(*cookie) releases whatever poll locks the current thread holds, + * setting a cookie for use by pollrelock(); * * pollrelock(cookie) reacquires previously dropped poll locks; * * polllock(php, mutex) does the common case: pollunlock(), * acquire the problematic mutex, pollrelock(). + * + * If polllock() or pollunlock() return non-zero, it indicates that a recursive + * /dev/poll is in progress and pollcache locks cannot be dropped. Callers + * must handle this by indicating a POLLNVAL in the revents of the VOP_POLL. */ int -pollunlock(void) +pollunlock(int *lockstate) { + pollstate_t *ps = curthread->t_pollstate; pollcache_t *pcp; - int lockstate = 0; + + ASSERT(lockstate != NULL); + + /* + * There is no way to safely perform a pollunlock() while in the depths + * of a recursive /dev/poll operation. + */ + if (ps != NULL && ps->ps_depth > 1) { + ps->ps_flags |= POLLSTATE_ULFAIL; + pollstats.pollunlockfail.value.ui64++; + return (-1); + } /* * t_pollcache is set by /dev/poll and event ports (port_fd.c). @@ -243,45 +265,56 @@ pollunlock(void) * the t_pollcache should be NULL. */ if (curthread->t_pollcache == NULL) - pcp = curthread->t_pollstate->ps_pcache; + pcp = ps->ps_pcache; else pcp = curthread->t_pollcache; - if (mutex_owned(&pcp->pc_lock)) { - lockstate = 1; + if (!mutex_owned(&pcp->pc_lock)) { + *lockstate = 0; + } else { + *lockstate = 1; mutex_exit(&pcp->pc_lock); } - return (lockstate); + return (0); } void pollrelock(int lockstate) { + pollstate_t *ps = curthread->t_pollstate; pollcache_t *pcp; + /* Skip this whole ordeal if the pollcache was not locked to begin */ + if (lockstate == 0) + return; + /* * t_pollcache is set by /dev/poll and event ports (port_fd.c). * If the pollrelock/pollunlock is called as a result of poll(2), * the t_pollcache should be NULL. */ if (curthread->t_pollcache == NULL) - pcp = curthread->t_pollstate->ps_pcache; + pcp = ps->ps_pcache; else pcp = curthread->t_pollcache; - if (lockstate > 0) - mutex_enter(&pcp->pc_lock); + mutex_enter(&pcp->pc_lock); } /* ARGSUSED */ -void +int polllock(pollhead_t *php, kmutex_t *lp) { - if (!mutex_tryenter(lp)) { - int lockstate = pollunlock(); + if (mutex_tryenter(lp) == 0) { + int state; + + if (pollunlock(&state) != 0) { + return (-1); + } mutex_enter(lp); - pollrelock(lockstate); + pollrelock(state); } + return (0); } static int @@ -370,10 +403,7 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) * Need to allocate memory for pollstate before anything because * the mutex and cv are created in this space */ - if ((ps = t->t_pollstate) == NULL) { - t->t_pollstate = pollstate_create(); - ps = t->t_pollstate; - } + ps = pollstate_create(); if (ps->ps_pcache == NULL) ps->ps_pcache = pcache_alloc(); @@ -899,6 +929,7 @@ pollnotify(pollcache_t *pcp, int fd) BT_SET(pcp->pc_bitmap, fd); pcp->pc_flag |= PC_POLLWAKE; cv_broadcast(&pcp->pc_cv); + pcache_wake_parents(pcp); } /* @@ -2221,20 +2252,47 @@ pcache_clean_entry(pollstate_t *ps, int fd) } } +void +pcache_wake_parents(pollcache_t *pcp) +{ + pcachelink_t *pl, *pln; + + ASSERT(MUTEX_HELD(&pcp->pc_lock)); + + for (pl = pcp->pc_parents; pl != NULL; pl = pln) { + mutex_enter(&pl->pcl_lock); + if (pl->pcl_state == PCL_VALID) { + ASSERT(pl->pcl_parent_pc != NULL); + cv_broadcast(&pl->pcl_parent_pc->pc_cv); + } + pln = pl->pcl_parent_next; + mutex_exit(&pl->pcl_lock); + } +} + /* - * This is the first time this thread has ever polled, - * so we have to create its pollstate structure. - * This will persist for the life of the thread, - * until it calls pollcleanup(). + * Initialize thread pollstate structure. + * It will persist for the life of the thread, until it calls pollcleanup(). */ pollstate_t * -pollstate_create(void) +pollstate_create() { - pollstate_t *ps; + pollstate_t *ps = curthread->t_pollstate; - ps = kmem_zalloc(sizeof (pollstate_t), KM_SLEEP); - ps->ps_nsets = POLLFDSETS; - ps->ps_pcacheset = pcacheset_create(ps->ps_nsets); + if (ps == NULL) { + /* + * This is the first time this thread has ever polled, so we + * have to create its pollstate structure. + */ + ps = kmem_zalloc(sizeof (pollstate_t), KM_SLEEP); + ps->ps_nsets = POLLFDSETS; + ps->ps_pcacheset = pcacheset_create(ps->ps_nsets); + curthread->t_pollstate = ps; + } else { + ASSERT(ps->ps_depth == 0); + ASSERT(ps->ps_flags == 0); + ASSERT(ps->ps_pc_stack[0] == 0); + } return (ps); } @@ -2259,6 +2317,186 @@ pollstate_destroy(pollstate_t *ps) kmem_free(ps, sizeof (pollstate_t)); } +static int +pollstate_contend(pollstate_t *ps, pollcache_t *pcp) +{ + pollstate_t *rem, *next; + pollcache_t *desired_pc; + int result = 0, depth_total; + + mutex_enter(&pollstate_contenders_lock); + /* + * There is a small chance that the pollcache of interest became + * available while we were waiting on the contenders lock. + */ + if (mutex_tryenter(&pcp->pc_lock) != 0) { + goto out; + } + + /* + * Walk the list of contended pollstates, searching for evidence of a + * deadlock condition. + */ + depth_total = ps->ps_depth; + desired_pc = pcp; + for (rem = pollstate_contenders; rem != NULL; rem = next) { + int i, j; + next = rem->ps_contend_nextp; + + /* Is this pollstate holding the pollcache of interest? */ + for (i = 0; i < rem->ps_depth; i++) { + if (rem->ps_pc_stack[i] != desired_pc) { + continue; + } + + /* + * The remote pollstate holds the pollcache lock we + * desire. If it is waiting on a pollcache we hold, + * then we can report the obvious deadlock. + */ + ASSERT(rem->ps_contend_pc != NULL); + for (j = 0; j < ps->ps_depth; j++) { + if (rem->ps_contend_pc == ps->ps_pc_stack[j]) { + rem->ps_flags |= POLLSTATE_STALEMATE; + result = -1; + goto out; + } + } + + /* + * The remote pollstate is not blocking on a pollcache + * which would deadlock against us. That pollcache + * may, however, be held by a pollstate which would + * result in a deadlock. + * + * To detect such a condition, we continue walking + * through the list using the pollcache blocking the + * remote thread as our new search target. + * + * Return to the front of pollstate_contenders since it + * is not ordered to guarantee complete dependency + * traversal. The below depth tracking places an upper + * bound on iterations. + */ + desired_pc = rem->ps_contend_pc; + next = pollstate_contenders; + + /* + * The recursion depth of the remote pollstate is used + * to calculate a final depth for the local /dev/poll + * recursion, since those locks will be acquired + * eventually. If that value exceeds the defined + * limit, we can report the failure now instead of + * recursing to that failure depth. + */ + depth_total += (rem->ps_depth - i); + if (depth_total >= POLLMAXDEPTH) { + result = -1; + goto out; + } + } + } + + /* + * No deadlock partner was found. The only course of action is to + * record ourself as a contended pollstate and wait for the pollcache + * mutex to become available. + */ + ps->ps_contend_pc = pcp; + ps->ps_contend_nextp = pollstate_contenders; + ps->ps_contend_pnextp = &pollstate_contenders; + if (pollstate_contenders != NULL) { + pollstate_contenders->ps_contend_pnextp = + &ps->ps_contend_nextp; + } + pollstate_contenders = ps; + + mutex_exit(&pollstate_contenders_lock); + mutex_enter(&pcp->pc_lock); + mutex_enter(&pollstate_contenders_lock); + + /* + * Our acquisition of the pollcache mutex may be due to another thread + * giving up in the face of deadlock with us. If that is the case, + * we too should report the failure. + */ + if ((ps->ps_flags & POLLSTATE_STALEMATE) != 0) { + result = -1; + ps->ps_flags &= ~POLLSTATE_STALEMATE; + mutex_exit(&pcp->pc_lock); + } + + /* Remove ourself from the contenders list. */ + if (ps->ps_contend_nextp != NULL) { + ps->ps_contend_nextp->ps_contend_pnextp = + ps->ps_contend_pnextp; + } + *ps->ps_contend_pnextp = ps->ps_contend_nextp; + ps->ps_contend_pc = NULL; + ps->ps_contend_nextp = NULL; + ps->ps_contend_pnextp = NULL; + +out: + mutex_exit(&pollstate_contenders_lock); + return (result); +} + +int +pollstate_enter(pollcache_t *pcp) +{ + pollstate_t *ps = curthread->t_pollstate; + int i; + + if (ps == NULL) { + /* + * The thread pollstate may not be initialized if VOP_POLL is + * called on a recursion-enabled /dev/poll handle from outside + * the poll() or /dev/poll codepaths. + */ + return (PSE_FAIL_POLLSTATE); + } + if (ps->ps_depth >= POLLMAXDEPTH) { + return (PSE_FAIL_DEPTH); + } + /* + * Check the desired pollcache against pollcaches we already have + * locked. Such a loop is the most simple deadlock scenario. + */ + for (i = 0; i < ps->ps_depth; i++) { + if (ps->ps_pc_stack[i] == pcp) { + return (PSE_FAIL_LOOP); + } + } + ASSERT(ps->ps_pc_stack[i] == NULL); + + if (ps->ps_depth == 0) { + /* Locking initial the pollcache requires no caution */ + mutex_enter(&pcp->pc_lock); + } else if (mutex_tryenter(&pcp->pc_lock) == 0) { + if (pollstate_contend(ps, pcp) != 0) { + /* This pollcache cannot safely be locked. */ + return (PSE_FAIL_DEADLOCK); + } + } + + ps->ps_pc_stack[ps->ps_depth++] = pcp; + return (PSE_SUCCESS); +} + +void +pollstate_exit(pollcache_t *pcp) +{ + pollstate_t *ps = curthread->t_pollstate; + + VERIFY(ps != NULL); + VERIFY(ps->ps_pc_stack[ps->ps_depth - 1] == pcp); + + mutex_exit(&pcp->pc_lock); + ps->ps_pc_stack[--ps->ps_depth] = NULL; + VERIFY(ps->ps_depth >= 0); +} + + /* * We are holding the appropriate uf_lock entering this routine. * Bump up the ps_busy count to prevent the thread from exiting. -- cgit v1.2.3 From 6a72db4a7fa12c3e0d1c1cf91a07390739fa0fbf Mon Sep 17 00:00:00 2001 From: Bryan Cantrill Date: Fri, 4 Sep 2015 08:32:01 -0700 Subject: 6208 add support for timerfd Reviewed by: Gordon Ross Approved by: Dan McDonald --- usr/src/cmd/devfsadm/misc_link.c | 5 +- usr/src/lib/libc/amd64/Makefile | 1 + usr/src/lib/libc/i386/Makefile.com | 1 + usr/src/lib/libc/port/mapfile-vers | 7 + usr/src/lib/libc/port/sys/timerfd.c | 93 ++++ usr/src/lib/libc/sparc/Makefile.com | 1 + usr/src/lib/libc/sparcv9/Makefile.com | 1 + usr/src/man/man3c/Makefile | 6 + usr/src/man/man3c/timerfd_create.3c | 201 +++++++++ usr/src/man/man5/Makefile | 2 + usr/src/man/man5/timerfd.5 | 47 ++ usr/src/pkg/manifests/SUNWcs.mf | 4 + usr/src/pkg/manifests/system-header.mf | 1 + usr/src/pkg/manifests/system-library.man3c.inc | 3 + usr/src/pkg/manifests/system-library.man5.inc | 1 + usr/src/uts/common/Makefile.files | 2 + usr/src/uts/common/io/timerfd.c | 586 +++++++++++++++++++++++++ usr/src/uts/common/io/timerfd.conf | 16 + usr/src/uts/common/os/clock_highres.c | 7 +- usr/src/uts/common/os/clock_realtime.c | 16 +- usr/src/uts/common/os/timer.c | 13 +- usr/src/uts/common/sys/Makefile | 1 + usr/src/uts/common/sys/timer.h | 31 +- usr/src/uts/common/sys/timerfd.h | 81 ++++ usr/src/uts/intel/Makefile.intel | 2 + usr/src/uts/intel/timerfd/Makefile | 68 +++ usr/src/uts/sparc/Makefile.sparc | 1 + usr/src/uts/sparc/timerfd/Makefile | 68 +++ 28 files changed, 1246 insertions(+), 20 deletions(-) create mode 100644 usr/src/lib/libc/port/sys/timerfd.c create mode 100644 usr/src/man/man3c/timerfd_create.3c create mode 100644 usr/src/man/man5/timerfd.5 create mode 100644 usr/src/uts/common/io/timerfd.c create mode 100644 usr/src/uts/common/io/timerfd.conf create mode 100644 usr/src/uts/common/sys/timerfd.h create mode 100644 usr/src/uts/intel/timerfd/Makefile create mode 100644 usr/src/uts/sparc/timerfd/Makefile (limited to 'usr/src') diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c index abb133bc6d..bf59fb5e6b 100644 --- a/usr/src/cmd/devfsadm/misc_link.c +++ b/usr/src/cmd/devfsadm/misc_link.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. All rights reserved. */ #include @@ -126,6 +126,9 @@ static devfsadm_create_t misc_cbt[] = { "(^kdmouse$)|(^rootprop$)", TYPE_EXACT | DRV_RE, ILEVEL_0, node_name }, + { "pseudo", "ddi_pseudo", "timerfd", + TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name + }, { "pseudo", "ddi_pseudo", "tod", TYPE_EXACT | DRV_EXACT, ILEVEL_0, node_name }, diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index b5e54b19fa..dbda6c0c31 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -909,6 +909,7 @@ PORTSYS= \ tasksys.o \ time.o \ time_util.o \ + timerfd.o \ ucontext.o \ unlink.o \ ustat.o \ diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index d7e77502f2..25ba0a2743 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -949,6 +949,7 @@ PORTSYS= \ tasksys.o \ time.o \ time_util.o \ + timerfd.o \ ucontext.o \ unlink.o \ ustat.o \ diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index 017c7c31bc..a0e21e250f 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -93,6 +93,13 @@ $if _x86 && _ELF64 $add amd64 $endif +SYMBOL_VERSION ILLUMOS_0.16 { # timerfd + protected: + timerfd_create; + timerfd_gettime; + timerfd_settime; +} ILLUMOS_0.15; + SYMBOL_VERSION ILLUMOS_0.15 { # epoll(3C) protected: epoll_create; diff --git a/usr/src/lib/libc/port/sys/timerfd.c b/usr/src/lib/libc/port/sys/timerfd.c new file mode 100644 index 0000000000..cb2e17adf7 --- /dev/null +++ b/usr/src/lib/libc/port/sys/timerfd.c @@ -0,0 +1,93 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015, Joyent, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include + +int +timerfd_create(int clockid, int flags) +{ + int oflags = O_RDWR; + int fd; + + if (flags & ~(TFD_NONBLOCK | TFD_CLOEXEC)) { + errno = EINVAL; + return (-1); + } + + if (flags & TFD_NONBLOCK) + oflags |= O_NONBLOCK; + + if (flags & TFD_CLOEXEC) + oflags |= O_CLOEXEC; + + if ((fd = open("/dev/timerfd", oflags)) < 0) + return (-1); + + if (ioctl(fd, TIMERFDIOC_CREATE, clockid) != 0) { + (void) close(fd); + return (-1); + } + + return (fd); +} + +int +timerfd_settime(int fd, int flags, const struct itimerspec *new_value, + struct itimerspec *old_value) +{ + timerfd_settime_t st; + int rval; + + if (flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) { + errno = EINVAL; + return (-1); + } + + st.tfd_settime_flags = flags; + st.tfd_settime_value = (uint64_t)(uintptr_t)new_value; + st.tfd_settime_ovalue = (uint64_t)(uintptr_t)old_value; + + rval = ioctl(fd, TIMERFDIOC_SETTIME, &st); + + if (rval == -1 && errno == ENOTTY) { + /* + * Linux has us return EINVAL when the file descriptor is valid + * but is not a timerfd file descriptor -- and LTP explicitly + * checks this case. + */ + errno = EINVAL; + } + + return (rval); +} + +int +timerfd_gettime(int fd, struct itimerspec *curr_value) +{ + int rval = ioctl(fd, TIMERFDIOC_GETTIME, curr_value); + + if (rval == -1 && errno == ENOTTY) { + /* + * See comment in timerfd_settime(), above. + */ + errno = EINVAL; + } + + return (rval); +} diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com index dc965fe6ac..94036d831e 100644 --- a/usr/src/lib/libc/sparc/Makefile.com +++ b/usr/src/lib/libc/sparc/Makefile.com @@ -983,6 +983,7 @@ PORTSYS= \ tasksys.o \ time.o \ time_util.o \ + timerfd.o \ ucontext.o \ unlink.o \ ustat.o \ diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com index 415aaf2be2..2156bad20b 100644 --- a/usr/src/lib/libc/sparcv9/Makefile.com +++ b/usr/src/lib/libc/sparcv9/Makefile.com @@ -927,6 +927,7 @@ PORTSYS= \ tasksys.o \ time.o \ time_util.o \ + timerfd.o \ ucontext.o \ unlink.o \ ustat.o \ diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile index f6cadebe95..c38d65a57a 100644 --- a/usr/src/man/man3c/Makefile +++ b/usr/src/man/man3c/Makefile @@ -473,6 +473,7 @@ MANFILES= __fbufsize.3c \ timer_delete.3c \ timer_settime.3c \ timeradd.3c \ + timerfd_create.3c \ tmpfile.3c \ tmpnam.3c \ toascii.3c \ @@ -1211,6 +1212,8 @@ MANLINKS= FD_CLR.3c \ timer_gettime.3c \ timerclear.3c \ timercmp.3c \ + timerfd_gettime.3c \ + timerfd_settime.3c \ timerisset.3c \ timersub.3c \ tmpnam_r.3c \ @@ -2250,6 +2253,9 @@ timercmp.3c := LINKSRC = timeradd.3c timerisset.3c := LINKSRC = timeradd.3c timersub.3c := LINKSRC = timeradd.3c +timerfd_gettime.3c := LINKSRC = timerfd_create.3c +timerfd_settime.3c := LINKSRC = timerfd_create.3c + tempnam.3c := LINKSRC = tmpnam.3c tmpnam_r.3c := LINKSRC = tmpnam.3c diff --git a/usr/src/man/man3c/timerfd_create.3c b/usr/src/man/man3c/timerfd_create.3c new file mode 100644 index 0000000000..84df47e245 --- /dev/null +++ b/usr/src/man/man3c/timerfd_create.3c @@ -0,0 +1,201 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright (c) 2015, Joyent, Inc. All Rights Reserved. +.\" +.Dd Feb 23, 2105 +.Dt TIMERFD 3C +.Os +.Sh NAME +.Nm timerfd_create , +.Nm timerfd_settime , +.Nm timerfd_gettime +.Nd create and manipulate timers via a file descriptor interface +.Sh SYNOPSIS +.In sys/timerfd.h +.Ft int +.Fo timerfd_create +.Fa "int clockid" +.Fa "int flags" +.Fc +.Ft int +.Fo timerfd_settime +.Fa "int fd" +.Fa "int flags" +.Fa "const struct itimerspec *restrict value" +.Fa "struct itimterspec *restrict ovalue" +.Fc +.Ft int +.Fo timerfd_gettime +.Fa "int fd" +.Fa "struct itimerspec *value" +.Fc +.Sh DESCRIPTION +These routines create and manipulate timers in which events are associated +with a file descriptor, allowing for timer-based events to be used +in file-descriptor based facilities like +.Xr poll 2 , +.Xr port_get 3C +or +.Xr epoll_wait 3C . +The +.Fn timerfd_create +function creates a timer with the clock +type specified by +.Fa clockid . +The +.Sy CLOCK_REALTIME +and +.Sy CLOCK_HIGHRES +clock types, as defined in +.Xr timer_create 3C , +are supported by +.Fn timerfd_create . +(Note that +.Sy CLOCK_MONOTONIC +may be used as an alias for +.Sy CLOCK_HIGHRES Ns .) +.Pp +The +.Fa flags +argument specifies additional parameters for the timer instance, and can have +any of the following values: +.Bl -hang -width Ds +.It Sy TFD_CLOEXEC +.Bd -filled -compact +Instance will be closed upon an +.Xr exec 2 ; +see +.Xr open 2 Ns 's +description of +.Sy O_CLOEXEC . +.Ed +.It Sy TFD_NONBLOCK +.Bd -filled -compact +Instance will be set to be non-blocking. A +.Xr read 2 +on a +.Sy timerfd +instance that has been initialized with +.Sy TFD_NONBLOCK +will return +.Sy EAGAIN +in lieu of blocking if the +timer has not expired since the last +.Fn timerfd_settime +or successful +.Fn read . +.Ed +.El +.Pp +The following operations can be performed upon a +.Sy timerfd +instance: +.Bl -hang -width Ds +.It Sy read(2) +.Bd -filled -compact +Atomically reads and clears the number of timer expirations since the +last successful +.Xr read 2 +or +.Fn timerfd_settime . +Upon success, +the number of expirations will be copied into the eight byte buffer +passed to the system call. If there have been no expirations of the +timer since the last successful +.Xr read 2 +or +.Fn timerfd_sttime , +.Xr read 2 +will block until at least the next expiration, +or return +.Sy EAGAIN +if the instance was created with +.Sy TFD_NONBLOCK . +Note that if multiple threads are blocked in +.Xr read 2 +for the same timer, only one of them will return upon +a single timer expiration. +.Pp +If the buffer specified to +.Xr read 2 +is less than +eight bytes in length, +.Sy EINAVL +will be returned. +.Ed +.It Sy poll(2), port_get(3C), epoll_wait(3C) +.Bd -filled -compact +Provide notification when the timer expires or has expired in the past without +a more recent +.Xr read 2 . +Note that threads being simultaneously +blocked in +.Xr read 2 +and +.Xr poll 2 +(or equivalents) for the same +timer constitute an application-level race; on a timer expiration, +the thread blocked in +.Xr poll 2 +may or may not return depending on how +it is scheduled with respect to the thread blocked in +.Xr read 2 . +.Ed +.It Sy timerfd_gettime() +.Bd -filled -compact +Returns the amount of time until the next timer expiration, with the +same functional signature and semantics as +.Xr timer_gettime 3C . +.Ed +.It Sy timerfd_settime() +.Bd -filled -compact +Sets or disarms the timer, with the +same functional signature and semantics as +.Xr timer_settime 3C . +.Ed +.El +.Sh RETURN VALUES +Upon succesful completion, a file descriptor associated with the instance +is returned. Otherwise, +.Sy -1 +is returned and errno is set to indicate the error. +.Sh ERRORS +The +.Fn timerfd_create() +function will fail if: +.Bl -tag -width Er +.It Er EINAL +The +.Fa flags +are invalid. +.It Er EMFILE +There are currently +.Pf { Sy OPEN_MAX Ns } +file descriptors open in the calling process. +.It Er EPERM +The +.Fa clock_id , +is +.Sy CLOCK_HIGHRES +and the +.Pf { Sy PRIV_PROC_CLOCK_HIGHRES Ns } +is not asserted in the effective set of the calling process. +.El +.Sh SEE ALSO +.Xr poll 2 , +.Xr port_get 3C , +.Xr epoll_wait 3C , +.Xr timer_create 3C , +.Xr timer_gettime 3C , +.Xr timer_settime 3C , +.Xr privileages 5 , +.Xr timerfd 5 diff --git a/usr/src/man/man5/Makefile b/usr/src/man/man5/Makefile index 4784603013..c30af8e1d1 100644 --- a/usr/src/man/man5/Makefile +++ b/usr/src/man/man5/Makefile @@ -14,6 +14,7 @@ # Copyright (c) 2012 by Delphix. All rights reserved. # Copyright 2014 Nexenta Systems, Inc. # Copyright 2014 Garrett D'Amore +# Copyright (c) 2015, Joyent, Inc. All rights reserved. # include $(SRC)/Makefile.master @@ -121,6 +122,7 @@ MANFILES= Intro.5 \ tecla.5 \ term.5 \ threads.5 \ + timerfd.5 \ trusted_extensions.5 \ vgrindefs.5 \ zones.5 \ diff --git a/usr/src/man/man5/timerfd.5 b/usr/src/man/man5/timerfd.5 new file mode 100644 index 0000000000..3229095b49 --- /dev/null +++ b/usr/src/man/man5/timerfd.5 @@ -0,0 +1,47 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright (c) 2015, Joyent, Inc. All Rights Reserved. +.\" +.Dd Feb 23, 2015 +.Dt TIMERFD 5 +.Os +.Sh NAME +.Nm timerfd +.Nd Linux-compatible timer notification facility +.Sh SYNOPSIS +.In sys/timerfd.h +.Sh DESCRIPTION +.Nm +is a Linux-borne facility for creating POSIX timers and +receiving their subsequent events via a file descriptor. +The facility itself is arguably unnecessary: +portable code can either use the timeout value present in +.Xr poll 2 / +.Xr port_get 3C +or -- if this is deemed of unacceptably poor resolution -- create a POSIX timer +via +.Xr timer_create 3C +and use the resulting signal to induce an +.Sy EINTR +to polling threads. (For code that need not be +portable, the +.Sy SIGEV_PORT +signal notification allows for explicit, event-oriented timer notification to be +sent to a specified port; see +.Xr signal.h 3HEAD +for details.) This facility therefore exists only to accommodate Linux-borne +applications and binaries; it is compatible with its Linux antecedent in both +binary interface and in semantics. +.Sh SEE ALSO +.Xr timerfd_create 3C , +.Xr timerfd_gettime 3C , +.Xr timerfd_settime 3C diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf index 85285ae7cd..47923f1f78 100644 --- a/usr/src/pkg/manifests/SUNWcs.mf +++ b/usr/src/pkg/manifests/SUNWcs.mf @@ -323,6 +323,7 @@ driver name=logindmux driver name=ptm clone_perms="ptmx 0666 root sys" driver name=pts perms="* 0644 root sys" perms="0 0620 root tty" \ perms="1 0620 root tty" perms="2 0620 root tty" perms="3 0620 root tty" +driver name=timerfd perms="* 0666 root sys" file path=etc/.login group=sys preserve=renamenew file path=etc/cron.d/.proto group=sys mode=0744 file path=etc/cron.d/at.deny group=sys preserve=true @@ -868,6 +869,7 @@ file path=usr/kernel/drv/$(ARCH64)/ksyms group=sys file path=usr/kernel/drv/$(ARCH64)/logindmux group=sys file path=usr/kernel/drv/$(ARCH64)/ptm group=sys file path=usr/kernel/drv/$(ARCH64)/pts group=sys +file path=usr/kernel/drv/$(ARCH64)/timerfd group=sys $(i386_ONLY)file path=usr/kernel/drv/dump group=sys file path=usr/kernel/drv/dump.conf group=sys $(i386_ONLY)file path=usr/kernel/drv/eventfd group=sys @@ -884,6 +886,8 @@ $(i386_ONLY)file path=usr/kernel/drv/ptm group=sys file path=usr/kernel/drv/ptm.conf group=sys $(i386_ONLY)file path=usr/kernel/drv/pts group=sys file path=usr/kernel/drv/pts.conf group=sys +$(i386_ONLY)file path=usr/kernel/drv/timerfd group=sys +file path=usr/kernel/drv/timerfd.conf group=sys file path=usr/kernel/exec/$(ARCH64)/javaexec group=sys mode=0755 file path=usr/kernel/exec/$(ARCH64)/shbinexec group=sys mode=0755 $(i386_ONLY)file path=usr/kernel/exec/javaexec group=sys mode=0755 diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf index b72d713cd8..08f0b19416 100644 --- a/usr/src/pkg/manifests/system-header.mf +++ b/usr/src/pkg/manifests/system-header.mf @@ -1516,6 +1516,7 @@ file path=usr/include/sys/time_impl.h file path=usr/include/sys/time_std_impl.h file path=usr/include/sys/timeb.h file path=usr/include/sys/timer.h +file path=usr/include/sys/timerfd.h file path=usr/include/sys/times.h file path=usr/include/sys/timex.h file path=usr/include/sys/timod.h diff --git a/usr/src/pkg/manifests/system-library.man3c.inc b/usr/src/pkg/manifests/system-library.man3c.inc index 27268505b3..ae061edac9 100644 --- a/usr/src/pkg/manifests/system-library.man3c.inc +++ b/usr/src/pkg/manifests/system-library.man3c.inc @@ -468,6 +468,7 @@ file path=usr/share/man/man3c/timer_create.3c file path=usr/share/man/man3c/timer_delete.3c file path=usr/share/man/man3c/timer_settime.3c file path=usr/share/man/man3c/timeradd.3c +file path=usr/share/man/man3c/timerfd_create.3c file path=usr/share/man/man3c/tmpfile.3c file path=usr/share/man/man3c/tmpnam.3c file path=usr/share/man/man3c/toascii.3c @@ -1281,6 +1282,8 @@ link path=usr/share/man/man3c/timer_getoverrun.3c target=timer_settime.3c link path=usr/share/man/man3c/timer_gettime.3c target=timer_settime.3c link path=usr/share/man/man3c/timerclear.3c target=timeradd.3c link path=usr/share/man/man3c/timercmp.3c target=timeradd.3c +link path=usr/share/man/man3c/timerfd_gettime.3c target=timerfd_create.3c +link path=usr/share/man/man3c/timerfd_settime.3c target=timerfd_create.3c link path=usr/share/man/man3c/timerisset.3c target=timeradd.3c link path=usr/share/man/man3c/timersub.3c target=timeradd.3c link path=usr/share/man/man3c/tmpnam_r.3c target=tmpnam.3c diff --git a/usr/src/pkg/manifests/system-library.man5.inc b/usr/src/pkg/manifests/system-library.man5.inc index fd222bcec4..63d883e984 100644 --- a/usr/src/pkg/manifests/system-library.man5.inc +++ b/usr/src/pkg/manifests/system-library.man5.inc @@ -61,6 +61,7 @@ file path=usr/share/man/man5/pkcs11_softtoken.5 file path=usr/share/man/man5/pkcs11_tpm.5 file path=usr/share/man/man5/regex.5 file path=usr/share/man/man5/regexp.5 +file path=usr/share/man/man5/timerfd.5 file path=usr/share/man/man5/threads.5 link path=usr/share/man/man5/advance.5 target=regexp.5 link path=usr/share/man/man5/compile.5 target=regexp.5 diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 3421b83719..cd9da11ac8 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1032,6 +1032,8 @@ GEN_DRV_OBJS += gen_drv.o TCLIENT_OBJS += tclient.o +TIMERFD_OBJS += timerfd.o + TPHCI_OBJS += tphci.o TVHCI_OBJS += tvhci.o diff --git a/usr/src/uts/common/io/timerfd.c b/usr/src/uts/common/io/timerfd.c new file mode 100644 index 0000000000..29eea9b24a --- /dev/null +++ b/usr/src/uts/common/io/timerfd.c @@ -0,0 +1,586 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. All rights reserved. + */ + +/* + * Support for the timerfd facility, a Linux-borne facility that allows + * POSIX.1b timers to be created and manipulated via a file descriptor + * interface. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct timerfd_state; +typedef struct timerfd_state timerfd_state_t; + +struct timerfd_state { + kmutex_t tfd_lock; /* lock protecting state */ + kcondvar_t tfd_cv; /* condvar */ + pollhead_t tfd_pollhd; /* poll head */ + uint64_t tfd_fired; /* # of times fired */ + itimer_t tfd_itimer; /* underlying itimer */ + timerfd_state_t *tfd_next; /* next state on global list */ +}; + +/* + * Internal global variables. + */ +static kmutex_t timerfd_lock; /* lock protecting state */ +static dev_info_t *timerfd_devi; /* device info */ +static vmem_t *timerfd_minor; /* minor number arena */ +static void *timerfd_softstate; /* softstate pointer */ +static timerfd_state_t *timerfd_state; /* global list of state */ + +static itimer_t * +timerfd_itimer_lock(timerfd_state_t *state) +{ + itimer_t *it = &state->tfd_itimer; + + mutex_enter(&state->tfd_lock); + + while (it->it_lock & ITLK_LOCKED) { + it->it_blockers++; + cv_wait(&it->it_cv, &state->tfd_lock); + it->it_blockers--; + } + + it->it_lock |= ITLK_LOCKED; + + mutex_exit(&state->tfd_lock); + + return (it); +} + +static void +timerfd_itimer_unlock(timerfd_state_t *state, itimer_t *it) +{ + VERIFY(it == &state->tfd_itimer); + VERIFY(it->it_lock & ITLK_LOCKED); + + mutex_enter(&state->tfd_lock); + + it->it_lock &= ~ITLK_LOCKED; + + if (it->it_blockers) + cv_signal(&it->it_cv); + + mutex_exit(&state->tfd_lock); +} + +static void +timerfd_fire(itimer_t *it) +{ + timerfd_state_t *state = it->it_frontend; + uint64_t oval; + + mutex_enter(&state->tfd_lock); + oval = state->tfd_fired++; + mutex_exit(&state->tfd_lock); + + if (oval == 0) { + cv_broadcast(&state->tfd_cv); + pollwakeup(&state->tfd_pollhd, POLLRDNORM | POLLIN); + } +} + +/*ARGSUSED*/ +static int +timerfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) +{ + timerfd_state_t *state; + major_t major = getemajor(*devp); + minor_t minor = getminor(*devp); + + if (minor != TIMERFDMNRN_TIMERFD) + return (ENXIO); + + mutex_enter(&timerfd_lock); + + minor = (minor_t)(uintptr_t)vmem_alloc(timerfd_minor, 1, + VM_BESTFIT | VM_SLEEP); + + if (ddi_soft_state_zalloc(timerfd_softstate, minor) != DDI_SUCCESS) { + vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1); + mutex_exit(&timerfd_lock); + return (NULL); + } + + state = ddi_get_soft_state(timerfd_softstate, minor); + *devp = makedevice(major, minor); + + state->tfd_next = timerfd_state; + timerfd_state = state; + + mutex_exit(&timerfd_lock); + + return (0); +} + +/*ARGSUSED*/ +static int +timerfd_read(dev_t dev, uio_t *uio, cred_t *cr) +{ + timerfd_state_t *state; + minor_t minor = getminor(dev); + uint64_t val; + int err; + + if (uio->uio_resid < sizeof (val)) + return (EINVAL); + + state = ddi_get_soft_state(timerfd_softstate, minor); + + mutex_enter(&state->tfd_lock); + + while (state->tfd_fired == 0) { + if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) { + mutex_exit(&state->tfd_lock); + return (EAGAIN); + } + + if (!cv_wait_sig_swap(&state->tfd_cv, &state->tfd_lock)) { + mutex_exit(&state->tfd_lock); + return (EINTR); + } + } + + /* + * Our tfd_fired is non-zero; slurp its value and then clear it. + */ + val = state->tfd_fired; + state->tfd_fired = 0; + mutex_exit(&state->tfd_lock); + + err = uiomove(&val, sizeof (val), UIO_READ, uio); + + return (err); +} + +/*ARGSUSED*/ +static int +timerfd_poll(dev_t dev, short events, int anyyet, short *reventsp, + struct pollhead **phpp) +{ + timerfd_state_t *state; + minor_t minor = getminor(dev); + short revents = 0; + + state = ddi_get_soft_state(timerfd_softstate, minor); + + mutex_enter(&state->tfd_lock); + + if (state->tfd_fired > 0) + revents |= POLLRDNORM | POLLIN; + + if (!(*reventsp = revents & events) && !anyyet) + *phpp = &state->tfd_pollhd; + + mutex_exit(&state->tfd_lock); + + return (0); +} + +static int +timerfd_copyin(uintptr_t addr, itimerspec_t *dest) +{ + if (get_udatamodel() == DATAMODEL_NATIVE) { + if (copyin((void *)addr, dest, sizeof (itimerspec_t)) != 0) + return (EFAULT); + } else { + itimerspec32_t dest32; + + if (copyin((void *)addr, &dest32, sizeof (itimerspec32_t)) != 0) + return (EFAULT); + + ITIMERSPEC32_TO_ITIMERSPEC(dest, &dest32); + } + + if (itimerspecfix(&dest->it_value) || + (itimerspecfix(&dest->it_interval) && + timerspecisset(&dest->it_value))) { + return (EINVAL); + } + + return (0); +} + +static int +timerfd_copyout(itimerspec_t *src, uintptr_t addr) +{ + if (get_udatamodel() == DATAMODEL_NATIVE) { + if (copyout(src, (void *)addr, sizeof (itimerspec_t)) != 0) + return (EFAULT); + } else { + itimerspec32_t src32; + + if (ITIMERSPEC_OVERFLOW(src)) + return (EOVERFLOW); + + ITIMERSPEC_TO_ITIMERSPEC32(&src32, src); + + if (copyout(&src32, (void *)addr, sizeof (itimerspec32_t)) != 0) + return (EFAULT); + } + + return (0); +} + +/*ARGSUSED*/ +static int +timerfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) +{ + itimerspec_t when, oval; + timerfd_state_t *state; + minor_t minor = getminor(dev); + int err; + itimer_t *it; + + state = ddi_get_soft_state(timerfd_softstate, minor); + + switch (cmd) { + case TIMERFDIOC_CREATE: { + if (arg == TIMERFD_MONOTONIC) + arg = CLOCK_MONOTONIC; + + it = timerfd_itimer_lock(state); + + if (it->it_backend != NULL) { + timerfd_itimer_unlock(state, it); + return (EEXIST); + } + + if ((it->it_backend = clock_get_backend(arg)) == NULL) { + timerfd_itimer_unlock(state, it); + return (EINVAL); + } + + /* + * We need to provide a proc structure only for purposes + * of locking CLOCK_REALTIME-based timers -- it is safe to + * provide p0 here. + */ + it->it_proc = &p0; + + err = it->it_backend->clk_timer_create(it, timerfd_fire); + + if (err != 0) { + it->it_backend = NULL; + timerfd_itimer_unlock(state, it); + return (err); + } + + it->it_frontend = state; + timerfd_itimer_unlock(state, it); + + return (0); + } + + case TIMERFDIOC_GETTIME: { + it = timerfd_itimer_lock(state); + + if (it->it_backend == NULL) { + timerfd_itimer_unlock(state, it); + return (ENODEV); + } + + err = it->it_backend->clk_timer_gettime(it, &when); + timerfd_itimer_unlock(state, it); + + if (err != 0) + return (err); + + if ((err = timerfd_copyout(&when, arg)) != 0) + return (err); + + return (0); + } + + case TIMERFDIOC_SETTIME: { + timerfd_settime_t st; + + if (copyin((void *)arg, &st, sizeof (st)) != 0) + return (EFAULT); + + if ((err = timerfd_copyin(st.tfd_settime_value, &when)) != 0) + return (err); + + it = timerfd_itimer_lock(state); + + if (it->it_backend == NULL) { + timerfd_itimer_unlock(state, it); + return (ENODEV); + } + + if (st.tfd_settime_ovalue != NULL) { + err = it->it_backend->clk_timer_gettime(it, &oval); + + if (err != 0) { + timerfd_itimer_unlock(state, it); + return (err); + } + } + + /* + * Before we set the time, we're going to clear tfd_fired. + * This can potentially race with the (old) timer firing, but + * the window is deceptively difficult to close: if we were + * to simply clear tfd_fired after the call to the backend + * returned, we would run the risk of plowing a firing of the + * new timer. Ultimately, the race can only be resolved by + * the backend, which would likely need to be extended with a + * function to call back into when the timer is between states + * (that is, after the timer can no longer fire with the old + * timer value, but before it can fire with the new one). + * This is straightforward enough for backends that set a + * timer's value by deleting the old one and adding the new + * one, but for those that modify the timer value in place + * (e.g., cyclics), the required serialization is necessarily + * delicate: the function would have to be callable from + * arbitrary interrupt context. While implementing all of + * this is possible, it does not (for the moment) seem worth + * it: if the timer is firing at essentially the same moment + * that it's being reprogrammed, there is a higher-level race + * with respect to timerfd usage that the progam itself will + * have to properly resolve -- and it seems reasonable to + * simply allow the program to resolve it in this case. + */ + mutex_enter(&state->tfd_lock); + state->tfd_fired = 0; + mutex_exit(&state->tfd_lock); + + err = it->it_backend->clk_timer_settime(it, + st.tfd_settime_flags & TFD_TIMER_ABSTIME ? + TIMER_ABSTIME : TIMER_RELTIME, &when); + timerfd_itimer_unlock(state, it); + + if (err != 0 || st.tfd_settime_ovalue == NULL) + return (err); + + if ((err = timerfd_copyout(&oval, st.tfd_settime_ovalue)) != 0) + return (err); + + return (0); + } + + default: + break; + } + + return (ENOTTY); +} + +/*ARGSUSED*/ +static int +timerfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p) +{ + timerfd_state_t *state, **sp; + itimer_t *it; + minor_t minor = getminor(dev); + + state = ddi_get_soft_state(timerfd_softstate, minor); + + if (state->tfd_pollhd.ph_list != NULL) { + pollwakeup(&state->tfd_pollhd, POLLERR); + pollhead_clean(&state->tfd_pollhd); + } + + /* + * No one can get to this timer; we don't need to lock it -- we can + * just call on the backend to delete it. + */ + it = &state->tfd_itimer; + + if (it->it_backend != NULL) + it->it_backend->clk_timer_delete(it); + + mutex_enter(&timerfd_lock); + + /* + * Remove our state from our global list. + */ + for (sp = &timerfd_state; *sp != state; sp = &((*sp)->tfd_next)) + VERIFY(*sp != NULL); + + *sp = (*sp)->tfd_next; + + ddi_soft_state_free(timerfd_softstate, minor); + vmem_free(timerfd_minor, (void *)(uintptr_t)minor, 1); + + mutex_exit(&timerfd_lock); + + return (0); +} + +static int +timerfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +{ + switch (cmd) { + case DDI_ATTACH: + break; + + case DDI_RESUME: + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); + } + + mutex_enter(&timerfd_lock); + + if (ddi_soft_state_init(&timerfd_softstate, + sizeof (timerfd_state_t), 0) != 0) { + cmn_err(CE_NOTE, "/dev/timerfd failed to create soft state"); + mutex_exit(&timerfd_lock); + return (DDI_FAILURE); + } + + if (ddi_create_minor_node(devi, "timerfd", S_IFCHR, + TIMERFDMNRN_TIMERFD, DDI_PSEUDO, NULL) == DDI_FAILURE) { + cmn_err(CE_NOTE, "/dev/timerfd couldn't create minor node"); + ddi_soft_state_fini(&timerfd_softstate); + mutex_exit(&timerfd_lock); + return (DDI_FAILURE); + } + + ddi_report_dev(devi); + timerfd_devi = devi; + + timerfd_minor = vmem_create("timerfd_minor", (void *)TIMERFDMNRN_CLONE, + UINT32_MAX - TIMERFDMNRN_CLONE, 1, NULL, NULL, NULL, 0, + VM_SLEEP | VMC_IDENTIFIER); + + mutex_exit(&timerfd_lock); + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +timerfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + switch (cmd) { + case DDI_DETACH: + break; + + case DDI_SUSPEND: + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); + } + + mutex_enter(&timerfd_lock); + vmem_destroy(timerfd_minor); + + ddi_remove_minor_node(timerfd_devi, NULL); + timerfd_devi = NULL; + + ddi_soft_state_fini(&timerfd_softstate); + mutex_exit(&timerfd_lock); + + return (DDI_SUCCESS); +} + +/*ARGSUSED*/ +static int +timerfd_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + int error; + + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + *result = (void *)timerfd_devi; + error = DDI_SUCCESS; + break; + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)0; + error = DDI_SUCCESS; + break; + default: + error = DDI_FAILURE; + } + return (error); +} + +static struct cb_ops timerfd_cb_ops = { + timerfd_open, /* open */ + timerfd_close, /* close */ + nulldev, /* strategy */ + nulldev, /* print */ + nodev, /* dump */ + timerfd_read, /* read */ + nodev, /* write */ + timerfd_ioctl, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + timerfd_poll, /* poll */ + ddi_prop_op, /* cb_prop_op */ + 0, /* streamtab */ + D_NEW | D_MP /* Driver compatibility flag */ +}; + +static struct dev_ops timerfd_ops = { + DEVO_REV, /* devo_rev */ + 0, /* refcnt */ + timerfd_info, /* get_dev_info */ + nulldev, /* identify */ + nulldev, /* probe */ + timerfd_attach, /* attach */ + timerfd_detach, /* detach */ + nodev, /* reset */ + &timerfd_cb_ops, /* driver operations */ + NULL, /* bus operations */ + nodev, /* dev power */ + ddi_quiesce_not_needed, /* quiesce */ +}; + +static struct modldrv modldrv = { + &mod_driverops, /* module type (this is a pseudo driver) */ + "timerfd support", /* name of module */ + &timerfd_ops, /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + (void *)&modldrv, + NULL +}; + +int +_init(void) +{ + return (mod_install(&modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + return (mod_remove(&modlinkage)); +} diff --git a/usr/src/uts/common/io/timerfd.conf b/usr/src/uts/common/io/timerfd.conf new file mode 100644 index 0000000000..c6ad86d051 --- /dev/null +++ b/usr/src/uts/common/io/timerfd.conf @@ -0,0 +1,16 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 Joyent, Inc. All rights reserved. +# + +name="timerfd" parent="pseudo" instance=0; diff --git a/usr/src/uts/common/os/clock_highres.c b/usr/src/uts/common/os/clock_highres.c index bcdf20c0bd..805813037d 100644 --- a/usr/src/uts/common/os/clock_highres.c +++ b/usr/src/uts/common/os/clock_highres.c @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2012, Joyent Inc. All rights reserved. + * Copyright (c) 2015, Joyent Inc. All rights reserved. */ #include @@ -66,7 +66,7 @@ clock_highres_getres(timespec_t *ts) /*ARGSUSED*/ static int -clock_highres_timer_create(itimer_t *it, struct sigevent *ev) +clock_highres_timer_create(itimer_t *it, void (*fire)(itimer_t *)) { /* * CLOCK_HIGHRES timers of sufficiently high resolution can deny @@ -80,6 +80,7 @@ clock_highres_timer_create(itimer_t *it, struct sigevent *ev) } it->it_arg = kmem_zalloc(sizeof (cyclic_id_t), KM_SLEEP); + it->it_fire = fire; return (0); } @@ -95,7 +96,7 @@ clock_highres_fire(void *arg) old = *addr; } while (atomic_cas_64((uint64_t *)addr, old, new) != old); - timer_fire(it); + it->it_fire(it); } static int diff --git a/usr/src/uts/common/os/clock_realtime.c b/usr/src/uts/common/os/clock_realtime.c index ef3383fb28..4a75984b23 100644 --- a/usr/src/uts/common/os/clock_realtime.c +++ b/usr/src/uts/common/os/clock_realtime.c @@ -24,7 +24,9 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2015, Joyent Inc. All rights reserved. + */ #include #include @@ -80,8 +82,7 @@ clock_realtime_fire(void *arg) /* * First call into the timer subsystem to get the signal going. */ - timer_fire(it); - + it->it_fire(it); val = &it->it_itime.it_value; interval = &it->it_itime.it_interval; @@ -171,9 +172,10 @@ clock_realtime_fire_first(void *arg) /*ARGSUSED*/ static int -clock_realtime_timer_create(itimer_t *it, struct sigevent *ev) +clock_realtime_timer_create(itimer_t *it, void (*fire)(itimer_t *)) { it->it_arg = kmem_zalloc(sizeof (timeout_id_t), KM_SLEEP); + it->it_fire = fire; return (0); } @@ -184,7 +186,7 @@ clock_realtime_timer_settime(itimer_t *it, int flags, { timeout_id_t tid, *tidp = it->it_arg; timespec_t now; - proc_t *p = curproc; + proc_t *p = it->it_proc; clock_t ticks; gethrestime(&now); @@ -246,7 +248,7 @@ static int clock_realtime_timer_gettime(itimer_t *it, struct itimerspec *when) { timespec_t now; - proc_t *p = curproc; + proc_t *p = it->it_proc; /* * We always keep it_itime up to date, so we just need to snapshot @@ -276,7 +278,7 @@ clock_realtime_timer_gettime(itimer_t *it, struct itimerspec *when) static int clock_realtime_timer_delete(itimer_t *it) { - proc_t *p = curproc; + proc_t *p = it->it_proc; timeout_id_t tid, *tidp = it->it_arg; mutex_enter(&p->p_lock); diff --git a/usr/src/uts/common/os/timer.c b/usr/src/uts/common/os/timer.c index 8559d8736c..b25a6cbcf1 100644 --- a/usr/src/uts/common/os/timer.c +++ b/usr/src/uts/common/os/timer.c @@ -269,6 +269,15 @@ clock_add_backend(clockid_t clock, clock_backend_t *backend) clock_backend[clock] = backend; } +clock_backend_t * +clock_get_backend(clockid_t clock) +{ + if (clock < 0 || clock >= CLOCK_MAX) + return (NULL); + + return (clock_backend[clock]); +} + int clock_settime(clockid_t clock, timespec_t *tp) { @@ -398,7 +407,7 @@ timer_signal(sigqueue_t *sigq) /* * This routine is called from the clock backend. */ -void +static void timer_fire(itimer_t *it) { proc_t *p; @@ -672,7 +681,7 @@ timer_create(clockid_t clock, struct sigevent *evp, timer_t *tid) * Call on the backend to verify the event argument (or return * EINVAL if this clock type does not support timers). */ - if ((error = backend->clk_timer_create(it, &ev)) != 0) + if ((error = backend->clk_timer_create(it, timer_fire)) != 0) goto err; it->it_lwp = ttolwp(curthread); diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index c2bf2f0483..39288d5cc0 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -587,6 +587,7 @@ CHKHDRS= \ time_std_impl.h \ timeb.h \ timer.h \ + timerfd.h \ times.h \ timex.h \ timod.h \ diff --git a/usr/src/uts/common/sys/timer.h b/usr/src/uts/common/sys/timer.h index 604ddf5d83..ec349c962f 100644 --- a/usr/src/uts/common/sys/timer.h +++ b/usr/src/uts/common/sys/timer.h @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2015, Joyent, Inc. All rights reserved. + */ + #ifndef _SYS_TIMER_H #define _SYS_TIMER_H @@ -55,32 +59,45 @@ extern int timer_max; /* patchable via /etc/system */ struct clock_backend; -typedef struct itimer { +struct itimer; +typedef struct itimer itimer_t; + +struct itimer { itimerspec_t it_itime; hrtime_t it_hrtime; ushort_t it_flags; ushort_t it_lock; - void *it_arg; - sigqueue_t *it_sigq; - klwp_t *it_lwp; + void *it_arg; /* clock backend-specific data */ struct proc *it_proc; + union { + struct { + sigqueue_t *__it_sigq; + klwp_t *__it_lwp; + } __proc; + void *__it_frontend; + } __data; /* timer frontend-specific data */ kcondvar_t it_cv; int it_blockers; int it_pending; int it_overrun; struct clock_backend *it_backend; + void (*it_fire)(itimer_t *); kmutex_t it_mutex; void *it_portev; /* port_kevent_t pointer */ void *it_portsrc; /* port_source_t pointer */ int it_portfd; /* port file descriptor */ -} itimer_t; +}; + +#define it_sigq __data.__proc.__it_sigq +#define it_lwp __data.__proc.__it_lwp +#define it_frontend __data.__it_frontend typedef struct clock_backend { struct sigevent clk_default; int (*clk_clock_settime)(timespec_t *); int (*clk_clock_gettime)(timespec_t *); int (*clk_clock_getres)(timespec_t *); - int (*clk_timer_create)(itimer_t *, struct sigevent *); + int (*clk_timer_create)(itimer_t *, void (*)(itimer_t *)); int (*clk_timer_settime)(itimer_t *, int, const struct itimerspec *); int (*clk_timer_gettime)(itimer_t *, struct itimerspec *); int (*clk_timer_delete)(itimer_t *); @@ -88,8 +105,8 @@ typedef struct clock_backend { } clock_backend_t; extern void clock_add_backend(clockid_t clock, clock_backend_t *backend); +extern clock_backend_t *clock_get_backend(clockid_t clock); -extern void timer_fire(itimer_t *); extern void timer_lwpbind(); extern void timer_func(sigqueue_t *); diff --git a/usr/src/uts/common/sys/timerfd.h b/usr/src/uts/common/sys/timerfd.h new file mode 100644 index 0000000000..66cb50ac88 --- /dev/null +++ b/usr/src/uts/common/sys/timerfd.h @@ -0,0 +1,81 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. All rights reserved. + */ + +/* + * Header file to support for the timerfd facility. + */ + +#ifndef _SYS_TIMERFD_H +#define _SYS_TIMERFD_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * To assure binary compatibility with Linux, these values are fixed at their + * Linux equivalents, not their native ones. + */ +#define TFD_CLOEXEC 02000000 /* LX_O_CLOEXEC */ +#define TFD_NONBLOCK 04000 /* LX_O_NONBLOCK */ +#define TFD_TIMER_ABSTIME (1 << 0) +#define TFD_TIMER_CANCEL_ON_SET (1 << 1) + +/* + * These ioctl values are specific to the native implementation; applications + * shouldn't be using them directly, and they should therefore be safe to + * change without breaking apps. + */ +#define TIMERFDIOC (('t' << 24) | ('f' << 16) | ('d' << 8)) +#define TIMERFDIOC_CREATE (TIMERFDIOC | 1) /* create timer */ +#define TIMERFDIOC_SETTIME (TIMERFDIOC | 2) /* timerfd_settime() */ +#define TIMERFDIOC_GETTIME (TIMERFDIOC | 3) /* timerfd_gettime() */ + +typedef struct timerfd_settime { + uint64_t tfd_settime_flags; /* flags (e.g., TFD_TIMER_ABSTIME) */ + uint64_t tfd_settime_value; /* pointer to value */ + uint64_t tfd_settime_ovalue; /* pointer to old value, if any */ +} timerfd_settime_t; + +#ifndef _KERNEL + +extern int timerfd_create(int, int); +extern int timerfd_settime(int, int, + const struct itimerspec *, struct itimerspec *); +extern int timerfd_gettime(int, struct itimerspec *); + +#else + +#define TIMERFDMNRN_TIMERFD 0 +#define TIMERFDMNRN_CLONE 1 +#define TIMERFD_VALMAX (ULLONG_MAX - 1ULL) + +/* + * Fortunately, the values for the Linux clocks that are valid for timerfd + * (namely, CLOCK_REALTIME and CLOCK_MONOTONIC) don't overlap with our values + * the same. + */ +#define TIMERFD_MONOTONIC 1 /* Linux value for CLOCK_MONOTONIC */ + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_TIMERFD_H */ diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel index 64b4962cae..18d05f23d0 100644 --- a/usr/src/uts/intel/Makefile.intel +++ b/usr/src/uts/intel/Makefile.intel @@ -21,6 +21,7 @@ # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2015 Nexenta Systems, Inc. All rights reserved. # Copyright (c) 2013 Andrew Stormont. All rights reserved. +# Copyright (c) 2015, Joyent, Inc. All rights reserved. # # This makefile contains the common definitions for all intel @@ -338,6 +339,7 @@ DRV_KMODS += sysevent DRV_KMODS += sysmsg DRV_KMODS += tcp DRV_KMODS += tcp6 +DRV_KMODS += timerfd DRV_KMODS += tl DRV_KMODS += tnf DRV_KMODS += tpm diff --git a/usr/src/uts/intel/timerfd/Makefile b/usr/src/uts/intel/timerfd/Makefile new file mode 100644 index 0000000000..28cf8fd41e --- /dev/null +++ b/usr/src/uts/intel/timerfd/Makefile @@ -0,0 +1,68 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 Joyent, Inc. All rights reserved. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = timerfd +OBJECTS = $(TIMERFD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(TIMERFD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/io + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +CERRWARN += -_gcc=-Wno-parentheses + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/sparc/Makefile.sparc b/usr/src/uts/sparc/Makefile.sparc index d7c45a396e..29e4cd7ccf 100644 --- a/usr/src/uts/sparc/Makefile.sparc +++ b/usr/src/uts/sparc/Makefile.sparc @@ -21,6 +21,7 @@ # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013 Andrew Stormont. All rights reserved. +# Copyright (c) 2015, Joyent, Inc. All rights reserved. # Copyright 2015 Nexenta Systems, Inc. All rights reserved. diff --git a/usr/src/uts/sparc/timerfd/Makefile b/usr/src/uts/sparc/timerfd/Makefile new file mode 100644 index 0000000000..969ca6dd1a --- /dev/null +++ b/usr/src/uts/sparc/timerfd/Makefile @@ -0,0 +1,68 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015 Joyent, Inc. All rights reserved. +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = timerfd +OBJECTS = $(TIMERFD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(TIMERFD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) +CONF_SRCDIR = $(UTSBASE)/common/io + +# +# Include common rules. +# +include $(UTSBASE)/sparc/Makefile.sparc + +CERRWARN += -_gcc=-Wno-parentheses + +# +# Define targets +# +ALL_TARGET = $(BINARY) $(SRC_CONFILE) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/sparc/Makefile.targ -- cgit v1.2.3 From 21227944c2bcc086121a5428f3f9d2496ba646f5 Mon Sep 17 00:00:00 2001 From: Gordon Ross Date: Sat, 2 May 2015 16:01:22 -0400 Subject: 5876 sys/regset.h pollutes name space Reviewed by: Josef 'Jeff' Sipek Reviewed by: Jonathan Perkin Reviewed by: Alexander Pyhalov Approved by: Albert Lee --- usr/src/cmd/csh/i386/signal.c | 3 +- usr/src/cmd/csh/sparc/signal.c | 3 +- usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c | 2 +- usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c | 3 +- usr/src/lib/libc/amd64/gen/makectxt.c | 3 +- usr/src/lib/libc/amd64/gen/siglongjmp.c | 3 +- usr/src/lib/libc/i386/gen/makectxt.c | 3 +- usr/src/lib/libc/i386/gen/siglongjmp.c | 3 +- usr/src/lib/libc/port/threads/sigaction.c | 2 +- usr/src/lib/libc/sparc/gen/makectxt.c | 3 +- usr/src/lib/libc/sparc/gen/siglongjmp.c | 3 +- usr/src/lib/libc/sparcv9/gen/makectxt.c | 3 +- usr/src/lib/libc/sparcv9/gen/siglongjmp.c | 1 + usr/src/lib/libm/common/m9x/__fex_hdlr.c | 1 + usr/src/lib/libm/common/m9x/__fex_i386.c | 1 + usr/src/lib/libm/common/m9x/__fex_sse.c | 1 + usr/src/lib/libm/common/m9x/fex_log.c | 1 + usr/src/lib/libproc/common/P32ton.c | 6 +- usr/src/pkg/manifests/system-header.mf | 1 + usr/src/uts/common/sys/core.h | 8 +- usr/src/uts/common/sys/prsystm.h | 4 +- usr/src/uts/intel/ia32/os/archdep.c | 4 +- usr/src/uts/intel/ia32/os/fpu.c | 2 +- usr/src/uts/intel/sys/Makefile | 1 + usr/src/uts/intel/sys/fp.h | 90 +++++- usr/src/uts/intel/sys/mcontext.h | 191 ++++++++++++ usr/src/uts/intel/sys/pcb.h | 3 + usr/src/uts/intel/sys/procfs_isa.h | 6 +- usr/src/uts/intel/sys/regset.h | 313 +------------------ usr/src/uts/intel/sys/ucontext.h | 4 +- usr/src/uts/sparc/fpu/fpu_simulator.c | 1 + usr/src/uts/sparc/sys/Makefile | 3 +- usr/src/uts/sparc/sys/fpu/fpu_simulator.h | 2 +- usr/src/uts/sparc/sys/fpu/fpusystm.h | 4 +- usr/src/uts/sparc/sys/mcontext.h | 304 ++++++++++++++++++ usr/src/uts/sparc/sys/procfs_isa.h | 6 +- usr/src/uts/sparc/sys/regset.h | 427 +------------------------- usr/src/uts/sparc/sys/ucontext.h | 6 +- usr/src/uts/sparc/syscall/getcontext.c | 8 +- usr/src/uts/sparc/v7/sys/machpcb.h | 6 +- usr/src/uts/sparc/v9/fpu/fpu.c | 14 +- usr/src/uts/sparc/v9/os/v9dep.c | 14 +- usr/src/uts/sparc/v9/sys/machpcb.h | 2 +- usr/src/uts/sun4/os/trap.c | 4 +- 44 files changed, 672 insertions(+), 801 deletions(-) create mode 100644 usr/src/uts/intel/sys/mcontext.h create mode 100644 usr/src/uts/sparc/sys/mcontext.h (limited to 'usr/src') diff --git a/usr/src/cmd/csh/i386/signal.c b/usr/src/cmd/csh/i386/signal.c index d023a9bd88..59b3eb54e4 100644 --- a/usr/src/cmd/csh/i386/signal.c +++ b/usr/src/cmd/csh/i386/signal.c @@ -12,8 +12,6 @@ * specifies the terms and conditions for redistribution. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * 4.3BSD signal compatibility functions * @@ -28,6 +26,7 @@ #include #include #include +#include #include #include "signal.h" #include diff --git a/usr/src/cmd/csh/sparc/signal.c b/usr/src/cmd/csh/sparc/signal.c index 9502940859..cac56044fa 100644 --- a/usr/src/cmd/csh/sparc/signal.c +++ b/usr/src/cmd/csh/sparc/signal.c @@ -12,8 +12,6 @@ * specifies the terms and conditions for redistribution. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * 4.3BSD signal compatibility functions * @@ -28,6 +26,7 @@ #include #include #include +#include #include #include "signal.h" #include diff --git a/usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c b/usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c index ed61bade77..c11e08ba0d 100644 --- a/usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c +++ b/usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c @@ -448,7 +448,7 @@ pt_fpregs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_tgt_t *t = mdb.m_target; mdb_tgt_tid_t tid; prfpregset_t fprs; - struct fpchip_state fps; + struct _fpchip_state fps; char buf[256]; uint_t top; int i; diff --git a/usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c b/usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c index 1d3781f1e9..0a7f46f5d1 100644 --- a/usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c +++ b/usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Context-saving routine used for pipelines. Designed for use only * with kmdb_setcontext, and with the assumption that func() will never @@ -36,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/usr/src/lib/libc/amd64/gen/makectxt.c b/usr/src/lib/libc/amd64/gen/makectxt.c index 5b2ce15313..c8fd3c7742 100644 --- a/usr/src/lib/libc/amd64/gen/makectxt.c +++ b/usr/src/lib/libc/amd64/gen/makectxt.c @@ -27,14 +27,13 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #pragma weak _makecontext = makecontext #include "lint.h" #include #include #include +#include /* * The ucontext_t that the user passes in must have been primed with a diff --git a/usr/src/lib/libc/amd64/gen/siglongjmp.c b/usr/src/lib/libc/amd64/gen/siglongjmp.c index 07a89b485f..6637c19625 100644 --- a/usr/src/lib/libc/amd64/gen/siglongjmp.c +++ b/usr/src/lib/libc/amd64/gen/siglongjmp.c @@ -24,13 +24,12 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #pragma weak _siglongjmp = siglongjmp #include "lint.h" #include #include +#include #include #include diff --git a/usr/src/lib/libc/i386/gen/makectxt.c b/usr/src/lib/libc/i386/gen/makectxt.c index d72a67a481..1e6f995d84 100644 --- a/usr/src/lib/libc/i386/gen/makectxt.c +++ b/usr/src/lib/libc/i386/gen/makectxt.c @@ -27,13 +27,12 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #pragma weak _makecontext = makecontext #include "lint.h" #include #include +#include #include /* diff --git a/usr/src/lib/libc/i386/gen/siglongjmp.c b/usr/src/lib/libc/i386/gen/siglongjmp.c index ea23fcf749..17261b3efd 100644 --- a/usr/src/lib/libc/i386/gen/siglongjmp.c +++ b/usr/src/lib/libc/i386/gen/siglongjmp.c @@ -28,13 +28,12 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #pragma weak _siglongjmp = siglongjmp #include "lint.h" #include #include +#include #include #include diff --git a/usr/src/lib/libc/port/threads/sigaction.c b/usr/src/lib/libc/port/threads/sigaction.c index 8e9df7ab60..571e211f97 100644 --- a/usr/src/lib/libc/port/threads/sigaction.c +++ b/usr/src/lib/libc/port/threads/sigaction.c @@ -208,7 +208,7 @@ call_user_handler(int sig, siginfo_t *sip, ucontext_t *ucp) if (--fp->fpu_qcnt > 0) { unsigned char i; - struct fq *fqp; + struct _fq *fqp; fqp = fp->fpu_q; for (i = 0; i < fp->fpu_qcnt; i++) diff --git a/usr/src/lib/libc/sparc/gen/makectxt.c b/usr/src/lib/libc/sparc/gen/makectxt.c index 7519bdabcb..fa5725525e 100644 --- a/usr/src/lib/libc/sparc/gen/makectxt.c +++ b/usr/src/lib/libc/sparc/gen/makectxt.c @@ -27,8 +27,6 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #pragma weak _makecontext = makecontext #pragma weak ___makecontext_v2 = __makecontext_v2 @@ -38,6 +36,7 @@ #include #include #include +#include /* * The ucontext_t that the user passes in must have been primed with a diff --git a/usr/src/lib/libc/sparc/gen/siglongjmp.c b/usr/src/lib/libc/sparc/gen/siglongjmp.c index 83a640397d..90b406cf8d 100644 --- a/usr/src/lib/libc/sparc/gen/siglongjmp.c +++ b/usr/src/lib/libc/sparc/gen/siglongjmp.c @@ -27,14 +27,13 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #pragma weak _siglongjmp = siglongjmp #include "lint.h" #include #include #include +#include #include #include #include diff --git a/usr/src/lib/libc/sparcv9/gen/makectxt.c b/usr/src/lib/libc/sparcv9/gen/makectxt.c index 7519bdabcb..fa5725525e 100644 --- a/usr/src/lib/libc/sparcv9/gen/makectxt.c +++ b/usr/src/lib/libc/sparcv9/gen/makectxt.c @@ -27,8 +27,6 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #pragma weak _makecontext = makecontext #pragma weak ___makecontext_v2 = __makecontext_v2 @@ -38,6 +36,7 @@ #include #include #include +#include /* * The ucontext_t that the user passes in must have been primed with a diff --git a/usr/src/lib/libc/sparcv9/gen/siglongjmp.c b/usr/src/lib/libc/sparcv9/gen/siglongjmp.c index 5d4ca06426..424942966f 100644 --- a/usr/src/lib/libc/sparcv9/gen/siglongjmp.c +++ b/usr/src/lib/libc/sparcv9/gen/siglongjmp.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/usr/src/lib/libm/common/m9x/__fex_hdlr.c b/usr/src/lib/libm/common/m9x/__fex_hdlr.c index 7732d0a9fe..5b7ac8d933 100644 --- a/usr/src/lib/libm/common/m9x/__fex_hdlr.c +++ b/usr/src/lib/libm/common/m9x/__fex_hdlr.c @@ -40,6 +40,7 @@ #include #endif #include +#include #include "fex_handler.h" #include "fenv_inlines.h" diff --git a/usr/src/lib/libm/common/m9x/__fex_i386.c b/usr/src/lib/libm/common/m9x/__fex_i386.c index c6ffa593be..5743bd414e 100644 --- a/usr/src/lib/libm/common/m9x/__fex_i386.c +++ b/usr/src/lib/libm/common/m9x/__fex_i386.c @@ -40,6 +40,7 @@ #include #endif #include +#include #include "fex_handler.h" #include "fenv_inlines.h" diff --git a/usr/src/lib/libm/common/m9x/__fex_sse.c b/usr/src/lib/libm/common/m9x/__fex_sse.c index df83dff212..e679e71929 100644 --- a/usr/src/lib/libm/common/m9x/__fex_sse.c +++ b/usr/src/lib/libm/common/m9x/__fex_sse.c @@ -34,6 +34,7 @@ #else #include #endif +#include #include "fex_handler.h" #include "fenv_inlines.h" diff --git a/usr/src/lib/libm/common/m9x/fex_log.c b/usr/src/lib/libm/common/m9x/fex_log.c index 336358da4c..6a8e759cc0 100644 --- a/usr/src/lib/libm/common/m9x/fex_log.c +++ b/usr/src/lib/libm/common/m9x/fex_log.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include diff --git a/usr/src/lib/libproc/common/P32ton.c b/usr/src/lib/libproc/common/P32ton.c index 78219550c8..acb52058c9 100644 --- a/usr/src/lib/libproc/common/P32ton.c +++ b/usr/src/lib/libproc/common/P32ton.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -267,7 +265,7 @@ prfpregset_32_to_n(const prfpregset32_t *src, prfpregset_t *dst) #elif defined(__amd64) struct _fpstate32 *src32 = (struct _fpstate32 *)src; - struct fpchip_state *dst64 = (struct fpchip_state *)dst; + struct _fpchip_state *dst64 = (struct _fpchip_state *)dst; int i; (void) memcpy(dst64->st, src32->_st, sizeof (src32->_st)); @@ -598,7 +596,7 @@ prfpregset_n_to_32(const prfpregset_t *src, prfpregset32_t *dst) #elif defined(__amd64) struct _fpstate32 *dst32 = (struct _fpstate32 *)dst; - struct fpchip_state *src64 = (struct fpchip_state *)src; + struct _fpchip_state *src64 = (struct _fpchip_state *)src; uint32_t top; int i; diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf index 08f0b19416..f4f92a63e0 100644 --- a/usr/src/pkg/manifests/system-header.mf +++ b/usr/src/pkg/manifests/system-header.mf @@ -1219,6 +1219,7 @@ $(i386_ONLY)file path=usr/include/sys/mc_amd.h $(i386_ONLY)file path=usr/include/sys/mc_intel.h $(i386_ONLY)file path=usr/include/sys/mca_amd.h $(i386_ONLY)file path=usr/include/sys/mca_x86.h +file path=usr/include/sys/mcontext.h file path=usr/include/sys/md4.h file path=usr/include/sys/md5.h file path=usr/include/sys/md5_consts.h diff --git a/usr/src/uts/common/sys/core.h b/usr/src/uts/common/sys/core.h index 28ee6faaff..543ed5fada 100644 --- a/usr/src/uts/common/sys/core.h +++ b/usr/src/uts/common/sys/core.h @@ -20,6 +20,8 @@ * CDDL HEADER END */ /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,8 +29,6 @@ #ifndef _SYS_CORE_H #define _SYS_CORE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifndef _KERNEL #include #endif /* _KERNEL */ @@ -63,9 +63,9 @@ struct core { int c_dsize; /* Data size (bytes) */ int c_ssize; /* Stack size (bytes) */ char c_cmdname[CORE_NAMELEN + 1]; /* Command name */ - struct fpu c_fpu; /* external FPU state */ + struct _fpu c_fpu; /* external FPU state */ #if defined(__sparc) - struct fq c_fpu_q[MAXFPQ]; /* fpu exception queue */ + struct _fq c_fpu_q[_MAXFPQ]; /* fpu exception queue */ #endif int c_ucode; /* Exception no. from u_code */ }; diff --git a/usr/src/uts/common/sys/prsystm.h b/usr/src/uts/common/sys/prsystm.h index ebded76dce..6f5d4b914f 100644 --- a/usr/src/uts/common/sys/prsystm.h +++ b/usr/src/uts/common/sys/prsystm.h @@ -108,9 +108,9 @@ extern int pr_watch_emul(struct regs *, caddr_t, enum seg_rw); extern void pr_free_watched_pages(proc_t *); extern int pr_allstopped(proc_t *, int); #if defined(__sparc) -struct gwindows; +struct _gwindows; extern int prnwindows(klwp_t *); -extern void prgetwindows(klwp_t *, struct gwindows *); +extern void prgetwindows(klwp_t *, struct _gwindows *); #if defined(__sparcv9) /* 32-bit adb macros should not see these defs */ extern void prgetasregs(klwp_t *, asrset_t); extern void prsetasregs(klwp_t *, asrset_t); diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index d83b16d673..2a33f306c2 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -193,7 +193,7 @@ fpregset_to_fxsave(const fpregset_t *fp, struct fxsave_state *fx) #if defined(__amd64) bcopy(fp, fx, sizeof (*fx)); #else - const struct fpchip_state *fc = &fp->fp_reg_set.fpchip_state; + const struct _fpchip_state *fc = &fp->fp_reg_set.fpchip_state; fnsave_to_fxsave((const struct fnsave_state *)fc, fx); fx->fx_mxcsr = fc->mxcsr; @@ -214,7 +214,7 @@ fxsave_to_fpregset(const struct fxsave_state *fx, fpregset_t *fp) #if defined(__amd64) bcopy(fx, fp, sizeof (*fx)); #else - struct fpchip_state *fc = &fp->fp_reg_set.fpchip_state; + struct _fpchip_state *fc = &fp->fp_reg_set.fpchip_state; fxsave_to_fnsave(fx, (struct fnsave_state *)fc); fc->mxcsr = fx->fx_mxcsr; diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c index b7022cf0e5..dc3e286ad5 100644 --- a/usr/src/uts/intel/ia32/os/fpu.c +++ b/usr/src/uts/intel/ia32/os/fpu.c @@ -416,7 +416,7 @@ fpnoextflt(struct regs *rp) ASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE); #if defined(__i386) - ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu)); + ASSERT(sizeof (struct _fpu) == sizeof (struct __old_fpu)); #endif /* __i386 */ #endif /* !__lint */ diff --git a/usr/src/uts/intel/sys/Makefile b/usr/src/uts/intel/sys/Makefile index 192ada41a5..5cfbdec4fc 100644 --- a/usr/src/uts/intel/sys/Makefile +++ b/usr/src/uts/intel/sys/Makefile @@ -56,6 +56,7 @@ HDRS = \ mc_intel.h \ mca_amd.h \ mca_x86.h \ + mcontext.h \ mutex_impl.h \ obpdefs.h \ old_procfs.h \ diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index 4956e2d318..3373484dec 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -19,6 +19,8 @@ * CDDL HEADER END */ /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -168,6 +170,91 @@ extern "C" { "\20\20fz\17ru\16rd\15pm\14um\13om\12zm\11dm" \ "\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie" +/* + * This structure is written to memory by an 'fnsave' instruction + */ +struct fnsave_state { + uint16_t f_fcw; + uint16_t __f_ign0; + uint16_t f_fsw; + uint16_t __f_ign1; + uint16_t f_ftw; + uint16_t __f_ign2; + uint32_t f_eip; + uint16_t f_cs; + uint16_t f_fop; + uint32_t f_dp; + uint16_t f_ds; + uint16_t __f_ign3; + union { + uint16_t fpr_16[5]; /* 80-bits of x87 state */ + } f_st[8]; +}; /* 108 bytes */ + +/* + * This structure is written to memory by an 'fxsave' instruction + * Note the variant behaviour of this instruction between long mode + * and legacy environments! + */ +struct fxsave_state { + uint16_t fx_fcw; + uint16_t fx_fsw; + uint16_t fx_fctw; /* compressed tag word */ + uint16_t fx_fop; +#if defined(__amd64) + uint64_t fx_rip; + uint64_t fx_rdp; +#else + uint32_t fx_eip; + uint16_t fx_cs; + uint16_t __fx_ign0; + uint32_t fx_dp; + uint16_t fx_ds; + uint16_t __fx_ign1; +#endif + uint32_t fx_mxcsr; + uint32_t fx_mxcsr_mask; + union { + uint16_t fpr_16[5]; /* 80-bits of x87 state */ + u_longlong_t fpr_mmx; /* 64-bit mmx register */ + uint32_t __fpr_pad[4]; /* (pad out to 128-bits) */ + } fx_st[8]; +#if defined(__amd64) + upad128_t fx_xmm[16]; /* 128-bit registers */ + upad128_t __fx_ign2[6]; +#else + upad128_t fx_xmm[8]; /* 128-bit registers */ + upad128_t __fx_ign2[14]; +#endif +}; /* 512 bytes */ + +/* + * This structure is written to memory by an 'xsave' instruction. + * First 512 byte is compatible with the format of an 'fxsave' area. + */ +struct xsave_state { + struct fxsave_state xs_fxsave; + uint64_t xs_xstate_bv; /* 512 */ + uint64_t xs_rsv_mbz[2]; + uint64_t xs_reserved[5]; + upad128_t xs_ymm[16]; /* avx - 576 */ +}; /* 832 bytes, asserted in fpnoextflt() */ + +/* + * Kernel's FPU save area + */ +typedef struct { + union _kfpu_u { + struct fxsave_state kfpu_fx; +#if defined(__i386) + struct fnsave_state kfpu_fn; +#endif + struct xsave_state kfpu_xs; + } kfpu_u; + uint32_t kfpu_status; /* saved at #mf exception */ + uint32_t kfpu_xstatus; /* saved at #xm exception */ +} kfpu_t; + extern int fp_kind; /* kind of fp support */ extern int fp_save_mech; /* fp save/restore mechanism */ extern int fpu_exists; /* FPU hw exists */ @@ -188,9 +275,6 @@ extern void fpxsave_ctxt(void *); extern void xsave_ctxt(void *); extern void (*fpsave_ctxt)(void *); -struct fnsave_state; -struct fxsave_state; -struct xsave_state; extern void fxsave_insn(struct fxsave_state *); extern void fpsave(struct fnsave_state *); extern void fprestore(struct fnsave_state *); diff --git a/usr/src/uts/intel/sys/mcontext.h b/usr/src/uts/intel/sys/mcontext.h new file mode 100644 index 0000000000..23d0aba364 --- /dev/null +++ b/usr/src/uts/intel/sys/mcontext.h @@ -0,0 +1,191 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * + * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. + */ +/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Essential struct definitions for mcontext_t needed by ucontext.h + * These were formerly in regset.h, which now includes this file. + */ + +#ifndef _SYS_MCONTEXT_H +#define _SYS_MCONTEXT_H + +#include + +#if !defined(_ASM) +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A gregset_t is defined as an array type for compatibility with the reference + * source. This is important due to differences in the way the C language + * treats arrays and structures as parameters. + */ +#if defined(__amd64) +#define _NGREG 28 +#else +#define _NGREG 19 +#endif + +#if !defined(_ASM) + +#if defined(_LP64) || defined(_I32LPx) +typedef long greg_t; +#else +typedef int greg_t; +#endif + +#if defined(_SYSCALL32) + +typedef int32_t greg32_t; +typedef int64_t greg64_t; + +#endif /* _SYSCALL32 */ + +typedef greg_t gregset_t[_NGREG]; + +#if defined(_SYSCALL32) + +#define _NGREG32 19 +#define _NGREG64 28 + +typedef greg32_t gregset32_t[_NGREG32]; +typedef greg64_t gregset64_t[_NGREG64]; + +#endif /* _SYSCALL32 */ + +/* + * Floating point definitions. + */ + +#if defined(__amd64) + +typedef struct _fpu { + union { + struct _fpchip_state { + uint16_t cw; + uint16_t sw; + uint8_t fctw; + uint8_t __fx_rsvd; + uint16_t fop; + uint64_t rip; + uint64_t rdp; + uint32_t mxcsr; + uint32_t mxcsr_mask; + union { + uint16_t fpr_16[5]; + upad128_t __fpr_pad; + } st[8]; + upad128_t xmm[16]; + upad128_t __fx_ign2[6]; + uint32_t status; /* sw at exception */ + uint32_t xstatus; /* mxcsr at exception */ + } fpchip_state; + uint32_t f_fpregs[130]; + } fp_reg_set; +} fpregset_t; + +#else /* __i386 */ + +/* + * This definition of the floating point structure is binary + * compatible with the Intel386 psABI definition, and source + * compatible with that specification for x87-style floating point. + * It also allows SSE/SSE2 state to be accessed on machines that + * possess such hardware capabilities. + */ +typedef struct _fpu { + union { + struct _fpchip_state { + uint32_t state[27]; /* 287/387 saved state */ + uint32_t status; /* saved at exception */ + uint32_t mxcsr; /* SSE control and status */ + uint32_t xstatus; /* SSE mxcsr at exception */ + uint32_t __pad[2]; /* align to 128-bits */ + upad128_t xmm[8]; /* %xmm0-%xmm7 */ + } fpchip_state; + struct _fp_emul_space { /* for emulator(s) */ + uint8_t fp_emul[246]; + uint8_t fp_epad[2]; + } fp_emul_space; + uint32_t f_fpregs[95]; /* union of the above */ + } fp_reg_set; +} fpregset_t; + +#endif /* __i386 */ + +#if defined(_SYSCALL32) + +/* Kernel view of user i386 fpu structure */ + +typedef struct fpu32 { + union { + struct fpchip32_state { + uint32_t state[27]; /* 287/387 saved state */ + uint32_t status; /* saved at exception */ + uint32_t mxcsr; /* SSE control and status */ + uint32_t xstatus; /* SSE mxcsr at exception */ + uint32_t __pad[2]; /* align to 128-bits */ + uint32_t xmm[8][4]; /* %xmm0-%xmm7 */ + } fpchip_state; + uint32_t f_fpregs[95]; /* union of the above */ + } fp_reg_set; +} fpregset32_t; + +#endif /* _SYSCALL32 */ + +/* + * Structure mcontext defines the complete hardware machine state. + * (This structure is specified in the i386 ABI suppl.) + */ +typedef struct { + gregset_t gregs; /* general register set */ + fpregset_t fpregs; /* floating point register set */ +} mcontext_t; + +#if defined(_SYSCALL32) + +typedef struct { + gregset32_t gregs; /* general register set */ + fpregset32_t fpregs; /* floating point register set */ +} mcontext32_t; + +#endif /* _SYSCALL32 */ + +#endif /* _ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MCONTEXT_H */ diff --git a/usr/src/uts/intel/sys/pcb.h b/usr/src/uts/intel/sys/pcb.h index 3a690bd980..defd116eba 100644 --- a/usr/src/uts/intel/sys/pcb.h +++ b/usr/src/uts/intel/sys/pcb.h @@ -28,6 +28,9 @@ #include #include +#ifndef _ASM +#include /* kfpu_t */ +#endif #ifdef __cplusplus extern "C" { diff --git a/usr/src/uts/intel/sys/procfs_isa.h b/usr/src/uts/intel/sys/procfs_isa.h index b7a6cc3582..5af0386d9b 100644 --- a/usr/src/uts/intel/sys/procfs_isa.h +++ b/usr/src/uts/intel/sys/procfs_isa.h @@ -27,8 +27,6 @@ #ifndef _SYS_PROCFS_ISA_H #define _SYS_PROCFS_ISA_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Instruction Set Architecture specific component of * i386 version @@ -69,7 +67,7 @@ typedef uchar_t instr_t; #define NPRGREG _NGREG #define prgreg_t greg_t #define prgregset_t gregset_t -#define prfpregset fpu +#define prfpregset _fpu #define prfpregset_t fpregset_t #if defined(_SYSCALL32) @@ -87,7 +85,7 @@ typedef uchar_t instr32_t; #define NPRGREG32 _NGREG #define prgreg32_t greg_t #define prgregset32_t gregset_t -#define prfpregset32 fpu +#define prfpregset32 _fpu #define prfpregset32_t fpregset_t #endif #endif /* _SYSCALL32 */ diff --git a/usr/src/uts/intel/sys/regset.h b/usr/src/uts/intel/sys/regset.h index 5436ae0be3..12b8f9e888 100644 --- a/usr/src/uts/intel/sys/regset.h +++ b/usr/src/uts/intel/sys/regset.h @@ -19,6 +19,8 @@ * CDDL HEADER END */ /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -34,13 +36,12 @@ #if !defined(_ASM) #include #endif +#include #ifdef __cplusplus extern "C" { #endif -#if !defined(_XPG4_2) || defined(__EXTENSIONS__) - /* * The names and offsets defined here should be specified by the * AMD64 ABI suppl. @@ -134,181 +135,16 @@ extern "C" { #endif /* __i386 */ -#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ - -/* - * A gregset_t is defined as an array type for compatibility with the reference - * source. This is important due to differences in the way the C language - * treats arrays and structures as parameters. - */ -#if defined(__amd64) -#define _NGREG 28 -#else -#define _NGREG 19 -#endif -#if !defined(_XPG4_2) || defined(__EXTENSIONS__) #define NGREG _NGREG -#endif #if !defined(_ASM) -#if defined(_LP64) || defined(_I32LPx) -typedef long greg_t; -#else -typedef int greg_t; -#endif - -#if defined(_SYSCALL32) - -typedef int32_t greg32_t; -typedef int64_t greg64_t; - -#endif /* _SYSCALL32 */ - -typedef greg_t gregset_t[_NGREG]; - -#if defined(_SYSCALL32) - -#define _NGREG32 19 -#define _NGREG64 28 - -typedef greg32_t gregset32_t[_NGREG32]; -typedef greg64_t gregset64_t[_NGREG64]; - -#endif /* _SYSCALL32 */ - -#if !defined(_XPG4_2) || defined(__EXTENSIONS__) - -/* - * Floating point definitions. - */ - -/* - * This structure is written to memory by an 'fnsave' instruction - */ -struct fnsave_state { - uint16_t f_fcw; - uint16_t __f_ign0; - uint16_t f_fsw; - uint16_t __f_ign1; - uint16_t f_ftw; - uint16_t __f_ign2; - uint32_t f_eip; - uint16_t f_cs; - uint16_t f_fop; - uint32_t f_dp; - uint16_t f_ds; - uint16_t __f_ign3; - union { - uint16_t fpr_16[5]; /* 80-bits of x87 state */ - } f_st[8]; -}; /* 108 bytes */ - -/* - * This structure is written to memory by an 'fxsave' instruction - * Note the variant behaviour of this instruction between long mode - * and legacy environments! - */ -struct fxsave_state { - uint16_t fx_fcw; - uint16_t fx_fsw; - uint16_t fx_fctw; /* compressed tag word */ - uint16_t fx_fop; -#if defined(__amd64) - uint64_t fx_rip; - uint64_t fx_rdp; -#else - uint32_t fx_eip; - uint16_t fx_cs; - uint16_t __fx_ign0; - uint32_t fx_dp; - uint16_t fx_ds; - uint16_t __fx_ign1; -#endif - uint32_t fx_mxcsr; - uint32_t fx_mxcsr_mask; - union { - uint16_t fpr_16[5]; /* 80-bits of x87 state */ - u_longlong_t fpr_mmx; /* 64-bit mmx register */ - uint32_t __fpr_pad[4]; /* (pad out to 128-bits) */ - } fx_st[8]; -#if defined(__amd64) - upad128_t fx_xmm[16]; /* 128-bit registers */ - upad128_t __fx_ign2[6]; -#else - upad128_t fx_xmm[8]; /* 128-bit registers */ - upad128_t __fx_ign2[14]; -#endif -}; /* 512 bytes */ - -/* - * This structure is written to memory by an 'xsave' instruction. - * First 512 byte is compatible with the format of an 'fxsave' area. - */ -struct xsave_state { - struct fxsave_state xs_fxsave; - uint64_t xs_xstate_bv; /* 512 */ - uint64_t xs_rsv_mbz[2]; - uint64_t xs_reserved[5]; - upad128_t xs_ymm[16]; /* avx - 576 */ -}; /* 832 bytes, asserted in fpnoextflt() */ - -#if defined(__amd64) - -typedef struct fpu { - union { - struct fpchip_state { - uint16_t cw; - uint16_t sw; - uint8_t fctw; - uint8_t __fx_rsvd; - uint16_t fop; - uint64_t rip; - uint64_t rdp; - uint32_t mxcsr; - uint32_t mxcsr_mask; - union { - uint16_t fpr_16[5]; - upad128_t __fpr_pad; - } st[8]; - upad128_t xmm[16]; - upad128_t __fx_ign2[6]; - uint32_t status; /* sw at exception */ - uint32_t xstatus; /* mxcsr at exception */ - } fpchip_state; - uint32_t f_fpregs[130]; - } fp_reg_set; -} fpregset_t; - -#else /* __i386 */ - -/* - * This definition of the floating point structure is binary - * compatible with the Intel386 psABI definition, and source - * compatible with that specification for x87-style floating point. - * It also allows SSE/SSE2 state to be accessed on machines that - * possess such hardware capabilities. - */ -typedef struct fpu { - union { - struct fpchip_state { - uint32_t state[27]; /* 287/387 saved state */ - uint32_t status; /* saved at exception */ - uint32_t mxcsr; /* SSE control and status */ - uint32_t xstatus; /* SSE mxcsr at exception */ - uint32_t __pad[2]; /* align to 128-bits */ - upad128_t xmm[8]; /* %xmm0-%xmm7 */ - } fpchip_state; - struct fp_emul_space { /* for emulator(s) */ - uint8_t fp_emul[246]; - uint8_t fp_epad[2]; - } fp_emul_space; - uint32_t f_fpregs[95]; /* union of the above */ - } fp_reg_set; -} fpregset_t; - +#ifdef __i386 /* * (This structure definition is specified in the i386 ABI supplement) + * It's likely we can just get rid of the struct __old_fpu or maybe + * move it to $SRC/uts/intel/ia32/os/fpu.c which appears to be the + * only place that uses it. See: www.illumos.org/issues/6284 */ typedef struct __old_fpu { union { @@ -327,74 +163,19 @@ typedef struct __old_fpu { } fp_reg_set; long f_wregs[33]; /* saved weitek state */ } __old_fpregset_t; - #endif /* __i386 */ -#if defined(_SYSCALL32) - -/* Kernel view of user i386 fpu structure */ - -typedef struct fpu32 { - union { - struct fpchip32_state { - uint32_t state[27]; /* 287/387 saved state */ - uint32_t status; /* saved at exception */ - uint32_t mxcsr; /* SSE control and status */ - uint32_t xstatus; /* SSE mxcsr at exception */ - uint32_t __pad[2]; /* align to 128-bits */ - uint32_t xmm[8][4]; /* %xmm0-%xmm7 */ - } fpchip_state; - uint32_t f_fpregs[95]; /* union of the above */ - } fp_reg_set; -} fpregset32_t; - -#endif /* _SYSCALL32 */ - -/* - * Kernel's FPU save area - */ -typedef struct { - union _kfpu_u { - struct fxsave_state kfpu_fx; -#if defined(__i386) - struct fnsave_state kfpu_fn; -#endif - struct xsave_state kfpu_xs; - } kfpu_u; - uint32_t kfpu_status; /* saved at #mf exception */ - uint32_t kfpu_xstatus; /* saved at #xm exception */ -} kfpu_t; - #if defined(__amd64) -#define NDEBUGREG 16 +#define _NDEBUGREG 16 #else -#define NDEBUGREG 8 +#define _NDEBUGREG 8 #endif typedef struct dbregset { - unsigned long debugreg[NDEBUGREG]; + unsigned long debugreg[_NDEBUGREG]; } dbregset_t; -/* - * Structure mcontext defines the complete hardware machine state. - * (This structure is specified in the i386 ABI suppl.) - */ -typedef struct { - gregset_t gregs; /* general register set */ - fpregset_t fpregs; /* floating point register set */ -} mcontext_t; - -#if defined(_SYSCALL32) - -typedef struct { - gregset32_t gregs; /* general register set */ - fpregset32_t fpregs; /* floating point register set */ -} mcontext32_t; - -#endif /* _SYSCALL32 */ - #endif /* _ASM */ -#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ /* * The version of privregs.h that is used on implementations that run on @@ -420,80 +201,6 @@ typedef struct { #include #endif /* __i386 (!_KERNEL && !_XPG4_2 || __EXTENSIONS__) */ -/* - * The following is here for XPG4.2 standards compliance. - * regset.h is included in ucontext.h for the definition of - * mcontext_t, all of which breaks XPG4.2 namespace. - */ - -#if defined(_XPG4_2) && !defined(__EXTENSIONS__) && !defined(_ASM) - -/* - * The following is here for UNIX 95 compliance (XPG Issue 4, Version 2 - * System Interfaces and Headers). The structures included here are identical - * to those visible elsewhere in this header except that the structure - * element names have been changed in accordance with the X/Open namespace - * rules. Specifically, depending on the name and scope, the names have - * been prepended with a single or double underscore (_ or __). See the - * structure definitions in the non-X/Open namespace for more detailed - * comments describing each of these structures. - */ - -#if defined(__amd64) - -typedef struct __fpu { - union { - struct __fpchip_state { - uint16_t __fx_cw; - uint16_t __fx_sw; - uint16_t __fx_ctw; - uint16_t __fx_op; - uint64_t __fx_rip; - uint64_t __fx_rdp; - uint32_t __fx_mxcsr; - uint32_t __fx_mxcsr_mask; - union { - uint16_t __fpr_16[5]; - upad128_t __fpr_pad; - } __fx_st[8]; - upad128_t __fx_xmm[16]; - upad128_t __fx_ign2[6]; - uint32_t __status; - uint32_t __xstatus; - } __fpchip_state; - uint32_t __f_fpregs[130]; - } __fp_reg_set; -} fpregset_t; - -#else /* __i386 */ - -typedef struct __fpu { - union { - struct __fpchip_state { - uint32_t __state[27]; /* 287/387 saved state */ - uint32_t __status; /* saved at exception */ - uint32_t __mxcsr; /* SSE control and status */ - uint32_t __xstatus; /* SSE mxcsr at exception */ - uint32_t __pad[2]; /* align to 128-bits */ - upad128_t __xmm[8]; /* %xmm0-%xmm7 */ - } __fpchip_state; - struct __fp_emul_space { /* for emulator(s) */ - uint8_t __fp_emul[246]; - uint8_t __fp_epad[2]; - } __fp_emul_space; - uint32_t __f_fpregs[95]; /* union of the above */ - } __fp_reg_set; -} fpregset_t; - -#endif /* __i386 */ - -typedef struct { - gregset_t __gregs; /* general register set */ - fpregset_t __fpregs; /* floating point register set */ -} mcontext_t; - -#endif /* _XPG4_2 && !__EXTENSIONS__ && !_ASM */ - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/intel/sys/ucontext.h b/usr/src/uts/intel/sys/ucontext.h index acd6ddc99e..5f64fa9393 100644 --- a/usr/src/uts/intel/sys/ucontext.h +++ b/usr/src/uts/intel/sys/ucontext.h @@ -20,6 +20,8 @@ */ /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,7 +35,7 @@ #include #include -#include +#include #if !defined(_XPG4_2) || defined(__EXTENSIONS__) #include #endif diff --git a/usr/src/uts/sparc/fpu/fpu_simulator.c b/usr/src/uts/sparc/fpu/fpu_simulator.c index aeafdc515d..16393bc9d1 100644 --- a/usr/src/uts/sparc/fpu/fpu_simulator.c +++ b/usr/src/uts/sparc/fpu/fpu_simulator.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/usr/src/uts/sparc/sys/Makefile b/usr/src/uts/sparc/sys/Makefile index 1ae69ac3cc..975639fa06 100644 --- a/usr/src/uts/sparc/sys/Makefile +++ b/usr/src/uts/sparc/sys/Makefile @@ -22,8 +22,6 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" -# include ../../../Makefile.master @@ -41,6 +39,7 @@ HDRS= \ machlock.h \ machsig.h \ machtypes.h \ + mcontext.h \ old_procfs.h \ pcb.h \ procfs_isa.h \ diff --git a/usr/src/uts/sparc/sys/fpu/fpu_simulator.h b/usr/src/uts/sparc/sys/fpu/fpu_simulator.h index 4d83a2c795..0087877f55 100644 --- a/usr/src/uts/sparc/sys/fpu/fpu_simulator.h +++ b/usr/src/uts/sparc/sys/fpu/fpu_simulator.h @@ -455,7 +455,7 @@ extern enum ftt_type fp_emulator( fp_inst_type *pinst, /* Pointer to FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ struct rwindow *pwindow, /* Pointer to locals and ins. */ - struct fpu *pfpu); /* Pointer to FPU register block. */ + struct _fpu *pfpu); /* Pointer to FPU register block. */ /* * fp_traps handles passing exception conditions to the kernel. diff --git a/usr/src/uts/sparc/sys/fpu/fpusystm.h b/usr/src/uts/sparc/sys/fpu/fpusystm.h index 6b013aa52d..cf178fa87f 100644 --- a/usr/src/uts/sparc/sys/fpu/fpusystm.h +++ b/usr/src/uts/sparc/sys/fpu/fpusystm.h @@ -27,8 +27,6 @@ #ifndef _SYS_FPU_FPUSYSTM_H #define _SYS_FPU_FPUSYSTM_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * ISA-dependent FPU interfaces */ @@ -39,7 +37,7 @@ extern "C" { #ifdef _KERNEL -struct fpu; +struct _fpu; struct regs; #if !defined(DEBUG) && !defined(NEED_FPU_EXISTS) diff --git a/usr/src/uts/sparc/sys/mcontext.h b/usr/src/uts/sparc/sys/mcontext.h new file mode 100644 index 0000000000..f35d0c1d22 --- /dev/null +++ b/usr/src/uts/sparc/sys/mcontext.h @@ -0,0 +1,304 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * Essential struct definitions for mcontext_t needed by ucontext.h + * These were formerly in regset.h, which now includes this file. + */ + +#ifndef _SYS_MCONTEXT_H +#define _SYS_MCONTEXT_H + +#include + +#if !defined(_ASM) +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A gregset_t is defined as an array type for compatibility with the reference + * source. This is important due to differences in the way the C language + * treats arrays and structures as parameters. + * + * Note that NGREG is really (sizeof (struct regs) / sizeof (greg_t)), + * but that the SPARC V8 ABI defines it absolutely to be 19. + */ +#if defined(__sparcv9) +#define _NGREG 21 +#else /* __sparcv9 */ +#define _NGREG 19 +#endif /* __sparcv9 */ + +#ifndef _ASM + +#if defined(_LP64) || defined(_I32LPx) +typedef long greg_t; +#else +typedef int greg_t; +#endif + +#if defined(_SYSCALL32) + +typedef int32_t greg32_t; +typedef int64_t greg64_t; + +#endif /* _SYSCALL32 */ + +typedef greg_t gregset_t[_NGREG]; + +#if defined(_SYSCALL32) + +#define _NGREG32 19 +#define _NGREG64 21 + +typedef greg32_t gregset32_t[_NGREG32]; +typedef greg64_t gregset64_t[_NGREG64]; + +#endif /* _SYSCALL32 */ + +/* + * Floating point definitions. + */ + +#define _MAXFPQ 16 /* max # of fpu queue entries currently supported */ + +/* + * struct _fq defines the minimal format of a floating point instruction queue + * entry. The size of entries in the floating point queue are implementation + * dependent. The union FQu is guarenteed to be the first field in any ABI + * conformant system implementation. Any additional fields provided by an + * implementation should not be used applications designed to be ABI conformant. + */ + +struct _fpq { + unsigned int *fpq_addr; /* address */ + unsigned int fpq_instr; /* instruction */ +}; + +struct _fq { + union { /* FPU inst/addr queue */ + double whole; + struct _fpq fpq; + } FQu; +}; + +#if defined(_SYSCALL32) + +struct fpq32 { + caddr32_t fpq_addr; /* address */ + uint32_t fpq_instr; /* instruction */ +}; + +struct fq32 { + union { /* FPU inst/addr queue */ + double whole; + struct fpq32 fpq; + } FQu; +}; + +#endif /* _SYSCALL32 */ + +/* + * struct fpu is the floating point processor state. struct fpu is the sum + * total of all possible floating point state which includes the state of + * external floating point hardware, fpa registers, etc..., if it exists. + * + * A floating point instuction queue may or may not be associated with + * the floating point processor state. If a queue does exist, the field + * fpu_q will point to an array of fpu_qcnt entries where each entry is + * fpu_q_entrysize long. fpu_q_entry has a lower bound of sizeof (union FQu) + * and no upper bound. If no floating point queue entries are associated + * with the processor state, fpu_qcnt will be zeo and fpu_q will be NULL. + */ + +#if defined(__sparcv9) + +struct _fpu { + union { /* FPU floating point regs */ + uint32_t fpu_regs[32]; /* 32 singles */ + double fpu_dregs[32]; /* 32 doubles */ + long double fpu_qregs[16]; /* 16 quads */ + } fpu_fr; + struct _fq *fpu_q; /* ptr to array of FQ entries */ + uint64_t fpu_fsr; /* FPU status register */ + uint8_t fpu_qcnt; /* # of entries in saved FQ */ + uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ + uint8_t fpu_en; /* flag specifying fpu in use */ +}; + +#else /* __sparcv9 */ + +struct _fpu { + union { /* FPU floating point regs */ + uint32_t fpu_regs[32]; /* 32 singles */ + double fpu_dregs[16]; /* 16 doubles */ + } fpu_fr; + struct _fq *fpu_q; /* ptr to array of FQ entries */ + uint32_t fpu_fsr; /* FPU status register */ + uint8_t fpu_qcnt; /* # of entries in saved FQ */ + uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ + uint8_t fpu_en; /* flag signifying fpu in use */ +}; + +#endif /* __sparcv9 */ + +typedef struct _fpu fpregset_t; + +#if defined(_SYSCALL32) + +/* Kernel view of user sparcv7/v8 fpu structure */ + +struct fpu32 { + union { /* FPU floating point regs */ + uint32_t fpu_regs[32]; /* 32 singles */ + double fpu_dregs[16]; /* 16 doubles */ + } fpu_fr; + caddr32_t fpu_q; /* ptr to array of FQ entries */ + uint32_t fpu_fsr; /* FPU status register */ + uint8_t fpu_qcnt; /* # of entries in saved FQ */ + uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ + uint8_t fpu_en; /* flag signifying fpu in use */ +}; + +typedef struct fpu32 fpregset32_t; + +#endif /* _SYSCALL32 */ + +#if defined(_KERNEL) || defined(_KMDB) +/* + * The ABI uses struct fpu, so we use this to describe the kernel's view of the + * fpu. + */ +typedef struct { + union _fpu_fr { /* V9 FPU floating point regs */ + uint32_t fpu_regs[32]; /* 32 singles */ + uint64_t fpu_dregs[32]; /* 32 doubles */ + long double fpu_qregs[16]; /* 16 quads */ + } fpu_fr; + uint64_t fpu_fsr; /* FPU status register */ + uint32_t fpu_fprs; /* fprs register */ + struct _fq *fpu_q; + uint8_t fpu_qcnt; + uint8_t fpu_q_entrysize; + uint8_t fpu_en; /* flag signifying fpu in use */ +} kfpu_t; +#endif /* _KERNEL || _KMDB */ + +/* + * The following structure is for associating extra register state with + * the ucontext structure and is kept within the uc_mcontext filler area. + * + * If (xrs_id == XRS_ID) then the xrs_ptr field is a valid pointer to + * extra register state. The exact format of the extra register state + * pointed to by xrs_ptr is platform-dependent. + * + * Note: a platform may or may not manage extra register state. + */ +typedef struct { + unsigned int xrs_id; /* indicates xrs_ptr validity */ + caddr_t xrs_ptr; /* ptr to extra reg state */ +} xrs_t; + +#define _XRS_ID 0x78727300 /* the string "xrs" */ + +#if defined(_SYSCALL32) + +typedef struct { + uint32_t xrs_id; /* indicates xrs_ptr validity */ + caddr32_t xrs_ptr; /* ptr to extra reg state */ +} xrs32_t; + +#endif /* _SYSCALL32 */ + +#if defined(__sparcv9) + +/* + * Ancillary State Registers + * + * The SPARC V9 architecture defines 25 ASRs, numbered from 7 through 31. + * ASRs 16 through 31 are available to user programs, though the meaning + * and content of these registers is implementation dependent. + */ +typedef int64_t asrset_t[16]; /* %asr16 - > %asr31 */ + +#endif /* __sparcv9 */ + +/* + * Structure mcontext defines the complete hardware machine state. If + * the field `gwins' is non NULL, it points to a save area for register + * window frames. If `gwins' is NULL, the register windows were saved + * on the user's stack. + * + * The filler of 21 longs is historical (now filler[19] plus the xrs_t + * field). The value was selected to provide binary compatibility with + * statically linked ICL binaries. It is in the ABI (do not change). It + * actually appears in the ABI as a single filler of 44 is in the field + * uc_filler of struct ucontext. It is split here so that ucontext.h can + * (hopefully) remain architecture independent. + * + * Note that 2 longs of the filler are used to hold extra register state info. + */ +typedef struct { + gregset_t gregs; /* general register set */ + struct _gwindows *gwins; /* POSSIBLE pointer to register windows */ + fpregset_t fpregs; /* floating point register set */ + xrs_t xrs; /* POSSIBLE extra register state association */ +#if defined(__sparcv9) + asrset_t asrs; /* ancillary registers */ + long filler[4]; /* room for expansion */ +#else /* __sparcv9 */ + long filler[19]; +#endif /* __sparcv9 */ +} mcontext_t; + +#if defined(_SYSCALL32) + +typedef struct { + gregset32_t gregs; /* general register set */ + caddr32_t gwins; /* POSSIBLE pointer to register windows */ + fpregset32_t fpregs; /* floating point register set */ + xrs32_t xrs; /* POSSIBLE extra register state association */ + int32_t filler[19]; +} mcontext32_t; + +#endif /* _SYSCALL32 */ + +#endif /* _ASM */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MCONTEXT_H */ diff --git a/usr/src/uts/sparc/sys/procfs_isa.h b/usr/src/uts/sparc/sys/procfs_isa.h index a1c98655b5..4c9ffec7c1 100644 --- a/usr/src/uts/sparc/sys/procfs_isa.h +++ b/usr/src/uts/sparc/sys/procfs_isa.h @@ -27,8 +27,6 @@ #ifndef _SYS_PROCFS_ISA_H #define _SYS_PROCFS_ISA_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Instruction Set Architecture specific component of * sparc v8/v9 version @@ -169,7 +167,7 @@ typedef struct prfpregset { uint8_t pr_q_entrysize; /* # of bytes per FQ entry */ uint8_t pr_en; /* flag signifying fpu in use */ char pr_pad[13]; /* ensure sizeof(prfpregset)%16 == 0 */ - struct fq pr_q[16]; /* contains the FQ array */ + struct _fq pr_q[16]; /* contains the FQ array */ } prfpregset_t; #else typedef struct prfpregset { @@ -182,7 +180,7 @@ typedef struct prfpregset { uint8_t pr_qcnt; /* # of entries in saved FQ */ uint8_t pr_q_entrysize; /* # of bytes per FQ entry */ uint8_t pr_en; /* flag signifying fpu in use */ - struct fq pr_q[32]; /* contains the FQ array */ + struct _fq pr_q[32]; /* contains the FQ array */ } prfpregset_t; #endif /* __sparcv9 */ diff --git a/usr/src/uts/sparc/sys/regset.h b/usr/src/uts/sparc/sys/regset.h index 26e7119a38..cebf5939a3 100644 --- a/usr/src/uts/sparc/sys/regset.h +++ b/usr/src/uts/sparc/sys/regset.h @@ -23,6 +23,8 @@ /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,13 +32,12 @@ #ifndef _SYS_REGSET_H #define _SYS_REGSET_H -#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */ - #include #if !defined(_ASM) #include #endif +#include #ifdef __cplusplus extern "C" { @@ -46,8 +47,6 @@ extern "C" { * Location of the users' stored registers relative to R0. * Usage is as an index into a gregset_t array or as u.u_ar0[XX]. */ -#if !defined(_XPG4_2) || defined(__EXTENSIONS__) - #if defined(__sparcv9) #define REG_CCR (0) #if defined(_SYSCALL32) @@ -87,53 +86,12 @@ extern "C" { #define REG_SP REG_O6 #define REG_R0 REG_O0 #define REG_R1 REG_O1 -#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ - -/* - * A gregset_t is defined as an array type for compatibility with the reference - * source. This is important due to differences in the way the C language - * treats arrays and structures as parameters. - * - * Note that NGREG is really (sizeof (struct regs) / sizeof (greg_t)), - * but that the SPARC V8 ABI defines it absolutely to be 19. - */ -#if defined(__sparcv9) -#define _NGREG 21 -#else /* __sparcv9 */ -#define _NGREG 19 -#endif /* __sparcv9 */ -#if !defined(_XPG4_2) || defined(__EXTENSIONS__) -#define NGREG _NGREG -#endif #ifndef _ASM -#if defined(_LP64) || defined(_I32LPx) -typedef long greg_t; -#else -typedef int greg_t; -#endif - -#if defined(_SYSCALL32) - -typedef int32_t greg32_t; -typedef int64_t greg64_t; - -#endif /* _SYSCALL32 */ - -typedef greg_t gregset_t[_NGREG]; - -#if defined(_SYSCALL32) - -#define _NGREG32 19 -#define _NGREG64 21 - -typedef greg32_t gregset32_t[_NGREG32]; -typedef greg64_t gregset64_t[_NGREG64]; - -#endif /* _SYSCALL32 */ +#define NGREG _NGREG +#define MAXFPQ _MAXFPQ -#if !defined(_XPG4_2) || defined(__EXTENSIONS__) /* * The following structures define how a register window can appear on the * stack. This structure is available (when required) through the `gwins' @@ -143,7 +101,7 @@ typedef greg64_t gregset64_t[_NGREG64]; */ #define SPARC_MAXREGWINDOW 31 /* max windows in SPARC arch. */ -struct rwindow { +struct rwindow { greg_t rw_local[8]; /* locals */ greg_t rw_in[8]; /* ins */ }; @@ -170,7 +128,7 @@ extern void rwindow_32ton(struct rwindow32 *, struct rwindow *); #define rw_fp rw_in[6] /* frame pointer */ #define rw_rtn rw_in[7] /* return address */ -typedef struct gwindows { +typedef struct _gwindows { int wbcnt; greg_t *spbuf[SPARC_MAXREGWINDOW]; struct rwindow wbuf[SPARC_MAXREGWINDOW]; @@ -192,62 +150,6 @@ typedef struct gwindows64 { #endif /* _SYSCALL32 */ - -/* - * Floating point definitions. - */ - -#define MAXFPQ 16 /* max # of fpu queue entries currently supported */ - -/* - * struct fq defines the minimal format of a floating point instruction queue - * entry. The size of entries in the floating point queue are implementation - * dependent. The union FQu is guarenteed to be the first field in any ABI - * conformant system implementation. Any additional fields provided by an - * implementation should not be used applications designed to be ABI conformant. - */ - -struct fpq { - unsigned int *fpq_addr; /* address */ - unsigned int fpq_instr; /* instruction */ -}; - -struct fq { - union { /* FPU inst/addr queue */ - double whole; - struct fpq fpq; - } FQu; -}; - -#if defined(_SYSCALL32) - -struct fpq32 { - caddr32_t fpq_addr; /* address */ - uint32_t fpq_instr; /* instruction */ -}; - -struct fq32 { - union { /* FPU inst/addr queue */ - double whole; - struct fpq32 fpq; - } FQu; -}; - -#endif /* _SYSCALL32 */ - -/* - * struct fpu is the floating point processor state. struct fpu is the sum - * total of all possible floating point state which includes the state of - * external floating point hardware, fpa registers, etc..., if it exists. - * - * A floating point instuction queue may or may not be associated with - * the floating point processor state. If a queue does exist, the field - * fpu_q will point to an array of fpu_qcnt entries where each entry is - * fpu_q_entrysize long. fpu_q_entry has a lower bound of sizeof (union FQu) - * and no upper bound. If no floating point queue entries are associated - * with the processor state, fpu_qcnt will be zeo and fpu_q will be NULL. - */ - /* * The following #define's are obsolete and may be removed in a future release. * The corresponding integer types should be used instead (i.e. uint64_t). @@ -258,160 +160,9 @@ struct fq32 { #define V9_FPU_FSR_TYPE uint64_t #define V9_FPU_FPRS_TYPE uint32_t -#if defined(__sparcv9) - -struct fpu { - union { /* FPU floating point regs */ - uint32_t fpu_regs[32]; /* 32 singles */ - double fpu_dregs[32]; /* 32 doubles */ - long double fpu_qregs[16]; /* 16 quads */ - } fpu_fr; - struct fq *fpu_q; /* ptr to array of FQ entries */ - uint64_t fpu_fsr; /* FPU status register */ - uint8_t fpu_qcnt; /* # of entries in saved FQ */ - uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ - uint8_t fpu_en; /* flag specifying fpu in use */ -}; - -#else /* __sparcv9 */ - -struct fpu { - union { /* FPU floating point regs */ - uint32_t fpu_regs[32]; /* 32 singles */ - double fpu_dregs[16]; /* 16 doubles */ - } fpu_fr; - struct fq *fpu_q; /* ptr to array of FQ entries */ - uint32_t fpu_fsr; /* FPU status register */ - uint8_t fpu_qcnt; /* # of entries in saved FQ */ - uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ - uint8_t fpu_en; /* flag signifying fpu in use */ -}; - -#endif /* __sparcv9 */ - -typedef struct fpu fpregset_t; - -#if defined(_SYSCALL32) - -/* Kernel view of user sparcv7/v8 fpu structure */ - -struct fpu32 { - union { /* FPU floating point regs */ - uint32_t fpu_regs[32]; /* 32 singles */ - double fpu_dregs[16]; /* 16 doubles */ - } fpu_fr; - caddr32_t fpu_q; /* ptr to array of FQ entries */ - uint32_t fpu_fsr; /* FPU status register */ - uint8_t fpu_qcnt; /* # of entries in saved FQ */ - uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ - uint8_t fpu_en; /* flag signifying fpu in use */ -}; - -typedef struct fpu32 fpregset32_t; - -#endif /* _SYSCALL32 */ +#define XRS_ID _XRS_ID -#if defined(_KERNEL) || defined(_KMDB) -/* - * The ABI uses struct fpu, so we use this to describe the kernel's view of the - * fpu. - */ -typedef struct { - union _fpu_fr { /* V9 FPU floating point regs */ - uint32_t fpu_regs[32]; /* 32 singles */ - uint64_t fpu_dregs[32]; /* 32 doubles */ - long double fpu_qregs[16]; /* 16 quads */ - } fpu_fr; - uint64_t fpu_fsr; /* FPU status register */ - uint32_t fpu_fprs; /* fprs register */ - struct fq *fpu_q; - uint8_t fpu_qcnt; - uint8_t fpu_q_entrysize; - uint8_t fpu_en; /* flag signifying fpu in use */ -} kfpu_t; -#endif /* _KERNEL || _KMDB */ - -/* - * The following structure is for associating extra register state with - * the ucontext structure and is kept within the uc_mcontext filler area. - * - * If (xrs_id == XRS_ID) then the xrs_ptr field is a valid pointer to - * extra register state. The exact format of the extra register state - * pointed to by xrs_ptr is platform-dependent. - * - * Note: a platform may or may not manage extra register state. - */ -typedef struct { - unsigned int xrs_id; /* indicates xrs_ptr validity */ - caddr_t xrs_ptr; /* ptr to extra reg state */ -} xrs_t; - -#define XRS_ID 0x78727300 /* the string "xrs" */ - -#if defined(_SYSCALL32) - -typedef struct { - uint32_t xrs_id; /* indicates xrs_ptr validity */ - caddr32_t xrs_ptr; /* ptr to extra reg state */ -} xrs32_t; - -#endif /* _SYSCALL32 */ - -#if defined(__sparcv9) - -/* - * Ancillary State Registers - * - * The SPARC V9 architecture defines 25 ASRs, numbered from 7 through 31. - * ASRs 16 through 31 are available to user programs, though the meaning - * and content of these registers is implementation dependent. - */ -typedef int64_t asrset_t[16]; /* %asr16 - > %asr31 */ - -#endif /* __sparcv9 */ - -/* - * Structure mcontext defines the complete hardware machine state. If - * the field `gwins' is non NULL, it points to a save area for register - * window frames. If `gwins' is NULL, the register windows were saved - * on the user's stack. - * - * The filler of 21 longs is historical (now filler[19] plus the xrs_t - * field). The value was selected to provide binary compatibility with - * statically linked ICL binaries. It is in the ABI (do not change). It - * actually appears in the ABI as a single filler of 44 is in the field - * uc_filler of struct ucontext. It is split here so that ucontext.h can - * (hopefully) remain architecture independent. - * - * Note that 2 longs of the filler are used to hold extra register state info. - */ -typedef struct { - gregset_t gregs; /* general register set */ - gwindows_t *gwins; /* POSSIBLE pointer to register windows */ - fpregset_t fpregs; /* floating point register set */ - xrs_t xrs; /* POSSIBLE extra register state association */ -#if defined(__sparcv9) - asrset_t asrs; /* ancillary registers */ - long filler[4]; /* room for expansion */ -#else /* __sparcv9 */ - long filler[19]; -#endif /* __sparcv9 */ -} mcontext_t; - -#if defined(_SYSCALL32) - -typedef struct { - gregset32_t gregs; /* general register set */ - caddr32_t gwins; /* POSSIBLE pointer to register windows */ - fpregset32_t fpregs; /* floating point register set */ - xrs32_t xrs; /* POSSIBLE extra register state association */ - int32_t filler[19]; -} mcontext32_t; - -#endif /* _SYSCALL32 */ - -#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ -#endif /* _ASM */ +#endif /* !_ASM */ /* * The version of privregs.h that is used on implementations that run @@ -428,7 +179,7 @@ typedef struct { * 'struct regs' to match the content of a 32-bit core file, or a ucontext_t. * * Note that the ucontext_t actually describes the general registers in - * terms of the gregset_t data type, as described in this file. Note also + * terms of the gregset_t data type, as described in mcontex.h. Note also * that the core file content is defined by core(4) in terms of data types * defined by procfs -- see proc(4). */ @@ -438,164 +189,6 @@ typedef struct { #endif /* !_KERNEL && !_XPG4_2 || __EXTENSIONS__ */ #endif /* __sparcv9 */ -/* - * The following is here for XPG4.2 standards compliance. - * regset.h is included in ucontext.h for the definition of - * mcontext_t, all of which breaks XPG4.2 namespace. - */ - -#if defined(_XPG4_2) && !defined(__EXTENSIONS__) -/* - * The following is here for UNIX 95 compliance (XPG Issue 4, Version 2 - * System Interfaces and Headers. The structures included here are identical - * to those visible elsewhere in this header except that the structure - * element names have been changed in accordance with the X/Open namespace - * rules. Specifically, depending on the name and scope, the names have - * been prepended with a single or double underscore (_ or __). See the - * structure definitions in the non-X/Open namespace for more detailed - * comments describing each of these structures. - */ - -#ifndef _ASM - -/* - * The following structures define how a register window can appear on the - * stack. - */ -#define _SPARC_MAXREGWINDOW 31 /* max windows in SPARC arch. */ - -struct __rwindow { - greg_t __rw_local[8]; /* locals */ - greg_t __rw_in[8]; /* ins */ -}; - -#define __rw_fp __rw_in[6] /* frame pointer */ -#define __rw_rtn __rw_in[7] /* return address */ - -struct __gwindows { - int __wbcnt; - greg_t *__spbuf[_SPARC_MAXREGWINDOW]; - struct __rwindow __wbuf[_SPARC_MAXREGWINDOW]; -}; - -typedef struct __gwindows gwindows_t; - -/* - * The fq structure defines the minimal format of a floating point - * instruction queue entry. - */ - -struct __fpq { - unsigned int *__fpq_addr; /* address */ - unsigned int __fpq_instr; /* instruction */ -}; - -struct __fq { - union { /* FPU inst/addr queue */ - double __whole; - struct __fpq __fpq; - } _FQu; -}; - -/* - * The fpu structure is the floating point processor state. - */ - -/* - * The following #define's are obsolete and may be removed in a future release. - * The corresponding integer types should be used instead (i.e. uint64_t). - */ -#define _FPU_REGS_TYPE uint32_t -#define _FPU_DREGS_TYPE uint64_t -#define _V7_FPU_FSR_TYPE uint32_t -#define _V9_FPU_FSR_TYPE uint64_t -#define _V9_FPU_FPRS_TYPE uint32_t - -#if defined(__sparcv9) - -/* - * SPARC Version 9 floating point - */ - -struct __fpu { - union { /* FPU floating point regs */ - uint32_t __fpu_regs[32]; /* 32 singles */ - double __fpu_dregs[32]; /* 32 doubles */ - long double __fpu_qregs[16]; /* 16 quads */ - } __fpu_fr; - struct __fq *__fpu_q; /* ptr to array of FQ entries */ - uint64_t __fpu_fsr; /* FPU status register */ - uint8_t __fpu_qcnt; /* # of entries in saved FQ */ - uint8_t __fpu_q_entrysize; /* # of bytes per FQ entry */ - uint8_t __fpu_en; /* flag signifying fpu in use */ -}; - -#else /* __sparcv9 */ - -/* - * SPARC Version 7 and 8 floating point - */ - -struct __fpu { - union { /* FPU floating point regs */ - uint32_t __fpu_regs[32]; /* 32 singles */ - double __fpu_dregs[16]; /* 16 doubles */ - } __fpu_fr; - struct __fq *__fpu_q; /* ptr to array of FQ entries */ - uint32_t __fpu_fsr; /* FPU status register */ - uint8_t __fpu_qcnt; /* # of entries in saved FQ */ - uint8_t __fpu_q_entrysize; /* # of bytes per FQ entry */ - uint8_t __fpu_en; /* flag signifying fpu in use */ -}; - -#endif /* __sparcv9 */ - -typedef struct __fpu fpregset_t; - -/* - * The xrs_t structure is for associating extra register state with - * the ucontext structure and is kept within the uc_mcontext filler area. - */ -typedef struct { - unsigned int __xrs_id; /* indicates xrs_ptr validity */ - caddr_t __xrs_ptr; /* ptr to extra reg state */ -} xrs_t; - -#define _XRS_ID 0x78727300 /* the string "xrs" */ - -#if defined(__sparcv9) - -/* - * Ancillary State Registers - * - * The SPARC V9 architecture defines 25 ASRs, numbered from 7 through 31. - * ASRs 16 through 31 are available to user programs, though the meaning - * and content of these registers is implementation dependent. - */ -typedef int64_t asrset_t[16]; /* %asr16 - > %asr31 */ - -#endif /* __sparcv9 */ - -/* - * Structure mcontext defines the complete hardware machine state. - */ -typedef struct { - gregset_t __gregs; /* general register set */ - gwindows_t *__gwins; /* POSSIBLE pointer to register windows */ - fpregset_t __fpregs; /* floating point register set */ - xrs_t __xrs; /* POSSIBLE extra register state association */ -#if defined(__sparcv9) - asrset_t __asrs; /* ancillary registers */ - long __filler[4]; /* room for expansion */ -#else /* __sparcv9 */ - long __filler[19]; -#endif /* __sparcv9 */ -} mcontext_t; - -#endif /* _ASM */ -#endif /* defined(_XPG4_2) && !defined(__EXTENSIONS__) */ - - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/sparc/sys/ucontext.h b/usr/src/uts/sparc/sys/ucontext.h index e0cfea55bd..35eff287af 100644 --- a/usr/src/uts/sparc/sys/ucontext.h +++ b/usr/src/uts/sparc/sys/ucontext.h @@ -20,6 +20,8 @@ */ /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -33,7 +35,7 @@ #include #include -#include +#include #if !defined(_XPG4_2) || defined(__EXTENSIONS__) #include #endif @@ -105,7 +107,7 @@ typedef struct ucontext32 { #ifdef _KERNEL extern void ucontext_32ton(const ucontext32_t *, ucontext_t *, - const struct fq32 *, struct fq *); + const struct fq32 *, struct _fq *); extern void fpuregset_nto32(const fpregset_t *, fpregset32_t *, struct fq32 *); #endif diff --git a/usr/src/uts/sparc/syscall/getcontext.c b/usr/src/uts/sparc/syscall/getcontext.c index 437eef5e1a..c9adb54b86 100644 --- a/usr/src/uts/sparc/syscall/getcontext.c +++ b/usr/src/uts/sparc/syscall/getcontext.c @@ -176,7 +176,7 @@ int getsetcontext(int flag, void *arg) { ucontext_t uc; - struct fq fpu_q[MAXFPQ]; /* to hold floating queue */ + struct _fq fpu_q[MAXFPQ]; /* to hold floating queue */ fpregset_t *fpp; gwindows_t *gwin = NULL; /* to hold windows */ caddr_t xregs = NULL; @@ -214,7 +214,7 @@ getsetcontext(int flag, void *arg) * a later setcontext(2). */ uc.uc_mcontext.fpregs.fpu_qcnt = 0; - uc.uc_mcontext.fpregs.fpu_q = (struct fq *)NULL; + uc.uc_mcontext.fpregs.fpu_q = (struct _fq *)NULL; if (copyout(&uc, arg, sizeof (ucontext_t))) return (set_errno(EFAULT)); @@ -255,7 +255,7 @@ getsetcontext(int flag, void *arg) if ((fpp->fpu_q) && (fpp->fpu_qcnt)) { if (fpp->fpu_qcnt > MAXFPQ || fpp->fpu_q_entrysize <= 0 || - fpp->fpu_q_entrysize > sizeof (struct fq)) + fpp->fpu_q_entrysize > sizeof (struct _fq)) return (set_errno(EINVAL)); if (copyin(fpp->fpu_q, fpu_q, fpp->fpu_qcnt * fpp->fpu_q_entrysize)) @@ -424,7 +424,7 @@ getsetcontext32(int flag, void *arg) { ucontext32_t uc; ucontext_t ucnat; - struct fq fpu_qnat[MAXFPQ]; /* to hold "native" floating queue */ + struct _fq fpu_qnat[MAXFPQ]; /* to hold "native" floating queue */ struct fq32 fpu_q[MAXFPQ]; /* to hold 32 bit floating queue */ fpregset32_t *fpp; gwindows32_t *gwin = NULL; /* to hold windows */ diff --git a/usr/src/uts/sparc/v7/sys/machpcb.h b/usr/src/uts/sparc/v7/sys/machpcb.h index a964018519..f0b76ee7a6 100644 --- a/usr/src/uts/sparc/v7/sys/machpcb.h +++ b/usr/src/uts/sparc/v7/sys/machpcb.h @@ -26,8 +26,6 @@ #ifndef _SYS_MACHPCB_H #define _SYS_MACHPCB_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -71,8 +69,8 @@ typedef struct machpcb { int mpcb_uwm; /* user window mask */ int mpcb_swm; /* shared user/kernel window mask */ int mpcb_wbcnt; /* number of saved windows in pcb_wbuf */ - struct fpu mpcb_fpu; /* fpu state */ - struct fq mpcb_fpu_q[MAXFPQ]; /* fpu exception queue */ + struct _fpu mpcb_fpu; /* fpu state */ + struct _fq mpcb_fpu_q[_MAXFPQ]; /* fpu exception queue */ int mpcb_flags; /* various state flags */ int mpcb_wocnt; /* window overflow count */ int mpcb_wucnt; /* window underflow count */ diff --git a/usr/src/uts/sparc/v9/fpu/fpu.c b/usr/src/uts/sparc/v9/fpu/fpu.c index a341eb120d..a84612e030 100644 --- a/usr/src/uts/sparc/v9/fpu/fpu.c +++ b/usr/src/uts/sparc/v9/fpu/fpu.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -291,7 +289,7 @@ fp_disabled(struct regs *rp) } if (ftt = fp_emulator(&fpsd, (fp_inst_type *)rp->r_pc, rp, (ulong_t *)rp->r_sp, fp)) { - fp->fpu_q_entrysize = sizeof (struct fpq); + fp->fpu_q_entrysize = sizeof (struct _fpq); fp_traps(&fpsd, ftt, rp); } } @@ -320,7 +318,7 @@ void fp_runq(struct regs *rp) { kfpu_t *fp = lwptofpu(curthread->t_lwp); - struct fq *fqp = fp->fpu_q; + struct _fq *fqp = fp->fpu_q; fp_simd_type fpsd; uint64_t gsr = get_gsr(fp); @@ -345,7 +343,7 @@ fp_runq(struct regs *rp) */ if (fqp != fp->fpu_q) { int i; - struct fq *fqdp; + struct _fq *fqdp; /* * We need to normalize the floating queue so @@ -359,7 +357,7 @@ fp_runq(struct regs *rp) } fqp = fp->fpu_q; } - fp->fpu_q_entrysize = sizeof (struct fpq); + fp->fpu_q_entrysize = sizeof (struct _fpq); /* * fpu_simulator uses the fp registers directly but it @@ -467,14 +465,14 @@ fp_precise(struct regs *rp) * problem for a restorecontext of a v8 fp queue on a * v9 system, which seems like the .000000001% case (on v9)! */ - struct fpq *pfpq = &fp->fpu_q->FQu.fpq; + struct _fpq *pfpq = &fp->fpu_q->FQu.fpq; fp_simd_type fpsd; int fptrap; pfpq->fpq_addr = (uint_t *)rp->r_pc; pfpq->fpq_instr = kluge.i; fp->fpu_qcnt = 1; - fp->fpu_q_entrysize = sizeof (struct fpq); + fp->fpu_q_entrysize = sizeof (struct _fpq); kpreempt_disable(); (void) flush_user_windows_to_stack(NULL); diff --git a/usr/src/uts/sparc/v9/os/v9dep.c b/usr/src/uts/sparc/v9/os/v9dep.c index 751837788e..f54dab4f5d 100644 --- a/usr/src/uts/sparc/v9/os/v9dep.c +++ b/usr/src/uts/sparc/v9/os/v9dep.c @@ -73,7 +73,7 @@ static uint_t mkpsr(uint64_t tstate, uint32_t fprs); #ifdef _SYSCALL32_IMPL static void fpuregset_32ton(const fpregset32_t *src, fpregset_t *dest, - const struct fq32 *sfq, struct fq *dfq); + const struct fq32 *sfq, struct _fq *dfq); #endif /* _SYSCALL32_IMPL */ /* @@ -122,7 +122,7 @@ setfpregs(klwp_t *lwp, fpregset_t *fp) */ if (fp->fpu_qcnt > MAXFPQ) /* plug security holes */ fp->fpu_qcnt = MAXFPQ; - fp->fpu_q_entrysize = sizeof (struct fq); + fp->fpu_q_entrysize = sizeof (struct _fq); /* * For v9 kernel, copy all of the fp regs. @@ -1098,7 +1098,7 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) sulword_noerr(&fp->uc.uc_mcontext.gwins, (ulong_t)NULL); if (fpq_size != 0) { - struct fq *fqp = (struct fq *)sp; + struct _fq *fqp = (struct _fq *)sp; sulword_noerr(&fp->uc.uc_mcontext.fpregs.fpu_q, (ulong_t)fqp); copyout_noerr(mpcb->mpcb_fpu_q, fqp, fpq_size); @@ -1730,7 +1730,7 @@ fpuregset_nto32(const fpregset_t *src, fpregset32_t *dest, struct fq32 *dfq) dest->fpu_en = src->fpu_en; if ((src->fpu_qcnt) && (dfq != NULL)) { - struct fq *sfq = src->fpu_q; + struct _fq *sfq = src->fpu_q; for (i = 0; i < src->fpu_qcnt; i++, dfq++, sfq++) { dfq->FQu.fpq.fpq_addr = (caddr32_t)(uintptr_t)sfq->FQu.fpq.fpq_addr; @@ -1748,7 +1748,7 @@ fpuregset_nto32(const fpregset_t *src, fpregset32_t *dest, struct fq32 *dfq) */ static void fpuregset_32ton(const fpregset32_t *src, fpregset_t *dest, - const struct fq32 *sfq, struct fq *dfq) + const struct fq32 *sfq, struct _fq *dfq) { int i; @@ -1758,7 +1758,7 @@ fpuregset_32ton(const fpregset32_t *src, fpregset_t *dest, dest->fpu_q = dfq; dest->fpu_fsr = (uint64_t)src->fpu_fsr; if ((dest->fpu_qcnt = src->fpu_qcnt) > 0) - dest->fpu_q_entrysize = sizeof (struct fpq); + dest->fpu_q_entrysize = sizeof (struct _fpq); else dest->fpu_q_entrysize = 0; dest->fpu_en = src->fpu_en; @@ -1774,7 +1774,7 @@ fpuregset_32ton(const fpregset32_t *src, fpregset_t *dest, void ucontext_32ton(const ucontext32_t *src, ucontext_t *dest, - const struct fq32 *sfq, struct fq *dfq) + const struct fq32 *sfq, struct _fq *dfq) { int i; diff --git a/usr/src/uts/sparc/v9/sys/machpcb.h b/usr/src/uts/sparc/v9/sys/machpcb.h index 298dae5100..e4b1972d8a 100644 --- a/usr/src/uts/sparc/v9/sys/machpcb.h +++ b/usr/src/uts/sparc/v9/sys/machpcb.h @@ -72,7 +72,7 @@ typedef struct machpcb { int mpcb_wbcnt; /* number of saved windows in pcb_wbuf */ uint_t mpcb_wstate; /* per-lwp %wstate */ kfpu_t *mpcb_fpu; /* fpu state */ - struct fq mpcb_fpu_q[MAXFPQ]; /* fpu exception queue */ + struct _fq mpcb_fpu_q[_MAXFPQ]; /* fpu exception queue */ caddr_t mpcb_illexcaddr; /* address of last illegal instruction */ uint_t mpcb_illexcinsn; /* last illegal instruction */ uint_t mpcb_illexccnt; /* count of illegal instruction attempts */ diff --git a/usr/src/uts/sun4/os/trap.c b/usr/src/uts/sun4/os/trap.c index 654b83c953..094620625a 100644 --- a/usr/src/uts/sun4/os/trap.c +++ b/usr/src/uts/sun4/os/trap.c @@ -1366,7 +1366,7 @@ fpu_trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t code) int mstate; char *badaddr; kfpu_t *fp; - struct fpq *pfpq; + struct _fpq *pfpq; uint32_t inst; utrap_handler_t *utrapp; @@ -1431,7 +1431,7 @@ fpu_trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t code) pfpq->fpq_addr = (uint32_t *)rp->r_pc; pfpq->fpq_instr = inst; fp->fpu_qcnt = 1; - fp->fpu_q_entrysize = sizeof (struct fpq); + fp->fpu_q_entrysize = sizeof (struct _fpq); #ifdef SF_V9_TABLE_28 /* * Spitfire and blackbird followed the SPARC V9 manual -- cgit v1.2.3 From 9a686fbc186e8e2a64e9a5094d44c7d6fa0ea167 Mon Sep 17 00:00:00 2001 From: Paul Dagnelie Date: Sun, 18 Oct 2015 09:53:48 -0700 Subject: 6328 Fix cstyle errors in zfs codebase Reviewed by: Matthew Ahrens Reviewed by: Alex Reece Reviewed by: Richard Elling Reviewed by: Jorgen Lundman Approved by: Robert Mustacchi --- usr/src/cmd/zoneadm/zoneadm.c | 12 +++++++----- usr/src/cmd/zoneadmd/vplat.c | 4 +++- usr/src/cmd/zpool/zpool_main.c | 3 ++- usr/src/common/net/wanboot/p12aux.h | 9 ++++++--- usr/src/common/zfs/zfeature_common.c | 3 ++- usr/src/grub/grub-0.97/stage2/fsys_zfs.c | 4 ++-- usr/src/lib/libzfs/common/libzfs_iter.c | 5 +++-- usr/src/lib/libzfs/common/libzfs_pool.c | 5 +++-- usr/src/lib/libzfs/common/libzfs_util.c | 5 +++-- usr/src/lib/libzpool/common/kernel.c | 4 ++-- usr/src/uts/common/fs/zfs/dmu.c | 9 +++++---- usr/src/uts/common/fs/zfs/dsl_dataset.c | 2 +- usr/src/uts/common/fs/zfs/space_reftree.c | 4 ++-- usr/src/uts/common/fs/zfs/sys/zrlock.h | 9 +++------ usr/src/uts/common/fs/zfs/vdev_label.c | 6 +++--- usr/src/uts/common/fs/zfs/zap_leaf.c | 4 ++-- usr/src/uts/common/fs/zfs/zfeature.c | 6 ++++-- usr/src/uts/common/fs/zfs/zfs_dir.c | 4 ++-- usr/src/uts/common/fs/zfs/zfs_ioctl.c | 2 +- usr/src/uts/common/fs/zfs/zfs_log.c | 13 +++++++------ usr/src/uts/common/fs/zfs/zfs_replay.c | 4 ++-- usr/src/uts/common/fs/zfs/zfs_vnops.c | 6 +++--- usr/src/uts/common/fs/zfs/zio.c | 6 +++--- usr/src/uts/common/fs/zfs/zio_checksum.c | 4 ++-- usr/src/uts/common/fs/zfs/zrlock.c | 8 ++------ 25 files changed, 75 insertions(+), 66 deletions(-) (limited to 'usr/src') diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c index 6d80fcd8c3..21bc9248f4 100644 --- a/usr/src/cmd/zoneadm/zoneadm.c +++ b/usr/src/cmd/zoneadm/zoneadm.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2015 by Delphix. All rights reserved. */ /* @@ -3120,7 +3121,7 @@ done: static void warn_dev_match(zone_dochandle_t s_handle, char *source_zone, - zone_dochandle_t t_handle, char *target_zone) + zone_dochandle_t t_handle, char *target_zone) { int err; struct zone_devtab s_devtab; @@ -3242,7 +3243,7 @@ print_fs_warnings(struct zone_fstab *s_fstab, struct zone_fstab *t_fstab) static void warn_fs_match(zone_dochandle_t s_handle, char *source_zone, - zone_dochandle_t t_handle, char *target_zone) + zone_dochandle_t t_handle, char *target_zone) { int err; struct zone_fstab s_fstab; @@ -3286,7 +3287,7 @@ warn_fs_match(zone_dochandle_t s_handle, char *source_zone, */ static void warn_ip_match(zone_dochandle_t s_handle, char *source_zone, - zone_dochandle_t t_handle, char *target_zone) + zone_dochandle_t t_handle, char *target_zone) { int err; struct zone_nwiftab s_nwiftab; @@ -3341,7 +3342,7 @@ warn_ip_match(zone_dochandle_t s_handle, char *source_zone, static void warn_dataset_match(zone_dochandle_t s_handle, char *source, - zone_dochandle_t t_handle, char *target) + zone_dochandle_t t_handle, char *target) { int err; struct zone_dstab s_dstab; @@ -3505,7 +3506,8 @@ copy_zone(char *src, char *dst) /* ARGSUSED */ int -zfm_print(const struct mnttab *p, void *r) { +zfm_print(const struct mnttab *p, void *r) +{ zerror(" %s\n", p->mnt_mountp); return (0); } diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index b9954b81b3..437385ee09 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, Joyent Inc. All rights reserved. + * Copyright (c) 2015 by Delphix. All rights reserved. */ /* @@ -4292,7 +4293,8 @@ remove_mlps(zlog_t *zlogp, zoneid_t zoneid) } int -prtmount(const struct mnttab *fs, void *x) { +prtmount(const struct mnttab *fs, void *x) +{ zerror((zlog_t *)x, B_FALSE, " %s", fs->mnt_mountp); return (0); } diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index 982057d1b5..18af5a2763 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -192,7 +192,8 @@ static boolean_t log_history = B_TRUE; static uint_t timestamp_fmt = NODATE; static const char * -get_usage(zpool_help_t idx) { +get_usage(zpool_help_t idx) +{ switch (idx) { case HELP_ADD: return (gettext("\tadd [-fn] ...\n")); diff --git a/usr/src/common/net/wanboot/p12aux.h b/usr/src/common/net/wanboot/p12aux.h index 76c71c9cb6..be07683c37 100644 --- a/usr/src/common/net/wanboot/p12aux.h +++ b/usr/src/common/net/wanboot/p12aux.h @@ -24,11 +24,13 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2015 by Delphix. All rights reserved. + */ + #ifndef _P12AUX_H #define _P12AUX_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include #ifdef __cplusplus @@ -42,7 +44,8 @@ extern "C" { * * My apologies. */ -DECLARE_STACK_OF(EVP_PKEY) +/* LINTED */ +DECLARE_STACK_OF(EVP_PKEY); #define sk_EVP_PKEY_new_null() SKM_sk_new_null(EVP_PKEY) #define sk_EVP_PKEY_free(st) SKM_sk_free(EVP_PKEY, (st)) diff --git a/usr/src/common/zfs/zfeature_common.c b/usr/src/common/zfs/zfeature_common.c index f75894b44d..a001255289 100644 --- a/usr/src/common/zfs/zfeature_common.c +++ b/usr/src/common/zfs/zfeature_common.c @@ -118,7 +118,8 @@ zfeature_lookup_name(const char *name, spa_feature_t *res) } boolean_t -zfeature_depends_on(spa_feature_t fid, spa_feature_t check) { +zfeature_depends_on(spa_feature_t fid, spa_feature_t check) +{ zfeature_info_t *feature = &spa_feature_table[fid]; for (int i = 0; feature->fi_depends[i] != SPA_FEATURE_NONE; i++) { diff --git a/usr/src/grub/grub-0.97/stage2/fsys_zfs.c b/usr/src/grub/grub-0.97/stage2/fsys_zfs.c index 8c0d137e42..f9bc6fda49 100644 --- a/usr/src/grub/grub-0.97/stage2/fsys_zfs.c +++ b/usr/src/grub/grub-0.97/stage2/fsys_zfs.c @@ -567,7 +567,7 @@ dmu_read(dnode_phys_t *dn, uint64_t blkid, void *buf, char *stack) */ static int mzap_lookup(mzap_phys_t *zapobj, int objsize, const char *name, - uint64_t *value) + uint64_t *value) { int i, chunks; mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk; @@ -855,7 +855,7 @@ zap_iterate(dnode_phys_t *zap_dnode, zap_cb_t *cb, void *arg, char *stack) */ static int dnode_get(dnode_phys_t *mdn, uint64_t objnum, uint8_t type, dnode_phys_t *buf, - char *stack) + char *stack) { uint64_t blkid, blksz; /* the block id this object dnode is in */ int epbs; /* shift of number of dnodes in a block */ diff --git a/usr/src/lib/libzfs/common/libzfs_iter.c b/usr/src/lib/libzfs/common/libzfs_iter.c index 5fdfe1d591..466c89757c 100644 --- a/usr/src/lib/libzfs/common/libzfs_iter.c +++ b/usr/src/lib/libzfs/common/libzfs_iter.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ @@ -308,7 +308,8 @@ typedef struct { } snapspec_arg_t; static int -snapspec_cb(zfs_handle_t *zhp, void *arg) { +snapspec_cb(zfs_handle_t *zhp, void *arg) +{ snapspec_arg_t *ssa = arg; char *shortsnapname; int err = 0; diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index 0cc3ce4e58..3c99295179 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -22,7 +22,7 @@ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ @@ -1855,7 +1855,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) * and the like. */ static int -ctd_check_path(char *str) { +ctd_check_path(char *str) +{ /* * If it starts with a slash, check the last component. */ diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c index 507a72ce02..043816d35e 100644 --- a/usr/src/lib/libzfs/common/libzfs_util.c +++ b/usr/src/lib/libzfs/common/libzfs_util.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. - * Copyright (c) 2011, 2014 by Delphix. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. */ /* @@ -1518,7 +1518,8 @@ zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, * and bs are undefined. */ int -zfs_get_hole_count(const char *path, uint64_t *count, uint64_t *bs) { +zfs_get_hole_count(const char *path, uint64_t *count, uint64_t *bs) +{ int fd, err; struct stat64 ss; uint64_t fill; diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c index a74276e95e..9b8a35476c 100644 --- a/usr/src/lib/libzpool/common/kernel.c +++ b/usr/src/lib/libzpool/common/kernel.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ @@ -500,7 +500,7 @@ vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, /*ARGSUSED*/ int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, - int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) + int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) { ssize_t iolen, split; diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index 5b24268af1..ceb08e227f 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -1670,7 +1670,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, - dmu_tx_t *tx) + dmu_tx_t *tx) { dnode_t *dn; int err; @@ -1685,7 +1685,7 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, - dmu_tx_t *tx) + dmu_tx_t *tx) { dnode_t *dn; @@ -1705,7 +1705,7 @@ dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, - dmu_tx_t *tx) + dmu_tx_t *tx) { dnode_t *dn; @@ -1873,7 +1873,8 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) * ID and wait for that to be synced. */ int -dmu_object_wait_synced(objset_t *os, uint64_t object) { +dmu_object_wait_synced(objset_t *os, uint64_t object) +{ dnode_t *dn; int error, i; diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index b06369ec13..31dc0df041 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -3449,7 +3449,7 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, */ boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, - uint64_t earlier_txg) + uint64_t earlier_txg) { dsl_pool_t *dp = later->ds_dir->dd_pool; int error; diff --git a/usr/src/uts/common/fs/zfs/space_reftree.c b/usr/src/uts/common/fs/zfs/space_reftree.c index a508092c53..3d990596f7 100644 --- a/usr/src/uts/common/fs/zfs/space_reftree.c +++ b/usr/src/uts/common/fs/zfs/space_reftree.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. */ #include @@ -103,7 +103,7 @@ space_reftree_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt) void space_reftree_add_seg(avl_tree_t *t, uint64_t start, uint64_t end, - int64_t refcnt) + int64_t refcnt) { space_reftree_add_node(t, start, refcnt); space_reftree_add_node(t, end, -refcnt); diff --git a/usr/src/uts/common/fs/zfs/sys/zrlock.h b/usr/src/uts/common/fs/zfs/sys/zrlock.h index dcd63f7b5b..b6eba1a18f 100644 --- a/usr/src/uts/common/fs/zfs/sys/zrlock.h +++ b/usr/src/uts/common/fs/zfs/sys/zrlock.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 by Delphix. All rights reserved. */ #ifndef _SYS_ZRLOCK_H @@ -44,12 +45,8 @@ typedef struct zrlock { extern void zrl_init(zrlock_t *); extern void zrl_destroy(zrlock_t *); -#ifdef ZFS_DEBUG -#define zrl_add(_z) zrl_add_debug((_z), __func__) -extern void zrl_add_debug(zrlock_t *, const char *); -#else -extern void zrl_add(zrlock_t *); -#endif +#define zrl_add(_z) zrl_add_impl((_z), __func__) +extern void zrl_add_impl(zrlock_t *, const char *); extern void zrl_remove(zrlock_t *); extern int zrl_tryenter(zrlock_t *); extern void zrl_exit(zrlock_t *); diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index 7bbd7f2bde..c9b0641cfd 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. */ /* @@ -179,7 +179,7 @@ vdev_label_number(uint64_t psize, uint64_t offset) static void vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, - uint64_t size, zio_done_func_t *done, void *private, int flags) + uint64_t size, zio_done_func_t *done, void *private, int flags) { ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); @@ -193,7 +193,7 @@ vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, static void vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, - uint64_t size, zio_done_func_t *done, void *private, int flags) + uint64_t size, zio_done_func_t *done, void *private, int flags) { ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL || (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) == diff --git a/usr/src/uts/common/fs/zfs/zap_leaf.c b/usr/src/uts/common/fs/zfs/zap_leaf.c index 96358f7bd8..c8c3660756 100644 --- a/usr/src/uts/common/fs/zfs/zap_leaf.c +++ b/usr/src/uts/common/fs/zfs/zap_leaf.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. */ /* @@ -535,7 +535,7 @@ zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen, int zap_entry_update(zap_entry_handle_t *zeh, - uint8_t integer_size, uint64_t num_integers, const void *buf) + uint8_t integer_size, uint64_t num_integers, const void *buf) { int delta_chunks; zap_leaf_t *l = zeh->zeh_leaf; diff --git a/usr/src/uts/common/fs/zfs/zfeature.c b/usr/src/uts/common/fs/zfs/zfeature.c index 1833e1e270..35ce827979 100644 --- a/usr/src/uts/common/fs/zfs/zfeature.c +++ b/usr/src/uts/common/fs/zfs/zfeature.c @@ -269,7 +269,8 @@ feature_get_refcount_from_disk(spa_t *spa, zfeature_info_t *feature, static int -feature_get_enabled_txg(spa_t *spa, zfeature_info_t *feature, uint64_t *res) { +feature_get_enabled_txg(spa_t *spa, zfeature_info_t *feature, uint64_t *res) +{ uint64_t enabled_txg_obj = spa->spa_feat_enabled_txg_obj; ASSERT(zfeature_depends_on(feature->fi_feature, @@ -489,7 +490,8 @@ spa_feature_is_active(spa_t *spa, spa_feature_t fid) * Returns B_FALSE otherwise (i.e. if the feature is not enabled). */ boolean_t -spa_feature_enabled_txg(spa_t *spa, spa_feature_t fid, uint64_t *txg) { +spa_feature_enabled_txg(spa_t *spa, spa_feature_t fid, uint64_t *txg) +{ int err; ASSERT(VALID_FEATURE_FID(fid)); diff --git a/usr/src/uts/common/fs/zfs/zfs_dir.c b/usr/src/uts/common/fs/zfs/zfs_dir.c index bd7424b55b..c2f876ecfa 100644 --- a/usr/src/uts/common/fs/zfs/zfs_dir.c +++ b/usr/src/uts/common/fs/zfs/zfs_dir.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. */ #include @@ -801,7 +801,7 @@ zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx, */ int zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, - boolean_t *unlinkedp) + boolean_t *unlinkedp) { znode_t *dzp = dl->dl_dzp; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index f813116e41..c863cbd399 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -5544,7 +5544,7 @@ zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func) static void zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func, - zfs_secpolicy_func_t *secpolicy) + zfs_secpolicy_func_t *secpolicy) { zfs_ioctl_register_legacy(ioc, func, secpolicy, DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY); diff --git a/usr/src/uts/common/fs/zfs/zfs_log.c b/usr/src/uts/common/fs/zfs/zfs_log.c index 47d32a45c3..4fb57e9c22 100644 --- a/usr/src/uts/common/fs/zfs/zfs_log.c +++ b/usr/src/uts/common/fs/zfs/zfs_log.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 by Delphix. All rights reserved. */ #include @@ -343,7 +344,7 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, */ void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, char *name, uint64_t foid) + znode_t *dzp, char *name, uint64_t foid) { itx_t *itx; lr_remove_t *lr; @@ -367,7 +368,7 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, */ void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *dzp, znode_t *zp, char *name) + znode_t *dzp, znode_t *zp, char *name) { itx_t *itx; lr_link_t *lr; @@ -422,7 +423,7 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, */ void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, - znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) + znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) { itx_t *itx; lr_rename_t *lr; @@ -450,7 +451,7 @@ ssize_t zfs_immediate_write_sz = 32768; void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, - znode_t *zp, offset_t off, ssize_t resid, int ioflag) + znode_t *zp, offset_t off, ssize_t resid, int ioflag) { itx_wr_state_t write_state; boolean_t slogging; @@ -527,7 +528,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, */ void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, - znode_t *zp, uint64_t off, uint64_t len) + znode_t *zp, uint64_t off, uint64_t len) { itx_t *itx; lr_truncate_t *lr; @@ -550,7 +551,7 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, */ void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, - znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) + znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) { itx_t *itx; lr_setattr_t *lr; diff --git a/usr/src/uts/common/fs/zfs/zfs_replay.c b/usr/src/uts/common/fs/zfs/zfs_replay.c index 3f98aaed79..c4223079d0 100644 --- a/usr/src/uts/common/fs/zfs/zfs_replay.c +++ b/usr/src/uts/common/fs/zfs/zfs_replay.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. */ #include @@ -55,7 +55,7 @@ static void zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode, - uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid) + uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid) { bzero(vap, sizeof (*vap)); vap->va_mask = (uint_t)mask; diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index fe740a5d28..49b1587640 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. */ @@ -4137,7 +4137,7 @@ top: /* ARGSUSED */ static int zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, - size_t *lenp, int flags, cred_t *cr) + size_t *lenp, int flags, cred_t *cr) { pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); return (0); @@ -4163,7 +4163,7 @@ zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, /* ARGSUSED */ static int zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, - size_t *lenp, int flags, cred_t *cr) + size_t *lenp, int flags, cred_t *cr) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index 7fa795ea8c..e129247936 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -267,7 +267,7 @@ zio_data_buf_free(void *buf, size_t size) */ static void zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize, - zio_transform_func_t *transform) + zio_transform_func_t *transform) { zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP); @@ -994,8 +994,8 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, zio_t * zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size, - int type, zio_priority_t priority, enum zio_flag flags, - zio_done_func_t *done, void *private) + int type, zio_priority_t priority, enum zio_flag flags, + zio_done_func_t *done, void *private) { zio_t *zio; diff --git a/usr/src/uts/common/fs/zfs/zio_checksum.c b/usr/src/uts/common/fs/zfs/zio_checksum.c index b471ad9047..4bef6a3e11 100644 --- a/usr/src/uts/common/fs/zfs/zio_checksum.c +++ b/usr/src/uts/common/fs/zfs/zio_checksum.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. */ @@ -245,7 +245,7 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) */ void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, - void *data, uint64_t size) + void *data, uint64_t size) { blkptr_t *bp = zio->io_bp; uint64_t offset = zio->io_offset; diff --git a/usr/src/uts/common/fs/zfs/zrlock.c b/usr/src/uts/common/fs/zfs/zrlock.c index 22151843e0..7f6beeed61 100644 --- a/usr/src/uts/common/fs/zfs/zrlock.c +++ b/usr/src/uts/common/fs/zfs/zrlock.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014 by Delphix. All rights reserved. + * Copyright (c) 2014, 2015 by Delphix. All rights reserved. */ /* @@ -69,11 +69,7 @@ zrl_destroy(zrlock_t *zrl) } void -#ifdef ZFS_DEBUG -zrl_add_debug(zrlock_t *zrl, const char *zc) -#else -zrl_add(zrlock_t *zrl) -#endif +zrl_add_impl(zrlock_t *zrl, const char *zc) { uint32_t n = (uint32_t)zrl->zr_refcount; -- cgit v1.2.3 From 61679b0b6826b0ae7e3f751acd91412fcfa45d1e Mon Sep 17 00:00:00 2001 From: Gordon Ross Date: Sun, 18 Oct 2015 13:53:39 -0400 Subject: backout 5876 sys/regset.h pollutes name space backout commit 21227944c2bcc086121a5428f3f9d2496ba646f5. --- usr/src/cmd/csh/i386/signal.c | 3 +- usr/src/cmd/csh/sparc/signal.c | 3 +- usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c | 2 +- usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c | 3 +- usr/src/lib/libc/amd64/gen/makectxt.c | 3 +- usr/src/lib/libc/amd64/gen/siglongjmp.c | 3 +- usr/src/lib/libc/i386/gen/makectxt.c | 3 +- usr/src/lib/libc/i386/gen/siglongjmp.c | 3 +- usr/src/lib/libc/port/threads/sigaction.c | 2 +- usr/src/lib/libc/sparc/gen/makectxt.c | 3 +- usr/src/lib/libc/sparc/gen/siglongjmp.c | 3 +- usr/src/lib/libc/sparcv9/gen/makectxt.c | 3 +- usr/src/lib/libc/sparcv9/gen/siglongjmp.c | 1 - usr/src/lib/libm/common/m9x/__fex_hdlr.c | 1 - usr/src/lib/libm/common/m9x/__fex_i386.c | 1 - usr/src/lib/libm/common/m9x/__fex_sse.c | 1 - usr/src/lib/libm/common/m9x/fex_log.c | 1 - usr/src/lib/libproc/common/P32ton.c | 6 +- usr/src/pkg/manifests/system-header.mf | 1 - usr/src/uts/common/sys/core.h | 8 +- usr/src/uts/common/sys/prsystm.h | 4 +- usr/src/uts/intel/ia32/os/archdep.c | 4 +- usr/src/uts/intel/ia32/os/fpu.c | 2 +- usr/src/uts/intel/sys/Makefile | 1 - usr/src/uts/intel/sys/fp.h | 90 +----- usr/src/uts/intel/sys/mcontext.h | 191 ------------ usr/src/uts/intel/sys/pcb.h | 3 - usr/src/uts/intel/sys/procfs_isa.h | 6 +- usr/src/uts/intel/sys/regset.h | 313 ++++++++++++++++++- usr/src/uts/intel/sys/ucontext.h | 4 +- usr/src/uts/sparc/fpu/fpu_simulator.c | 1 - usr/src/uts/sparc/sys/Makefile | 3 +- usr/src/uts/sparc/sys/fpu/fpu_simulator.h | 2 +- usr/src/uts/sparc/sys/fpu/fpusystm.h | 4 +- usr/src/uts/sparc/sys/mcontext.h | 304 ------------------ usr/src/uts/sparc/sys/procfs_isa.h | 6 +- usr/src/uts/sparc/sys/regset.h | 427 +++++++++++++++++++++++++- usr/src/uts/sparc/sys/ucontext.h | 6 +- usr/src/uts/sparc/syscall/getcontext.c | 8 +- usr/src/uts/sparc/v7/sys/machpcb.h | 6 +- usr/src/uts/sparc/v9/fpu/fpu.c | 14 +- usr/src/uts/sparc/v9/os/v9dep.c | 14 +- usr/src/uts/sparc/v9/sys/machpcb.h | 2 +- usr/src/uts/sun4/os/trap.c | 4 +- 44 files changed, 801 insertions(+), 672 deletions(-) delete mode 100644 usr/src/uts/intel/sys/mcontext.h delete mode 100644 usr/src/uts/sparc/sys/mcontext.h (limited to 'usr/src') diff --git a/usr/src/cmd/csh/i386/signal.c b/usr/src/cmd/csh/i386/signal.c index 59b3eb54e4..d023a9bd88 100644 --- a/usr/src/cmd/csh/i386/signal.c +++ b/usr/src/cmd/csh/i386/signal.c @@ -12,6 +12,8 @@ * specifies the terms and conditions for redistribution. */ +#pragma ident "%Z%%M% %I% %E% SMI" + /* * 4.3BSD signal compatibility functions * @@ -26,7 +28,6 @@ #include #include #include -#include #include #include "signal.h" #include diff --git a/usr/src/cmd/csh/sparc/signal.c b/usr/src/cmd/csh/sparc/signal.c index cac56044fa..9502940859 100644 --- a/usr/src/cmd/csh/sparc/signal.c +++ b/usr/src/cmd/csh/sparc/signal.c @@ -12,6 +12,8 @@ * specifies the terms and conditions for redistribution. */ +#pragma ident "%Z%%M% %I% %E% SMI" + /* * 4.3BSD signal compatibility functions * @@ -26,7 +28,6 @@ #include #include #include -#include #include #include "signal.h" #include diff --git a/usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c b/usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c index c11e08ba0d..ed61bade77 100644 --- a/usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c +++ b/usr/src/cmd/mdb/intel/mdb/proc_amd64dep.c @@ -448,7 +448,7 @@ pt_fpregs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_tgt_t *t = mdb.m_target; mdb_tgt_tid_t tid; prfpregset_t fprs; - struct _fpchip_state fps; + struct fpchip_state fps; char buf[256]; uint_t top; int i; diff --git a/usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c b/usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c index 0a7f46f5d1..1d3781f1e9 100644 --- a/usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c +++ b/usr/src/cmd/mdb/sparc/kmdb/kmdb_makecontext.c @@ -24,6 +24,8 @@ * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + /* * Context-saving routine used for pipelines. Designed for use only * with kmdb_setcontext, and with the assumption that func() will never @@ -34,7 +36,6 @@ #include #include #include -#include #include #include diff --git a/usr/src/lib/libc/amd64/gen/makectxt.c b/usr/src/lib/libc/amd64/gen/makectxt.c index c8fd3c7742..5b2ce15313 100644 --- a/usr/src/lib/libc/amd64/gen/makectxt.c +++ b/usr/src/lib/libc/amd64/gen/makectxt.c @@ -27,13 +27,14 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ +#pragma ident "%Z%%M% %I% %E% SMI" + #pragma weak _makecontext = makecontext #include "lint.h" #include #include #include -#include /* * The ucontext_t that the user passes in must have been primed with a diff --git a/usr/src/lib/libc/amd64/gen/siglongjmp.c b/usr/src/lib/libc/amd64/gen/siglongjmp.c index 6637c19625..07a89b485f 100644 --- a/usr/src/lib/libc/amd64/gen/siglongjmp.c +++ b/usr/src/lib/libc/amd64/gen/siglongjmp.c @@ -24,12 +24,13 @@ * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + #pragma weak _siglongjmp = siglongjmp #include "lint.h" #include #include -#include #include #include diff --git a/usr/src/lib/libc/i386/gen/makectxt.c b/usr/src/lib/libc/i386/gen/makectxt.c index 1e6f995d84..d72a67a481 100644 --- a/usr/src/lib/libc/i386/gen/makectxt.c +++ b/usr/src/lib/libc/i386/gen/makectxt.c @@ -27,12 +27,13 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ +#pragma ident "%Z%%M% %I% %E% SMI" + #pragma weak _makecontext = makecontext #include "lint.h" #include #include -#include #include /* diff --git a/usr/src/lib/libc/i386/gen/siglongjmp.c b/usr/src/lib/libc/i386/gen/siglongjmp.c index 17261b3efd..ea23fcf749 100644 --- a/usr/src/lib/libc/i386/gen/siglongjmp.c +++ b/usr/src/lib/libc/i386/gen/siglongjmp.c @@ -28,12 +28,13 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ +#pragma ident "%Z%%M% %I% %E% SMI" + #pragma weak _siglongjmp = siglongjmp #include "lint.h" #include #include -#include #include #include diff --git a/usr/src/lib/libc/port/threads/sigaction.c b/usr/src/lib/libc/port/threads/sigaction.c index 571e211f97..8e9df7ab60 100644 --- a/usr/src/lib/libc/port/threads/sigaction.c +++ b/usr/src/lib/libc/port/threads/sigaction.c @@ -208,7 +208,7 @@ call_user_handler(int sig, siginfo_t *sip, ucontext_t *ucp) if (--fp->fpu_qcnt > 0) { unsigned char i; - struct _fq *fqp; + struct fq *fqp; fqp = fp->fpu_q; for (i = 0; i < fp->fpu_qcnt; i++) diff --git a/usr/src/lib/libc/sparc/gen/makectxt.c b/usr/src/lib/libc/sparc/gen/makectxt.c index fa5725525e..7519bdabcb 100644 --- a/usr/src/lib/libc/sparc/gen/makectxt.c +++ b/usr/src/lib/libc/sparc/gen/makectxt.c @@ -27,6 +27,8 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ +#pragma ident "%Z%%M% %I% %E% SMI" + #pragma weak _makecontext = makecontext #pragma weak ___makecontext_v2 = __makecontext_v2 @@ -36,7 +38,6 @@ #include #include #include -#include /* * The ucontext_t that the user passes in must have been primed with a diff --git a/usr/src/lib/libc/sparc/gen/siglongjmp.c b/usr/src/lib/libc/sparc/gen/siglongjmp.c index 90b406cf8d..83a640397d 100644 --- a/usr/src/lib/libc/sparc/gen/siglongjmp.c +++ b/usr/src/lib/libc/sparc/gen/siglongjmp.c @@ -27,13 +27,14 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ +#pragma ident "%Z%%M% %I% %E% SMI" + #pragma weak _siglongjmp = siglongjmp #include "lint.h" #include #include #include -#include #include #include #include diff --git a/usr/src/lib/libc/sparcv9/gen/makectxt.c b/usr/src/lib/libc/sparcv9/gen/makectxt.c index fa5725525e..7519bdabcb 100644 --- a/usr/src/lib/libc/sparcv9/gen/makectxt.c +++ b/usr/src/lib/libc/sparcv9/gen/makectxt.c @@ -27,6 +27,8 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ +#pragma ident "%Z%%M% %I% %E% SMI" + #pragma weak _makecontext = makecontext #pragma weak ___makecontext_v2 = __makecontext_v2 @@ -36,7 +38,6 @@ #include #include #include -#include /* * The ucontext_t that the user passes in must have been primed with a diff --git a/usr/src/lib/libc/sparcv9/gen/siglongjmp.c b/usr/src/lib/libc/sparcv9/gen/siglongjmp.c index 424942966f..5d4ca06426 100644 --- a/usr/src/lib/libc/sparcv9/gen/siglongjmp.c +++ b/usr/src/lib/libc/sparcv9/gen/siglongjmp.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/usr/src/lib/libm/common/m9x/__fex_hdlr.c b/usr/src/lib/libm/common/m9x/__fex_hdlr.c index 5b7ac8d933..7732d0a9fe 100644 --- a/usr/src/lib/libm/common/m9x/__fex_hdlr.c +++ b/usr/src/lib/libm/common/m9x/__fex_hdlr.c @@ -40,7 +40,6 @@ #include #endif #include -#include #include "fex_handler.h" #include "fenv_inlines.h" diff --git a/usr/src/lib/libm/common/m9x/__fex_i386.c b/usr/src/lib/libm/common/m9x/__fex_i386.c index 5743bd414e..c6ffa593be 100644 --- a/usr/src/lib/libm/common/m9x/__fex_i386.c +++ b/usr/src/lib/libm/common/m9x/__fex_i386.c @@ -40,7 +40,6 @@ #include #endif #include -#include #include "fex_handler.h" #include "fenv_inlines.h" diff --git a/usr/src/lib/libm/common/m9x/__fex_sse.c b/usr/src/lib/libm/common/m9x/__fex_sse.c index e679e71929..df83dff212 100644 --- a/usr/src/lib/libm/common/m9x/__fex_sse.c +++ b/usr/src/lib/libm/common/m9x/__fex_sse.c @@ -34,7 +34,6 @@ #else #include #endif -#include #include "fex_handler.h" #include "fenv_inlines.h" diff --git a/usr/src/lib/libm/common/m9x/fex_log.c b/usr/src/lib/libm/common/m9x/fex_log.c index 6a8e759cc0..336358da4c 100644 --- a/usr/src/lib/libm/common/m9x/fex_log.c +++ b/usr/src/lib/libm/common/m9x/fex_log.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/usr/src/lib/libproc/common/P32ton.c b/usr/src/lib/libproc/common/P32ton.c index acb52058c9..78219550c8 100644 --- a/usr/src/lib/libproc/common/P32ton.c +++ b/usr/src/lib/libproc/common/P32ton.c @@ -24,6 +24,8 @@ * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + #include #include #include @@ -265,7 +267,7 @@ prfpregset_32_to_n(const prfpregset32_t *src, prfpregset_t *dst) #elif defined(__amd64) struct _fpstate32 *src32 = (struct _fpstate32 *)src; - struct _fpchip_state *dst64 = (struct _fpchip_state *)dst; + struct fpchip_state *dst64 = (struct fpchip_state *)dst; int i; (void) memcpy(dst64->st, src32->_st, sizeof (src32->_st)); @@ -596,7 +598,7 @@ prfpregset_n_to_32(const prfpregset_t *src, prfpregset32_t *dst) #elif defined(__amd64) struct _fpstate32 *dst32 = (struct _fpstate32 *)dst; - struct _fpchip_state *src64 = (struct _fpchip_state *)src; + struct fpchip_state *src64 = (struct fpchip_state *)src; uint32_t top; int i; diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf index f4f92a63e0..08f0b19416 100644 --- a/usr/src/pkg/manifests/system-header.mf +++ b/usr/src/pkg/manifests/system-header.mf @@ -1219,7 +1219,6 @@ $(i386_ONLY)file path=usr/include/sys/mc_amd.h $(i386_ONLY)file path=usr/include/sys/mc_intel.h $(i386_ONLY)file path=usr/include/sys/mca_amd.h $(i386_ONLY)file path=usr/include/sys/mca_x86.h -file path=usr/include/sys/mcontext.h file path=usr/include/sys/md4.h file path=usr/include/sys/md5.h file path=usr/include/sys/md5_consts.h diff --git a/usr/src/uts/common/sys/core.h b/usr/src/uts/common/sys/core.h index 543ed5fada..28ee6faaff 100644 --- a/usr/src/uts/common/sys/core.h +++ b/usr/src/uts/common/sys/core.h @@ -20,8 +20,6 @@ * CDDL HEADER END */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,6 +27,8 @@ #ifndef _SYS_CORE_H #define _SYS_CORE_H +#pragma ident "%Z%%M% %I% %E% SMI" + #ifndef _KERNEL #include #endif /* _KERNEL */ @@ -63,9 +63,9 @@ struct core { int c_dsize; /* Data size (bytes) */ int c_ssize; /* Stack size (bytes) */ char c_cmdname[CORE_NAMELEN + 1]; /* Command name */ - struct _fpu c_fpu; /* external FPU state */ + struct fpu c_fpu; /* external FPU state */ #if defined(__sparc) - struct _fq c_fpu_q[_MAXFPQ]; /* fpu exception queue */ + struct fq c_fpu_q[MAXFPQ]; /* fpu exception queue */ #endif int c_ucode; /* Exception no. from u_code */ }; diff --git a/usr/src/uts/common/sys/prsystm.h b/usr/src/uts/common/sys/prsystm.h index 6f5d4b914f..ebded76dce 100644 --- a/usr/src/uts/common/sys/prsystm.h +++ b/usr/src/uts/common/sys/prsystm.h @@ -108,9 +108,9 @@ extern int pr_watch_emul(struct regs *, caddr_t, enum seg_rw); extern void pr_free_watched_pages(proc_t *); extern int pr_allstopped(proc_t *, int); #if defined(__sparc) -struct _gwindows; +struct gwindows; extern int prnwindows(klwp_t *); -extern void prgetwindows(klwp_t *, struct _gwindows *); +extern void prgetwindows(klwp_t *, struct gwindows *); #if defined(__sparcv9) /* 32-bit adb macros should not see these defs */ extern void prgetasregs(klwp_t *, asrset_t); extern void prsetasregs(klwp_t *, asrset_t); diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index 2a33f306c2..d83b16d673 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -193,7 +193,7 @@ fpregset_to_fxsave(const fpregset_t *fp, struct fxsave_state *fx) #if defined(__amd64) bcopy(fp, fx, sizeof (*fx)); #else - const struct _fpchip_state *fc = &fp->fp_reg_set.fpchip_state; + const struct fpchip_state *fc = &fp->fp_reg_set.fpchip_state; fnsave_to_fxsave((const struct fnsave_state *)fc, fx); fx->fx_mxcsr = fc->mxcsr; @@ -214,7 +214,7 @@ fxsave_to_fpregset(const struct fxsave_state *fx, fpregset_t *fp) #if defined(__amd64) bcopy(fx, fp, sizeof (*fx)); #else - struct _fpchip_state *fc = &fp->fp_reg_set.fpchip_state; + struct fpchip_state *fc = &fp->fp_reg_set.fpchip_state; fxsave_to_fnsave(fx, (struct fnsave_state *)fc); fc->mxcsr = fx->fx_mxcsr; diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c index dc3e286ad5..b7022cf0e5 100644 --- a/usr/src/uts/intel/ia32/os/fpu.c +++ b/usr/src/uts/intel/ia32/os/fpu.c @@ -416,7 +416,7 @@ fpnoextflt(struct regs *rp) ASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE); #if defined(__i386) - ASSERT(sizeof (struct _fpu) == sizeof (struct __old_fpu)); + ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu)); #endif /* __i386 */ #endif /* !__lint */ diff --git a/usr/src/uts/intel/sys/Makefile b/usr/src/uts/intel/sys/Makefile index 5cfbdec4fc..192ada41a5 100644 --- a/usr/src/uts/intel/sys/Makefile +++ b/usr/src/uts/intel/sys/Makefile @@ -56,7 +56,6 @@ HDRS = \ mc_intel.h \ mca_amd.h \ mca_x86.h \ - mcontext.h \ mutex_impl.h \ obpdefs.h \ old_procfs.h \ diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index 3373484dec..4956e2d318 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -19,8 +19,6 @@ * CDDL HEADER END */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -170,91 +168,6 @@ extern "C" { "\20\20fz\17ru\16rd\15pm\14um\13om\12zm\11dm" \ "\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie" -/* - * This structure is written to memory by an 'fnsave' instruction - */ -struct fnsave_state { - uint16_t f_fcw; - uint16_t __f_ign0; - uint16_t f_fsw; - uint16_t __f_ign1; - uint16_t f_ftw; - uint16_t __f_ign2; - uint32_t f_eip; - uint16_t f_cs; - uint16_t f_fop; - uint32_t f_dp; - uint16_t f_ds; - uint16_t __f_ign3; - union { - uint16_t fpr_16[5]; /* 80-bits of x87 state */ - } f_st[8]; -}; /* 108 bytes */ - -/* - * This structure is written to memory by an 'fxsave' instruction - * Note the variant behaviour of this instruction between long mode - * and legacy environments! - */ -struct fxsave_state { - uint16_t fx_fcw; - uint16_t fx_fsw; - uint16_t fx_fctw; /* compressed tag word */ - uint16_t fx_fop; -#if defined(__amd64) - uint64_t fx_rip; - uint64_t fx_rdp; -#else - uint32_t fx_eip; - uint16_t fx_cs; - uint16_t __fx_ign0; - uint32_t fx_dp; - uint16_t fx_ds; - uint16_t __fx_ign1; -#endif - uint32_t fx_mxcsr; - uint32_t fx_mxcsr_mask; - union { - uint16_t fpr_16[5]; /* 80-bits of x87 state */ - u_longlong_t fpr_mmx; /* 64-bit mmx register */ - uint32_t __fpr_pad[4]; /* (pad out to 128-bits) */ - } fx_st[8]; -#if defined(__amd64) - upad128_t fx_xmm[16]; /* 128-bit registers */ - upad128_t __fx_ign2[6]; -#else - upad128_t fx_xmm[8]; /* 128-bit registers */ - upad128_t __fx_ign2[14]; -#endif -}; /* 512 bytes */ - -/* - * This structure is written to memory by an 'xsave' instruction. - * First 512 byte is compatible with the format of an 'fxsave' area. - */ -struct xsave_state { - struct fxsave_state xs_fxsave; - uint64_t xs_xstate_bv; /* 512 */ - uint64_t xs_rsv_mbz[2]; - uint64_t xs_reserved[5]; - upad128_t xs_ymm[16]; /* avx - 576 */ -}; /* 832 bytes, asserted in fpnoextflt() */ - -/* - * Kernel's FPU save area - */ -typedef struct { - union _kfpu_u { - struct fxsave_state kfpu_fx; -#if defined(__i386) - struct fnsave_state kfpu_fn; -#endif - struct xsave_state kfpu_xs; - } kfpu_u; - uint32_t kfpu_status; /* saved at #mf exception */ - uint32_t kfpu_xstatus; /* saved at #xm exception */ -} kfpu_t; - extern int fp_kind; /* kind of fp support */ extern int fp_save_mech; /* fp save/restore mechanism */ extern int fpu_exists; /* FPU hw exists */ @@ -275,6 +188,9 @@ extern void fpxsave_ctxt(void *); extern void xsave_ctxt(void *); extern void (*fpsave_ctxt)(void *); +struct fnsave_state; +struct fxsave_state; +struct xsave_state; extern void fxsave_insn(struct fxsave_state *); extern void fpsave(struct fnsave_state *); extern void fprestore(struct fnsave_state *); diff --git a/usr/src/uts/intel/sys/mcontext.h b/usr/src/uts/intel/sys/mcontext.h deleted file mode 100644 index 23d0aba364..0000000000 --- a/usr/src/uts/intel/sys/mcontext.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * - * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. - */ -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ - -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - -/* - * Essential struct definitions for mcontext_t needed by ucontext.h - * These were formerly in regset.h, which now includes this file. - */ - -#ifndef _SYS_MCONTEXT_H -#define _SYS_MCONTEXT_H - -#include - -#if !defined(_ASM) -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * A gregset_t is defined as an array type for compatibility with the reference - * source. This is important due to differences in the way the C language - * treats arrays and structures as parameters. - */ -#if defined(__amd64) -#define _NGREG 28 -#else -#define _NGREG 19 -#endif - -#if !defined(_ASM) - -#if defined(_LP64) || defined(_I32LPx) -typedef long greg_t; -#else -typedef int greg_t; -#endif - -#if defined(_SYSCALL32) - -typedef int32_t greg32_t; -typedef int64_t greg64_t; - -#endif /* _SYSCALL32 */ - -typedef greg_t gregset_t[_NGREG]; - -#if defined(_SYSCALL32) - -#define _NGREG32 19 -#define _NGREG64 28 - -typedef greg32_t gregset32_t[_NGREG32]; -typedef greg64_t gregset64_t[_NGREG64]; - -#endif /* _SYSCALL32 */ - -/* - * Floating point definitions. - */ - -#if defined(__amd64) - -typedef struct _fpu { - union { - struct _fpchip_state { - uint16_t cw; - uint16_t sw; - uint8_t fctw; - uint8_t __fx_rsvd; - uint16_t fop; - uint64_t rip; - uint64_t rdp; - uint32_t mxcsr; - uint32_t mxcsr_mask; - union { - uint16_t fpr_16[5]; - upad128_t __fpr_pad; - } st[8]; - upad128_t xmm[16]; - upad128_t __fx_ign2[6]; - uint32_t status; /* sw at exception */ - uint32_t xstatus; /* mxcsr at exception */ - } fpchip_state; - uint32_t f_fpregs[130]; - } fp_reg_set; -} fpregset_t; - -#else /* __i386 */ - -/* - * This definition of the floating point structure is binary - * compatible with the Intel386 psABI definition, and source - * compatible with that specification for x87-style floating point. - * It also allows SSE/SSE2 state to be accessed on machines that - * possess such hardware capabilities. - */ -typedef struct _fpu { - union { - struct _fpchip_state { - uint32_t state[27]; /* 287/387 saved state */ - uint32_t status; /* saved at exception */ - uint32_t mxcsr; /* SSE control and status */ - uint32_t xstatus; /* SSE mxcsr at exception */ - uint32_t __pad[2]; /* align to 128-bits */ - upad128_t xmm[8]; /* %xmm0-%xmm7 */ - } fpchip_state; - struct _fp_emul_space { /* for emulator(s) */ - uint8_t fp_emul[246]; - uint8_t fp_epad[2]; - } fp_emul_space; - uint32_t f_fpregs[95]; /* union of the above */ - } fp_reg_set; -} fpregset_t; - -#endif /* __i386 */ - -#if defined(_SYSCALL32) - -/* Kernel view of user i386 fpu structure */ - -typedef struct fpu32 { - union { - struct fpchip32_state { - uint32_t state[27]; /* 287/387 saved state */ - uint32_t status; /* saved at exception */ - uint32_t mxcsr; /* SSE control and status */ - uint32_t xstatus; /* SSE mxcsr at exception */ - uint32_t __pad[2]; /* align to 128-bits */ - uint32_t xmm[8][4]; /* %xmm0-%xmm7 */ - } fpchip_state; - uint32_t f_fpregs[95]; /* union of the above */ - } fp_reg_set; -} fpregset32_t; - -#endif /* _SYSCALL32 */ - -/* - * Structure mcontext defines the complete hardware machine state. - * (This structure is specified in the i386 ABI suppl.) - */ -typedef struct { - gregset_t gregs; /* general register set */ - fpregset_t fpregs; /* floating point register set */ -} mcontext_t; - -#if defined(_SYSCALL32) - -typedef struct { - gregset32_t gregs; /* general register set */ - fpregset32_t fpregs; /* floating point register set */ -} mcontext32_t; - -#endif /* _SYSCALL32 */ - -#endif /* _ASM */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MCONTEXT_H */ diff --git a/usr/src/uts/intel/sys/pcb.h b/usr/src/uts/intel/sys/pcb.h index defd116eba..3a690bd980 100644 --- a/usr/src/uts/intel/sys/pcb.h +++ b/usr/src/uts/intel/sys/pcb.h @@ -28,9 +28,6 @@ #include #include -#ifndef _ASM -#include /* kfpu_t */ -#endif #ifdef __cplusplus extern "C" { diff --git a/usr/src/uts/intel/sys/procfs_isa.h b/usr/src/uts/intel/sys/procfs_isa.h index 5af0386d9b..b7a6cc3582 100644 --- a/usr/src/uts/intel/sys/procfs_isa.h +++ b/usr/src/uts/intel/sys/procfs_isa.h @@ -27,6 +27,8 @@ #ifndef _SYS_PROCFS_ISA_H #define _SYS_PROCFS_ISA_H +#pragma ident "%Z%%M% %I% %E% SMI" + /* * Instruction Set Architecture specific component of * i386 version @@ -67,7 +69,7 @@ typedef uchar_t instr_t; #define NPRGREG _NGREG #define prgreg_t greg_t #define prgregset_t gregset_t -#define prfpregset _fpu +#define prfpregset fpu #define prfpregset_t fpregset_t #if defined(_SYSCALL32) @@ -85,7 +87,7 @@ typedef uchar_t instr32_t; #define NPRGREG32 _NGREG #define prgreg32_t greg_t #define prgregset32_t gregset_t -#define prfpregset32 _fpu +#define prfpregset32 fpu #define prfpregset32_t fpregset_t #endif #endif /* _SYSCALL32 */ diff --git a/usr/src/uts/intel/sys/regset.h b/usr/src/uts/intel/sys/regset.h index 12b8f9e888..5436ae0be3 100644 --- a/usr/src/uts/intel/sys/regset.h +++ b/usr/src/uts/intel/sys/regset.h @@ -19,8 +19,6 @@ * CDDL HEADER END */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -36,12 +34,13 @@ #if !defined(_ASM) #include #endif -#include #ifdef __cplusplus extern "C" { #endif +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) + /* * The names and offsets defined here should be specified by the * AMD64 ABI suppl. @@ -135,16 +134,181 @@ extern "C" { #endif /* __i386 */ +#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ + +/* + * A gregset_t is defined as an array type for compatibility with the reference + * source. This is important due to differences in the way the C language + * treats arrays and structures as parameters. + */ +#if defined(__amd64) +#define _NGREG 28 +#else +#define _NGREG 19 +#endif +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) #define NGREG _NGREG +#endif #if !defined(_ASM) -#ifdef __i386 +#if defined(_LP64) || defined(_I32LPx) +typedef long greg_t; +#else +typedef int greg_t; +#endif + +#if defined(_SYSCALL32) + +typedef int32_t greg32_t; +typedef int64_t greg64_t; + +#endif /* _SYSCALL32 */ + +typedef greg_t gregset_t[_NGREG]; + +#if defined(_SYSCALL32) + +#define _NGREG32 19 +#define _NGREG64 28 + +typedef greg32_t gregset32_t[_NGREG32]; +typedef greg64_t gregset64_t[_NGREG64]; + +#endif /* _SYSCALL32 */ + +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) + +/* + * Floating point definitions. + */ + +/* + * This structure is written to memory by an 'fnsave' instruction + */ +struct fnsave_state { + uint16_t f_fcw; + uint16_t __f_ign0; + uint16_t f_fsw; + uint16_t __f_ign1; + uint16_t f_ftw; + uint16_t __f_ign2; + uint32_t f_eip; + uint16_t f_cs; + uint16_t f_fop; + uint32_t f_dp; + uint16_t f_ds; + uint16_t __f_ign3; + union { + uint16_t fpr_16[5]; /* 80-bits of x87 state */ + } f_st[8]; +}; /* 108 bytes */ + +/* + * This structure is written to memory by an 'fxsave' instruction + * Note the variant behaviour of this instruction between long mode + * and legacy environments! + */ +struct fxsave_state { + uint16_t fx_fcw; + uint16_t fx_fsw; + uint16_t fx_fctw; /* compressed tag word */ + uint16_t fx_fop; +#if defined(__amd64) + uint64_t fx_rip; + uint64_t fx_rdp; +#else + uint32_t fx_eip; + uint16_t fx_cs; + uint16_t __fx_ign0; + uint32_t fx_dp; + uint16_t fx_ds; + uint16_t __fx_ign1; +#endif + uint32_t fx_mxcsr; + uint32_t fx_mxcsr_mask; + union { + uint16_t fpr_16[5]; /* 80-bits of x87 state */ + u_longlong_t fpr_mmx; /* 64-bit mmx register */ + uint32_t __fpr_pad[4]; /* (pad out to 128-bits) */ + } fx_st[8]; +#if defined(__amd64) + upad128_t fx_xmm[16]; /* 128-bit registers */ + upad128_t __fx_ign2[6]; +#else + upad128_t fx_xmm[8]; /* 128-bit registers */ + upad128_t __fx_ign2[14]; +#endif +}; /* 512 bytes */ + +/* + * This structure is written to memory by an 'xsave' instruction. + * First 512 byte is compatible with the format of an 'fxsave' area. + */ +struct xsave_state { + struct fxsave_state xs_fxsave; + uint64_t xs_xstate_bv; /* 512 */ + uint64_t xs_rsv_mbz[2]; + uint64_t xs_reserved[5]; + upad128_t xs_ymm[16]; /* avx - 576 */ +}; /* 832 bytes, asserted in fpnoextflt() */ + +#if defined(__amd64) + +typedef struct fpu { + union { + struct fpchip_state { + uint16_t cw; + uint16_t sw; + uint8_t fctw; + uint8_t __fx_rsvd; + uint16_t fop; + uint64_t rip; + uint64_t rdp; + uint32_t mxcsr; + uint32_t mxcsr_mask; + union { + uint16_t fpr_16[5]; + upad128_t __fpr_pad; + } st[8]; + upad128_t xmm[16]; + upad128_t __fx_ign2[6]; + uint32_t status; /* sw at exception */ + uint32_t xstatus; /* mxcsr at exception */ + } fpchip_state; + uint32_t f_fpregs[130]; + } fp_reg_set; +} fpregset_t; + +#else /* __i386 */ + +/* + * This definition of the floating point structure is binary + * compatible with the Intel386 psABI definition, and source + * compatible with that specification for x87-style floating point. + * It also allows SSE/SSE2 state to be accessed on machines that + * possess such hardware capabilities. + */ +typedef struct fpu { + union { + struct fpchip_state { + uint32_t state[27]; /* 287/387 saved state */ + uint32_t status; /* saved at exception */ + uint32_t mxcsr; /* SSE control and status */ + uint32_t xstatus; /* SSE mxcsr at exception */ + uint32_t __pad[2]; /* align to 128-bits */ + upad128_t xmm[8]; /* %xmm0-%xmm7 */ + } fpchip_state; + struct fp_emul_space { /* for emulator(s) */ + uint8_t fp_emul[246]; + uint8_t fp_epad[2]; + } fp_emul_space; + uint32_t f_fpregs[95]; /* union of the above */ + } fp_reg_set; +} fpregset_t; + /* * (This structure definition is specified in the i386 ABI supplement) - * It's likely we can just get rid of the struct __old_fpu or maybe - * move it to $SRC/uts/intel/ia32/os/fpu.c which appears to be the - * only place that uses it. See: www.illumos.org/issues/6284 */ typedef struct __old_fpu { union { @@ -163,19 +327,74 @@ typedef struct __old_fpu { } fp_reg_set; long f_wregs[33]; /* saved weitek state */ } __old_fpregset_t; + #endif /* __i386 */ +#if defined(_SYSCALL32) + +/* Kernel view of user i386 fpu structure */ + +typedef struct fpu32 { + union { + struct fpchip32_state { + uint32_t state[27]; /* 287/387 saved state */ + uint32_t status; /* saved at exception */ + uint32_t mxcsr; /* SSE control and status */ + uint32_t xstatus; /* SSE mxcsr at exception */ + uint32_t __pad[2]; /* align to 128-bits */ + uint32_t xmm[8][4]; /* %xmm0-%xmm7 */ + } fpchip_state; + uint32_t f_fpregs[95]; /* union of the above */ + } fp_reg_set; +} fpregset32_t; + +#endif /* _SYSCALL32 */ + +/* + * Kernel's FPU save area + */ +typedef struct { + union _kfpu_u { + struct fxsave_state kfpu_fx; +#if defined(__i386) + struct fnsave_state kfpu_fn; +#endif + struct xsave_state kfpu_xs; + } kfpu_u; + uint32_t kfpu_status; /* saved at #mf exception */ + uint32_t kfpu_xstatus; /* saved at #xm exception */ +} kfpu_t; + #if defined(__amd64) -#define _NDEBUGREG 16 +#define NDEBUGREG 16 #else -#define _NDEBUGREG 8 +#define NDEBUGREG 8 #endif typedef struct dbregset { - unsigned long debugreg[_NDEBUGREG]; + unsigned long debugreg[NDEBUGREG]; } dbregset_t; +/* + * Structure mcontext defines the complete hardware machine state. + * (This structure is specified in the i386 ABI suppl.) + */ +typedef struct { + gregset_t gregs; /* general register set */ + fpregset_t fpregs; /* floating point register set */ +} mcontext_t; + +#if defined(_SYSCALL32) + +typedef struct { + gregset32_t gregs; /* general register set */ + fpregset32_t fpregs; /* floating point register set */ +} mcontext32_t; + +#endif /* _SYSCALL32 */ + #endif /* _ASM */ +#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ /* * The version of privregs.h that is used on implementations that run on @@ -201,6 +420,80 @@ typedef struct dbregset { #include #endif /* __i386 (!_KERNEL && !_XPG4_2 || __EXTENSIONS__) */ +/* + * The following is here for XPG4.2 standards compliance. + * regset.h is included in ucontext.h for the definition of + * mcontext_t, all of which breaks XPG4.2 namespace. + */ + +#if defined(_XPG4_2) && !defined(__EXTENSIONS__) && !defined(_ASM) + +/* + * The following is here for UNIX 95 compliance (XPG Issue 4, Version 2 + * System Interfaces and Headers). The structures included here are identical + * to those visible elsewhere in this header except that the structure + * element names have been changed in accordance with the X/Open namespace + * rules. Specifically, depending on the name and scope, the names have + * been prepended with a single or double underscore (_ or __). See the + * structure definitions in the non-X/Open namespace for more detailed + * comments describing each of these structures. + */ + +#if defined(__amd64) + +typedef struct __fpu { + union { + struct __fpchip_state { + uint16_t __fx_cw; + uint16_t __fx_sw; + uint16_t __fx_ctw; + uint16_t __fx_op; + uint64_t __fx_rip; + uint64_t __fx_rdp; + uint32_t __fx_mxcsr; + uint32_t __fx_mxcsr_mask; + union { + uint16_t __fpr_16[5]; + upad128_t __fpr_pad; + } __fx_st[8]; + upad128_t __fx_xmm[16]; + upad128_t __fx_ign2[6]; + uint32_t __status; + uint32_t __xstatus; + } __fpchip_state; + uint32_t __f_fpregs[130]; + } __fp_reg_set; +} fpregset_t; + +#else /* __i386 */ + +typedef struct __fpu { + union { + struct __fpchip_state { + uint32_t __state[27]; /* 287/387 saved state */ + uint32_t __status; /* saved at exception */ + uint32_t __mxcsr; /* SSE control and status */ + uint32_t __xstatus; /* SSE mxcsr at exception */ + uint32_t __pad[2]; /* align to 128-bits */ + upad128_t __xmm[8]; /* %xmm0-%xmm7 */ + } __fpchip_state; + struct __fp_emul_space { /* for emulator(s) */ + uint8_t __fp_emul[246]; + uint8_t __fp_epad[2]; + } __fp_emul_space; + uint32_t __f_fpregs[95]; /* union of the above */ + } __fp_reg_set; +} fpregset_t; + +#endif /* __i386 */ + +typedef struct { + gregset_t __gregs; /* general register set */ + fpregset_t __fpregs; /* floating point register set */ +} mcontext_t; + +#endif /* _XPG4_2 && !__EXTENSIONS__ && !_ASM */ + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/intel/sys/ucontext.h b/usr/src/uts/intel/sys/ucontext.h index 5f64fa9393..acd6ddc99e 100644 --- a/usr/src/uts/intel/sys/ucontext.h +++ b/usr/src/uts/intel/sys/ucontext.h @@ -20,8 +20,6 @@ */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,7 +33,7 @@ #include #include -#include +#include #if !defined(_XPG4_2) || defined(__EXTENSIONS__) #include #endif diff --git a/usr/src/uts/sparc/fpu/fpu_simulator.c b/usr/src/uts/sparc/fpu/fpu_simulator.c index 16393bc9d1..aeafdc515d 100644 --- a/usr/src/uts/sparc/fpu/fpu_simulator.c +++ b/usr/src/uts/sparc/fpu/fpu_simulator.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/usr/src/uts/sparc/sys/Makefile b/usr/src/uts/sparc/sys/Makefile index 975639fa06..1ae69ac3cc 100644 --- a/usr/src/uts/sparc/sys/Makefile +++ b/usr/src/uts/sparc/sys/Makefile @@ -22,6 +22,8 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # +#ident "%Z%%M% %I% %E% SMI" +# include ../../../Makefile.master @@ -39,7 +41,6 @@ HDRS= \ machlock.h \ machsig.h \ machtypes.h \ - mcontext.h \ old_procfs.h \ pcb.h \ procfs_isa.h \ diff --git a/usr/src/uts/sparc/sys/fpu/fpu_simulator.h b/usr/src/uts/sparc/sys/fpu/fpu_simulator.h index 0087877f55..4d83a2c795 100644 --- a/usr/src/uts/sparc/sys/fpu/fpu_simulator.h +++ b/usr/src/uts/sparc/sys/fpu/fpu_simulator.h @@ -455,7 +455,7 @@ extern enum ftt_type fp_emulator( fp_inst_type *pinst, /* Pointer to FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ struct rwindow *pwindow, /* Pointer to locals and ins. */ - struct _fpu *pfpu); /* Pointer to FPU register block. */ + struct fpu *pfpu); /* Pointer to FPU register block. */ /* * fp_traps handles passing exception conditions to the kernel. diff --git a/usr/src/uts/sparc/sys/fpu/fpusystm.h b/usr/src/uts/sparc/sys/fpu/fpusystm.h index cf178fa87f..6b013aa52d 100644 --- a/usr/src/uts/sparc/sys/fpu/fpusystm.h +++ b/usr/src/uts/sparc/sys/fpu/fpusystm.h @@ -27,6 +27,8 @@ #ifndef _SYS_FPU_FPUSYSTM_H #define _SYS_FPU_FPUSYSTM_H +#pragma ident "%Z%%M% %I% %E% SMI" + /* * ISA-dependent FPU interfaces */ @@ -37,7 +39,7 @@ extern "C" { #ifdef _KERNEL -struct _fpu; +struct fpu; struct regs; #if !defined(DEBUG) && !defined(NEED_FPU_EXISTS) diff --git a/usr/src/uts/sparc/sys/mcontext.h b/usr/src/uts/sparc/sys/mcontext.h deleted file mode 100644 index f35d0c1d22..0000000000 --- a/usr/src/uts/sparc/sys/mcontext.h +++ /dev/null @@ -1,304 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - - -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - * - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - */ - -/* - * Essential struct definitions for mcontext_t needed by ucontext.h - * These were formerly in regset.h, which now includes this file. - */ - -#ifndef _SYS_MCONTEXT_H -#define _SYS_MCONTEXT_H - -#include - -#if !defined(_ASM) -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * A gregset_t is defined as an array type for compatibility with the reference - * source. This is important due to differences in the way the C language - * treats arrays and structures as parameters. - * - * Note that NGREG is really (sizeof (struct regs) / sizeof (greg_t)), - * but that the SPARC V8 ABI defines it absolutely to be 19. - */ -#if defined(__sparcv9) -#define _NGREG 21 -#else /* __sparcv9 */ -#define _NGREG 19 -#endif /* __sparcv9 */ - -#ifndef _ASM - -#if defined(_LP64) || defined(_I32LPx) -typedef long greg_t; -#else -typedef int greg_t; -#endif - -#if defined(_SYSCALL32) - -typedef int32_t greg32_t; -typedef int64_t greg64_t; - -#endif /* _SYSCALL32 */ - -typedef greg_t gregset_t[_NGREG]; - -#if defined(_SYSCALL32) - -#define _NGREG32 19 -#define _NGREG64 21 - -typedef greg32_t gregset32_t[_NGREG32]; -typedef greg64_t gregset64_t[_NGREG64]; - -#endif /* _SYSCALL32 */ - -/* - * Floating point definitions. - */ - -#define _MAXFPQ 16 /* max # of fpu queue entries currently supported */ - -/* - * struct _fq defines the minimal format of a floating point instruction queue - * entry. The size of entries in the floating point queue are implementation - * dependent. The union FQu is guarenteed to be the first field in any ABI - * conformant system implementation. Any additional fields provided by an - * implementation should not be used applications designed to be ABI conformant. - */ - -struct _fpq { - unsigned int *fpq_addr; /* address */ - unsigned int fpq_instr; /* instruction */ -}; - -struct _fq { - union { /* FPU inst/addr queue */ - double whole; - struct _fpq fpq; - } FQu; -}; - -#if defined(_SYSCALL32) - -struct fpq32 { - caddr32_t fpq_addr; /* address */ - uint32_t fpq_instr; /* instruction */ -}; - -struct fq32 { - union { /* FPU inst/addr queue */ - double whole; - struct fpq32 fpq; - } FQu; -}; - -#endif /* _SYSCALL32 */ - -/* - * struct fpu is the floating point processor state. struct fpu is the sum - * total of all possible floating point state which includes the state of - * external floating point hardware, fpa registers, etc..., if it exists. - * - * A floating point instuction queue may or may not be associated with - * the floating point processor state. If a queue does exist, the field - * fpu_q will point to an array of fpu_qcnt entries where each entry is - * fpu_q_entrysize long. fpu_q_entry has a lower bound of sizeof (union FQu) - * and no upper bound. If no floating point queue entries are associated - * with the processor state, fpu_qcnt will be zeo and fpu_q will be NULL. - */ - -#if defined(__sparcv9) - -struct _fpu { - union { /* FPU floating point regs */ - uint32_t fpu_regs[32]; /* 32 singles */ - double fpu_dregs[32]; /* 32 doubles */ - long double fpu_qregs[16]; /* 16 quads */ - } fpu_fr; - struct _fq *fpu_q; /* ptr to array of FQ entries */ - uint64_t fpu_fsr; /* FPU status register */ - uint8_t fpu_qcnt; /* # of entries in saved FQ */ - uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ - uint8_t fpu_en; /* flag specifying fpu in use */ -}; - -#else /* __sparcv9 */ - -struct _fpu { - union { /* FPU floating point regs */ - uint32_t fpu_regs[32]; /* 32 singles */ - double fpu_dregs[16]; /* 16 doubles */ - } fpu_fr; - struct _fq *fpu_q; /* ptr to array of FQ entries */ - uint32_t fpu_fsr; /* FPU status register */ - uint8_t fpu_qcnt; /* # of entries in saved FQ */ - uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ - uint8_t fpu_en; /* flag signifying fpu in use */ -}; - -#endif /* __sparcv9 */ - -typedef struct _fpu fpregset_t; - -#if defined(_SYSCALL32) - -/* Kernel view of user sparcv7/v8 fpu structure */ - -struct fpu32 { - union { /* FPU floating point regs */ - uint32_t fpu_regs[32]; /* 32 singles */ - double fpu_dregs[16]; /* 16 doubles */ - } fpu_fr; - caddr32_t fpu_q; /* ptr to array of FQ entries */ - uint32_t fpu_fsr; /* FPU status register */ - uint8_t fpu_qcnt; /* # of entries in saved FQ */ - uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ - uint8_t fpu_en; /* flag signifying fpu in use */ -}; - -typedef struct fpu32 fpregset32_t; - -#endif /* _SYSCALL32 */ - -#if defined(_KERNEL) || defined(_KMDB) -/* - * The ABI uses struct fpu, so we use this to describe the kernel's view of the - * fpu. - */ -typedef struct { - union _fpu_fr { /* V9 FPU floating point regs */ - uint32_t fpu_regs[32]; /* 32 singles */ - uint64_t fpu_dregs[32]; /* 32 doubles */ - long double fpu_qregs[16]; /* 16 quads */ - } fpu_fr; - uint64_t fpu_fsr; /* FPU status register */ - uint32_t fpu_fprs; /* fprs register */ - struct _fq *fpu_q; - uint8_t fpu_qcnt; - uint8_t fpu_q_entrysize; - uint8_t fpu_en; /* flag signifying fpu in use */ -} kfpu_t; -#endif /* _KERNEL || _KMDB */ - -/* - * The following structure is for associating extra register state with - * the ucontext structure and is kept within the uc_mcontext filler area. - * - * If (xrs_id == XRS_ID) then the xrs_ptr field is a valid pointer to - * extra register state. The exact format of the extra register state - * pointed to by xrs_ptr is platform-dependent. - * - * Note: a platform may or may not manage extra register state. - */ -typedef struct { - unsigned int xrs_id; /* indicates xrs_ptr validity */ - caddr_t xrs_ptr; /* ptr to extra reg state */ -} xrs_t; - -#define _XRS_ID 0x78727300 /* the string "xrs" */ - -#if defined(_SYSCALL32) - -typedef struct { - uint32_t xrs_id; /* indicates xrs_ptr validity */ - caddr32_t xrs_ptr; /* ptr to extra reg state */ -} xrs32_t; - -#endif /* _SYSCALL32 */ - -#if defined(__sparcv9) - -/* - * Ancillary State Registers - * - * The SPARC V9 architecture defines 25 ASRs, numbered from 7 through 31. - * ASRs 16 through 31 are available to user programs, though the meaning - * and content of these registers is implementation dependent. - */ -typedef int64_t asrset_t[16]; /* %asr16 - > %asr31 */ - -#endif /* __sparcv9 */ - -/* - * Structure mcontext defines the complete hardware machine state. If - * the field `gwins' is non NULL, it points to a save area for register - * window frames. If `gwins' is NULL, the register windows were saved - * on the user's stack. - * - * The filler of 21 longs is historical (now filler[19] plus the xrs_t - * field). The value was selected to provide binary compatibility with - * statically linked ICL binaries. It is in the ABI (do not change). It - * actually appears in the ABI as a single filler of 44 is in the field - * uc_filler of struct ucontext. It is split here so that ucontext.h can - * (hopefully) remain architecture independent. - * - * Note that 2 longs of the filler are used to hold extra register state info. - */ -typedef struct { - gregset_t gregs; /* general register set */ - struct _gwindows *gwins; /* POSSIBLE pointer to register windows */ - fpregset_t fpregs; /* floating point register set */ - xrs_t xrs; /* POSSIBLE extra register state association */ -#if defined(__sparcv9) - asrset_t asrs; /* ancillary registers */ - long filler[4]; /* room for expansion */ -#else /* __sparcv9 */ - long filler[19]; -#endif /* __sparcv9 */ -} mcontext_t; - -#if defined(_SYSCALL32) - -typedef struct { - gregset32_t gregs; /* general register set */ - caddr32_t gwins; /* POSSIBLE pointer to register windows */ - fpregset32_t fpregs; /* floating point register set */ - xrs32_t xrs; /* POSSIBLE extra register state association */ - int32_t filler[19]; -} mcontext32_t; - -#endif /* _SYSCALL32 */ - -#endif /* _ASM */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MCONTEXT_H */ diff --git a/usr/src/uts/sparc/sys/procfs_isa.h b/usr/src/uts/sparc/sys/procfs_isa.h index 4c9ffec7c1..a1c98655b5 100644 --- a/usr/src/uts/sparc/sys/procfs_isa.h +++ b/usr/src/uts/sparc/sys/procfs_isa.h @@ -27,6 +27,8 @@ #ifndef _SYS_PROCFS_ISA_H #define _SYS_PROCFS_ISA_H +#pragma ident "%Z%%M% %I% %E% SMI" + /* * Instruction Set Architecture specific component of * sparc v8/v9 version @@ -167,7 +169,7 @@ typedef struct prfpregset { uint8_t pr_q_entrysize; /* # of bytes per FQ entry */ uint8_t pr_en; /* flag signifying fpu in use */ char pr_pad[13]; /* ensure sizeof(prfpregset)%16 == 0 */ - struct _fq pr_q[16]; /* contains the FQ array */ + struct fq pr_q[16]; /* contains the FQ array */ } prfpregset_t; #else typedef struct prfpregset { @@ -180,7 +182,7 @@ typedef struct prfpregset { uint8_t pr_qcnt; /* # of entries in saved FQ */ uint8_t pr_q_entrysize; /* # of bytes per FQ entry */ uint8_t pr_en; /* flag signifying fpu in use */ - struct _fq pr_q[32]; /* contains the FQ array */ + struct fq pr_q[32]; /* contains the FQ array */ } prfpregset_t; #endif /* __sparcv9 */ diff --git a/usr/src/uts/sparc/sys/regset.h b/usr/src/uts/sparc/sys/regset.h index cebf5939a3..26e7119a38 100644 --- a/usr/src/uts/sparc/sys/regset.h +++ b/usr/src/uts/sparc/sys/regset.h @@ -23,8 +23,6 @@ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -32,12 +30,13 @@ #ifndef _SYS_REGSET_H #define _SYS_REGSET_H +#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.1 */ + #include #if !defined(_ASM) #include #endif -#include #ifdef __cplusplus extern "C" { @@ -47,6 +46,8 @@ extern "C" { * Location of the users' stored registers relative to R0. * Usage is as an index into a gregset_t array or as u.u_ar0[XX]. */ +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) + #if defined(__sparcv9) #define REG_CCR (0) #if defined(_SYSCALL32) @@ -86,12 +87,53 @@ extern "C" { #define REG_SP REG_O6 #define REG_R0 REG_O0 #define REG_R1 REG_O1 +#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ + +/* + * A gregset_t is defined as an array type for compatibility with the reference + * source. This is important due to differences in the way the C language + * treats arrays and structures as parameters. + * + * Note that NGREG is really (sizeof (struct regs) / sizeof (greg_t)), + * but that the SPARC V8 ABI defines it absolutely to be 19. + */ +#if defined(__sparcv9) +#define _NGREG 21 +#else /* __sparcv9 */ +#define _NGREG 19 +#endif /* __sparcv9 */ +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) +#define NGREG _NGREG +#endif #ifndef _ASM -#define NGREG _NGREG -#define MAXFPQ _MAXFPQ +#if defined(_LP64) || defined(_I32LPx) +typedef long greg_t; +#else +typedef int greg_t; +#endif + +#if defined(_SYSCALL32) + +typedef int32_t greg32_t; +typedef int64_t greg64_t; + +#endif /* _SYSCALL32 */ + +typedef greg_t gregset_t[_NGREG]; + +#if defined(_SYSCALL32) +#define _NGREG32 19 +#define _NGREG64 21 + +typedef greg32_t gregset32_t[_NGREG32]; +typedef greg64_t gregset64_t[_NGREG64]; + +#endif /* _SYSCALL32 */ + +#if !defined(_XPG4_2) || defined(__EXTENSIONS__) /* * The following structures define how a register window can appear on the * stack. This structure is available (when required) through the `gwins' @@ -101,7 +143,7 @@ extern "C" { */ #define SPARC_MAXREGWINDOW 31 /* max windows in SPARC arch. */ -struct rwindow { +struct rwindow { greg_t rw_local[8]; /* locals */ greg_t rw_in[8]; /* ins */ }; @@ -128,7 +170,7 @@ extern void rwindow_32ton(struct rwindow32 *, struct rwindow *); #define rw_fp rw_in[6] /* frame pointer */ #define rw_rtn rw_in[7] /* return address */ -typedef struct _gwindows { +typedef struct gwindows { int wbcnt; greg_t *spbuf[SPARC_MAXREGWINDOW]; struct rwindow wbuf[SPARC_MAXREGWINDOW]; @@ -150,6 +192,62 @@ typedef struct gwindows64 { #endif /* _SYSCALL32 */ + +/* + * Floating point definitions. + */ + +#define MAXFPQ 16 /* max # of fpu queue entries currently supported */ + +/* + * struct fq defines the minimal format of a floating point instruction queue + * entry. The size of entries in the floating point queue are implementation + * dependent. The union FQu is guarenteed to be the first field in any ABI + * conformant system implementation. Any additional fields provided by an + * implementation should not be used applications designed to be ABI conformant. + */ + +struct fpq { + unsigned int *fpq_addr; /* address */ + unsigned int fpq_instr; /* instruction */ +}; + +struct fq { + union { /* FPU inst/addr queue */ + double whole; + struct fpq fpq; + } FQu; +}; + +#if defined(_SYSCALL32) + +struct fpq32 { + caddr32_t fpq_addr; /* address */ + uint32_t fpq_instr; /* instruction */ +}; + +struct fq32 { + union { /* FPU inst/addr queue */ + double whole; + struct fpq32 fpq; + } FQu; +}; + +#endif /* _SYSCALL32 */ + +/* + * struct fpu is the floating point processor state. struct fpu is the sum + * total of all possible floating point state which includes the state of + * external floating point hardware, fpa registers, etc..., if it exists. + * + * A floating point instuction queue may or may not be associated with + * the floating point processor state. If a queue does exist, the field + * fpu_q will point to an array of fpu_qcnt entries where each entry is + * fpu_q_entrysize long. fpu_q_entry has a lower bound of sizeof (union FQu) + * and no upper bound. If no floating point queue entries are associated + * with the processor state, fpu_qcnt will be zeo and fpu_q will be NULL. + */ + /* * The following #define's are obsolete and may be removed in a future release. * The corresponding integer types should be used instead (i.e. uint64_t). @@ -160,9 +258,160 @@ typedef struct gwindows64 { #define V9_FPU_FSR_TYPE uint64_t #define V9_FPU_FPRS_TYPE uint32_t -#define XRS_ID _XRS_ID +#if defined(__sparcv9) + +struct fpu { + union { /* FPU floating point regs */ + uint32_t fpu_regs[32]; /* 32 singles */ + double fpu_dregs[32]; /* 32 doubles */ + long double fpu_qregs[16]; /* 16 quads */ + } fpu_fr; + struct fq *fpu_q; /* ptr to array of FQ entries */ + uint64_t fpu_fsr; /* FPU status register */ + uint8_t fpu_qcnt; /* # of entries in saved FQ */ + uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ + uint8_t fpu_en; /* flag specifying fpu in use */ +}; + +#else /* __sparcv9 */ + +struct fpu { + union { /* FPU floating point regs */ + uint32_t fpu_regs[32]; /* 32 singles */ + double fpu_dregs[16]; /* 16 doubles */ + } fpu_fr; + struct fq *fpu_q; /* ptr to array of FQ entries */ + uint32_t fpu_fsr; /* FPU status register */ + uint8_t fpu_qcnt; /* # of entries in saved FQ */ + uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ + uint8_t fpu_en; /* flag signifying fpu in use */ +}; + +#endif /* __sparcv9 */ + +typedef struct fpu fpregset_t; + +#if defined(_SYSCALL32) + +/* Kernel view of user sparcv7/v8 fpu structure */ + +struct fpu32 { + union { /* FPU floating point regs */ + uint32_t fpu_regs[32]; /* 32 singles */ + double fpu_dregs[16]; /* 16 doubles */ + } fpu_fr; + caddr32_t fpu_q; /* ptr to array of FQ entries */ + uint32_t fpu_fsr; /* FPU status register */ + uint8_t fpu_qcnt; /* # of entries in saved FQ */ + uint8_t fpu_q_entrysize; /* # of bytes per FQ entry */ + uint8_t fpu_en; /* flag signifying fpu in use */ +}; + +typedef struct fpu32 fpregset32_t; + +#endif /* _SYSCALL32 */ -#endif /* !_ASM */ +#if defined(_KERNEL) || defined(_KMDB) +/* + * The ABI uses struct fpu, so we use this to describe the kernel's view of the + * fpu. + */ +typedef struct { + union _fpu_fr { /* V9 FPU floating point regs */ + uint32_t fpu_regs[32]; /* 32 singles */ + uint64_t fpu_dregs[32]; /* 32 doubles */ + long double fpu_qregs[16]; /* 16 quads */ + } fpu_fr; + uint64_t fpu_fsr; /* FPU status register */ + uint32_t fpu_fprs; /* fprs register */ + struct fq *fpu_q; + uint8_t fpu_qcnt; + uint8_t fpu_q_entrysize; + uint8_t fpu_en; /* flag signifying fpu in use */ +} kfpu_t; +#endif /* _KERNEL || _KMDB */ + +/* + * The following structure is for associating extra register state with + * the ucontext structure and is kept within the uc_mcontext filler area. + * + * If (xrs_id == XRS_ID) then the xrs_ptr field is a valid pointer to + * extra register state. The exact format of the extra register state + * pointed to by xrs_ptr is platform-dependent. + * + * Note: a platform may or may not manage extra register state. + */ +typedef struct { + unsigned int xrs_id; /* indicates xrs_ptr validity */ + caddr_t xrs_ptr; /* ptr to extra reg state */ +} xrs_t; + +#define XRS_ID 0x78727300 /* the string "xrs" */ + +#if defined(_SYSCALL32) + +typedef struct { + uint32_t xrs_id; /* indicates xrs_ptr validity */ + caddr32_t xrs_ptr; /* ptr to extra reg state */ +} xrs32_t; + +#endif /* _SYSCALL32 */ + +#if defined(__sparcv9) + +/* + * Ancillary State Registers + * + * The SPARC V9 architecture defines 25 ASRs, numbered from 7 through 31. + * ASRs 16 through 31 are available to user programs, though the meaning + * and content of these registers is implementation dependent. + */ +typedef int64_t asrset_t[16]; /* %asr16 - > %asr31 */ + +#endif /* __sparcv9 */ + +/* + * Structure mcontext defines the complete hardware machine state. If + * the field `gwins' is non NULL, it points to a save area for register + * window frames. If `gwins' is NULL, the register windows were saved + * on the user's stack. + * + * The filler of 21 longs is historical (now filler[19] plus the xrs_t + * field). The value was selected to provide binary compatibility with + * statically linked ICL binaries. It is in the ABI (do not change). It + * actually appears in the ABI as a single filler of 44 is in the field + * uc_filler of struct ucontext. It is split here so that ucontext.h can + * (hopefully) remain architecture independent. + * + * Note that 2 longs of the filler are used to hold extra register state info. + */ +typedef struct { + gregset_t gregs; /* general register set */ + gwindows_t *gwins; /* POSSIBLE pointer to register windows */ + fpregset_t fpregs; /* floating point register set */ + xrs_t xrs; /* POSSIBLE extra register state association */ +#if defined(__sparcv9) + asrset_t asrs; /* ancillary registers */ + long filler[4]; /* room for expansion */ +#else /* __sparcv9 */ + long filler[19]; +#endif /* __sparcv9 */ +} mcontext_t; + +#if defined(_SYSCALL32) + +typedef struct { + gregset32_t gregs; /* general register set */ + caddr32_t gwins; /* POSSIBLE pointer to register windows */ + fpregset32_t fpregs; /* floating point register set */ + xrs32_t xrs; /* POSSIBLE extra register state association */ + int32_t filler[19]; +} mcontext32_t; + +#endif /* _SYSCALL32 */ + +#endif /* !defined(_XPG4_2) || defined(__EXTENSIONS__) */ +#endif /* _ASM */ /* * The version of privregs.h that is used on implementations that run @@ -179,7 +428,7 @@ typedef struct gwindows64 { * 'struct regs' to match the content of a 32-bit core file, or a ucontext_t. * * Note that the ucontext_t actually describes the general registers in - * terms of the gregset_t data type, as described in mcontex.h. Note also + * terms of the gregset_t data type, as described in this file. Note also * that the core file content is defined by core(4) in terms of data types * defined by procfs -- see proc(4). */ @@ -189,6 +438,164 @@ typedef struct gwindows64 { #endif /* !_KERNEL && !_XPG4_2 || __EXTENSIONS__ */ #endif /* __sparcv9 */ +/* + * The following is here for XPG4.2 standards compliance. + * regset.h is included in ucontext.h for the definition of + * mcontext_t, all of which breaks XPG4.2 namespace. + */ + +#if defined(_XPG4_2) && !defined(__EXTENSIONS__) +/* + * The following is here for UNIX 95 compliance (XPG Issue 4, Version 2 + * System Interfaces and Headers. The structures included here are identical + * to those visible elsewhere in this header except that the structure + * element names have been changed in accordance with the X/Open namespace + * rules. Specifically, depending on the name and scope, the names have + * been prepended with a single or double underscore (_ or __). See the + * structure definitions in the non-X/Open namespace for more detailed + * comments describing each of these structures. + */ + +#ifndef _ASM + +/* + * The following structures define how a register window can appear on the + * stack. + */ +#define _SPARC_MAXREGWINDOW 31 /* max windows in SPARC arch. */ + +struct __rwindow { + greg_t __rw_local[8]; /* locals */ + greg_t __rw_in[8]; /* ins */ +}; + +#define __rw_fp __rw_in[6] /* frame pointer */ +#define __rw_rtn __rw_in[7] /* return address */ + +struct __gwindows { + int __wbcnt; + greg_t *__spbuf[_SPARC_MAXREGWINDOW]; + struct __rwindow __wbuf[_SPARC_MAXREGWINDOW]; +}; + +typedef struct __gwindows gwindows_t; + +/* + * The fq structure defines the minimal format of a floating point + * instruction queue entry. + */ + +struct __fpq { + unsigned int *__fpq_addr; /* address */ + unsigned int __fpq_instr; /* instruction */ +}; + +struct __fq { + union { /* FPU inst/addr queue */ + double __whole; + struct __fpq __fpq; + } _FQu; +}; + +/* + * The fpu structure is the floating point processor state. + */ + +/* + * The following #define's are obsolete and may be removed in a future release. + * The corresponding integer types should be used instead (i.e. uint64_t). + */ +#define _FPU_REGS_TYPE uint32_t +#define _FPU_DREGS_TYPE uint64_t +#define _V7_FPU_FSR_TYPE uint32_t +#define _V9_FPU_FSR_TYPE uint64_t +#define _V9_FPU_FPRS_TYPE uint32_t + +#if defined(__sparcv9) + +/* + * SPARC Version 9 floating point + */ + +struct __fpu { + union { /* FPU floating point regs */ + uint32_t __fpu_regs[32]; /* 32 singles */ + double __fpu_dregs[32]; /* 32 doubles */ + long double __fpu_qregs[16]; /* 16 quads */ + } __fpu_fr; + struct __fq *__fpu_q; /* ptr to array of FQ entries */ + uint64_t __fpu_fsr; /* FPU status register */ + uint8_t __fpu_qcnt; /* # of entries in saved FQ */ + uint8_t __fpu_q_entrysize; /* # of bytes per FQ entry */ + uint8_t __fpu_en; /* flag signifying fpu in use */ +}; + +#else /* __sparcv9 */ + +/* + * SPARC Version 7 and 8 floating point + */ + +struct __fpu { + union { /* FPU floating point regs */ + uint32_t __fpu_regs[32]; /* 32 singles */ + double __fpu_dregs[16]; /* 16 doubles */ + } __fpu_fr; + struct __fq *__fpu_q; /* ptr to array of FQ entries */ + uint32_t __fpu_fsr; /* FPU status register */ + uint8_t __fpu_qcnt; /* # of entries in saved FQ */ + uint8_t __fpu_q_entrysize; /* # of bytes per FQ entry */ + uint8_t __fpu_en; /* flag signifying fpu in use */ +}; + +#endif /* __sparcv9 */ + +typedef struct __fpu fpregset_t; + +/* + * The xrs_t structure is for associating extra register state with + * the ucontext structure and is kept within the uc_mcontext filler area. + */ +typedef struct { + unsigned int __xrs_id; /* indicates xrs_ptr validity */ + caddr_t __xrs_ptr; /* ptr to extra reg state */ +} xrs_t; + +#define _XRS_ID 0x78727300 /* the string "xrs" */ + +#if defined(__sparcv9) + +/* + * Ancillary State Registers + * + * The SPARC V9 architecture defines 25 ASRs, numbered from 7 through 31. + * ASRs 16 through 31 are available to user programs, though the meaning + * and content of these registers is implementation dependent. + */ +typedef int64_t asrset_t[16]; /* %asr16 - > %asr31 */ + +#endif /* __sparcv9 */ + +/* + * Structure mcontext defines the complete hardware machine state. + */ +typedef struct { + gregset_t __gregs; /* general register set */ + gwindows_t *__gwins; /* POSSIBLE pointer to register windows */ + fpregset_t __fpregs; /* floating point register set */ + xrs_t __xrs; /* POSSIBLE extra register state association */ +#if defined(__sparcv9) + asrset_t __asrs; /* ancillary registers */ + long __filler[4]; /* room for expansion */ +#else /* __sparcv9 */ + long __filler[19]; +#endif /* __sparcv9 */ +} mcontext_t; + +#endif /* _ASM */ +#endif /* defined(_XPG4_2) && !defined(__EXTENSIONS__) */ + + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/sparc/sys/ucontext.h b/usr/src/uts/sparc/sys/ucontext.h index 35eff287af..e0cfea55bd 100644 --- a/usr/src/uts/sparc/sys/ucontext.h +++ b/usr/src/uts/sparc/sys/ucontext.h @@ -20,8 +20,6 @@ */ /* - * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,7 +33,7 @@ #include #include -#include +#include #if !defined(_XPG4_2) || defined(__EXTENSIONS__) #include #endif @@ -107,7 +105,7 @@ typedef struct ucontext32 { #ifdef _KERNEL extern void ucontext_32ton(const ucontext32_t *, ucontext_t *, - const struct fq32 *, struct _fq *); + const struct fq32 *, struct fq *); extern void fpuregset_nto32(const fpregset_t *, fpregset32_t *, struct fq32 *); #endif diff --git a/usr/src/uts/sparc/syscall/getcontext.c b/usr/src/uts/sparc/syscall/getcontext.c index c9adb54b86..437eef5e1a 100644 --- a/usr/src/uts/sparc/syscall/getcontext.c +++ b/usr/src/uts/sparc/syscall/getcontext.c @@ -176,7 +176,7 @@ int getsetcontext(int flag, void *arg) { ucontext_t uc; - struct _fq fpu_q[MAXFPQ]; /* to hold floating queue */ + struct fq fpu_q[MAXFPQ]; /* to hold floating queue */ fpregset_t *fpp; gwindows_t *gwin = NULL; /* to hold windows */ caddr_t xregs = NULL; @@ -214,7 +214,7 @@ getsetcontext(int flag, void *arg) * a later setcontext(2). */ uc.uc_mcontext.fpregs.fpu_qcnt = 0; - uc.uc_mcontext.fpregs.fpu_q = (struct _fq *)NULL; + uc.uc_mcontext.fpregs.fpu_q = (struct fq *)NULL; if (copyout(&uc, arg, sizeof (ucontext_t))) return (set_errno(EFAULT)); @@ -255,7 +255,7 @@ getsetcontext(int flag, void *arg) if ((fpp->fpu_q) && (fpp->fpu_qcnt)) { if (fpp->fpu_qcnt > MAXFPQ || fpp->fpu_q_entrysize <= 0 || - fpp->fpu_q_entrysize > sizeof (struct _fq)) + fpp->fpu_q_entrysize > sizeof (struct fq)) return (set_errno(EINVAL)); if (copyin(fpp->fpu_q, fpu_q, fpp->fpu_qcnt * fpp->fpu_q_entrysize)) @@ -424,7 +424,7 @@ getsetcontext32(int flag, void *arg) { ucontext32_t uc; ucontext_t ucnat; - struct _fq fpu_qnat[MAXFPQ]; /* to hold "native" floating queue */ + struct fq fpu_qnat[MAXFPQ]; /* to hold "native" floating queue */ struct fq32 fpu_q[MAXFPQ]; /* to hold 32 bit floating queue */ fpregset32_t *fpp; gwindows32_t *gwin = NULL; /* to hold windows */ diff --git a/usr/src/uts/sparc/v7/sys/machpcb.h b/usr/src/uts/sparc/v7/sys/machpcb.h index f0b76ee7a6..a964018519 100644 --- a/usr/src/uts/sparc/v7/sys/machpcb.h +++ b/usr/src/uts/sparc/v7/sys/machpcb.h @@ -26,6 +26,8 @@ #ifndef _SYS_MACHPCB_H #define _SYS_MACHPCB_H +#pragma ident "%Z%%M% %I% %E% SMI" + #include #include #include @@ -69,8 +71,8 @@ typedef struct machpcb { int mpcb_uwm; /* user window mask */ int mpcb_swm; /* shared user/kernel window mask */ int mpcb_wbcnt; /* number of saved windows in pcb_wbuf */ - struct _fpu mpcb_fpu; /* fpu state */ - struct _fq mpcb_fpu_q[_MAXFPQ]; /* fpu exception queue */ + struct fpu mpcb_fpu; /* fpu state */ + struct fq mpcb_fpu_q[MAXFPQ]; /* fpu exception queue */ int mpcb_flags; /* various state flags */ int mpcb_wocnt; /* window overflow count */ int mpcb_wucnt; /* window underflow count */ diff --git a/usr/src/uts/sparc/v9/fpu/fpu.c b/usr/src/uts/sparc/v9/fpu/fpu.c index a84612e030..a341eb120d 100644 --- a/usr/src/uts/sparc/v9/fpu/fpu.c +++ b/usr/src/uts/sparc/v9/fpu/fpu.c @@ -24,6 +24,8 @@ * Use is subject to license terms. */ +#pragma ident "%Z%%M% %I% %E% SMI" + #include #include #include @@ -289,7 +291,7 @@ fp_disabled(struct regs *rp) } if (ftt = fp_emulator(&fpsd, (fp_inst_type *)rp->r_pc, rp, (ulong_t *)rp->r_sp, fp)) { - fp->fpu_q_entrysize = sizeof (struct _fpq); + fp->fpu_q_entrysize = sizeof (struct fpq); fp_traps(&fpsd, ftt, rp); } } @@ -318,7 +320,7 @@ void fp_runq(struct regs *rp) { kfpu_t *fp = lwptofpu(curthread->t_lwp); - struct _fq *fqp = fp->fpu_q; + struct fq *fqp = fp->fpu_q; fp_simd_type fpsd; uint64_t gsr = get_gsr(fp); @@ -343,7 +345,7 @@ fp_runq(struct regs *rp) */ if (fqp != fp->fpu_q) { int i; - struct _fq *fqdp; + struct fq *fqdp; /* * We need to normalize the floating queue so @@ -357,7 +359,7 @@ fp_runq(struct regs *rp) } fqp = fp->fpu_q; } - fp->fpu_q_entrysize = sizeof (struct _fpq); + fp->fpu_q_entrysize = sizeof (struct fpq); /* * fpu_simulator uses the fp registers directly but it @@ -465,14 +467,14 @@ fp_precise(struct regs *rp) * problem for a restorecontext of a v8 fp queue on a * v9 system, which seems like the .000000001% case (on v9)! */ - struct _fpq *pfpq = &fp->fpu_q->FQu.fpq; + struct fpq *pfpq = &fp->fpu_q->FQu.fpq; fp_simd_type fpsd; int fptrap; pfpq->fpq_addr = (uint_t *)rp->r_pc; pfpq->fpq_instr = kluge.i; fp->fpu_qcnt = 1; - fp->fpu_q_entrysize = sizeof (struct _fpq); + fp->fpu_q_entrysize = sizeof (struct fpq); kpreempt_disable(); (void) flush_user_windows_to_stack(NULL); diff --git a/usr/src/uts/sparc/v9/os/v9dep.c b/usr/src/uts/sparc/v9/os/v9dep.c index f54dab4f5d..751837788e 100644 --- a/usr/src/uts/sparc/v9/os/v9dep.c +++ b/usr/src/uts/sparc/v9/os/v9dep.c @@ -73,7 +73,7 @@ static uint_t mkpsr(uint64_t tstate, uint32_t fprs); #ifdef _SYSCALL32_IMPL static void fpuregset_32ton(const fpregset32_t *src, fpregset_t *dest, - const struct fq32 *sfq, struct _fq *dfq); + const struct fq32 *sfq, struct fq *dfq); #endif /* _SYSCALL32_IMPL */ /* @@ -122,7 +122,7 @@ setfpregs(klwp_t *lwp, fpregset_t *fp) */ if (fp->fpu_qcnt > MAXFPQ) /* plug security holes */ fp->fpu_qcnt = MAXFPQ; - fp->fpu_q_entrysize = sizeof (struct _fq); + fp->fpu_q_entrysize = sizeof (struct fq); /* * For v9 kernel, copy all of the fp regs. @@ -1098,7 +1098,7 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) sulword_noerr(&fp->uc.uc_mcontext.gwins, (ulong_t)NULL); if (fpq_size != 0) { - struct _fq *fqp = (struct _fq *)sp; + struct fq *fqp = (struct fq *)sp; sulword_noerr(&fp->uc.uc_mcontext.fpregs.fpu_q, (ulong_t)fqp); copyout_noerr(mpcb->mpcb_fpu_q, fqp, fpq_size); @@ -1730,7 +1730,7 @@ fpuregset_nto32(const fpregset_t *src, fpregset32_t *dest, struct fq32 *dfq) dest->fpu_en = src->fpu_en; if ((src->fpu_qcnt) && (dfq != NULL)) { - struct _fq *sfq = src->fpu_q; + struct fq *sfq = src->fpu_q; for (i = 0; i < src->fpu_qcnt; i++, dfq++, sfq++) { dfq->FQu.fpq.fpq_addr = (caddr32_t)(uintptr_t)sfq->FQu.fpq.fpq_addr; @@ -1748,7 +1748,7 @@ fpuregset_nto32(const fpregset_t *src, fpregset32_t *dest, struct fq32 *dfq) */ static void fpuregset_32ton(const fpregset32_t *src, fpregset_t *dest, - const struct fq32 *sfq, struct _fq *dfq) + const struct fq32 *sfq, struct fq *dfq) { int i; @@ -1758,7 +1758,7 @@ fpuregset_32ton(const fpregset32_t *src, fpregset_t *dest, dest->fpu_q = dfq; dest->fpu_fsr = (uint64_t)src->fpu_fsr; if ((dest->fpu_qcnt = src->fpu_qcnt) > 0) - dest->fpu_q_entrysize = sizeof (struct _fpq); + dest->fpu_q_entrysize = sizeof (struct fpq); else dest->fpu_q_entrysize = 0; dest->fpu_en = src->fpu_en; @@ -1774,7 +1774,7 @@ fpuregset_32ton(const fpregset32_t *src, fpregset_t *dest, void ucontext_32ton(const ucontext32_t *src, ucontext_t *dest, - const struct fq32 *sfq, struct _fq *dfq) + const struct fq32 *sfq, struct fq *dfq) { int i; diff --git a/usr/src/uts/sparc/v9/sys/machpcb.h b/usr/src/uts/sparc/v9/sys/machpcb.h index e4b1972d8a..298dae5100 100644 --- a/usr/src/uts/sparc/v9/sys/machpcb.h +++ b/usr/src/uts/sparc/v9/sys/machpcb.h @@ -72,7 +72,7 @@ typedef struct machpcb { int mpcb_wbcnt; /* number of saved windows in pcb_wbuf */ uint_t mpcb_wstate; /* per-lwp %wstate */ kfpu_t *mpcb_fpu; /* fpu state */ - struct _fq mpcb_fpu_q[_MAXFPQ]; /* fpu exception queue */ + struct fq mpcb_fpu_q[MAXFPQ]; /* fpu exception queue */ caddr_t mpcb_illexcaddr; /* address of last illegal instruction */ uint_t mpcb_illexcinsn; /* last illegal instruction */ uint_t mpcb_illexccnt; /* count of illegal instruction attempts */ diff --git a/usr/src/uts/sun4/os/trap.c b/usr/src/uts/sun4/os/trap.c index 094620625a..654b83c953 100644 --- a/usr/src/uts/sun4/os/trap.c +++ b/usr/src/uts/sun4/os/trap.c @@ -1366,7 +1366,7 @@ fpu_trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t code) int mstate; char *badaddr; kfpu_t *fp; - struct _fpq *pfpq; + struct fpq *pfpq; uint32_t inst; utrap_handler_t *utrapp; @@ -1431,7 +1431,7 @@ fpu_trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t code) pfpq->fpq_addr = (uint32_t *)rp->r_pc; pfpq->fpq_instr = inst; fp->fpu_qcnt = 1; - fp->fpu_q_entrysize = sizeof (struct _fpq); + fp->fpu_q_entrysize = sizeof (struct fpq); #ifdef SF_V9_TABLE_28 /* * Spitfire and blackbird followed the SPARC V9 manual -- cgit v1.2.3 From 2a446632681882e5dfe50f2f301401588f1ce734 Mon Sep 17 00:00:00 2001 From: Robert Mustacchi Date: Sun, 18 Oct 2015 17:08:27 -0700 Subject: 6328 Fix cstyle errors in zfs codebase (fix studio) --- usr/src/common/net/wanboot/p12aux.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'usr/src') diff --git a/usr/src/common/net/wanboot/p12aux.h b/usr/src/common/net/wanboot/p12aux.h index be07683c37..a4de6781aa 100644 --- a/usr/src/common/net/wanboot/p12aux.h +++ b/usr/src/common/net/wanboot/p12aux.h @@ -44,8 +44,8 @@ extern "C" { * * My apologies. */ -/* LINTED */ -DECLARE_STACK_OF(EVP_PKEY); +/* CSTYLED */ +DECLARE_STACK_OF(EVP_PKEY) #define sk_EVP_PKEY_new_null() SKM_sk_new_null(EVP_PKEY) #define sk_EVP_PKEY_free(st) SKM_sk_free(EVP_PKEY, (st)) -- cgit v1.2.3