diff options
Diffstat (limited to 'usr/src/cmd')
88 files changed, 32174 insertions, 4585 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 5d5404299b..2fe0d555a8 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -24,12 +24,13 @@ # Copyright (c) 2014 Joyent, Inc. All rights reserved. # Copyright (c) 2012 by Delphix. All rights reserved. # Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved. +# Copyright 2014 Garrett D'Amore <garrett@damore.org> include ../Makefile.master # -# Note that the commands 'perl', and 'man' are first in -# the list, violating alphabetical order. This is because they are very +# Note that if the 'lp' command were built, it would be first in +# the list, violating alphabetical order. This is because it is very # long-running and should be given the most wall-clock time for a # parallel build. # @@ -51,7 +52,6 @@ COMMON_SUBDIRS= \ allocate \ availdevs \ perl \ - man \ Adm \ abi \ adbgen \ @@ -250,6 +250,8 @@ COMMON_SUBDIRS= \ mail \ mailx \ makekey \ + man \ + mandoc \ mdb \ mesg \ mkdir \ @@ -497,11 +499,10 @@ sparc_SUBDIRS= \ vntsd # -# Commands that are messaged. Note that 'man' comes first -# (see previous comment about 'man'). +# Commands that are messaged. Note that 'lp' comes first +# (see previous comment about 'lp'.) # MSGSUBDIRS= \ - man \ abi \ acctadm \ allocate \ @@ -614,6 +615,7 @@ MSGSUBDIRS= \ ls \ luxadm \ mailx \ + man \ mesg \ mkdir \ mkpwdict \ diff --git a/usr/src/cmd/cmd-inet/etc/default/inetinit.dfl b/usr/src/cmd/cmd-inet/etc/default/inetinit.dfl index 11b5056010..ffec19bdbf 100644 --- a/usr/src/cmd/cmd-inet/etc/default/inetinit.dfl +++ b/usr/src/cmd/cmd-inet/etc/default/inetinit.dfl @@ -29,7 +29,7 @@ # 1 = Improved sequential generation, with random variance in increment. # 2 = RFC 1948 sequence number generation, unique-per-connection-ID. # -TCP_STRONG_ISS=1 +TCP_STRONG_ISS=2 # # ACCEPT6TO4RELAY sets the policy for 6to4 tunnels communicating with 6to4 # Relay Routers as defined in RFC 3056. Traffic sent from a 6to4 site to a diff --git a/usr/src/cmd/hostid/smf/svc-hostid b/usr/src/cmd/hostid/smf/svc-hostid index 91edb71a95..818a863600 100644 --- a/usr/src/cmd/hostid/smf/svc-hostid +++ b/usr/src/cmd/hostid/smf/svc-hostid @@ -23,6 +23,8 @@ # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # +# Copyright 2014 Garrett D'Amore <garrett@damore.org> +# . /lib/svc/share/smf_include.sh @@ -36,8 +38,7 @@ if smf_is_globalzone; then else host=`/usr/bin/hostid` echo "# DO NOT EDIT" > /etc/hostid - r=`echo "0x${host}" | /usr/bin/perl -e \ - 'while(<STDIN>){chop;tr/!-~/P-~!-O/;print $_,"\n";}exit 0;'` + r=`echo "0x${host}" | /usr/bin/tr 'P-~!-O' '!-OP-~'` printf "\"%s\"\n" $r >> /etc/hostid fi fi diff --git a/usr/src/cmd/man/Makefile b/usr/src/cmd/man/Makefile index bf87071d3e..911bae6340 100644 --- a/usr/src/cmd/man/Makefile +++ b/usr/src/cmd/man/Makefile @@ -1,61 +1,47 @@ # -# CDDL HEADER START +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. # -# The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. # -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -#ident "%Z%%M% %I% %E% SMI" + # -# Copyright (c) 1990 by Sun Microsystems, Inc. +# Copyright 2012 Nexenta Systems, Inc. All rights reserved. +# Copyright 2014 Garrett D'Amore <garrett@damore.org> # -# cmd/man/Makefile -include ../Makefile.cmd -SUBDIRS = src +PROG= man +LINKS= apropos whatis catman +LIBLINKS = makewhatis +OBJS= makewhatis.o man.o stringlist.o +SRCS= $(OBJS:%.o=%.c) + +include $(SRC)/cmd/Makefile.cmd + +CFLAGS += $(CCVERBOSE) -all := TARGET= all -install := TARGET= install -clean := TARGET= clean -clobber := TARGET= clobber -lint := TARGET= lint -_msg := TARGET= catalog +ROOTLINKS= $(LINKS:%=$(ROOTBIN)/%) $(LIBLINKS:%=$(ROOTLIB)/%) -#for message catalog files -POFILE = man.po -POFILES = src/src.po +.KEEP_STATE : -.KEEP_STATE: +all: $(PROG) -all install clean lint: $(SUBDIRS) +clean: + $(RM) $(OBJS) -clobber: $(SUBDIRS) local_clobber +install: all $(ROOTPROG) $(ROOTLINKS) -local_clobber: - $(RM) $(CLOBBERFILES) +lint: lint_SRCS -_msg: $(SUBDIRS) - $(RM) $(POFILE) - cat $(POFILES) > $(POFILE) - $(RM) $(MSGDOMAIN)/$(POFILE) - cp $(POFILE) $(MSGDOMAIN) +$(PROG): $(OBJS) + $(LINK.c) $(OBJS) -o $@ $(LDLIBS) + $(POST_PROCESS) -$(SUBDIRS): FRC - @cd $@; pwd; $(MAKE) $(TARGET) +$(ROOTLINKS): $(ROOTPROG) + $(RM) $@; $(LN) $(ROOTPROG) $@ -FRC: +include $(SRC)/cmd/Makefile.targ diff --git a/usr/src/cmd/man/THIRDPARTYLICENSE b/usr/src/cmd/man/THIRDPARTYLICENSE new file mode 100644 index 0000000000..f6ba743968 --- /dev/null +++ b/usr/src/cmd/man/THIRDPARTYLICENSE @@ -0,0 +1,92 @@ +man.c: + +Copyright (c) 1980 Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + 3. All advertising materials mentioning features or use of this + software must display the following acknowledgement: + This product includes software developed by the University + of California, Berkeley and its contributors. + 4. Neither the name of the University nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +makewhatis.c: + +Copyright (c) 2002 John Rochester +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer, + in this position and unchanged. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +stringlist.c, stringlist.h: + +Copyright (c) 1994 Christos Zoulas +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +4. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/usr/src/cmd/man/src/THIRDPARTYLICENSE.descrip b/usr/src/cmd/man/THIRDPARTYLICENSE.descrip index 3fe27c64b4..3fe27c64b4 100644 --- a/usr/src/cmd/man/src/THIRDPARTYLICENSE.descrip +++ b/usr/src/cmd/man/THIRDPARTYLICENSE.descrip diff --git a/usr/src/cmd/man/makewhatis.c b/usr/src/cmd/man/makewhatis.c new file mode 100644 index 0000000000..c5428e4633 --- /dev/null +++ b/usr/src/cmd/man/makewhatis.c @@ -0,0 +1,837 @@ +/* + * Copyright (c) 2002 John Rochester + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014 Garrett D'Amore <garrett@damore.org> + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/param.h> + +#include <ctype.h> +#include <dirent.h> +#include <err.h> +#include <signal.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "man.h" +#include "stringlist.h" + + +/* Information collected about each man page in a section */ +struct page_info { + char *filename; + char *name; + char *suffix; + ino_t inode; +}; + +/* An expanding string */ +struct sbuf { + char *content; /* the start of the buffer */ + char *end; /* just past the end of the content */ + char *last; /* the last allocated character */ +}; + +/* Remove the last amount characters from the sbuf */ +#define sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount)) +/* Return the length of the sbuf content */ +#define sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content) + +typedef char *edited_copy(char *from, char *to, int length); + +/* + * While the whatis line is being formed, it is stored in whatis_proto. + * When finished, it is reformatted into whatis_final and then appended + * to whatis_lines. + */ +static struct sbuf *whatis_proto; +static struct sbuf *whatis_final; +static stringlist *whatis_lines; /* collected output lines */ + +static char tempfile[MAXPATHLEN]; /* path of temporary file, if any */ + +#define MDOC_COMMANDS "ArDvErEvFlLiNmPa" + + +/* Free a struct page_info and its content */ +static void +free_page_info(struct page_info *info) +{ + + free(info->filename); + free(info->name); + free(info->suffix); + free(info); +} + +/* + * Allocate and fill in a new struct page_info given the + * name of the man section directory and the dirent of the file. + * If the file is not a man page, return NULL. + */ +static struct page_info * +new_page_info(char *dir, struct dirent *dirent) +{ + struct page_info *info; + int basename_length; + char *suffix; + struct stat st; + + if ((info = malloc(sizeof (struct page_info))) == NULL) + err(1, "malloc"); + basename_length = strlen(dirent->d_name); + suffix = &dirent->d_name[basename_length]; + if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1) + err(1, "asprintf"); + for (;;) { + if (--suffix == dirent->d_name || !isalnum(*suffix)) { + if (*suffix == '.') + break; + free(info->filename); + free(info); + return (NULL); + } + } + *suffix++ = '\0'; + info->name = strdup(dirent->d_name); + info->suffix = strdup(suffix); + if (stat(info->filename, &st) < 0) { + warn("%s", info->filename); + free_page_info(info); + return (NULL); + } + if (!S_ISREG(st.st_mode)) { + free_page_info(info); + return (NULL); + } + info->inode = st.st_ino; + return (info); +} + +/* + * Reset sbuf length to 0. + */ +static void +sbuf_clear(struct sbuf *sbuf) +{ + + sbuf->end = sbuf->content; +} + +/* + * Allocate a new sbuf. + */ +static struct sbuf * +new_sbuf(void) +{ + struct sbuf *sbuf; + + if ((sbuf = malloc(sizeof (struct sbuf))) == NULL) + err(1, "malloc"); + if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL) + err(1, "malloc"); + sbuf->last = sbuf->content + LINE_ALLOC - 1; + sbuf_clear(sbuf); + + return (sbuf); +} + +/* + * Ensure that there is enough room in the sbuf + * for nchars more characters. + */ +static void +sbuf_need(struct sbuf *sbuf, int nchars) +{ + char *new_content; + size_t size, cntsize; + size_t grow = 128; + + while (grow < nchars) { + grow += 128; /* we grow in chunks of 128 bytes */ + } + + /* Grow if the buffer isn't big enough */ + if (sbuf->end + nchars > sbuf->last) { + size = sbuf->last + 1 - sbuf->content; + size += grow; + cntsize = sbuf->end - sbuf->content; + + if ((new_content = realloc(sbuf->content, size)) == NULL) { + perror("realloc"); + if (tempfile[0] != '\0') + (void) unlink(tempfile); + exit(1); + } + sbuf->content = new_content; + sbuf->end = new_content + cntsize; + sbuf->last = new_content + size - 1; + } +} + +/* + * Append a string of a given length to the sbuf. + */ +static void +sbuf_append(struct sbuf *sbuf, const char *text, int length) +{ + if (length > 0) { + sbuf_need(sbuf, length); + (void) memcpy(sbuf->end, text, length); + sbuf->end += length; + } +} + +/* + * Append a null-terminated string to the sbuf. + */ +static void +sbuf_append_str(struct sbuf *sbuf, char *text) +{ + + sbuf_append(sbuf, text, strlen(text)); +} + +/* + * Append an edited null-terminated string to the sbuf. + */ +static void +sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy) +{ + int length; + + if ((length = strlen(text)) > 0) { + sbuf_need(sbuf, length); + sbuf->end = copy(text, sbuf->end, length); + } +} + +/* + * Strip any of a set of chars from the end of the sbuf. + */ +static void +sbuf_strip(struct sbuf *sbuf, const char *set) +{ + + while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL) + sbuf->end--; +} + +/* + * Return the null-terminated string built by the sbuf. + */ +static char * +sbuf_content(struct sbuf *sbuf) +{ + + *sbuf->end = '\0'; + return (sbuf->content); +} + +/* + * Return true if no man page exists in the directory with + * any of the names in the stringlist. + */ +static int +no_page_exists(char *dir, stringlist *names, char *suffix) +{ + char path[MAXPATHLEN]; + char *suffixes[] = { "", ".gz", ".bz2", NULL }; + size_t i; + int j; + + for (i = 0; i < names->sl_cur; i++) { + for (j = 0; suffixes[j] != NULL; j++) { + (void) snprintf(path, MAXPATHLEN, "%s/%s.%s%s", + dir, names->sl_str[i], suffix, suffixes[j]); + if (access(path, F_OK) == 0) { + return (0); + } + } + } + return (1); +} + +/* ARGSUSED sig */ +static void +trap_signal(int sig) +{ + + if (tempfile[0] != '\0') + (void) unlink(tempfile); + + exit(1); +} + +/* + * Attempt to open an output file. + * Return NULL if unsuccessful. + */ +static FILE * +open_output(char *name) +{ + FILE *output; + + whatis_lines = sl_init(); + (void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name); + name = tempfile; + if ((output = fopen(name, "w")) == NULL) { + warn("%s", name); + return (NULL); + } + return (output); +} + +static int +linesort(const void *a, const void *b) +{ + + return (strcmp((*(const char * const *)a), (*(const char * const *)b))); +} + +/* + * Write the unique sorted lines to the output file. + */ +static void +finish_output(FILE *output, char *name) +{ + size_t i; + char *prev = NULL; + + qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *), + linesort); + for (i = 0; i < whatis_lines->sl_cur; i++) { + char *line = whatis_lines->sl_str[i]; + if (i > 0 && strcmp(line, prev) == 0) + continue; + prev = line; + (void) fputs(line, output); + (void) putc('\n', output); + } + (void) fclose(output); + sl_free(whatis_lines, 1); + (void) rename(tempfile, name); + (void) unlink(tempfile); +} + +static FILE * +open_whatis(char *mandir) +{ + char filename[MAXPATHLEN]; + + (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); + return (open_output(filename)); +} + +static void +finish_whatis(FILE *output, char *mandir) +{ + char filename[MAXPATHLEN]; + + (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); + finish_output(output, filename); +} + +/* + * Remove trailing spaces from a string, returning a pointer to just + * beyond the new last character. + */ +static char * +trim_rhs(char *str) +{ + char *rhs; + + rhs = &str[strlen(str)]; + while (--rhs > str && isspace(*rhs)) + ; + *++rhs = '\0'; + return (rhs); +} + +/* + * Return a pointer to the next non-space character in the string. + */ +static char * +skip_spaces(char *s) +{ + + while (*s != '\0' && isspace(*s)) + s++; + + return (s); +} + +/* + * Return whether the line is of one of the forms: + * .Sh NAME + * .Sh "NAME" + * etc. + * assuming that section_start is ".Sh". + */ +static int +name_section_line(char *line, const char *section_start) +{ + char *rhs; + + if (strncmp(line, section_start, 3) != 0) + return (0); + line = skip_spaces(line + 3); + rhs = trim_rhs(line); + if (*line == '"') { + line++; + if (*--rhs == '"') + *rhs = '\0'; + } + if (strcmp(line, "NAME") == 0) + return (1); + + return (0); +} + +/* + * Copy characters while removing the most common nroff/troff markup: + * \(em, \(mi, \s[+-N], \& + * \fF, \f(fo, \f[font] + * \*s, \*(st, \*[stringvar] + */ +static char * +de_nroff_copy(char *from, char *to, int fromlen) +{ + char *from_end = &from[fromlen]; + + while (from < from_end) { + switch (*from) { + case '\\': + switch (*++from) { + case '(': + if (strncmp(&from[1], "em", 2) == 0 || + strncmp(&from[1], "mi", 2) == 0) { + from += 3; + continue; + } + break; + case 's': + if (*++from == '-') + from++; + while (isdigit(*from)) + from++; + continue; + case 'f': + case '*': + if (*++from == '(') { + from += 3; + } else if (*from == '[') { + while (*++from != ']' && + from < from_end) + ; + from++; + } else { + from++; + } + continue; + case '&': + from++; + continue; + } + break; + } + *to++ = *from++; + } + return (to); +} + +/* + * Append a string with the nroff formatting removed. + */ +static void +add_nroff(char *text) +{ + + sbuf_append_edited(whatis_proto, text, de_nroff_copy); +} + +/* + * Appends "name(suffix), " to whatis_final + */ +static void +add_whatis_name(char *name, char *suffix) +{ + + if (*name != '\0') { + sbuf_append_str(whatis_final, name); + sbuf_append(whatis_final, "(", 1); + sbuf_append_str(whatis_final, suffix); + sbuf_append(whatis_final, "), ", 3); + } +} + +/* + * Processes an old-style man(7) line. This ignores commands with only + * a single number argument. + */ +static void +process_man_line(char *line) +{ + char *p; + + if (*line == '.') { + while (isalpha(*++line)) + ; + p = line = skip_spaces(line); + while (*p != '\0') { + if (!isdigit(*p)) + break; + p++; + } + if (*p == '\0') + return; + } else + line = skip_spaces(line); + if (*line != '\0') { + add_nroff(line); + sbuf_append(whatis_proto, " ", 1); + } +} + +/* + * Processes a new-style mdoc(7) line. + */ +static void +process_mdoc_line(char *line) +{ + int xref; + int arg = 0; + char *line_end = &line[strlen(line)]; + int orig_length = sbuf_length(whatis_proto); + char *next; + + if (*line == '\0') + return; + if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) { + add_nroff(skip_spaces(line)); + sbuf_append(whatis_proto, " ", 1); + return; + } + xref = strncmp(line, ".Xr", 3) == 0; + line += 3; + while ((line = skip_spaces(line)) < line_end) { + if (*line == '"') { + next = ++line; + for (;;) { + next = strchr(next, '"'); + if (next == NULL) + break; + (void) memmove(next, next + 1, strlen(next)); + line_end--; + if (*next != '"') + break; + next++; + } + } else { + next = strpbrk(line, " \t"); + } + if (next != NULL) + *next++ = '\0'; + else + next = line_end; + if (isupper(*line) && islower(line[1]) && line[2] == '\0') { + if (strcmp(line, "Ns") == 0) { + arg = 0; + line = next; + continue; + } + if (strstr(line, MDOC_COMMANDS) != NULL) { + line = next; + continue; + } + } + if (arg > 0 && strchr(",.:;?!)]", *line) == 0) { + if (xref) { + sbuf_append(whatis_proto, "(", 1); + add_nroff(line); + sbuf_append(whatis_proto, ")", 1); + xref = 0; + } else { + sbuf_append(whatis_proto, " ", 1); + } + } + add_nroff(line); + arg++; + line = next; + } + if (sbuf_length(whatis_proto) > orig_length) + sbuf_append(whatis_proto, " ", 1); +} + +/* + * Collect a list of comma-separated names from the text. + */ +static void +collect_names(stringlist *names, char *text) +{ + char *arg; + + for (;;) { + arg = text; + text = strchr(text, ','); + if (text != NULL) + *text++ = '\0'; + (void) sl_add(names, arg); + if (text == NULL) + return; + if (*text == ' ') + text++; + } +} + +enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC }; + +/* + * Process a man page source into a single whatis line and add it + * to whatis_lines. + */ +static void +process_page(struct page_info *page, char *section_dir) +{ + FILE *fp; + stringlist *names; + char *descr; + int state = STATE_UNKNOWN; + size_t i; + char *line = NULL; + size_t linecap = 0; + + sbuf_clear(whatis_proto); + if ((fp = fopen(page->filename, "r")) == NULL) { + warn("%s", page->filename); + return; + } + while (getline(&line, &linecap, fp) > 0) { + /* Skip comments */ + if (strncmp(line, ".\\\"", 3) == 0) + continue; + switch (state) { + /* Haven't reached the NAME section yet */ + case STATE_UNKNOWN: + if (name_section_line(line, ".SH")) + state = STATE_MANSTYLE; + else if (name_section_line(line, ".Sh")) + state = STATE_MDOCNAME; + continue; + /* Inside an old-style .SH NAME section */ + case STATE_MANSTYLE: + if (strncmp(line, ".SH", 3) == 0 || + strncmp(line, ".SS", 3) == 0) + break; + (void) trim_rhs(line); + if (strcmp(line, ".") == 0) + continue; + if (strncmp(line, ".IX", 3) == 0) { + line += 3; + line = skip_spaces(line); + } + process_man_line(line); + continue; + /* Inside a new-style .Sh NAME section (the .Nm part) */ + case STATE_MDOCNAME: + (void) trim_rhs(line); + if (strncmp(line, ".Nm", 3) == 0) { + process_mdoc_line(line); + continue; + } else { + if (strcmp(line, ".") == 0) + continue; + sbuf_append(whatis_proto, "- ", 2); + state = STATE_MDOCDESC; + } + /* FALLTHROUGH */ + /* Inside a new-style .Sh NAME section (after the .Nm-s) */ + case STATE_MDOCDESC: + if (strncmp(line, ".Sh", 3) == 0) + break; + (void) trim_rhs(line); + if (strcmp(line, ".") == 0) + continue; + process_mdoc_line(line); + continue; + } + break; + } + (void) fclose(fp); + sbuf_strip(whatis_proto, " \t.-"); + line = sbuf_content(whatis_proto); + /* + * Line now contains the appropriate data, but without the + * proper indentation or the section appended to each name. + */ + descr = strstr(line, " - "); + if (descr == NULL) { + descr = strchr(line, ' '); + if (descr == NULL) + return; + *descr++ = '\0'; + } else { + *descr = '\0'; + descr += 3; + } + names = sl_init(); + collect_names(names, line); + sbuf_clear(whatis_final); + if (!sl_find(names, page->name) && + no_page_exists(section_dir, names, page->suffix)) { + /* + * Add the page name since that's the only + * thing that man(1) will find. + */ + add_whatis_name(page->name, page->suffix); + } + for (i = 0; i < names->sl_cur; i++) + add_whatis_name(names->sl_str[i], page->suffix); + sl_free(names, 0); + /* Remove last ", " */ + sbuf_retract(whatis_final, 2); + while (sbuf_length(whatis_final) < INDENT) + sbuf_append(whatis_final, " ", 1); + sbuf_append(whatis_final, " - ", 3); + sbuf_append_str(whatis_final, skip_spaces(descr)); + (void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final))); +} + +/* + * Sort pages first by inode number, then by name. + */ +static int +pagesort(const void *a, const void *b) +{ + const struct page_info *p1 = *(struct page_info * const *) a; + const struct page_info *p2 = *(struct page_info * const *) b; + + if (p1->inode == p2->inode) + return (strcmp(p1->name, p2->name)); + + return (p1->inode - p2->inode); +} + +/* + * Process a single man section. + */ +static void +process_section(char *section_dir) +{ + struct dirent **entries; + int nentries; + struct page_info **pages; + int npages = 0; + int i; + ino_t prev_inode = 0; + + /* Scan the man section directory for pages */ + nentries = scandir(section_dir, &entries, NULL, alphasort); + + /* Collect information about man pages */ + pages = (struct page_info **)calloc(nentries, + sizeof (struct page_info *)); + for (i = 0; i < nentries; i++) { + struct page_info *info = new_page_info(section_dir, entries[i]); + if (info != NULL) + pages[npages++] = info; + free(entries[i]); + } + free(entries); + qsort(pages, npages, sizeof (struct page_info *), pagesort); + + /* Process each unique page */ + for (i = 0; i < npages; i++) { + struct page_info *page = pages[i]; + if (page->inode != prev_inode) { + prev_inode = page->inode; + process_page(page, section_dir); + } + free_page_info(page); + } + free(pages); +} + +/* + * Return whether the directory entry is a man page section. + */ +static int +select_sections(const struct dirent *entry) +{ + const char *p = &entry->d_name[3]; + + if (strncmp(entry->d_name, "man", 3) != 0) + return (0); + while (*p != '\0') { + if (!isalnum(*p++)) + return (0); + } + return (1); +} + +/* + * Process a single top-level man directory by finding all the + * sub-directories named man* and processing each one in turn. + */ +void +mwpath(char *path) +{ + FILE *fp = NULL; + struct dirent **entries; + int nsections; + int i; + + (void) signal(SIGINT, trap_signal); + (void) signal(SIGHUP, trap_signal); + (void) signal(SIGQUIT, trap_signal); + (void) signal(SIGTERM, trap_signal); + + whatis_proto = new_sbuf(); + whatis_final = new_sbuf(); + + nsections = scandir(path, &entries, select_sections, alphasort); + if ((fp = open_whatis(path)) == NULL) + return; + for (i = 0; i < nsections; i++) { + char section_dir[MAXPATHLEN]; + + (void) snprintf(section_dir, MAXPATHLEN, "%s/%s", + path, entries[i]->d_name); + process_section(section_dir); + free(entries[i]); + } + free(entries); + finish_whatis(fp, path); +} diff --git a/usr/src/cmd/man/man.c b/usr/src/cmd/man/man.c new file mode 100644 index 0000000000..8038cabbac --- /dev/null +++ b/usr/src/cmd/man/man.c @@ -0,0 +1,1622 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, Josef 'Jeff' Sipek <jeffpc@31bits.net>. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014 Garrett D'Amore <garrett@damore.org> + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T. */ +/* All rights reserved. */ + +/* + * University Copyright- Copyright (c) 1982, 1986, 1988 + * The Regents of the University of California + * All Rights Reserved + * + * University Acknowledgment- Portions of this document are derived from + * software developed by the University of California, Berkeley, and its + * contributors. + */ + +/* + * Find and display reference manual pages. This version includes makewhatis + * functionality as well. + */ + +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/termios.h> +#include <sys/types.h> + +#include <ctype.h> +#include <dirent.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <limits.h> +#include <locale.h> +#include <malloc.h> +#include <memory.h> +#include <regex.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "man.h" + + +/* Mapping of old directories to new directories */ +static const struct map_entry { + char *old_name; + char *new_name; +} map[] = { + { "3b", "3ucb" }, + { "3e", "3elf" }, + { "3g", "3gen" }, + { "3k", "3kstat" }, + { "3n", "3socket" }, + { "3r", "3rt" }, + { "3s", "3c" }, + { "3t", "3thr" }, + { "3x", "3curses" }, + { "3xc", "3xcurses" }, + { "3xn", "3xnet" }, + { NULL, NULL } +}; + +struct suffix { + char *ds; + char *fs; +}; + +/* + * Flags that control behavior of build_manpath() + * + * BMP_ISPATH pathv is a vector constructed from PATH. + * Perform appropriate path translations for + * manpath. + * BMP_APPEND_DEFMANDIR Add DEFMANDIR to the end if it hasn't + * already appeared earlier. + * BMP_FALLBACK_DEFMANDIR Append /usr/share/man only if no other + * manpath (including derived from PATH) + * elements are valid. + */ +#define BMP_ISPATH 1 +#define BMP_APPEND_DEFMANDIR 2 +#define BMP_FALLBACK_DEFMANDIR 4 + +/* + * When doing equality comparisons of directories, device and inode + * comparisons are done. The secnode and dupnode structures are used + * to form a list of lists for this processing. + */ +struct secnode { + char *secp; + struct secnode *next; +}; +struct dupnode { + dev_t dev; /* from struct stat st_dev */ + ino_t ino; /* from struct stat st_ino */ + struct secnode *secl; /* sections already considered */ + struct dupnode *next; +}; + +/* + * Map directories that may appear in PATH to the corresponding + * man directory. + */ +static struct pathmap { + char *bindir; + char *mandir; + dev_t dev; + ino_t ino; +} bintoman[] = { + { "/sbin", "/usr/share/man,1m", 0, 0 }, + { "/usr/sbin", "/usr/share/man,1m", 0, 0 }, + { "/usr/ucb", "/usr/share/man,1b", 0, 0 }, + { "/usr/bin", "/usr/share/man,1,1m,1s,1t,1c", 0, 0 }, + { "/usr/xpg4/bin", "/usr/share/man,1", 0, 0 }, + { "/usr/xpg6/bin", "/usr/share/man,1", 0, 0 }, + { NULL, NULL, 0, 0 } +}; + +struct man_node { + char *path; /* mandir path */ + char **secv; /* submandir suffices */ + int defsrch; /* hint for man -p */ + int frompath; /* hint for man -d */ + struct man_node *next; +}; + +static int all = 0; +static int apropos = 0; +static int debug = 0; +static int found = 0; +static int list = 0; +static int makewhatis = 0; +static int printmp = 0; +static int sargs = 0; +static int psoutput = 0; +static int lintout = 0; +static int whatis = 0; +static int makewhatishere = 0; + +static char *mansec; +static char *pager = NULL; + +static char *addlocale(char *); +static struct man_node *build_manpath(char **, int); +static void do_makewhatis(struct man_node *); +static char *check_config(char *); +static int cmp(const void *, const void *); +static int dupcheck(struct man_node *, struct dupnode **); +static int format(char *, char *, char *, char *); +static void free_dupnode(struct dupnode *); +static void free_manp(struct man_node *manp); +static void freev(char **); +static void fullpaths(struct man_node **); +static void get_all_sect(struct man_node *); +static int getdirs(char *, char ***, int); +static void getpath(struct man_node *, char **); +static void getsect(struct man_node *, char **); +static void init_bintoman(void); +static void lower(char *); +static void mandir(char **, char *, char *, int); +static int manual(struct man_node *, char *); +static char *map_section(char *, char *); +static char *path_to_manpath(char *); +static void print_manpath(struct man_node *); +static void search_whatis(char *, char *); +static int searchdir(char *, char *, char *); +static void sortdir(DIR *, char ***); +static char **split(char *, char); +static void usage_man(void); +static void usage_whatapro(void); +static void usage_catman(void); +static void usage_makewhatis(void); +static void whatapro(struct man_node *, char *); + +static char language[MAXPATHLEN]; /* LC_MESSAGES */ +static char localedir[MAXPATHLEN]; /* locale specific path component */ + +static char *newsection = NULL; + +static int manwidth = 0; + +extern const char *__progname; + +int +main(int argc, char **argv) +{ + int c, i; + char **pathv; + char *manpath = NULL; + static struct man_node *mandirs = NULL; + int bmp_flags = 0; + int ret = 0; + char *opts; + char *mwstr; + int catman = 0; + + (void) setlocale(LC_ALL, ""); + (void) strcpy(language, setlocale(LC_MESSAGES, (char *)NULL)); + if (strcmp("C", language) != 0) + (void) strlcpy(localedir, language, MAXPATHLEN); + +#if !defined(TEXT_DOMAIN) +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + if (strcmp(__progname, "apropos") == 0) { + apropos++; + opts = "M:ds:"; + } else if (strcmp(__progname, "whatis") == 0) { + apropos++; + whatis++; + opts = "M:ds:"; + } else if (strcmp(__progname, "catman") == 0) { + catman++; + makewhatis++; + opts = "P:M:w"; + } else if (strcmp(__progname, "makewhatis") == 0) { + makewhatis++; + makewhatishere++; + manpath = "."; + opts = ""; + } else { + opts = "FM:P:T:adfklprs:tw"; + if (argc > 1 && strcmp(argv[1], "-") == 0) { + pager = "cat"; + optind++; + } + } + + opterr = 0; + while ((c = getopt(argc, argv, opts)) != -1) { + switch (c) { + case 'M': /* Respecify path for man pages */ + manpath = optarg; + break; + case 'a': + all++; + break; + case 'd': + debug++; + break; + case 'f': + whatis++; + /*FALLTHROUGH*/ + case 'k': + apropos++; + break; + case 'l': + list++; + all++; + break; + case 'p': + printmp++; + break; + case 's': + mansec = optarg; + sargs++; + break; + case 'r': + lintout++; + break; + case 't': + psoutput++; + break; + case 'T': + case 'P': + case 'F': + /* legacy options, compatibility only and ignored */ + break; + case 'w': + makewhatis++; + break; + case '?': + default: + if (apropos) + usage_whatapro(); + else if (catman) + usage_catman(); + else if (makewhatishere) + usage_makewhatis(); + else + usage_man(); + } + } + argc -= optind; + argv += optind; + + if (argc == 0) { + if (apropos) { + (void) fprintf(stderr, gettext("%s what?\n"), + __progname); + exit(1); + } else if (!printmp && !makewhatis) { + (void) fprintf(stderr, + gettext("What manual page do you want?\n")); + exit(1); + } + } + + init_bintoman(); + if (manpath == NULL && (manpath = getenv("MANPATH")) == NULL) { + if ((manpath = getenv("PATH")) != NULL) + bmp_flags = BMP_ISPATH | BMP_APPEND_DEFMANDIR; + else + manpath = DEFMANDIR; + } + pathv = split(manpath, ':'); + mandirs = build_manpath(pathv, bmp_flags); + freev(pathv); + fullpaths(&mandirs); + + if (makewhatis) { + do_makewhatis(mandirs); + exit(0); + } + + if (printmp) { + print_manpath(mandirs); + exit(0); + } + + /* Collect environment information */ + if (isatty(STDOUT_FILENO) && (mwstr = getenv("MANWIDTH")) != NULL && + *mwstr != '\0') { + if (strcasecmp(mwstr, "tty") == 0) { + struct winsize ws; + + if (ioctl(0, TIOCGWINSZ, &ws) != 0) + warn("TIOCGWINSZ"); + else + manwidth = ws.ws_col; + } else { + manwidth = (int)strtol(mwstr, (char **)NULL, 10); + if (manwidth < 0) + manwidth = 0; + } + } + if (manwidth != 0) { + DPRINTF("-- Using non-standard page width: %d\n", manwidth); + } + + if (pager == NULL) { + if ((pager = getenv("PAGER")) == NULL || *pager == '\0') + pager = PAGER; + } + DPRINTF("-- Using pager: %s\n", pager); + + for (i = 0; i < argc; i++) { + char *cmd; + static struct man_node *mp; + char *pv[2]; + + /* + * If full path to command specified, customize + * the manpath accordingly. + */ + if ((cmd = strrchr(argv[i], '/')) != NULL) { + *cmd = '\0'; + if ((pv[0] = strdup(argv[i])) == NULL) + err(1, "strdup"); + pv[1] = NULL; + *cmd = '/'; + mp = build_manpath(pv, + BMP_ISPATH | BMP_FALLBACK_DEFMANDIR); + } else { + mp = mandirs; + } + + if (apropos) + whatapro(mp, argv[i]); + else + ret += manual(mp, argv[i]); + + if (mp != NULL && mp != mandirs) { + free(pv[0]); + free_manp(mp); + } + } + + return (ret == 0 ? 0 : 1); +} + +/* + * This routine builds the manpage structure from MANPATH or PATH, + * depending on flags. See BMP_* definitions above for valid + * flags. + */ +static struct man_node * +build_manpath(char **pathv, int flags) +{ + struct man_node *manpage = NULL; + struct man_node *currp = NULL; + struct man_node *lastp = NULL; + char **p; + char **q; + char *mand = NULL; + char *mandir = DEFMANDIR; + int s; + struct dupnode *didup = NULL; + struct stat sb; + + s = sizeof (struct man_node); + for (p = pathv; *p != NULL; ) { + if (flags & BMP_ISPATH) { + if ((mand = path_to_manpath(*p)) == NULL) + goto next; + free(*p); + *p = mand; + } + q = split(*p, ','); + if (stat(q[0], &sb) != 0 || (sb.st_mode & S_IFDIR) == 0) { + freev(q); + goto next; + } + + if (access(q[0], R_OK | X_OK) == 0) { + /* + * Some element exists. Do not append DEFMANDIR as a + * fallback. + */ + flags &= ~BMP_FALLBACK_DEFMANDIR; + + if ((currp = (struct man_node *)calloc(1, s)) == NULL) + err(1, "calloc"); + + currp->frompath = (flags & BMP_ISPATH); + + if (manpage == NULL) + lastp = manpage = currp; + + getpath(currp, p); + getsect(currp, p); + + /* + * If there are no new elements in this path, + * do not add it to the manpage list. + */ + if (dupcheck(currp, &didup) != 0) { + freev(currp->secv); + free(currp); + } else { + currp->next = NULL; + if (currp != manpage) + lastp->next = currp; + lastp = currp; + } + } + freev(q); +next: + /* + * Special handling of appending DEFMANDIR. After all pathv + * elements have been processed, append DEFMANDIR if needed. + */ + if (p == &mandir) + break; + p++; + if (*p != NULL) + continue; + if (flags & (BMP_APPEND_DEFMANDIR | BMP_FALLBACK_DEFMANDIR)) { + p = &mandir; + flags &= ~BMP_ISPATH; + } + } + + free_dupnode(didup); + + return (manpage); +} + +/* + * Store the mandir path into the manp structure. + */ +static void +getpath(struct man_node *manp, char **pv) +{ + char *s = *pv; + int i = 0; + + while (*s != '\0' && *s != ',') + i++, s++; + + if ((manp->path = (char *)malloc(i + 1)) == NULL) + err(1, "malloc"); + (void) strlcpy(manp->path, *pv, i + 1); +} + +/* + * Store the mandir's corresponding sections (submandir + * directories) into the manp structure. + */ +static void +getsect(struct man_node *manp, char **pv) +{ + char *sections; + char **sectp; + + /* Just store all sections when doing makewhatis or apropos/whatis */ + if (makewhatis || apropos) { + manp->defsrch = 1; + DPRINTF("-- Adding %s\n", manp->path); + manp->secv = NULL; + get_all_sect(manp); + } else if (sargs) { + manp->secv = split(mansec, ','); + for (sectp = manp->secv; *sectp; sectp++) + lower(*sectp); + } else if ((sections = strchr(*pv, ',')) != NULL) { + DPRINTF("-- Adding %s: MANSECTS=%s\n", manp->path, sections); + manp->secv = split(++sections, ','); + for (sectp = manp->secv; *sectp; sectp++) + lower(*sectp); + if (*manp->secv == NULL) + get_all_sect(manp); + } else if ((sections = check_config(*pv)) != NULL) { + manp->defsrch = 1; + DPRINTF("-- Adding %s: from %s, MANSECTS=%s\n", manp->path, + CONFIG, sections); + manp->secv = split(sections, ','); + for (sectp = manp->secv; *sectp; sectp++) + lower(*sectp); + if (*manp->secv == NULL) + get_all_sect(manp); + } else { + manp->defsrch = 1; + DPRINTF("-- Adding %s: default sort order\n", manp->path); + manp->secv = NULL; + get_all_sect(manp); + } +} + +/* + * Get suffices of all sub-mandir directories in a mandir. + */ +static void +get_all_sect(struct man_node *manp) +{ + DIR *dp; + char **dirv; + char **dv; + char **p; + char *prev = NULL; + char *tmp = NULL; + int maxentries = MAXTOKENS; + int entries = 0; + + if ((dp = opendir(manp->path)) == 0) + return; + + sortdir(dp, &dirv); + + (void) closedir(dp); + + if (manp->secv == NULL) { + if ((manp->secv = malloc(maxentries * sizeof (char *))) == NULL) + err(1, "malloc"); + } + + for (dv = dirv, p = manp->secv; *dv; dv++) { + if (strcmp(*dv, CONFIG) == 0) { + free(*dv); + continue; + } + + free(tmp); + if ((tmp = strdup(*dv + 3)) == NULL) + err(1, "strdup"); + + if (prev != NULL && strcmp(prev, tmp) == 0) { + free(*dv); + continue; + } + + free(prev); + if ((prev = strdup(*dv + 3)) == NULL) + err(1, "strdup"); + + if ((*p = strdup(*dv + 3)) == NULL) + err(1, "strdup"); + + p++; entries++; + + if (entries == maxentries) { + maxentries += MAXTOKENS; + if ((manp->secv = realloc(manp->secv, + sizeof (char *) * maxentries)) == NULL) + err(1, "realloc"); + p = manp->secv + entries; + } + free(*dv); + } + free(tmp); + free(prev); + *p = NULL; + free(dirv); +} + +/* + * Build whatis databases. + */ +static void +do_makewhatis(struct man_node *manp) +{ + struct man_node *p; + char *ldir; + + for (p = manp; p != NULL; p = p->next) { + ldir = addlocale(p->path); + if (*localedir != '\0' && getdirs(ldir, NULL, 0) > 0) + mwpath(ldir); + free(ldir); + mwpath(p->path); + } +} + +/* + * Count mandirs under the given manpath + */ +static int +getdirs(char *path, char ***dirv, int flag) +{ + DIR *dp; + struct dirent *d; + int n = 0; + int maxentries = MAXDIRS; + char **dv = NULL; + + if ((dp = opendir(path)) == NULL) + return (0); + + if (flag) { + if ((*dirv = malloc(sizeof (char *) * + maxentries)) == NULL) + err(1, "malloc"); + dv = *dirv; + } + while ((d = readdir(dp))) { + if (strncmp(d->d_name, "man", 3) != 0) + continue; + n++; + + if (flag) { + if ((*dv = strdup(d->d_name + 3)) == NULL) + err(1, "strdup"); + dv++; + if ((dv - *dirv) == maxentries) { + int entries = maxentries; + + maxentries += MAXTOKENS; + if ((*dirv = realloc(*dirv, + sizeof (char *) * maxentries)) == NULL) + err(1, "realloc"); + dv = *dirv + entries; + } + } + } + + (void) closedir(dp); + return (n); +} + + +/* + * Find matching whatis or apropos entries. + */ +static void +whatapro(struct man_node *manp, char *word) +{ + char whatpath[MAXPATHLEN]; + struct man_node *b; + char *ldir; + + for (b = manp; b != NULL; b = b->next) { + if (*localedir != '\0') { + ldir = addlocale(b->path); + if (getdirs(ldir, NULL, 0) != 0) { + (void) snprintf(whatpath, sizeof (whatpath), + "%s/%s", ldir, WHATIS); + search_whatis(whatpath, word); + } + free(ldir); + } + (void) snprintf(whatpath, sizeof (whatpath), "%s/%s", b->path, + WHATIS); + search_whatis(whatpath, word); + } +} + +static void +search_whatis(char *whatpath, char *word) +{ + FILE *fp; + char *line = NULL; + size_t linecap = 0; + char *pkwd; + regex_t preg; + char **ss = NULL; + char s[MAXNAMELEN]; + int i; + + if ((fp = fopen(whatpath, "r")) == NULL) { + perror(whatpath); + return; + } + + DPRINTF("-- Found %s: %s\n", WHATIS, whatpath); + + /* Build keyword regex */ + if (asprintf(&pkwd, "%s%s%s", (whatis) ? "\\<" : "", + word, (whatis) ? "\\>" : "") == -1) + err(1, "asprintf"); + + if (regcomp(&preg, pkwd, REG_BASIC | REG_ICASE | REG_NOSUB) != 0) + err(1, "regcomp"); + + if (sargs) + ss = split(mansec, ','); + + while (getline(&line, &linecap, fp) > 0) { + if (regexec(&preg, line, 0, NULL, 0) == 0) { + if (sargs) { + /* Section-restricted search */ + for (i = 0; ss[i] != NULL; i++) { + (void) snprintf(s, sizeof (s), "(%s)", + ss[i]); + if (strstr(line, s) != NULL) { + (void) printf("%s", line); + break; + } + } + } else { + (void) printf("%s", line); + } + } + } + + if (ss != NULL) + freev(ss); + free(pkwd); + (void) fclose(fp); +} + + +/* + * Split a string by specified separator. + */ +static char ** +split(char *s1, char sep) +{ + char **tokv, **vp; + char *mp = s1, *tp; + int maxentries = MAXTOKENS; + int entries = 0; + + if ((tokv = vp = malloc(maxentries * sizeof (char *))) == NULL) + err(1, "malloc"); + + for (; mp && *mp; mp = tp) { + tp = strchr(mp, sep); + if (mp == tp) { + tp++; + continue; + } + if (tp) { + size_t len; + + len = tp - mp; + if ((*vp = (char *)malloc(sizeof (char) * + len + 1)) == NULL) + err(1, "malloc"); + (void) strncpy(*vp, mp, len); + *(*vp + len) = '\0'; + tp++; + vp++; + } else { + if ((*vp = strdup(mp)) == NULL) + err(1, "strdup"); + vp++; + } + entries++; + if (entries == maxentries) { + maxentries += MAXTOKENS; + if ((tokv = realloc(tokv, + maxentries * sizeof (char *))) == NULL) + err(1, "realloc"); + vp = tokv + entries; + } + } + *vp = 0; + + return (tokv); +} + +/* + * Free a vector allocated by split() + */ +static void +freev(char **v) +{ + int i; + if (v != NULL) { + for (i = 0; v[i] != NULL; i++) { + free(v[i]); + } + free(v); + } +} + +/* + * Convert paths to full paths if necessary + */ +static void +fullpaths(struct man_node **manp_head) +{ + char *cwd = NULL; + char *p; + int cwd_gotten = 0; + struct man_node *manp = *manp_head; + struct man_node *b; + struct man_node *prev = NULL; + + for (b = manp; b != NULL; b = b->next) { + if (*(b->path) == '/') { + prev = b; + continue; + } + + if (!cwd_gotten) { + cwd = getcwd(NULL, MAXPATHLEN); + cwd_gotten = 1; + } + + if (cwd) { + /* Relative manpath with cwd: make absolute */ + if (asprintf(&p, "%s/%s", cwd, b->path) == -1) + err(1, "asprintf"); + free(b->path); + b->path = p; + } else { + /* Relative manpath but no cwd: omit path entry */ + if (prev) + prev->next = b->next; + else + *manp_head = b->next; + + free_manp(b); + } + } + free(cwd); +} + +/* + * Free a man_node structure and its contents + */ +static void +free_manp(struct man_node *manp) +{ + char **p; + + free(manp->path); + p = manp->secv; + while ((p != NULL) && (*p != NULL)) { + free(*p); + p++; + } + free(manp->secv); + free(manp); +} + + +/* + * Map (in place) to lower case. + */ +static void +lower(char *s) +{ + + if (s == 0) + return; + while (*s) { + if (isupper(*s)) + *s = tolower(*s); + s++; + } +} + + +/* + * Compare function for qsort(). + * Sort first by section, then by prefix. + */ +static int +cmp(const void *arg1, const void *arg2) +{ + int n; + char **p1 = (char **)arg1; + char **p2 = (char **)arg2; + + /* By section */ + if ((n = strcmp(*p1 + 3, *p2 + 3)) != 0) + return (n); + + /* By prefix reversed */ + return (strncmp(*p2, *p1, 3)); +} + + +/* + * Find a manpage. + */ +static int +manual(struct man_node *manp, char *name) +{ + struct man_node *p; + struct man_node *local; + int ndirs = 0; + char *ldir; + char *ldirs[2]; + char *fullname = name; + char *slash; + + if ((slash = strrchr(name, '/')) != NULL) + name = slash + 1; + + /* For each path in MANPATH */ + found = 0; + + for (p = manp; p != NULL; p = p->next) { + DPRINTF("-- Searching mandir: %s\n", p->path); + + if (*localedir != '\0') { + ldir = addlocale(p->path); + ndirs = getdirs(ldir, NULL, 0); + if (ndirs != 0) { + ldirs[0] = ldir; + ldirs[1] = NULL; + local = build_manpath(ldirs, 0); + DPRINTF("-- Locale specific subdir: %s\n", + ldir); + mandir(local->secv, ldir, name, 1); + free_manp(local); + } + free(ldir); + } + + /* + * Locale mandir not valid, man page in locale + * mandir not found, or -a option present + */ + if (ndirs == 0 || !found || all) + mandir(p->secv, p->path, name, 0); + + if (found && !all) + break; + } + + if (!found) { + if (sargs) { + (void) fprintf(stderr, gettext( + "No manual entry for %s in section(s) %s\n"), + fullname, mansec); + } else { + (void) fprintf(stderr, + gettext("No manual entry for %s\n"), fullname); + } + + } + + return (!found); +} + + +/* + * For a specified manual directory, read, store and sort section subdirs. + * For each section specified, find and search matching subdirs. + */ +static void +mandir(char **secv, char *path, char *name, int lspec) +{ + DIR *dp; + char **dirv; + char **dv, **pdv; + int len, dslen; + + if ((dp = opendir(path)) == NULL) + return; + + if (lspec) + DPRINTF("-- Searching mandir: %s\n", path); + + sortdir(dp, &dirv); + + /* Search in the order specified by MANSECTS */ + for (; *secv; secv++) { + len = strlen(*secv); + for (dv = dirv; *dv; dv++) { + dslen = strlen(*dv + 3); + if (dslen > len) + len = dslen; + if (**secv == '\\') { + if (strcmp(*secv + 1, *dv + 3) != 0) + continue; + } else if (strncasecmp(*secv, *dv + 3, len) != 0) { + if (!all && + (newsection = map_section(*secv, path)) + == NULL) { + continue; + } + if (newsection == NULL) + newsection = ""; + if (strncmp(newsection, *dv + 3, len) != 0) { + continue; + } + } + + if (searchdir(path, *dv, name) == 0) + continue; + + if (!all) { + pdv = dirv; + while (*pdv) { + free(*pdv); + pdv++; + } + (void) closedir(dp); + free(dirv); + return; + } + + if (all && **dv == 'm' && *(dv + 1) && + strcmp(*(dv + 1) + 3, *dv + 3) == 0) + dv++; + } + } + pdv = dirv; + while (*pdv != NULL) { + free(*pdv); + pdv++; + } + free(dirv); + (void) closedir(dp); +} + +/* + * Sort directories. + */ +static void +sortdir(DIR *dp, char ***dirv) +{ + struct dirent *d; + char **dv; + int maxentries = MAXDIRS; + int entries = 0; + + if ((dv = *dirv = malloc(sizeof (char *) * + maxentries)) == NULL) + err(1, "malloc"); + dv = *dirv; + + while ((d = readdir(dp))) { + if (strcmp(d->d_name, ".") == 0 || + strcmp(d->d_name, "..") == 0) + continue; + + if (strncmp(d->d_name, "man", 3) == 0 || + strncmp(d->d_name, "cat", 3) == 0) { + if ((*dv = strdup(d->d_name)) == NULL) + err(1, "strdup"); + dv++; + entries++; + if (entries == maxentries) { + maxentries += MAXDIRS; + if ((*dirv = realloc(*dirv, + sizeof (char *) * maxentries)) == NULL) + err(1, "realloc"); + dv = *dirv + entries; + } + } + } + *dv = 0; + + qsort((void *)*dirv, dv - *dirv, sizeof (char *), cmp); + +} + + +/* + * Search a section subdir for a given manpage. + */ +static int +searchdir(char *path, char *dir, char *name) +{ + DIR *sdp; + struct dirent *sd; + char sectpath[MAXPATHLEN]; + char file[MAXNAMLEN]; + char dname[MAXPATHLEN]; + char *last; + int nlen; + + (void) snprintf(sectpath, sizeof (sectpath), "%s/%s", path, dir); + (void) snprintf(file, sizeof (file), "%s.", name); + + if ((sdp = opendir(sectpath)) == NULL) + return (0); + + while ((sd = readdir(sdp))) { + char *pname; + + if ((pname = strdup(sd->d_name)) == NULL) + err(1, "strdup"); + if ((last = strrchr(pname, '.')) != NULL && + (strcmp(last, ".gz") == 0 || strcmp(last, ".bz2") == 0)) + *last = '\0'; + last = strrchr(pname, '.'); + nlen = last - pname; + (void) snprintf(dname, sizeof (dname), "%.*s.", nlen, pname); + if (strcmp(dname, file) == 0 || + strcmp(pname, name) == 0) { + (void) format(path, dir, name, sd->d_name); + (void) closedir(sdp); + free(pname); + return (1); + } + free(pname); + } + (void) closedir(sdp); + + return (0); +} + +/* + * Check the hash table of old directory names to see if there is a + * new directory name. + */ +static char * +map_section(char *section, char *path) +{ + int i; + char fullpath[MAXPATHLEN]; + + if (list) /* -l option fall through */ + return (NULL); + + for (i = 0; map[i].new_name != NULL; i++) { + if (strcmp(section, map[i].old_name) == 0) { + (void) snprintf(fullpath, sizeof (fullpath), + "%s/man%s", path, map[i].new_name); + if (!access(fullpath, R_OK | X_OK)) { + return (map[i].new_name); + } else { + return (NULL); + } + } + } + + return (NULL); +} + +/* + * Format the manpage. + */ +static int +format(char *path, char *dir, char *name, char *pg) +{ + char manpname[MAXPATHLEN], catpname[MAXPATHLEN]; + char cmdbuf[BUFSIZ], tmpbuf[BUFSIZ]; + char *cattool; + int utf8 = 0; + struct stat sbman, sbcat; + + found++; + + if (list) { + (void) printf(gettext("%s(%s)\t-M %s\n"), name, dir + 3, path); + return (-1); + } + + (void) snprintf(manpname, sizeof (manpname), "%s/man%s/%s", path, + dir + 3, pg); + (void) snprintf(catpname, sizeof (catpname), "%s/cat%s/%s", path, + dir + 3, pg); + + /* Can't do PS output if manpage doesn't exist */ + if (stat(manpname, &sbman) != 0 && (psoutput|lintout)) + return (-1); + + /* + * If both manpage and catpage do not exist, manpname is + * broken symlink, most likely. + */ + if (stat(catpname, &sbcat) != 0 && stat(manpname, &sbman) != 0) + err(1, "%s", manpname); + + /* Setup cattool */ + if (fnmatch("*.gz", manpname, 0) == 0) + cattool = "gzcat"; + else if (fnmatch("*.bz2", manpname, 0) == 0) + cattool = "bzcat"; + else + cattool = "cat"; + + /* Preprocess UTF-8 input with preconv (could be smarter) */ + if (strstr(path, "UTF-8") != NULL) + utf8 = 1; + + if (psoutput) { + (void) snprintf(cmdbuf, BUFSIZ, + "cd %s; %s %s%s | mandoc -Tps | lp -Tpostscript", + path, cattool, manpname, + utf8 ? " | " PRECONV " -e UTF-8" : ""); + DPRINTF("-- Using manpage: %s\n", manpname); + goto cmd; + } else if (lintout) { + (void) snprintf(cmdbuf, BUFSIZ, + "cd %s; %s %s%s | mandoc -Tlint", + path, cattool, manpname, + utf8 ? " | " PRECONV " -e UTF-8" : ""); + DPRINTF("-- Linting manpage: %s\n", manpname); + goto cmd; + } + + /* + * Output catpage if: + * - manpage doesn't exist + * - output width is standard and catpage is recent enough + */ + if (stat(manpname, &sbman) != 0 || (manwidth == 0 && + stat(catpname, &sbcat) == 0 && sbcat.st_mtime >= sbman.st_mtime)) { + DPRINTF("-- Using catpage: %s\n", catpname); + (void) snprintf(cmdbuf, BUFSIZ, "%s %s", pager, catpname); + goto cmd; + } + + DPRINTF("-- Using manpage: %s\n", manpname); + if (manwidth > 0) + (void) snprintf(tmpbuf, BUFSIZ, "-Owidth=%d ", manwidth); + (void) snprintf(cmdbuf, BUFSIZ, "cd %s; %s %s%s | mandoc -T%s %s| %s", + path, cattool, manpname, + utf8 ? " | " PRECONV " -e UTF-8 " : "", + utf8 ? "utf8" : "ascii", (manwidth > 0) ? tmpbuf : "", pager); + +cmd: + DPRINTF("-- Command: %s\n", cmdbuf); + + if (!debug) + return (system(cmdbuf) == 0); + else + return (0); +} + +/* + * Add <localedir> to the path. + */ +static char * +addlocale(char *path) +{ + char *tmp; + + if (asprintf(&tmp, "%s/%s", path, localedir) == -1) + err(1, "asprintf"); + + return (tmp); +} + +/* + * Get the order of sections from man.cf. + */ +static char * +check_config(char *path) +{ + FILE *fp; + char *rc = NULL; + char *sect; + char fname[MAXPATHLEN]; + char *line = NULL; + size_t linecap = 0; + + (void) snprintf(fname, MAXPATHLEN, "%s/%s", path, CONFIG); + + if ((fp = fopen(fname, "r")) == NULL) + return (NULL); + + while (getline(&line, &linecap, fp) > 0) { + if ((rc = strstr(line, "MANSECTS")) != NULL) + break; + } + + (void) fclose(fp); + + if (rc == NULL || (sect = strchr(line, '=')) == NULL) + return (NULL); + else + return (++sect); +} + + +/* + * Initialize the bintoman array with appropriate device and inode info. + */ +static void +init_bintoman(void) +{ + int i; + struct stat sb; + + for (i = 0; bintoman[i].bindir != NULL; i++) { + if (stat(bintoman[i].bindir, &sb) == 0) { + bintoman[i].dev = sb.st_dev; + bintoman[i].ino = sb.st_ino; + } else { + bintoman[i].dev = NODEV; + } + } +} + +/* + * If a duplicate is found, return 1. + * If a duplicate is not found, add it to the dupnode list and return 0. + */ +static int +dupcheck(struct man_node *mnp, struct dupnode **dnp) +{ + struct dupnode *curdnp; + struct secnode *cursnp; + struct stat sb; + int i; + int rv = 1; + int dupfound; + + /* If the path doesn't exist, treat it as a duplicate */ + if (stat(mnp->path, &sb) != 0) + return (1); + + /* If no sections were found in the man dir, treat it as duplicate */ + if (mnp->secv == NULL) + return (1); + + /* + * Find the dupnode structure for the previous time this directory + * was looked at. Device and inode numbers are compared so that + * directories that are reached via different paths (e.g. /usr/man and + * /usr/share/man) are treated as equivalent. + */ + for (curdnp = *dnp; curdnp != NULL; curdnp = curdnp->next) { + if (curdnp->dev == sb.st_dev && curdnp->ino == sb.st_ino) + break; + } + + /* + * First time this directory has been seen. Add a new node to the + * head of the list. Since all entries are guaranteed to be unique + * copy all sections to new node. + */ + if (curdnp == NULL) { + if ((curdnp = calloc(1, sizeof (struct dupnode))) == NULL) + err(1, "calloc"); + for (i = 0; mnp->secv[i] != NULL; i++) { + if ((cursnp = calloc(1, sizeof (struct secnode))) + == NULL) + err(1, "calloc"); + cursnp->next = curdnp->secl; + curdnp->secl = cursnp; + if ((cursnp->secp = strdup(mnp->secv[i])) == NULL) + err(1, "strdup"); + } + curdnp->dev = sb.st_dev; + curdnp->ino = sb.st_ino; + curdnp->next = *dnp; + *dnp = curdnp; + return (0); + } + + /* + * Traverse the section vector in the man_node and the section list + * in dupnode cache to eliminate all duplicates from man_node. + */ + for (i = 0; mnp->secv[i] != NULL; i++) { + dupfound = 0; + for (cursnp = curdnp->secl; cursnp != NULL; + cursnp = cursnp->next) { + if (strcmp(mnp->secv[i], cursnp->secp) == 0) { + dupfound = 1; + break; + } + } + if (dupfound) { + mnp->secv[i][0] = '\0'; + continue; + } + + + /* + * Update curdnp and set return value to indicate that this + * was not all duplicates. + */ + if ((cursnp = calloc(1, sizeof (struct secnode))) == NULL) + err(1, "calloc"); + cursnp->next = curdnp->secl; + curdnp->secl = cursnp; + if ((cursnp->secp = strdup(mnp->secv[i])) == NULL) + err(1, "strdup"); + rv = 0; + } + + return (rv); +} + +/* + * Given a bindir, return corresponding mandir. + */ +static char * +path_to_manpath(char *bindir) +{ + char *mand, *p; + int i; + struct stat sb; + + /* First look for known translations for specific bin paths */ + if (stat(bindir, &sb) != 0) { + return (NULL); + } + for (i = 0; bintoman[i].bindir != NULL; i++) { + if (sb.st_dev == bintoman[i].dev && + sb.st_ino == bintoman[i].ino) { + if ((mand = strdup(bintoman[i].mandir)) == NULL) + err(1, "strdup"); + if ((p = strchr(mand, ',')) != NULL) + *p = '\0'; + if (stat(mand, &sb) != 0) { + free(mand); + return (NULL); + } + if (p != NULL) + *p = ','; + return (mand); + } + } + + /* + * No specific translation found. Try `dirname $bindir`/share/man + * and `dirname $bindir`/man + */ + if ((mand = malloc(MAXPATHLEN)) == NULL) + err(1, "malloc"); + if (strlcpy(mand, bindir, MAXPATHLEN) >= MAXPATHLEN) { + free(mand); + return (NULL); + } + + /* + * Advance to end of buffer, strip trailing /'s then remove last + * directory component. + */ + for (p = mand; *p != '\0'; p++) + ; + for (; p > mand && *p == '/'; p--) + ; + for (; p > mand && *p != '/'; p--) + ; + if (p == mand && *p == '.') { + if (realpath("..", mand) == NULL) { + free(mand); + return (NULL); + } + for (; *p != '\0'; p++) + ; + } else { + *p = '\0'; + } + + if (strlcat(mand, "/share/man", MAXPATHLEN) >= MAXPATHLEN) { + free(mand); + return (NULL); + } + + if ((stat(mand, &sb) == 0) && S_ISDIR(sb.st_mode)) { + return (mand); + } + + /* + * Strip the /share/man off and try /man + */ + *p = '\0'; + if (strlcat(mand, "/man", MAXPATHLEN) >= MAXPATHLEN) { + free(mand); + return (NULL); + } + if ((stat(mand, &sb) == 0) && S_ISDIR(sb.st_mode)) { + return (mand); + } + + /* + * No man or share/man directory found + */ + free(mand); + return (NULL); +} + +/* + * Free a linked list of dupnode structs. + */ +void +free_dupnode(struct dupnode *dnp) { + struct dupnode *dnp2; + struct secnode *snp; + + while (dnp != NULL) { + dnp2 = dnp; + dnp = dnp->next; + while (dnp2->secl != NULL) { + snp = dnp2->secl; + dnp2->secl = dnp2->secl->next; + free(snp->secp); + free(snp); + } + free(dnp2); + } +} + +/* + * Print manp linked list to stdout. + */ +void +print_manpath(struct man_node *manp) +{ + char colon[2] = "\0\0"; + char **secp; + + for (; manp != NULL; manp = manp->next) { + (void) printf("%s%s", colon, manp->path); + colon[0] = ':'; + + /* + * If man.cf or a directory scan was used to create section + * list, do not print section list again. If the output of + * man -p is used to set MANPATH, subsequent runs of man + * will re-read man.cf and/or scan man directories as + * required. + */ + if (manp->defsrch != 0) + continue; + + for (secp = manp->secv; *secp != NULL; secp++) { + /* + * Section deduplication may have eliminated some + * sections from the vector. Avoid displaying this + * detail which would appear as ",," in output + */ + if ((*secp)[0] != '\0') + (void) printf(",%s", *secp); + } + } + (void) printf("\n"); +} + +static void +usage_man(void) +{ + + (void) fprintf(stderr, gettext( +"usage: man [-alptw] [-M path] [-s section] name ...\n" +" man [-M path] [-s section] -k keyword ...\n" +" man [-M path] [-s section] -f keyword ...\n")); + + exit(1); +} + +static void +usage_whatapro(void) +{ + + (void) fprintf(stderr, gettext( +"usage: %s [-M path] [-s section] keyword ...\n"), + whatis ? "whatis" : "apropos"); + + exit(1); +} + +static void +usage_catman(void) +{ + (void) fprintf(stderr, gettext( +"usage: catman [-M path] [-w]\n")); + + exit(1); +} + +static void +usage_makewhatis(void) +{ + (void) fprintf(stderr, gettext("usage: makewhatis\n")); + + exit(1); +} diff --git a/usr/src/cmd/man/man.h b/usr/src/cmd/man/man.h new file mode 100644 index 0000000000..e6803a7969 --- /dev/null +++ b/usr/src/cmd/man/man.h @@ -0,0 +1,40 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014 Garrett D'Amore <garrett@damore.org> + */ + +/* + * Common definitions + */ + +#ifndef _MAN_H_ +#define _MAN_H_ + +#define CONFIG "man.cf" +#define DEFMANDIR "/usr/share/man" +#define INDENT 24 +#define PAGER "less -ins" +#define WHATIS "whatis" +#define PRECONV "/usr/lib/mandoc_preconv" + +#define LINE_ALLOC 4096 +#define MAXDIRS 128 +#define MAXTOKENS 64 + +#define DPRINTF if (debug) \ + (void) printf + +void mwpath(char *path); + +#endif /* _MAN_H_ */ diff --git a/usr/src/cmd/man/src/Makefile b/usr/src/cmd/man/src/Makefile deleted file mode 100644 index 32135e8093..0000000000 --- a/usr/src/cmd/man/src/Makefile +++ /dev/null @@ -1,88 +0,0 @@ -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# Copyright 2009 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -# Makefile for cmd/man/src -# - -include ../../Makefile.cmd - -BINPROG = man -LIBPROG = getNAME -LIBSHELL = makewhatis -LNKPROG = catman apropos whatis - -PROG = $(BINPROG) $(LIBPROG) - -SRCS = man.c getNAME.c - -SHSRCS = makewhatis.sh - -OBJS = $(SRCS:%.c=%.o) - - -CLOBBERFILES += $(LNKPROG) $(LIBSHELL) - -ROOTPROG = $(BINPROG:%=$(ROOTBIN)/%) -ROOTLIBPROG = $(LIBPROG:%=$(ROOTLIB)/%) -ROOTLIBSHELL = $(LIBSHELL:%=$(ROOTLIB)/%) -ROOTLNKPROG = $(LNKPROG:%=$(ROOTBIN)/%) - -CERRWARN += -_gcc=-Wno-implicit-function-declaration -CERRWARN += -_gcc=-Wno-uninitialized -CERRWARN += -_gcc=-Wno-unused-variable - -# for messaging catalog files -POFILE= src.po -POFILES= $(SRCS:%.c=%.po) - -.KEEP_STATE : - -all : $(PROG) $(LIBSHELL) $(LNKPROG) - -makewhatis : $$@.sh - cat $@.sh > $@ - -install : all $(ROOTPROG) $(ROOTLIBPROG) \ - $(ROOTLIBSHELL) $(ROOTLNKPROG) - -$(ROOTLNKPROG) : $(ROOTBIN)/man - -$(RM) $@; $(LN) $(ROOTBIN)/man $@ - -$(LNKPROG) : man - -$(RM) $@; $(LN) man $@ - -catalog: $(POFILE) - -$(POFILE): $(POFILES) - $(RM) $@ - cat $(POFILES) > $@ - - -clean : - -strip : - $(STRIP) $(PROG) - -lint : lint_SRCS - -include ../../Makefile.targ diff --git a/usr/src/cmd/man/src/THIRDPARTYLICENSE b/usr/src/cmd/man/src/THIRDPARTYLICENSE deleted file mode 100644 index 4dedd9d2a7..0000000000 --- a/usr/src/cmd/man/src/THIRDPARTYLICENSE +++ /dev/null @@ -1,32 +0,0 @@ -Copyright (c) 1980 Regents of the University of California. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - 3. All advertising materials mentioning features or use of this - software must display the following acknowledgement: - This product includes software developed by the University - of California, Berkeley and its contributors. - 4. Neither the name of the University nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/usr/src/cmd/man/src/getNAME.c b/usr/src/cmd/man/src/getNAME.c deleted file mode 100644 index 77d03dd866..0000000000 --- a/usr/src/cmd/man/src/getNAME.c +++ /dev/null @@ -1,789 +0,0 @@ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - -/* - * Copyright 1980 Regents of the University of California. - * All rights reserved. The Berkeley software License Agreement - * specifies the terms and conditions for redistribution. - */ - -#pragma ident "%Z%%M% %I% %E% SMI" - -/* - * Get name sections from manual pages. - * -t for building toc - * -i for building intro entries - * other apropos database - */ - -#include <stdlib.h> -#include <stdio.h> -#include <stdarg.h> -#include <string.h> -#include <unistd.h> -#include <limits.h> -#include <locale.h> -#include <wchar.h> -#include <errno.h> -#include <sys/param.h> - -#define PLEN 3 /* prefix length "man" */ - -static char path[MAXPATHLEN+1]; -static int tocrc; -static int intro; -static char *progname; - -static void trimln(char *); -static void roff_trim(char *cp); -static void doname(char *); -static void section(char *, char *); -static void split(char *, char *); -static void dorefname(char *); -static void troffpage(char *); -static void sgmlpage(char *); - -/* - * Test to see if this is an SGML manpage or a regular manpage - * Unless the first line begins with <!DOCTYPE, we assume it isn't. - */ -static int -issgml(FILE *fp) -{ - static const char magic[] = "<!DOCTYPE"; - char buf[sizeof (magic)]; - size_t n = sizeof (magic) - 1; - - if (read(fileno(fp), buf, n) != n || - lseek(fileno(fp), 0, SEEK_SET) != 0) - return (0); - return (strncmp(magic, buf, n) == 0); -} - -int -main(int argc, char *argv[]) -{ - int c; - - (void) setlocale(LC_ALL, ""); - - progname = argv[0]; - - while ((c = getopt(argc, argv, "it")) != EOF) - switch (c) { - case 't': - tocrc++; - break; - case 'i': - intro++; - break; - case '?': - default: - (void) fprintf(stderr, - "usage: %s [-i][-t] files..\n", progname); - exit(1); - } - - if (getcwd(path, sizeof (path)) == NULL) { - (void) fprintf(stderr, "%s: getcwd: %s\n", progname, path); - exit(1); - } - - for (; optind < argc; optind++) { - char *name = argv[optind]; - - if (freopen(name, "r", stdin) == 0) { - (void) fprintf(stderr, - "%s: %s: %s\n", progname, name, strerror(errno)); - continue; - } - - /* - * Most of the info we care about is in the first kbyte - */ - (void) setvbuf(stdin, NULL, _IOFBF, 1024); - - if (issgml(stdin)) - sgmlpage(name); - else - troffpage(name); - } - - return (0); -} - -/* - * Parse a troff-format manpage - */ -static void -troffpage(char *name) -{ - char headbuf[BUFSIZ]; - char linbuf[BUFSIZ]; - char *strptr; - int i = 0; - - for (;;) { - if (fgets(headbuf, sizeof (headbuf), stdin) == NULL) - return; - if (headbuf[0] != '.') - continue; - if (headbuf[1] == 'T' && headbuf[2] == 'H') - break; - if (headbuf[1] == 't' && headbuf[2] == 'h') - break; - } - for (;;) { - if (fgets(linbuf, sizeof (linbuf), stdin) == NULL) - return; - if (linbuf[0] != '.') - continue; - if (linbuf[1] == 'S' && linbuf[2] == 'H') - break; - if (linbuf[1] == 's' && linbuf[2] == 'h') - break; - } - trimln(headbuf); - if (tocrc) - doname(name); - if (!intro) - section(name, headbuf); - for (;;) { - if (fgets(linbuf, sizeof (linbuf), stdin) == NULL) - break; - if (linbuf[0] == '.') { - if (linbuf[1] == 'S' && linbuf[2] == 'H') - break; - if (linbuf[1] == 's' && linbuf[2] == 'h') - break; - if (linbuf[1] == '\\' && linbuf[2] == '"') - continue; - } - trimln(linbuf); - roff_trim(linbuf); - if (intro) { - split(linbuf, name); - continue; - } - if (i != 0) - (void) printf(" "); - i++; - (void) printf("%s", linbuf); - } - (void) printf("\n"); -} - - -/* - * Substitute section defined in page with new section spec - * of the form xx/yy where xx is the section suffix of the - * directory and yy is the filename extension (unless xx - * and yy are equal, in which case xx is the section). - * Pages should be placed in their proper directory with the - * proper name to simplify things. - * - * For example take the following names: - * man1/ar.1v (1/1V) - * man1/find.1 (1) - * man1/loco (1/) - * - */ -static void -section(char *name, char *buf) -{ - char scratch[MAXPATHLEN+1]; - char *p = buf; - char *dir, *fname; - char *dp, *np; - int i; - int plen = PLEN; - - /* - * split dirname and filename - */ - (void) strcpy(scratch, name); - if ((fname = strrchr(scratch, '/')) == NULL) { - fname = name; - dir = path; - } else { - dir = scratch; - *fname = 0; - fname++; - } - dp = strrchr(dir, '/'); - - if (*(dp+1) == 's') - plen = PLEN + 1; - - if (dp != NULL) { - dp = dp+plen+1; - } else { - dp = dir+plen; - } - np = strrchr(fname, '.'); - if (np != NULL) { - ++np; - } else { - np = ""; - } - for (i = 0; i < 2; i++) { - while (*p && *p != ' ' && *p != '\t') - p++; - if (!*p) - break; - while (*p && (*p == ' ' || *p == '\t')) - p++; - if (!*p) - break; - } - *p++ = 0; - (void) printf("%s", buf); - if (strcmp(np, dp) == 0) - (void) printf("%s", dp); - else - (void) printf("%s/%s", dp, np); - while (*p && *p != ' ' && *p != '\t') - p++; - (void) printf("%s\t", p); -} - -static void -trimln(char *cp) -{ - while (*cp) - cp++; - if (*--cp == '\n') - *cp = 0; -} - -static void -roff_trim(char *cp) -{ - if (*cp == '.') { - while ((*cp != ' ') && (*cp != '\0')) { - strcpy(cp, cp+1); - } - strcpy(cp, cp+1); - } - while (*cp) { - if (strncmp(cp, "\\f", 2) == 0) { - if ((*(cp+2) >= 48) && (*(cp+2) <= 57)) { - strcpy(cp, cp+3); - } - if (*(cp+2) == '(') { - strcpy(cp, cp+5); - } - } - cp++; - } -} - -static void -doname(char *name) -{ - char *dp = name, *ep; - -again: - while (*dp && *dp != '.') - (void) putchar(*dp++); - if (*dp) - for (ep = dp+1; *ep; ep++) - if (*ep == '.') { - (void) putchar(*dp++); - goto again; - } - (void) putchar('('); - if (*dp) - dp++; - while (*dp) - (void) putchar(*dp++); - (void) putchar(')'); - (void) putchar(' '); -} - -static void -split(char *line, char *name) -{ - char *cp, *dp; - char *sp, *sep; - - cp = strchr(line, '-'); - if (cp == 0) - return; - sp = cp + 1; - for (--cp; *cp == ' ' || *cp == '\t' || *cp == '\\'; cp--) - ; - *++cp = '\0'; - while (*sp && (*sp == ' ' || *sp == '\t')) - sp++; - for (sep = "", dp = line; dp && *dp; dp = cp, sep = "\n") { - cp = strchr(dp, ','); - if (cp) { - char *tp; - - for (tp = cp - 1; *tp == ' ' || *tp == '\t'; tp--) - ; - *++tp = '\0'; - for (++cp; *cp == ' ' || *cp == '\t'; cp++) - ; - } - (void) printf("%s%s\t", sep, dp); - dorefname(name); - (void) printf("\t%s", sp); - } -} - -static void -dorefname(char *name) -{ - char *dp = name, *ep; - -again: - while (*dp && *dp != '.') - (void) putchar(*dp++); - if (*dp) - for (ep = dp+1; *ep; ep++) - if (*ep == '.') { - (void) putchar(*dp++); - goto again; - } - (void) putchar('.'); - if (*dp) - dp++; - while (*dp) - (void) putchar(*dp++); -} - -/* - * The rest of the routines in the file form a simplistic parser - * for SGML manpages. We assume the input is syntactically correct - * SGML, and that the fields occur in the input file in order. - */ - -/* - * Some utilities for constructing arbitrary length wide character strings - */ - -typedef struct { - wchar_t *str; - size_t size; - long index; -} string_t; - -#define DEF_STR_SIZE 16 -#define DEF_STR_GROWTH 16 - -static void -outofspace(char *where) -{ - (void) fprintf(stderr, "%s: '%s' - out of memory\n", progname, where); - exit(1); -} - -static string_t * -newstring(size_t initial) -{ - string_t *s = malloc(sizeof (*s)); - - if (s == NULL) - outofspace("new s"); - - initial *= sizeof (wchar_t); - if (initial < DEF_STR_SIZE) - initial = DEF_STR_SIZE; - - s->str = malloc(initial); - if (s->str == NULL) - outofspace("new str"); - - s->size = initial; - s->index = 0; - *s->str = L'\0'; - return (s); -} - -static void -delstring(string_t **s) -{ - free((*s)->str); - (*s)->str = NULL; - free(*s); - *s = NULL; -} - -static wchar_t * -getwstring(string_t *s) -{ - static const wchar_t wnull = L'\0'; - - if (s) - return (s->str); - return ((wchar_t *)&wnull); -} - -static char * -getcstring(string_t *s) -{ - size_t len = (wcslen(s->str) + 1) * MB_CUR_MAX; - char *cstr = malloc(len); - char *p = cstr; - wchar_t *wp = s->str; - - if (p == NULL) - outofspace("getc"); - while (*wp) - p += wctomb(p, *wp++); - *p = '\0'; - return (cstr); -} - -static void -appendwstring(string_t *s, const wchar_t *str) -{ - size_t len = wcslen(str) + 1; - - s->size += sizeof (wchar_t) * len; - s->str = realloc(s->str, s->size); - if (s->str == NULL) - outofspace("appendw"); - (void) wcscat(s->str, str); - s->index = wcslen(s->str) + 1; -} - -static void -putwstring(string_t *s, wchar_t wc) -{ - if ((s->index + 1) * sizeof (wchar_t) >= s->size) { - s->size += DEF_STR_GROWTH; - s->str = realloc(s->str, s->size); - if (s->str == NULL) - outofspace("put"); - } - s->str[s->index++] = wc; -} - -/* - * Find the closing > of an SGML comment block - * (allowing for multibyte, embedded, comments) - */ -static void -eatcomments(void) -{ - int pending = 1; - - while (pending) - switch (getwchar()) { - default: - break; - case L'<': - pending++; - break; - case L'>': - pending--; - break; - case WEOF: - return; - } -} - -/* - * Find the next token on stdin. - * Handles nested comment strings, and removes any trailing newlines - * from the stream after the closing '>'. - */ -static int -find_token(char *tokbuf, size_t tokbuflen) -{ - int c; - wint_t wc; - char *tokp; - -top: - while ((wc = getwchar()) != WEOF) - if (wc == L'<') - break; - - if (wc == WEOF && errno == EILSEQ) - return (0); - - switch (c = getchar()) { - case EOF: - return (0); - default: - (void) ungetc(c, stdin); - break; - case '!': - eatcomments(); - goto top; - } - - tokp = tokbuf; - - while ((c = getchar()) != EOF) { - if (c == '>') { - while ((c = getchar()) != EOF) - if (c != '\n') { - (void) ungetc(c, stdin); - break; - } - *tokp = '\0'; - return (1); - } - if (tokp - tokbuf < tokbuflen) - *tokp++ = (char)c; - } - - return (0); -} - -/* - * This structure is filled out during the parsing of each page we encounter - */ -typedef struct { - char *name; - string_t *title; - string_t *volnum; - string_t *date; - string_t *names; - string_t *purpose; -} manpage_t; - -static void -warning(manpage_t *m, const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - (void) fprintf(stderr, "%s: %s - ", progname, m->name); - (void) vfprintf(stderr, fmt, ap); - va_end(ap); -} - -/* - * Fetch a string from stdin, terminated by the endtoken. - * These strings may be localized, so do this with wide characters. - * Hack: skip over (completely ignore) all other tokens - * Hack: map all &blort; constructs to spaces. - */ -static string_t * -filestring(manpage_t *m, size_t initial, char *endtoken) -{ - char tokbuf[BUFSIZ * MB_LEN_MAX]; - string_t *s = newstring(initial); - wint_t wc; - - while ((wc = getwchar()) != WEOF) - switch (wc) { - case L'\n': - if ((wc = getwchar()) != WEOF) - (void) ungetwc(wc, stdin); - if (wc != L'<') - putwstring(s, L' '); - break; - case L'<': - (void) ungetwc(wc, stdin); - if (!find_token(tokbuf, sizeof (tokbuf)) || - strcasecmp(endtoken, tokbuf) == 0) - goto done; - break; - case L'&': - while ((wc = getwchar()) != WEOF) - if (wc == L';') - break; - wc = L' '; - /* FALLTHROUGH */ - default: - putwstring(s, wc); - break; - } - - if (errno == EILSEQ) - warning(m, "%s while parsing %s\n", strerror(errno), endtoken); -done: - putwstring(s, L'\0'); - return (s); -} - -/* - * <refentrytitle> TITLE </refentrytitle> - */ -static int -refentrytitle(manpage_t *m) -{ - if (m->title != NULL) - warning(m, "repeated refentrytitle\n"); - m->title = filestring(m, 8, "/refentrytitle"); - return (1); -} - -/* - * <manvolnum> MANVOLNUM </manvolnum> - */ -static int -manvolnum(manpage_t *m) -{ - if (m->volnum != NULL) - warning(m, "repeated manvolnum\n"); - m->volnum = filestring(m, 3, "/manvolnum"); - return (1); -} - -/* - * <refmiscinfo class="date"> DATE </refmiscinfo> - */ -static int -refmiscinfo_date(manpage_t *m) -{ - if (m->date != NULL) - warning(m, "repeated date\n"); - m->date = filestring(m, 11, "/refmiscinfo"); - return (1); -} - -/* - * .. </refmeta> - */ -static int -print_refmeta(manpage_t *m) -{ - char headbuf[BUFSIZ]; - - (void) snprintf(headbuf, sizeof (headbuf), ".TH %ws %ws \"%ws\"", - getwstring(m->title), getwstring(m->volnum), getwstring(m->date)); - - trimln(headbuf); - if (tocrc) - doname(m->name); - if (!intro) - section(m->name, headbuf); - - if (m->title) - delstring(&m->title); - if (m->volnum) - delstring(&m->volnum); - if (m->date) - delstring(&m->date); - - return (1); -} - -static int -appendname(manpage_t *m, char *term) -{ - string_t *r = filestring(m, 0, term); - - if (m->names) { - appendwstring(m->names, L", "); - appendwstring(m->names, getwstring(r)); - delstring(&r); - } else - m->names = r; - return (1); -} - -/* - * <refdescriptor> REFDESCRIPTOR </refdescriptor> - */ -static int -refdescriptor(manpage_t *m) -{ - return (appendname(m, "/refdescriptor")); -} - -/* - * <refname> REFNAME </refname> - */ -static int -refname(manpage_t *m) -{ - return (appendname(m, "/refname")); -} - -/* - * <refpurpose> PURPOSE </refpurpose> - */ -static int -refpurpose(manpage_t *m) -{ - if (m->purpose != NULL) - warning(m, "repeated refpurpose\n"); - m->purpose = filestring(m, 0, "/refpurpose"); - return (1); -} - -/* - * .. </refnamediv> - this is our chance to bail out. - */ -static int -terminate(manpage_t *m) -{ - if (m->names) { - appendwstring(m->names, L" \\- "); - appendwstring(m->names, getwstring(m->purpose)); - if (intro) { - char *buf = getcstring(m->names); - split(buf, m->name); - free(buf); - } else - (void) printf("%ws", getwstring(m->names)); - } - - if (m->names) - delstring(&m->names); - if (m->purpose) - delstring(&m->purpose); - - (void) printf("\n"); - return (0); -} - - -/* - * Basic control structure of the SGML "parser". - * It's very simplistic - when named tags are encountered in the - * input stream, control is transferred to the corresponding routine. - * No checking is done for correct pairing of tags. A few other hacks - * are sneaked into the lexical routines above. - * Output is generated after seeing the /refmeta and /refnamediv - * closing tags. - */ -static const struct { - char *name; - int (*action)(manpage_t *); -} acts[] = { - { "refentrytitle", refentrytitle }, - { "manvolnum", manvolnum }, - { "refmiscinfo class=\"date\"", refmiscinfo_date }, - { "/refmeta", print_refmeta }, - { "refdescriptor", refdescriptor }, - { "refname", refname }, - { "refpurpose", refpurpose }, - { "/refnamediv", terminate }, - { 0 } -}; - -static void -sgmlpage(char *name) -{ - int rc = 1, a; - char tokbuf[BUFSIZ]; - manpage_t manpage, *m = &manpage; - - (void) memset(m, 0, sizeof (*m)); - m->name = name; - - do { - if (!find_token(tokbuf, sizeof (tokbuf))) - break; - for (a = 0; acts[a].name; a++) { - if (strcasecmp(acts[a].name, tokbuf) != 0) - continue; - rc = acts[a].action(m); - break; - } - } while (rc); -} diff --git a/usr/src/cmd/man/src/makewhatis.sh b/usr/src/cmd/man/src/makewhatis.sh deleted file mode 100644 index da5fbf8613..0000000000 --- a/usr/src/cmd/man/src/makewhatis.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/sh - -# -# CDDL HEADER START -# -# The contents of this file are subject to the terms of the -# Common Development and Distribution License (the "License"). -# You may not use this file except in compliance with the License. -# -# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE -# or http://www.opensolaris.org/os/licensing. -# See the License for the specific language governing permissions -# and limitations under the License. -# -# When distributing Covered Code, include this CDDL HEADER in each -# file and include the License file at usr/src/OPENSOLARIS.LICENSE. -# If applicable, add the following below this CDDL HEADER, with the -# fields enclosed by brackets "[]" replaced with your own identifying -# information: Portions Copyright [yyyy] [name of copyright owner] -# -# CDDL HEADER END -# -# -# Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. -# -# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T -# All Rights Reserved -# - -# University Copyright- Copyright (c) 1982, 1986, 1988 -# The Regents of the University of California -# All Rights Reserved -# -# University Acknowledgment- Portions of this document are derived from -# software developed by the University of California, Berkeley, and its -# contributors - -PATH=/usr/xpg4/bin:$PATH - -tmpdir=/tmp/whatis.$$ -trap "rm -rf $tmpdir; exit 1" 1 2 13 15 - -mkdir -m 700 $tmpdir || { - echo "${0}: could not create temporary directory" 1&>2 - exit 1 -} - -[ -d $1 ] || exit 1 - -cd $1 -top=`pwd` -for i in man?* sman?* -do - if [ -d $i ] ; then - cd $i - if test "`echo *`" != "*" ; then - /usr/lib/getNAME * - fi - cd $top - fi -done >$tmpdir/whatisx -sed <$tmpdir/whatisx \ - -e 's/\\-/-/' \ - -e 's/\\\*-/-/' \ - -e 's/ VAX-11//' \ - -e 's/\\f[PRIB01234]//g' \ - -e 's/\\s[-+0-9]*//g' \ - -e 's/\\&//g' \ - -e '/ - /!d' \ - -e 's/.TH [^ ]* \([^ ]*\).* \(.*\) -/\2 (\1) -/' \ - -e 's/ / /g' | \ -awk '{ title = substr($0, 1, index($0, "- ") - 1) - synop = substr($0, index($0, "- ")) - count = split(title, n, " ") - for (i=1; i<count; i++) { - if ( (pos = index(n[i], ",")) || (pos = index(n[i], ":")) ) - n[i] = substr(n[i], 1, pos-1) - printf("%s\t%s %s\t%s\n", n[i], n[1], n[count], synop) - } -}' >$tmpdir/whatis -/usr/bin/expand -16,32,36,40,44,48,52,56,60,64,68,72,76,80,84,88,92,96,100 \ - $tmpdir/whatis | LC_CTYPE=C LC_COLLATE=C sort -u | \ - /usr/bin/unexpand -a > windex -chmod 644 windex >/dev/null 2>&1 -rm -rf $tmpdir -exit 0 diff --git a/usr/src/cmd/man/src/man.c b/usr/src/cmd/man/src/man.c deleted file mode 100644 index a2ef30da5b..0000000000 --- a/usr/src/cmd/man/src/man.c +++ /dev/null @@ -1,3341 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, Josef 'Jeff' Sipek <jeffpc@31bits.net>. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. - */ - -/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T. */ -/* All rights reserved. */ - -/* - * University Copyright- Copyright (c) 1982, 1986, 1988 - * The Regents of the University of California - * All Rights Reserved - * - * University Acknowledgment- Portions of this document are derived from - * software developed by the University of California, Berkeley, and its - * contributors. - */ - - -/* - * man - * links to apropos, whatis, and catman - * This version uses more for underlining and paging. - */ - -#include <stdio.h> -#include <ctype.h> -#include <sgtty.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <signal.h> -#include <string.h> -#include <malloc.h> -#include <dirent.h> -#include <errno.h> -#include <fcntl.h> -#include <locale.h> -#include <stdlib.h> -#include <unistd.h> -#include <memory.h> -#include <limits.h> -#include <wchar.h> - -#define MACROF "tmac.an" /* name of <locale> macro file */ -#define TMAC_AN "-man" /* default macro file */ - -/* - * The default search path for man subtrees. - */ - -#define MANDIR "/usr/share/man" /* default mandir */ -#define MAKEWHATIS "/usr/lib/makewhatis" -#define WHATIS "windex" -#define TEMPLATE "/tmp/mpXXXXXX" -#define CONFIG "man.cf" - -/* - * Names for formatting and display programs. The values given - * below are reasonable defaults, but sites with source may - * wish to modify them to match the local environment. The - * value for TCAT is particularly problematic as there's no - * accepted standard value available for it. (The definition - * below assumes C.A.T. troff output and prints it). - */ - -#define MORE "more -s" /* default paging filter */ -#define CAT_S "/usr/bin/cat -s" /* for '-' opt (no more) */ -#define CAT_ "/usr/bin/cat" /* for when output is not a tty */ -#define TROFF "troff" /* local name for troff */ -#define TCAT "lp -c -T troff" /* command to "display" troff output */ - -#define SOLIMIT 10 /* maximum allowed .so chain length */ -#define MAXDIRS 128 /* max # of subdirs per manpath */ -#define MAXPAGES 128 /* max # for multiple pages */ -#define PLEN 3 /* prefix length {man, cat, fmt} */ -#define TMPLEN 7 /* length of tmpfile prefix */ -#define MAXTOKENS 64 - -#define DOT_SO ".so " -#define PREPROC_SPEC "'\\\" " - -#define DPRINTF if (debug && !catmando) \ - (void) printf - -#define sys(s) (debug ? ((void)puts(s), 0) : system(s)) -#define eq(a, b) (strcmp(a, b) == 0) -#define match(a, b, c) (strncmp(a, b, c) == 0) - -#define ISDIR(A) ((A.st_mode & S_IFMT) == S_IFDIR) - -#define SROFF_CMD "/usr/lib/sgml/sgml2roff" /* sgml converter */ -#define MANDIRNAME "man" /* man directory */ -#define SGMLDIR "sman" /* sman directory */ -#define SGML_SYMBOL "<!DOCTYPE" /* a sgml file should contain this */ -#define SGML_SYMBOL_LEN 9 /* length of SGML_SYMBOL */ - -/* - * Directory mapping of old directories to new directories - */ - -typedef struct { - char *old_name; - char *new_name; -} map_entry; - -static const map_entry map[] = { - { "3b", "3ucb" }, - { "3e", "3elf" }, - { "3g", "3gen" }, - { "3k", "3kstat" }, - { "3n", "3socket" }, - { "3r", "3rt" }, - { "3s", "3c" }, - { "3t", "3thr" }, - { "3x", "3curses" }, - { "3xc", "3xcurses" }, - { "3xn", "3xnet" } -}; - -/* - * A list of known preprocessors to precede the formatter itself - * in the formatting pipeline. Preprocessors are specified by - * starting a manual page with a line of the form: - * '\" X - * where X is a string consisting of letters from the p_tag fields - * below. - */ -static const struct preprocessor { - char p_tag; - char *p_nroff, - *p_troff, - *p_stdin_char; -} preprocessors [] = { - {'c', "cw", "cw", "-"}, - {'e', "neqn /usr/share/lib/pub/eqnchar", - "eqn /usr/share/lib/pub/eqnchar", "-"}, - {'p', "gpic", "gpic", "-"}, - {'r', "refer", "refer", "-"}, - {'t', "tbl", "tbl", ""}, - {'v', "vgrind -f", "vgrind -f", "-"}, - {0, 0, 0, 0} -}; - -struct suffix { - char *ds; - char *fs; -}; - -/* - * Flags that control behavior of build_manpath() - * - * BMP_ISPATH pathv is a vector constructed from PATH. - * Perform appropriate path translations for - * manpath. - * BMP_APPEND_MANDIR Add /usr/share/man to the end if it - * hasn't already appeared earlier. - * BMP_FALLBACK_MANDIR Append /usr/share/man only if no other - * manpath (including derived from PATH) - * elements are valid. - */ -#define BMP_ISPATH 1 -#define BMP_APPEND_MANDIR 2 -#define BMP_FALLBACK_MANDIR 4 - -/* - * When doing equality comparisons of directories, device and inode - * comparisons are done. The dupsec and dupnode structures are used - * to form a list of lists for this processing. - */ -struct secnode { - char *secp; - struct secnode *next; -}; -struct dupnode { - dev_t dev; /* from struct stat st_dev */ - ino_t ino; /* from struct stat st_ino */ - struct secnode *secl; /* sections already considered */ - struct dupnode *next; -}; - -/* - * Map directories that may appear in PATH to the corresponding - * man directory - */ -static struct pathmap { - char *bindir; - char *mandir; - dev_t dev; - ino_t ino; -} bintoman[] = { - {"/sbin", "/usr/share/man,1m", 0, 0}, - {"/usr/sbin", "/usr/share/man,1m", 0, 0}, - {"/usr/ucb", "/usr/share/man,1b", 0, 0}, - {"/usr/bin/X11", "/usr/X11/share/man", 0, 0}, - /* - * Restrict to section 1 so that whatis /usr/{,xpg4,xpg6}/bin/ls - * does not confuse users with section 1 and 1b - */ - {"/usr/bin", "/usr/share/man,1,1m,1s,1t,1c", 0, 0}, - {"/usr/xpg4/bin", "/usr/share/man,1", 0, 0}, - {"/usr/xpg6/bin", "/usr/share/man,1", 0, 0}, - {NULL, NULL, 0, 0} -}; - -/* - * Subdirectories to search for unformatted/formatted man page - * versions, in nroff and troff variations. The searching - * code in manual() is structured to expect there to be two - * subdirectories apiece, the first for unformatted files - * and the second for formatted ones. - */ -static char *nroffdirs[] = { "man", "cat", 0 }; -static char *troffdirs[] = { "man", "fmt", 0 }; - -#define MAN_USAGE "\ -usage:\tman [-] [-adFlprt] [-M path] [-T macro-package ] [ -s section ] \ -name ...\n\ -\tman [-M path] -k keyword ...\n\tman [-M path] -f file ..." -#define CATMAN_USAGE "\ -usage:\tcatman [-p] [-c|-ntw] [-M path] [-T macro-package ] [sections]" - -static char *opts[] = { - "FfkrpP:M:T:ts:lad", /* man */ - "wpnP:M:T:tc" /* catman */ -}; - -struct man_node { - char *path; /* mandir path */ - char **secv; /* submandir suffices */ - int defsrch; /* hint for man -p to avoid section list */ - int frompath; /* hint for man -d and catman -p */ - struct man_node *next; -}; - -static char *pages[MAXPAGES]; -static char **endp = pages; - -/* - * flags (options) - */ -static int nomore; -static int troffit; -static int debug; -static int Tflag; -static int sargs; -static int margs; -static int force; -static int found; -static int list; -static int all; -static int whatis; -static int apropos; -static int catmando; -static int nowhatis; -static int whatonly; -static int compargs; /* -c option for catman */ -static int printmp; - -static char *CAT = CAT_; -static char macros[MAXPATHLEN]; -static char *mansec; -static char *pager; -static char *troffcmd; -static char *troffcat; -static char **subdirs; - -static char *check_config(char *); -static struct man_node *build_manpath(char **, int); -static void getpath(struct man_node *, char **); -static void getsect(struct man_node *, char **); -static void get_all_sect(struct man_node *); -static void catman(struct man_node *, char **, int); -static int makecat(char *, char **, int); -static int getdirs(char *, char ***, short); -static void whatapro(struct man_node *, char *, int); -static void lookup_windex(char *, char *, char **); -static int icmp(wchar_t *, wchar_t *); -static void more(char **, int); -static void cleanup(char **); -static void bye(int); -static char **split(char *, char); -static void freev(char **); -static void fullpaths(struct man_node **); -static void lower(char *); -static int cmp(const void *, const void *); -static int manual(struct man_node *, char *); -static void mandir(char **, char *, char *); -static void sortdir(DIR *, char ***); -static int searchdir(char *, char *, char *); -static int windex(char **, char *, char *); -static void section(struct suffix *, char *); -static int bfsearch(FILE *, char **, char *, char **); -static int compare(char *, char *, char **); -static int format(char *, char *, char *, char *); -static char *addlocale(char *); -static int get_manconfig(FILE *, char *); -static void malloc_error(void); -static int sgmlcheck(const char *); -static char *map_section(char *, char *); -static void free_manp(struct man_node *manp); -static void init_bintoman(void); -static char *path_to_manpath(char *); -static int dupcheck(struct man_node *, struct dupnode **); -static void free_dupnode(struct dupnode *); -static void print_manpath(struct man_node *, char *); - -/* - * This flag is used when the SGML-to-troff converter - * is absent - all the SGML searches are bypassed. - */ -static int no_sroff = 0; - -/* - * This flag is used to describe the case where we've found - * an SGML formatted manpage in the sman directory, we haven't - * found a troff formatted manpage, and we don't have the SGML to troff - * conversion utility on the system. - */ -static int sman_no_man_no_sroff; - -static char language[PATH_MAX + 1]; /* LC_MESSAGES */ -static char localedir[PATH_MAX + 1]; /* locale specific path component */ - -static int defaultmandir = 1; /* if processing default mandir, 1 */ - -static char *newsection = NULL; - -int -main(int argc, char *argv[]) -{ - int badopts = 0; - int c; - char **pathv; - char *cmdname; - char *manpath = NULL; - static struct man_node *manpage = NULL; - int bmp_flags = 0; - int err = 0; - - if (access(SROFF_CMD, F_OK | X_OK) != 0) - no_sroff = 1; - - (void) setlocale(LC_ALL, ""); - (void) strcpy(language, setlocale(LC_MESSAGES, (char *)0)); - if (strcmp("C", language) != 0) - (void) sprintf(localedir, "%s", language); - -#if !defined(TEXT_DOMAIN) -#define TEXT_DOMAIN "SYS_TEST" -#endif - (void) textdomain(TEXT_DOMAIN); - - (void) strcpy(macros, TMAC_AN); - - /* - * get base part of command name - */ - if ((cmdname = strrchr(argv[0], '/')) != NULL) - cmdname++; - else - cmdname = argv[0]; - - if (eq(cmdname, "apropos") || eq(cmdname, "whatis")) { - whatis++; - apropos = (*cmdname == 'a'); - if ((optind = 1) == argc) { - (void) fprintf(stderr, gettext("%s what?\n"), cmdname); - exit(2); - } - goto doargs; - } else if (eq(cmdname, "catman")) - catmando++; - - opterr = 0; - while ((c = getopt(argc, argv, opts[catmando])) != -1) - switch (c) { - - /* - * man specific options - */ - case 'k': - apropos++; - /*FALLTHROUGH*/ - case 'f': - whatis++; - break; - case 'F': - force++; /* do lookups the hard way */ - break; - case 's': - mansec = optarg; - sargs++; - break; - case 'r': - nomore++, troffit++; - break; - case 'l': - list++; /* implies all */ - /*FALLTHROUGH*/ - case 'a': - all++; - break; - case 'd': - debug++; - break; - /* - * man and catman use -p differently. In catman it - * enables debug mode and in man it prints the (possibly - * derived from PATH or name operand) MANPATH. - */ - case 'p': - if (catmando == 0) { - printmp++; - } else { - debug++; - } - break; - case 'n': - nowhatis++; - break; - case 'w': - whatonly++; - break; - case 'c': /* n|troff compatibility */ - if (no_sroff) - (void) fprintf(stderr, gettext( - "catman: SGML conversion not " - "available -- -c flag ignored\n")); - else - compargs++; - continue; - - /* - * shared options - */ - case 'P': /* Backwards compatibility */ - case 'M': /* Respecify path for man pages. */ - manpath = optarg; - margs++; - break; - case 'T': /* Respecify man macros */ - (void) strcpy(macros, optarg); - Tflag++; - break; - case 't': - troffit++; - break; - case '?': - badopts++; - } - - /* - * Bad options or no args? - * (man -p and catman don't need args) - */ - if (badopts || (!catmando && !printmp && optind == argc)) { - (void) fprintf(stderr, "%s\n", catmando ? - gettext(CATMAN_USAGE) : gettext(MAN_USAGE)); - exit(2); - } - - if (compargs && (nowhatis || whatonly || troffit)) { - (void) fprintf(stderr, "%s\n", gettext(CATMAN_USAGE)); - (void) fprintf(stderr, gettext( - "-c option cannot be used with [-w][-n][-t]\n")); - exit(2); - } - - if (sargs && margs && catmando) { - (void) fprintf(stderr, "%s\n", gettext(CATMAN_USAGE)); - exit(2); - } - - if (troffit == 0 && nomore == 0 && !isatty(fileno(stdout))) - nomore++; - - /* - * Collect environment information. - */ - if (troffit) { - if ((troffcmd = getenv("TROFF")) == NULL) - troffcmd = TROFF; - if ((troffcat = getenv("TCAT")) == NULL) - troffcat = TCAT; - } else { - if (((pager = getenv("PAGER")) == NULL) || - (*pager == NULL)) - pager = MORE; - } - -doargs: - subdirs = troffit ? troffdirs : nroffdirs; - - init_bintoman(); - - if (manpath == NULL && (manpath = getenv("MANPATH")) == NULL) { - if ((manpath = getenv("PATH")) != NULL) { - bmp_flags = BMP_ISPATH | BMP_APPEND_MANDIR; - } else { - manpath = MANDIR; - } - } - - pathv = split(manpath, ':'); - - manpage = build_manpath(pathv, bmp_flags); - - /* release pathv allocated by split() */ - freev(pathv); - - /* - * If the TROFF environment variable hasn't been set, set PATH to - * ensure that we find nroff in /usr/bin, regardless of the user's - * PATH. (If the TROFF environment variable has been set, we - * assume that the environment has been constructed to execute the - * the nroff/troff found in the PATH with any options that may be - * specific to that implementation -- e.g., groff.) - */ - if (getenv("TROFF") == NULL && putenv("PATH=/usr/bin") != 0) { - perror("putenv"); - exit(1); - } - - fullpaths(&manpage); - - if (catmando) { - catman(manpage, argv+optind, argc-optind); - exit(0); - } - - /* - * The manual routine contains windows during which - * termination would leave a temp file behind. Thus - * we blanket the whole thing with a clean-up routine. - */ - if (signal(SIGINT, SIG_IGN) == SIG_DFL) { - (void) signal(SIGINT, bye); - (void) signal(SIGQUIT, bye); - (void) signal(SIGTERM, bye); - } - - /* - * "man -p" without operands - */ - if ((printmp != 0) && (optind == argc)) { - print_manpath(manpage, NULL); - exit(0); - } - - for (; optind < argc; optind++) { - if (strcmp(argv[optind], "-") == 0) { - nomore++; - CAT = CAT_S; - } else { - char *cmd; - static struct man_node *mp; - char *pv[2]; - - /* - * If full path to command specified, customize - * manpath accordingly - */ - if ((cmd = strrchr(argv[optind], '/')) != NULL) { - *cmd = '\0'; - if ((pv[0] = strdup(argv[optind])) == NULL) { - malloc_error(); - } - pv[1] = NULL; - *cmd = '/'; - mp = build_manpath(pv, - BMP_ISPATH|BMP_FALLBACK_MANDIR); - } else { - mp = manpage; - } - - if (whatis) { - whatapro(mp, argv[optind], apropos); - } else if (printmp != 0) { - print_manpath(mp, argv[optind]); - } else { - err += manual(mp, argv[optind]); - } - - if (mp != NULL && mp != manpage) { - free(pv[0]); - free_manp(mp); - } - } - } - return (err == 0 ? 0 : 1); - /*NOTREACHED*/ -} - -/* - * This routine builds the manpage structure from MANPATH or PATH, - * depending on flags. See BMP_* definitions above for valid - * flags. - * - * Assumes pathv elements were malloc'd, as done by split(). - * Elements may be freed and reallocated to have different contents. - */ - -static struct man_node * -build_manpath(char **pathv, int flags) -{ - struct man_node *manpage = NULL; - struct man_node *currp = NULL; - struct man_node *lastp = NULL; - char **p; - char **q; - char *mand = NULL; - char *mandir = MANDIR; - int s; - struct dupnode *didup = NULL; - struct stat sb; - - s = sizeof (struct man_node); - for (p = pathv; *p; ) { - - if (flags & BMP_ISPATH) { - if ((mand = path_to_manpath(*p)) == NULL) { - goto next; - } - free(*p); - *p = mand; - } - q = split(*p, ','); - if (stat(q[0], &sb) != 0 || (sb.st_mode & S_IFDIR) == 0) { - freev(q); - goto next; - } - - if (access(q[0], R_OK|X_OK) != 0) { - if (catmando) { - (void) fprintf(stderr, - gettext("%s is not accessible.\n"), - q[0]); - (void) fflush(stderr); - } - } else { - - /* - * Some element exists. Do not append MANDIR as a - * fallback. - */ - flags &= ~BMP_FALLBACK_MANDIR; - - if ((currp = (struct man_node *)calloc(1, s)) == NULL) { - malloc_error(); - } - - currp->frompath = (flags & BMP_ISPATH); - - if (manpage == NULL) { - lastp = manpage = currp; - } - - getpath(currp, p); - getsect(currp, p); - - /* - * If there are no new elements in this path, - * do not add it to the manpage list - */ - if (dupcheck(currp, &didup) != 0) { - freev(currp->secv); - free(currp); - } else { - currp->next = NULL; - if (currp != manpage) { - lastp->next = currp; - } - lastp = currp; - } - } - freev(q); -next: - /* - * Special handling of appending MANDIR. - * After all pathv elements have been processed, append MANDIR - * if needed. - */ - if (p == &mandir) { - break; - } - p++; - if (*p != NULL) { - continue; - } - if (flags & (BMP_APPEND_MANDIR|BMP_FALLBACK_MANDIR)) { - p = &mandir; - flags &= ~BMP_ISPATH; - } - } - - free_dupnode(didup); - - return (manpage); -} - -/* - * Stores the mandir path into the manp structure. - */ - -static void -getpath(struct man_node *manp, char **pv) -{ - char *s; - int i = 0; - - s = *pv; - - while (*s != NULL && *s != ',') - i++, s++; - - manp->path = (char *)malloc(i+1); - if (manp->path == NULL) - malloc_error(); - (void) strncpy(manp->path, *pv, i); - *(manp->path + i) = '\0'; -} - -/* - * Stores the mandir's corresponding sections (submandir - * directories) into the manp structure. - */ - -static void -getsect(struct man_node *manp, char **pv) -{ - char *sections; - char **sectp; - - if (sargs) { - manp->secv = split(mansec, ','); - - for (sectp = manp->secv; *sectp; sectp++) - lower(*sectp); - } else if ((sections = strchr(*pv, ',')) != NULL) { - if (debug) { - if (manp->frompath != 0) { -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. /usr/share/man: derived from PATH, MANSECTS=,1b - */ - (void) printf(gettext( - "%s: derived from PATH, MANSECTS=%s\n"), - manp->path, sections); - } else { -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. /usr/share/man: from -M option, MANSECTS=,1,2,3c - */ - (void) fprintf(stdout, gettext( - "%s: from -M option, MANSECTS=%s\n"), - manp->path, sections); - } - } - manp->secv = split(++sections, ','); - for (sectp = manp->secv; *sectp; sectp++) - lower(*sectp); - - if (*manp->secv == NULL) - get_all_sect(manp); - } else if ((sections = check_config(*pv)) != NULL) { - manp->defsrch = 1; -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. /usr/share/man: from man.cf, MANSECTS=1,1m,1c - */ - if (debug) - (void) fprintf(stdout, gettext( - "%s: from %s, MANSECTS=%s\n"), - manp->path, CONFIG, sections); - manp->secv = split(sections, ','); - - for (sectp = manp->secv; *sectp; sectp++) - lower(*sectp); - - if (*manp->secv == NULL) - get_all_sect(manp); - } else { - manp->defsrch = 1; -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * if man.cf has not been found or sections has not been specified - * man/catman searches the sections lexicographically. - */ - if (debug) - (void) fprintf(stdout, gettext( - "%s: search the sections lexicographically\n"), - manp->path); - manp->secv = NULL; - get_all_sect(manp); - } -} - -/* - * Get suffices of all sub-mandir directories in a mandir. - */ - -static void -get_all_sect(struct man_node *manp) -{ - DIR *dp; - char **dirv; - char **dv; - char **p; - char *prev = NULL; - char *tmp = NULL; - int plen; - int maxentries = MAXTOKENS; - int entries = 0; - - if ((dp = opendir(manp->path)) == 0) - return; - - /* - * sortdir() allocates memory for dirv and dirv[]. - */ - sortdir(dp, &dirv); - - (void) closedir(dp); - - if (manp->secv == NULL) { - /* - * allocates memory for manp->secv only if it's NULL - */ - manp->secv = (char **)malloc(maxentries * sizeof (char *)); - if (manp->secv == NULL) - malloc_error(); - } - - for (dv = dirv, p = manp->secv; *dv; dv++) { - plen = PLEN; - if (match(*dv, SGMLDIR, PLEN+1)) - ++plen; - - if (strcmp(*dv, CONFIG) == 0) { - /* release memory allocated by sortdir */ - free(*dv); - continue; - } - - if (tmp != NULL) - free(tmp); - tmp = strdup(*dv + plen); - if (tmp == NULL) - malloc_error(); - (void) sprintf(tmp, "%s", *dv + plen); - - if (prev != NULL) { - if (strcmp(prev, tmp) == 0) { - /* release memory allocated by sortdir */ - free(*dv); - continue; - } - } - - if (prev != NULL) - free(prev); - prev = strdup(*dv + plen); - if (prev == NULL) - malloc_error(); - (void) sprintf(prev, "%s", *dv + plen); - /* - * copy the string in (*dv + plen) to *p - */ - *p = strdup(*dv + plen); - if (*p == NULL) - malloc_error(); - p++; - entries++; - if (entries == maxentries) { - maxentries += MAXTOKENS; - manp->secv = (char **)realloc(manp->secv, - sizeof (char *) * maxentries); - if (manp->secv == NULL) - malloc_error(); - p = manp->secv + entries; - } - /* release memory allocated by sortdir */ - free(*dv); - } - *p = 0; - /* release memory allocated by sortdir */ - free(dirv); -} - -/* - * Format man pages (build cat pages); if no - * sections are specified, build all of them. - * When building cat pages: - * catman() tries to build cat pages for locale specific - * man dirs first. Then, catman() tries to build cat pages - * for the default man dir (for C locale like /usr/share/man) - * regardless of the locale. - * When building windex file: - * catman() tries to build windex file for locale specific - * man dirs first. Then, catman() tries to build windex file - * for the default man dir (for C locale like /usr/share/man) - * regardless of the locale. - */ - -static void -catman(struct man_node *manp, char **argv, int argc) -{ - char cmdbuf[BUFSIZ]; - char **dv; - int changed; - struct man_node *p; - int ndirs = 0; - char *ldir; - int i; - struct dupnode *dnp = NULL; - char **realsecv; - /* - * May be overwritten in dupcheck() so must be kept out of .rodata. - */ - char fakename[] = " catman "; - char *fakesecv[2]; - - fakesecv[0] = fakename; - fakesecv[1] = NULL; - - for (p = manp; p != NULL; p = p->next) { - /* - * prevent catman from doing very heavy lifting multiple - * times on some directory - */ - realsecv = p->secv; - p->secv = fakesecv; - if (dupcheck(p, &dnp) != 0) { - p->secv = realsecv; - continue; - } - -/* - * TRANSLATION_NOTE - message for catman -p - * ex. mandir path = /usr/share/man - */ - if (debug) - (void) fprintf(stdout, gettext( - "\nmandir path = %s\n"), p->path); - ndirs = 0; - - /* - * Build cat pages - * addlocale() allocates memory and returns it - */ - ldir = addlocale(p->path); - if (!whatonly) { - if (*localedir != '\0') { - if (defaultmandir) - defaultmandir = 0; - /* getdirs allocate memory for dv */ - ndirs = getdirs(ldir, &dv, 1); - if (ndirs != 0) { - changed = argc ? - makecat(ldir, argv, argc) : - makecat(ldir, dv, ndirs); - /* release memory by getdirs */ - for (i = 0; i < ndirs; i++) { - free(dv[i]); - } - free(dv); - } - } - - /* default man dir is always processed */ - defaultmandir = 1; - ndirs = getdirs(p->path, &dv, 1); - changed = argc ? - makecat(p->path, argv, argc) : - makecat(p->path, dv, ndirs); - /* release memory allocated by getdirs */ - for (i = 0; i < ndirs; i++) { - free(dv[i]); - } - free(dv); - } - /* - * Build whatis database - * print error message if locale is set and man dir not found - * won't build it at all if -c option is on - */ - if (!compargs && (whatonly || (!nowhatis && changed))) { - if (*localedir != '\0') { - /* just count the number of ndirs */ - if ((ndirs = getdirs(ldir, NULL, 0)) != 0) { - (void) sprintf(cmdbuf, - "/usr/bin/sh %s %s", - MAKEWHATIS, ldir); - (void) sys(cmdbuf); - } - } - /* whatis database of the default man dir */ - /* will be always built in C locale. */ - (void) sprintf(cmdbuf, - "/usr/bin/sh %s %s", - MAKEWHATIS, p->path); - (void) sys(cmdbuf); - } - /* release memory allocated by addlocale() */ - free(ldir); - } - free_dupnode(dnp); -} - -/* - * Build cat pages for given sections - */ - -static int -makecat(char *path, char **dv, int ndirs) -{ - DIR *dp, *sdp; - struct dirent *d; - struct stat sbuf; - char mandir[MAXPATHLEN+1]; - char smandir[MAXPATHLEN+1]; - char catdir[MAXPATHLEN+1]; - char *dirp, *sdirp; - int i, fmt; - int manflag, smanflag; - - for (i = fmt = 0; i < ndirs; i++) { - (void) snprintf(mandir, MAXPATHLEN, "%s/%s%s", - path, MANDIRNAME, dv[i]); - (void) snprintf(smandir, MAXPATHLEN, "%s/%s%s", - path, SGMLDIR, dv[i]); - (void) snprintf(catdir, MAXPATHLEN, "%s/%s%s", - path, subdirs[1], dv[i]); - dirp = strrchr(mandir, '/') + 1; - sdirp = strrchr(smandir, '/') + 1; - - manflag = smanflag = 0; - - if ((dp = opendir(mandir)) != NULL) - manflag = 1; - - if (!no_sroff && (sdp = opendir(smandir)) != NULL) - smanflag = 1; - - if (dp == 0 && sdp == 0) { - if (strcmp(mandir, CONFIG) == 0) - perror(mandir); - continue; - } -/* - * TRANSLATION_NOTE - message for catman -p - * ex. Building cat pages for mandir = /usr/share/man/ja - */ - if (debug) - (void) fprintf(stdout, gettext( - "Building cat pages for mandir = %s\n"), path); - - if (!compargs && stat(catdir, &sbuf) < 0) { - (void) umask(02); -/* - * TRANSLATION_NOTE - message for catman -p - * ex. mkdir /usr/share/man/ja/cat3c - */ - if (debug) - (void) fprintf(stdout, gettext("mkdir %s\n"), - catdir); - else { - if (mkdir(catdir, 0755) < 0) { - perror(catdir); - continue; - } - (void) chmod(catdir, 0755); - } - } - - /* - * if it is -c option of catman, if there is no - * coresponding man dir for sman files to go to, - * make the man dir - */ - - if (compargs && !manflag) { - if (mkdir(mandir, 0755) < 0) { - perror(mandir); - continue; - } - (void) chmod(mandir, 0755); - } - - if (smanflag) { - while ((d = readdir(sdp))) { - if (eq(".", d->d_name) || eq("..", d->d_name)) - continue; - - if (format(path, sdirp, (char *)0, d->d_name) - > 0) - fmt++; - } - } - - if (manflag && !compargs) { - while ((d = readdir(dp))) { - if (eq(".", d->d_name) || eq("..", d->d_name)) - continue; - - if (format(path, dirp, (char *)0, d->d_name) - > 0) - fmt++; - } - } - - if (manflag) - (void) closedir(dp); - - if (smanflag) - (void) closedir(sdp); - - } - return (fmt); -} - - -/* - * Get all "man" and "sman" dirs under a given manpath - * and return the number found - * If -c option is on, only count sman dirs - */ - -static int -getdirs(char *path, char ***dirv, short flag) -{ - DIR *dp; - struct dirent *d; - int n = 0; - int plen, sgml_flag, man_flag; - int i = 0; - int maxentries = MAXDIRS; - char **dv; - - if ((dp = opendir(path)) == 0) { - if (debug) { - if (*localedir != '\0') - (void) printf(gettext("\ -locale is %s, search in %s\n"), localedir, path); - perror(path); - } - return (0); - } - - if (flag) { - /* allocate memory for dirv */ - *dirv = (char **)malloc(sizeof (char *) * - maxentries); - if (*dirv == NULL) - malloc_error(); - dv = *dirv; - } - while ((d = readdir(dp))) { - plen = PLEN; - man_flag = sgml_flag = 0; - if (match(d->d_name, SGMLDIR, PLEN+1)) { - plen = PLEN + 1; - sgml_flag = 1; - i++; - } - - if (match(subdirs[0], d->d_name, PLEN)) - man_flag = 1; - - if (compargs && sgml_flag) { - if (flag) { - *dv = strdup(d->d_name+plen); - if (*dv == NULL) - malloc_error(); - dv++; - n = i; - } - } else if (!compargs && (sgml_flag || man_flag)) { - if (flag) { - *dv = strdup(d->d_name+plen); - if (*dv == NULL) - malloc_error(); - dv++; - } - n++; - } - if (flag) { - if ((dv - *dirv) == maxentries) { - int entries = maxentries; - maxentries += MAXTOKENS; - *dirv = (char **)realloc(*dirv, - sizeof (char *) * maxentries); - if (*dirv == NULL) - malloc_error(); - dv = *dirv + entries; - } - } - } - - (void) closedir(dp); - return (n); -} - - -/* - * Find matching whatis or apropos entries - * whatapro() tries to handle the windex file of the locale specific - * man dirs first, then tries to handle the windex file of the default - * man dir (of C locale like /usr/share/man). - */ - -static void -whatapro(struct man_node *manp, char *word, int apropos) -{ - char whatpath[MAXPATHLEN+1]; - char *p; - struct man_node *b; - int ndirs = 0; - char *ldir; - - -/* - * TRANSLATION_NOTE - message for man -d - * %s takes a parameter to -k option. - */ - DPRINTF(gettext("word = %s \n"), word); - - /* - * get base part of name - */ - if (!apropos) { - if ((p = strrchr(word, '/')) == NULL) - p = word; - else - p++; - } else { - p = word; - } - - for (b = manp; b != NULL; b = b->next) { - - if (*localedir != '\0') { - /* addlocale() allocates memory and returns it */ - ldir = addlocale(b->path); - if (defaultmandir) - defaultmandir = 0; - ndirs = getdirs(ldir, NULL, 0); - if (ndirs != 0) { - (void) sprintf(whatpath, "%s/%s", ldir, WHATIS); -/* - * TRANSLATION_NOTE - message for man -d - * ex. mandir path = /usr/share/man/ja - */ - DPRINTF(gettext("\nmandir path = %s\n"), ldir); - lookup_windex(whatpath, p, b->secv); - } - /* release memory allocated by addlocale() */ - free(ldir); - } - - defaultmandir = 1; - (void) sprintf(whatpath, "%s/%s", b->path, WHATIS); -/* - * TRANSLATION_NOTE - message for man -d - * ex. mandir path = /usr/share/man - */ - DPRINTF(gettext("\nmandir path = %s\n"), b->path); - - lookup_windex(whatpath, p, b->secv); - } -} - - -static void -lookup_windex(char *whatpath, char *word, char **secv) -{ - FILE *fp; - char *matches[MAXPAGES]; - char **pp; - wchar_t wbuf[BUFSIZ]; - wchar_t *word_wchar = NULL; - wchar_t *ws; - size_t word_len, ret; - - if ((fp = fopen(whatpath, "r")) == NULL) { - perror(whatpath); - return; - } - - if (apropos) { - word_len = strlen(word) + 1; - if ((word_wchar = (wchar_t *)malloc(sizeof (wchar_t) * - word_len)) == NULL) { - malloc_error(); - } - ret = mbstowcs(word_wchar, (const char *)word, word_len); - if (ret == (size_t)-1) { - (void) fprintf(stderr, gettext( - "Invalid character in keyword\n")); - exit(1); - } - while (fgetws(wbuf, BUFSIZ, fp) != NULL) - for (ws = wbuf; *ws; ws++) - if (icmp(word_wchar, ws) == 0) { - (void) printf("%ws", wbuf); - break; - } - } else { - if (bfsearch(fp, matches, word, secv)) - for (pp = matches; *pp; pp++) { - (void) printf("%s", *pp); - /* - * release memory allocated by - * strdup() in bfsearch() - */ - free(*pp); - } - } - (void) fclose(fp); - if (word_wchar) - free(word_wchar); - -} - - -/* - * case-insensitive compare unless upper case is used - * ie) "mount" matches mount, Mount, MOUNT - * "Mount" matches Mount, MOUNT - * "MOUNT" matches MOUNT only - * If matched return 0. Otherwise, return 1. - */ - -static int -icmp(wchar_t *ws, wchar_t *wt) -{ - for (; (*ws == 0) || - (*ws == (iswupper(*ws) ? *wt: towlower(*wt))); - ws++, wt++) - if (*ws == 0) - return (0); - - return (1); -} - - -/* - * Invoke PAGER with all matching man pages - */ - -static void -more(char **pages, int plain) -{ - char cmdbuf[BUFSIZ]; - char **vp; - - /* - * Dont bother. - */ - if (list || (*pages == 0)) - return; - - if (plain && troffit) { - cleanup(pages); - return; - } - (void) sprintf(cmdbuf, "%s", troffit ? troffcat : - plain ? CAT : pager); - - /* - * Build arg list - */ - for (vp = pages; vp < endp; vp++) { - (void) strcat(cmdbuf, " "); - (void) strcat(cmdbuf, *vp); - } - (void) sys(cmdbuf); - cleanup(pages); -} - - -/* - * Get rid of dregs. - */ - -static void -cleanup(char **pages) -{ - char **vp; - - for (vp = pages; vp < endp; vp++) { - if (match(TEMPLATE, *vp, TMPLEN)) - (void) unlink(*vp); - free(*vp); - } - - endp = pages; /* reset */ -} - - -/* - * Clean things up after receiving a signal. - */ - -/*ARGSUSED*/ -static void -bye(int sig) -{ - cleanup(pages); - exit(1); - /*NOTREACHED*/ -} - - -/* - * Split a string by specified separator. - * ignore empty components/adjacent separators. - * returns vector to all tokens - */ - -static char ** -split(char *s1, char sep) -{ - char **tokv, **vp; - char *mp, *tp; - int maxentries = MAXTOKENS; - int entries = 0; - - tokv = vp = (char **)malloc(maxentries * sizeof (char *)); - if (tokv == NULL) - malloc_error(); - mp = s1; - for (; mp && *mp; mp = tp) { - tp = strchr(mp, sep); - if (mp == tp) { /* empty component */ - tp++; /* ignore */ - continue; - } - if (tp) { - /* a component found */ - size_t len; - - len = tp - mp; - *vp = (char *)malloc(sizeof (char) * len + 1); - if (*vp == NULL) - malloc_error(); - (void) strncpy(*vp, mp, len); - *(*vp + len) = '\0'; - tp++; - vp++; - } else { - /* the last component */ - *vp = strdup(mp); - if (*vp == NULL) - malloc_error(); - vp++; - } - entries++; - if (entries == maxentries) { - maxentries += MAXTOKENS; - tokv = (char **)realloc(tokv, - maxentries * sizeof (char *)); - if (tokv == NULL) - malloc_error(); - vp = tokv + entries; - } - } - *vp = 0; - return (tokv); -} - -/* - * Free a vector allocated by split(); - */ -static void -freev(char **v) -{ - int i; - if (v != NULL) { - for (i = 0; v[i] != NULL; i++) { - free(v[i]); - } - free(v); - } -} - -/* - * Convert paths to full paths if necessary - * - */ - -static void -fullpaths(struct man_node **manp_head) -{ - char *cwd = NULL; - char *p; - char cwd_gotten = 0; - struct man_node *manp = *manp_head; - struct man_node *b; - struct man_node *prev = NULL; - - for (b = manp; b != NULL; b = b->next) { - if (*(b->path) == '/') { - prev = b; - continue; - } - - /* try to get cwd if haven't already */ - if (!cwd_gotten) { - cwd = getcwd(NULL, MAXPATHLEN+1); - cwd_gotten = 1; - } - - if (cwd) { - /* case: relative manpath with cwd: make absolute */ - if ((p = malloc(strlen(b->path)+strlen(cwd)+2)) == - NULL) { - malloc_error(); - } - (void) sprintf(p, "%s/%s", cwd, b->path); - /* - * resetting b->path - */ - free(b->path); - b->path = p; - } else { - /* case: relative manpath but no cwd: omit path entry */ - if (prev) - prev->next = b->next; - else - *manp_head = b->next; - - free_manp(b); - } - } - /* - * release memory allocated by getcwd() - */ - free(cwd); -} - -/* - * Free a man_node structure and its contents - */ - -static void -free_manp(struct man_node *manp) -{ - char **p; - - free(manp->path); - p = manp->secv; - while ((p != NULL) && (*p != NULL)) { - free(*p); - p++; - } - free(manp->secv); - free(manp); -} - - -/* - * Map (in place) to lower case - */ - -static void -lower(char *s) -{ - if (s == 0) - return; - while (*s) { - if (isupper(*s)) - *s = tolower(*s); - s++; - } -} - - -/* - * compare for sort() - * sort first by section-spec, then by prefix {sman, man, cat, fmt} - * note: prefix is reverse sorted so that "sman" and "man" always - * comes before {cat, fmt} - */ - -static int -cmp(const void *arg1, const void *arg2) -{ - int n; - char **p1 = (char **)arg1; - char **p2 = (char **)arg2; - - - /* by section; sman always before man dirs */ - if ((n = strcmp(*p1 + PLEN + (**p1 == 's' ? 1 : 0), - *p2 + PLEN + (**p2 == 's' ? 1 : 0)))) - return (n); - - /* by prefix reversed */ - return (strncmp(*p2, *p1, PLEN)); -} - - -/* - * Find a man page ... - * Loop through each path specified, - * first try the lookup method (whatis database), - * and if it doesn't exist, do the hard way. - */ - -static int -manual(struct man_node *manp, char *name) -{ - struct man_node *p; - struct man_node *local; - int ndirs = 0; - char *ldir; - char *ldirs[2]; - char *fullname = name; - char *slash; - - if ((slash = strrchr(name, '/')) != NULL) { - name = slash + 1; - } - - /* - * for each path in MANPATH - */ - found = 0; - - for (p = manp; p != NULL; p = p->next) { -/* - * TRANSLATION_NOTE - message for man -d - * ex. mandir path = /usr/share/man - */ - DPRINTF(gettext("\nmandir path = %s\n"), p->path); - - if (*localedir != '\0') { - /* addlocale() allocates memory and returns it */ - ldir = addlocale(p->path); - if (defaultmandir) - defaultmandir = 0; -/* - * TRANSLATION_NOTE - message for man -d - * ex. localedir = ja, ldir = /usr/share/man/ja - */ - if (debug) - (void) printf(gettext( - "localedir = %s, ldir = %s\n"), - localedir, ldir); - ndirs = getdirs(ldir, NULL, 0); - if (ndirs != 0) { - ldirs[0] = ldir; - ldirs[1] = NULL; - local = build_manpath(ldirs, 0); - if (force || - windex(local->secv, ldir, name) < 0) - mandir(local->secv, ldir, name); - free_manp(local); - } - /* release memory allocated by addlocale() */ - free(ldir); - } - - defaultmandir = 1; - /* - * locale mandir not valid, man page in locale - * mandir not found, or -a option present - */ - if (ndirs == 0 || !found || all) { - if (force || windex(p->secv, p->path, name) < 0) - mandir(p->secv, p->path, name); - } - - if (found && !all) - break; - } - - if (found) { - more(pages, nomore); - } else { - if (sargs) { - (void) fprintf(stderr, gettext("No entry for %s in " - "section(s) %s of the manual.\n"), - fullname, mansec); - } else { - (void) fprintf(stderr, gettext( - "No manual entry for %s.\n"), fullname, mansec); - } - - if (sman_no_man_no_sroff) - (void) fprintf(stderr, gettext("(An SGML manpage was " - "found for '%s' but it cannot be displayed.)\n"), - fullname, mansec); - } - sman_no_man_no_sroff = 0; - return (!found); -} - - -/* - * For a specified manual directory, - * read, store, & sort section subdirs, - * for each section specified - * find and search matching subdirs - */ - -static void -mandir(char **secv, char *path, char *name) -{ - DIR *dp; - char **dirv; - char **dv, **pdv; - int len, dslen, plen = PLEN; - - if ((dp = opendir(path)) == 0) { -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * opendir(%s) returned 0 - */ - if (debug) - (void) fprintf(stdout, gettext( - " opendir on %s failed\n"), path); - return; - } - -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. mandir path = /usr/share/man/ja - */ - if (debug) - (void) printf(gettext("mandir path = %s\n"), path); - - /* - * sordir() allocates memory for dirv and dirv[]. - */ - sortdir(dp, &dirv); - /* - * Search in the order specified by MANSECTS - */ - for (; *secv; secv++) { -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. section = 3c - */ - DPRINTF(gettext(" section = %s\n"), *secv); - len = strlen(*secv); - for (dv = dirv; *dv; dv++) { - plen = PLEN; - if (*dv[0] == 's') - plen++; - dslen = strlen(*dv+plen); - if (dslen > len) - len = dslen; - if (**secv == '\\') { - if (!eq(*secv + 1, *dv+plen)) - continue; - } else if (strncasecmp(*secv, *dv+plen, len) != 0) { - /* check to see if directory name changed */ - if (!all && - (newsection = map_section(*secv, path)) - == NULL) { - continue; - } - if (newsection == NULL) - newsection = ""; - if (!match(newsection, *dv+plen, len)) { - continue; - } - } - - if (searchdir(path, *dv, name) == 0) - continue; - - if (!all) { - /* release memory allocated by sortdir() */ - pdv = dirv; - while (*pdv) { - free(*pdv); - pdv++; - } - (void) closedir(dp); - /* release memory allocated by sortdir() */ - free(dirv); - return; - } - /* - * if we found a match in the man dir skip - * the corresponding cat dir if it exists - */ - if (all && **dv == 'm' && *(dv+1) && - eq(*(dv+1)+plen, *dv+plen)) - dv++; - } - } - /* release memory allocated by sortdir() */ - pdv = dirv; - while (*pdv) { - free(*pdv); - pdv++; - } - free(dirv); - (void) closedir(dp); -} - -/* - * Sort directories. - */ - -static void -sortdir(DIR *dp, char ***dirv) -{ - struct dirent *d; - char **dv; - int maxentries = MAXDIRS; - int entries = 0; - - *dirv = (char **)malloc(sizeof (char *) * maxentries); - dv = *dirv; - while ((d = readdir(dp))) { /* store dirs */ - if (eq(d->d_name, ".") || eq(d->d_name, "..")) /* ignore */ - continue; - - /* check if it matches sman, man, cat format */ - if (match(d->d_name, SGMLDIR, PLEN+1) || - match(d->d_name, subdirs[0], PLEN) || - match(d->d_name, subdirs[1], PLEN)) { - *dv = malloc(strlen(d->d_name) + 1); - if (*dv == NULL) - malloc_error(); - (void) strcpy(*dv, d->d_name); - dv++; - entries++; - if (entries == maxentries) { - maxentries += MAXDIRS; - *dirv = (char **)realloc(*dirv, - sizeof (char *) * maxentries); - if (*dirv == NULL) - malloc_error(); - dv = *dirv + entries; - } - } - } - *dv = 0; - - qsort((void *)*dirv, dv - *dirv, sizeof (char *), cmp); - -} - - -/* - * Search a section subdirectory for a - * given man page, return 1 for success - */ - -static int -searchdir(char *path, char *dir, char *name) -{ - DIR *sdp; - struct dirent *sd; - char sectpath[MAXPATHLEN+1]; - char file[MAXNAMLEN+1]; - char dname[MAXPATHLEN+1]; - char *last; - int nlen; - -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. scanning = man3c - */ - DPRINTF(gettext(" scanning = %s\n"), dir); - (void) sprintf(sectpath, "%s/%s", path, dir); - (void) snprintf(file, MAXPATHLEN, "%s.", name); - - if ((sdp = opendir(sectpath)) == 0) { - if (errno != ENOTDIR) /* ignore matching cruft */ - perror(sectpath); - return (0); - } - while ((sd = readdir(sdp))) { - last = strrchr(sd->d_name, '.'); - nlen = last - sd->d_name; - (void) sprintf(dname, "%.*s.", nlen, sd->d_name); - if (eq(dname, file) || eq(sd->d_name, name)) { - if (no_sroff && *dir == 's') { - sman_no_man_no_sroff = 1; - return (0); - } - (void) format(path, dir, name, sd->d_name); - (void) closedir(sdp); - return (1); - } - } - (void) closedir(sdp); - return (0); -} - -/* - * Check the hash table of old directory names to see if there is a - * new directory name. - * Returns new directory name if a match; after checking to be sure - * directory exists. - * Otherwise returns NULL - */ - -static char * -map_section(char *section, char *path) -{ - int i; - int len; - char fullpath[MAXPATHLEN]; - - if (list) /* -l option fall through */ - return (NULL); - - for (i = 0; i <= ((sizeof (map)/sizeof (map[0]) - 1)); i++) { - if (strlen(section) > strlen(map[i].new_name)) { - len = strlen(section); - } else { - len = strlen(map[i].new_name); - } - if (match(section, map[i].old_name, len)) { - (void) sprintf(fullpath, - "%s/sman%s", path, map[i].new_name); - if (!access(fullpath, R_OK | X_OK)) { - return (map[i].new_name); - } else { - return (NULL); - } - } - } - - return (NULL); -} - - -/* - * Use windex database for quick lookup of man pages - * instead of mandir() (brute force search) - */ - -static int -windex(char **secv, char *path, char *name) -{ - FILE *fp; - struct stat sbuf; - struct suffix *sp; - struct suffix psecs[MAXPAGES+1]; - char whatfile[MAXPATHLEN+1]; - char page[MAXPATHLEN+1]; - char *matches[MAXPAGES]; - char *file, *dir; - char **sv, **vp; - int len, dslen, exist, i; - int found_in_windex = 0; - char *tmp[] = {0, 0, 0, 0}; - - - (void) sprintf(whatfile, "%s/%s", path, WHATIS); - if ((fp = fopen(whatfile, "r")) == NULL) { - if (errno == ENOENT) - return (-1); - return (0); - } - -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. search in = /usr/share/man/ja/windex file - */ - if (debug) - (void) fprintf(stdout, gettext( - " search in = %s file\n"), whatfile); - - if (bfsearch(fp, matches, name, NULL) == 0) { - (void) fclose(fp); - return (-1); /* force search in mandir */ - } - - (void) fclose(fp); - - /* - * Save and split sections - * section() allocates memory for sp->ds - */ - for (sp = psecs, vp = matches; *vp; vp++, sp++) { - if ((sp - psecs) < MAXPAGES) { - section(sp, *vp); - } else { - if (debug) - (void) fprintf(stderr, gettext( - "too many sections in %s windex entry\n"), - name); - - /* Setting sp->ds to NULL signifies end-of-data. */ - sp->ds = 0; - goto finish; - } - } - - sp->ds = 0; - - /* - * Search in the order specified - * by MANSECTS - */ - for (; *secv; secv++) { - len = strlen(*secv); - -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. search an entry to match printf.3c - */ - if (debug) - (void) fprintf(stdout, gettext( - " search an entry to match %s.%s\n"), name, *secv); - /* - * For every whatis entry that - * was matched - */ - for (sp = psecs; sp->ds; sp++) { - dslen = strlen(sp->ds); - if (dslen > len) - len = dslen; - if (**secv == '\\') { - if (!eq(*secv + 1, sp->ds)) - continue; - } else if (!match(*secv, sp->ds, len)) { - /* check to see if directory name changed */ - if (!all && - (newsection = map_section(*secv, path)) - == NULL) { - continue; - } - if (newsection == NULL) - newsection = ""; - if (!match(newsection, sp->ds, len)) { - continue; - } - } - /* - * here to form "sman", "man", "cat"|"fmt" in - * order - */ - if (!no_sroff) { - tmp[0] = SGMLDIR; - for (i = 1; i < 4; i++) - tmp[i] = subdirs[i-1]; - } else { - for (i = 0; i < 3; i++) - tmp[i] = subdirs[i]; - } - - for (sv = tmp; *sv; sv++) { - (void) sprintf(page, - "%s/%s%s/%s%s%s", path, *sv, - sp->ds, name, *sp->fs ? "." : "", - sp->fs); - exist = (stat(page, &sbuf) == 0); - if (exist) - break; - } - if (!exist) { - (void) fprintf(stderr, gettext( - "%s entry incorrect: %s(%s) not found.\n"), - WHATIS, name, sp->ds); - continue; - } - - file = strrchr(page, '/'), *file = 0; - dir = strrchr(page, '/'); - - /* - * By now we have a match - */ - found_in_windex = 1; - (void) format(path, ++dir, name, ++file); - - if (!all) - goto finish; - } - } -finish: - /* - * release memory allocated by section() - */ - sp = psecs; - while (sp->ds) { - free(sp->ds); - sp->ds = NULL; - sp++; - } - - /* - * If we didn't find a match, return failure as if we didn't find - * the windex at all. Why? Well, if you create a windex, then upgrade - * to a later release that contains new man pages, and forget to - * recreate the windex (since we don't do that automatically), you - * won't see any new man pages since they aren't in the windex. - * Pretending we didn't see a windex at all if there are no matches - * forces a search of the underlying directory. After all, the - * goal of the windex is to enable searches (man -k) and speed things - * up, not to _prevent_ you from seeing new man pages, so this seems - * ok. The only problem is when there are multiple entries (different - * sections), and some are in and some are out. Say you do 'man ls', - * and ls(1) isn't in the windex, but ls(1B) is. In that case, we - * will find a match in ls(1B), and you'll see that man page. - * That doesn't seem bad since if you specify the section the search - * will be restricted too. So in the example above, if you do - * 'man -s 1 ls' you'll get ls(1). - */ - if (found_in_windex) - return (0); - else - return (-1); -} - - -/* - * Return pointers to the section-spec - * and file-suffix of a whatis entry - */ - -static void -section(struct suffix *sp, char *s) -{ - char *lp, *p; - - lp = strchr(s, '('); - p = strchr(s, ')'); - - if (++lp == 0 || p == 0 || lp == p) { - (void) fprintf(stderr, - gettext("mangled windex entry:\n\t%s\n"), s); - return; - } - *p = 0; - - /* - * copy the string pointed to by lp - */ - lp = strdup(lp); - if (lp == NULL) - malloc_error(); - /* - * release memory in s - * s has been allocated memory in bfsearch() - */ - free(s); - - lower(lp); - - /* - * split section-specifier if file-name - * suffix differs from section-suffix - */ - sp->ds = lp; - if ((p = strchr(lp, '/'))) { - *p++ = 0; - sp->fs = p; - } else - sp->fs = lp; -} - - -/* - * Binary file search to find matching man - * pages in whatis database. - */ - -static int -bfsearch(FILE *fp, char **matchv, char *key, char **secv) -{ - char entry[BUFSIZ]; - char **vp; - long top, bot, mid; - int c; - - vp = matchv; - bot = 0; - (void) fseek(fp, 0L, 2); - top = ftell(fp); - for (;;) { - mid = (top+bot)/2; - (void) fseek(fp, mid, 0); - do { - c = getc(fp); - mid++; - } while (c != EOF && c != '\n'); - if (fgets(entry, sizeof (entry), fp) == NULL) - break; - switch (compare(key, entry, secv)) { - case -2: - case -1: - case 0: - if (top <= mid) - break; - top = mid; - continue; - case 1: - case 2: - bot = mid; - continue; - } - break; - } - (void) fseek(fp, bot, 0); - while (ftell(fp) < top) { - if (fgets(entry, sizeof (entry), fp) == NULL) { - *matchv = 0; - return (matchv - vp); - } - switch (compare(key, entry, secv)) { - case -2: - *matchv = 0; - return (matchv - vp); - case -1: - case 0: - *matchv = strdup(entry); - if (*matchv == NULL) - malloc_error(); - else - matchv++; - break; - case 1: - case 2: - continue; - } - break; - } - while (fgets(entry, sizeof (entry), fp)) { - switch (compare(key, entry, secv)) { - case -1: - case 0: - *matchv = strdup(entry); - if (*matchv == NULL) - malloc_error(); - else - matchv++; - continue; - } - break; - } - *matchv = 0; - return (matchv - vp); -} - -static int -compare(char *key, char *entry, char **secv) -{ - char *entbuf; - char *s; - int comp, mlen; - int mbcurmax = MB_CUR_MAX; - char *secp = NULL; - int rv; - int eblen; - - entbuf = strdup(entry); - if (entbuf == NULL) { - malloc_error(); - } - eblen = strlen(entbuf); - - s = entbuf; - while (*s) { - if (*s == '\t' || *s == ' ') { - *s = '\0'; - break; - } - mlen = mblen(s, mbcurmax); - if (mlen == -1) { - (void) fprintf(stderr, gettext( - "Invalid character in windex file.\n")); - exit(1); - } - s += mlen; - } - /* - * Find the section within parantheses - */ - if (secv != NULL && (s - entbuf) < eblen) { - if ((secp = strchr(s + 1, ')')) != NULL) { - *secp = '\0'; - if ((secp = strchr(s + 1, '(')) != NULL) { - secp++; - } - } - } - - comp = strcmp(key, entbuf); - if (comp == 0) { - if (secp == NULL) { - rv = 0; - } else { - while (*secv != NULL) { - if ((strcmp(*secv, secp)) == 0) { - rv = 0; - break; - } - secv++; - } - } - } else if (comp < 0) { - rv = -2; - } else { - rv = 2; - } - free(entbuf); - return (rv); -} - - -/* - * Format a man page and follow .so references - * if necessary. - */ - -static int -format(char *path, char *dir, char *name, char *pg) -{ - char manpname[MAXPATHLEN+1], catpname[MAXPATHLEN+1]; - char manpname_sgml[MAXPATHLEN+1], smantmpname[MAXPATHLEN+1]; - char soed[MAXPATHLEN+1], soref[MAXPATHLEN+1]; - char manbuf[BUFSIZ], cmdbuf[BUFSIZ], tmpbuf[BUFSIZ]; - char tmpdir[MAXPATHLEN+1]; - int socount, updatedcat, regencat; - struct stat mansb, catsb, smansb; - char *tmpname; - int catonly = 0; - struct stat statb; - int plen = PLEN; - FILE *md; - int tempfd; - ssize_t count; - int temp, sgml_flag = 0, check_flag = 0; - char prntbuf[BUFSIZ + 1]; - char *ptr; - char *new_m; - char *tmpsubdir; - - found++; - - if (*dir != 'm' && *dir != 's') - catonly++; - - - if (*dir == 's') { - tmpsubdir = SGMLDIR; - ++plen; - (void) sprintf(manpname_sgml, "%s/man%s/%s", - path, dir+plen, pg); - } else - tmpsubdir = MANDIRNAME; - - if (list) { - (void) printf(gettext("%s (%s)\t-M %s\n"), - name, dir+plen, path); - return (-1); - } - - (void) sprintf(manpname, "%s/%s%s/%s", path, tmpsubdir, dir+plen, pg); - (void) sprintf(catpname, "%s/%s%s/%s", path, subdirs[1], dir+plen, pg); - - (void) sprintf(smantmpname, "%s/%s%s/%s", path, SGMLDIR, dir+plen, pg); - -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. unformatted = /usr/share/man/ja/man3s/printf.3s - */ - DPRINTF(gettext( - " unformatted = %s\n"), catonly ? "" : manpname); -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. formatted = /usr/share/man/ja/cat3s/printf.3s - */ - DPRINTF(gettext( - " formatted = %s\n"), catpname); - - /* - * Take care of indirect references to other man pages; - * i.e., resolve files containing only ".so manx/file.x". - * We follow .so chains, replacing title with the .so'ed - * file at each stage, and keeping track of how many times - * we've done so, so that we can avoid looping. - */ - *soed = 0; - socount = 0; - for (;;) { - FILE *md; - char *cp; - char *s; - char *new_s; - - if (catonly) - break; - /* - * Grab manpname's first line, stashing it in manbuf. - */ - - - if ((md = fopen(manpname, "r")) == NULL) { - if (*soed && errno == ENOENT) { - (void) fprintf(stderr, - gettext("Can't find referent of " - ".so in %s\n"), soed); - (void) fflush(stderr); - return (-1); - } - perror(manpname); - return (-1); - } - - /* - * If this is a directory, just ignore it. - */ - if (fstat(fileno(md), &statb) == NULL) { - if (S_ISDIR(statb.st_mode)) { - if (debug) { - (void) fprintf(stderr, - "\tignoring directory %s\n", - manpname); - (void) fflush(stderr); - } - (void) fclose(md); - return (-1); - } - } - - if (fgets(manbuf, BUFSIZ-1, md) == NULL) { - (void) fclose(md); - (void) fprintf(stderr, gettext("%s: null file\n"), - manpname); - (void) fflush(stderr); - return (-1); - } - (void) fclose(md); - - if (strncmp(manbuf, DOT_SO, sizeof (DOT_SO) - 1)) - break; -so_again: if (++socount > SOLIMIT) { - (void) fprintf(stderr, gettext(".so chain too long\n")); - (void) fflush(stderr); - return (-1); - } - s = manbuf + sizeof (DOT_SO) - 1; - if ((check_flag == 1) && ((new_s = strrchr(s, '/')) != NULL)) { - new_s++; - (void) sprintf(s, "%s%s/%s", - tmpsubdir, dir+plen, new_s); - } - - cp = strrchr(s, '\n'); - if (cp) - *cp = '\0'; - /* - * Compensate for sloppy typists by stripping - * trailing white space. - */ - cp = s + strlen(s); - while (--cp >= s && (*cp == ' ' || *cp == '\t')) - *cp = '\0'; - - /* - * Go off and find the next link in the chain. - */ - (void) strcpy(soed, manpname); - (void) strcpy(soref, s); - (void) sprintf(manpname, "%s/%s", path, s); -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. .so ref = man3c/string.3c - */ - DPRINTF(gettext(".so ref = %s\n"), s); - } - - /* - * Make symlinks if so'ed and cattin' - */ - if (socount && catmando) { - (void) sprintf(cmdbuf, "cd %s; rm -f %s; ln -s ../%s%s %s", - path, catpname, subdirs[1], soref+plen, catpname); - (void) sys(cmdbuf); - return (1); - } - - /* - * Obtain the cat page that corresponds to the man page. - * If it already exists, is up to date, and if we haven't - * been told not to use it, use it as it stands. - */ - regencat = updatedcat = 0; - if (compargs || (!catonly && stat(manpname, &mansb) >= 0 && - (stat(catpname, &catsb) < 0 || catsb.st_mtime < mansb.st_mtime)) || - (access(catpname, R_OK) != 0)) { - /* - * Construct a shell command line for formatting manpname. - * The resulting file goes initially into /tmp. If possible, - * it will later be moved to catpname. - */ - - int pipestage = 0; - int needcol = 0; - char *cbp = cmdbuf; - - regencat = updatedcat = 1; - - if (!catmando && !debug && !check_flag) { - (void) fprintf(stderr, gettext( - "Reformatting page. Please Wait...")); - if (sargs && (newsection != NULL) && - (*newsection != '\0')) { - (void) fprintf(stderr, gettext( - "\nThe directory name has been changed " - "to %s\n"), newsection); - } - (void) fflush(stderr); - } - - /* - * in catman command, if the file exists in sman dir already, - * don't need to convert the file in man dir to cat dir - */ - - if (!no_sroff && catmando && - match(tmpsubdir, MANDIRNAME, PLEN) && - stat(smantmpname, &smansb) >= 0) - return (1); - - /* - * cd to path so that relative .so commands will work - * correctly - */ - (void) sprintf(cbp, "cd %s; ", path); - cbp += strlen(cbp); - - - /* - * check to see whether it is a sgml file - * assume sgml symbol(>!DOCTYPE) can be found in the first - * BUFSIZ bytes - */ - - if ((temp = open(manpname, 0)) == -1) { - perror(manpname); - return (-1); - } - - if ((count = read(temp, prntbuf, BUFSIZ)) <= 0) { - perror(manpname); - return (-1); - } - - prntbuf[count] = '\0'; /* null terminate */ - ptr = prntbuf; - if (sgmlcheck((const char *)ptr) == 1) { - sgml_flag = 1; - if (defaultmandir && *localedir) { - (void) sprintf(cbp, "LC_MESSAGES=C %s %s ", - SROFF_CMD, manpname); - } else { - (void) sprintf(cbp, "%s %s ", - SROFF_CMD, manpname); - } - cbp += strlen(cbp); - } else if (*dir == 's') { - (void) close(temp); - return (-1); - } - (void) close(temp); - - /* - * Check for special formatting requirements by examining - * manpname's first line preprocessor specifications. - */ - - if (strncmp(manbuf, PREPROC_SPEC, - sizeof (PREPROC_SPEC) - 1) == 0) { - char *ptp; - - ptp = manbuf + sizeof (PREPROC_SPEC) - 1; - while (*ptp && *ptp != '\n') { - const struct preprocessor *pp; - - /* - * Check for a preprocessor we know about. - */ - for (pp = preprocessors; pp->p_tag; pp++) { - if (pp->p_tag == *ptp) - break; - } - if (pp->p_tag == 0) { - (void) fprintf(stderr, - gettext("unknown preprocessor " - "specifier %c\n"), *ptp); - (void) fflush(stderr); - return (-1); - } - - /* - * Add it to the pipeline. - */ - (void) sprintf(cbp, "%s %s |", - troffit ? pp->p_troff : pp->p_nroff, - pipestage++ == 0 ? manpname : - pp->p_stdin_char); - cbp += strlen(cbp); - - /* - * Special treatment: if tbl is among the - * preprocessors and we'll process with - * nroff, we have to pass things through - * col at the end of the pipeline. - */ - if (pp->p_tag == 't' && !troffit) - needcol++; - - ptp++; - } - } - - /* - * if catman, use the cat page name - * otherwise, dup template and create another - * (needed for multiple pages) - */ - if (catmando) - tmpname = catpname; - else { - tmpname = strdup(TEMPLATE); - if (tmpname == NULL) - malloc_error(); - (void) close(mkstemp(tmpname)); - } - - if (! Tflag) { - if (*localedir != '\0') { - (void) sprintf(macros, "%s/%s", path, MACROF); -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. locale macros = /usr/share/man/ja/tmac.an - */ - if (debug) - (void) printf(gettext( - "\nlocale macros = %s "), - macros); - if (stat(macros, &statb) < 0) - (void) strcpy(macros, TMAC_AN); -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * ex. macros = /usr/share/man/ja/tman.an - */ - if (debug) - (void) printf(gettext( - "\nmacros = %s\n"), - macros); - } - } - - tmpdir[0] = '\0'; - if (sgml_flag == 1) { - if (check_flag == 0) { - strcpy(tmpdir, "/tmp/sman_XXXXXX"); - if ((tempfd = mkstemp(tmpdir)) == -1) { - (void) fprintf(stderr, gettext( - "%s: null file\n"), tmpdir); - (void) fflush(stderr); - return (-1); - } - - if (debug) - close(tempfd); - - (void) sprintf(tmpbuf, "%s > %s", - cmdbuf, tmpdir); - if (sys(tmpbuf)) { -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * Error message if sys(%s) failed - */ - (void) fprintf(stderr, gettext( - "sys(%s) fail!\n"), tmpbuf); - (void) fprintf(stderr, - gettext(" aborted (sorry)\n")); - (void) fflush(stderr); - /* release memory for tmpname */ - if (!catmando) { - (void) unlink(tmpdir); - (void) unlink(tmpname); - free(tmpname); - } - return (-1); - } else if (debug == 0) { - if ((md = fdopen(tempfd, "r")) - == NULL) { - (void) fprintf(stderr, gettext( - "%s: null file\n"), tmpdir); - (void) fflush(stderr); - close(tempfd); - /* release memory for tmpname */ - if (!catmando) - free(tmpname); - return (-1); - } - - /* if the file is empty, */ - /* it's a fragment, do nothing */ - if (fgets(manbuf, BUFSIZ-1, md) - == NULL) { - (void) fclose(md); - /* release memory for tmpname */ - if (!catmando) - free(tmpname); - return (1); - } - (void) fclose(md); - - if (strncmp(manbuf, DOT_SO, - sizeof (DOT_SO) - 1) == 0) { - if (!compargs) { - check_flag = 1; - (void) unlink(tmpdir); - (void) unlink(tmpname); - /* release memory for tmpname */ - if (!catmando) - free(tmpname); - goto so_again; - } else { - (void) unlink(tmpdir); - strcpy(tmpdir, - "/tmp/sman_XXXXXX"); - tempfd = mkstemp(tmpdir); - if ((tempfd == -1) || - (md = fdopen(tempfd, "w")) - == NULL) { - (void) fprintf(stderr, - gettext( - "%s: null file\n"), - tmpdir); - (void) fflush(stderr); - if (tempfd != -1) - close(tempfd); - /* release memory for tmpname */ - if (!catmando) - free(tmpname); - return (-1); - } - if ((new_m = strrchr(manbuf, '/')) != NULL) { - (void) fprintf(md, ".so man%s%s\n", dir+plen, new_m); - } else { -/* - * TRANSLATION_NOTE - message for catman -c - * Error message if unable to get file name - */ - (void) fprintf(stderr, - gettext("file not found\n")); - (void) fflush(stderr); - return (-1); - } - (void) fclose(md); - } - } - } - if (catmando && compargs) - (void) sprintf(cmdbuf, "cat %s > %s", - tmpdir, manpname_sgml); - else - (void) sprintf(cmdbuf, " cat %s | tbl | eqn | %s %s - %s > %s", - tmpdir, troffit ? troffcmd : "nroff -u0 -Tlp", - macros, troffit ? "" : " | col -x", tmpname); - } else - if (catmando && compargs) - (void) sprintf(cbp, " > %s", - manpname_sgml); - else - (void) sprintf(cbp, " | tbl | eqn | %s %s - %s > %s", - troffit ? troffcmd : "nroff -u0 -Tlp", - macros, troffit ? "" : " | col -x", tmpname); - - } else - (void) sprintf(cbp, "%s %s %s%s > %s", - troffit ? troffcmd : "nroff -u0 -Tlp", - macros, pipestage == 0 ? manpname : "-", - troffit ? "" : " | col -x", tmpname); - - /* Reformat the page. */ - if (sys(cmdbuf)) { -/* - * TRANSLATION_NOTE - message for man -d or catman -p - * Error message if sys(%s) failed - */ - (void) fprintf(stderr, gettext( - "sys(%s) fail!\n"), cmdbuf); - (void) fprintf(stderr, gettext(" aborted (sorry)\n")); - (void) fflush(stderr); - (void) unlink(tmpname); - /* release memory for tmpname */ - if (!catmando) - free(tmpname); - return (-1); - } - - if (tmpdir[0] != '\0') - (void) unlink(tmpdir); - - if (catmando) - return (1); - - /* - * Attempt to move the cat page to its proper home. - */ - (void) sprintf(cmdbuf, - "trap '' 1 15; /usr/bin/mv -f %s %s 2> /dev/null", - tmpname, - catpname); - if (sys(cmdbuf)) - updatedcat = 0; - else if (debug == 0) - (void) chmod(catpname, 0644); - - if (debug) { - /* release memory for tmpname */ - if (!catmando) - free(tmpname); - (void) unlink(tmpname); - return (1); - } - - (void) fprintf(stderr, gettext(" done\n")); - (void) fflush(stderr); - } - - /* - * Save file name (dup if necessary) - * to view later - * fix for 1123802 - don't save names if we are invoked as catman - */ - if (!catmando) { - char **tmpp; - int dup; - char *newpage; - - if (regencat && !updatedcat) - newpage = tmpname; - else { - newpage = strdup(catpname); - if (newpage == NULL) - malloc_error(); - } - /* make sure we don't add a dup */ - dup = 0; - for (tmpp = pages; tmpp < endp; tmpp++) { - if (strcmp(*tmpp, newpage) == 0) { - dup = 1; - break; - } - } - if (!dup) - *endp++ = newpage; - if (endp >= &pages[MAXPAGES]) { - fprintf(stderr, - gettext("Internal pages array overflow!\n")); - exit(1); - } - } - - return (regencat); -} - -/* - * Add <localedir> to the path. - */ - -static char * -addlocale(char *path) -{ - - char *tmp; - - tmp = malloc(strlen(path) + strlen(localedir) + 2); - if (tmp == NULL) - malloc_error(); - (void) sprintf(tmp, "%s/%s", path, localedir); - return (tmp); - -} - -/* - * From the configuration file "man.cf", get the order of suffices of - * sub-mandirs to be used in the search path for a given mandir. - */ - -static char * -check_config(char *path) -{ - FILE *fp; - static char submandir[BUFSIZ]; - char *sect; - char fname[MAXPATHLEN]; - - (void) sprintf(fname, "%s/%s", path, CONFIG); - - if ((fp = fopen(fname, "r")) == NULL) - return (NULL); - else { - if (get_manconfig(fp, submandir) == -1) { - (void) fclose(fp); - return (NULL); - } - - (void) fclose(fp); - - sect = strchr(submandir, '='); - if (sect != NULL) - return (++sect); - else - return (NULL); - } -} - -/* - * This routine is for getting the MANSECTS entry from man.cf. - * It sets submandir to the line in man.cf that contains - * MANSECTS=sections[,sections]... - */ - -static int -get_manconfig(FILE *fp, char *submandir) -{ - char *s, *t, *rc; - char buf[BUFSIZ]; - - while ((rc = fgets(buf, sizeof (buf), fp)) != NULL) { - - /* - * skip leading blanks - */ - for (t = buf; *t != '\0'; t++) { - if (!isspace(*t)) - break; - } - /* - * skip line that starts with '#' or empty line - */ - if (*t == '#' || *t == '\0') - continue; - - if (strstr(buf, "MANSECTS") != NULL) - break; - } - - /* - * the man.cf file doesn't have a MANSECTS entry - */ - if (rc == NULL) - return (-1); - - s = strchr(buf, '\n'); - *s = '\0'; /* replace '\n' with '\0' */ - - (void) strcpy(submandir, buf); - return (0); -} - -static void -malloc_error(void) -{ - (void) fprintf(stderr, gettext( - "Memory allocation failed.\n")); - exit(1); -} - -static int -sgmlcheck(const char *s1) -{ - const char *s2 = SGML_SYMBOL; - int len; - - while (*s1) { - /* - * Assume the first character of SGML_SYMBOL(*s2) is '<'. - * Therefore, not necessary to do toupper(*s1) here. - */ - if (*s1 == *s2) { - /* - * *s1 is '<'. Check the following substring matches - * with "!DOCTYPE". - */ - s1++; - if (strncasecmp(s1, s2 + 1, SGML_SYMBOL_LEN - 1) - == 0) { - /* - * SGML_SYMBOL found - */ - return (1); - } - continue; - } else if (isascii(*s1)) { - /* - * *s1 is an ASCII char - * Skip one character - */ - s1++; - continue; - } else { - /* - * *s1 is a non-ASCII char or - * the first byte of the multibyte char. - * Skip one character - */ - len = mblen(s1, MB_CUR_MAX); - if (len == -1) - len = 1; - s1 += len; - continue; - } - } - /* - * SGML_SYMBOL not found - */ - return (0); -} - -/* - * Initializes the bintoman array with appropriate device and inode info - */ - -static void -init_bintoman(void) -{ - int i; - struct stat sb; - - for (i = 0; bintoman[i].bindir != NULL; i++) { - if (stat(bintoman[i].bindir, &sb) == 0) { - bintoman[i].dev = sb.st_dev; - bintoman[i].ino = sb.st_ino; - } else { - bintoman[i].dev = NODEV; - } - } -} - -/* - * If a duplicate is found, return 1 - * If a duplicate is not found, add it to the dupnode list and return 0 - */ -static int -dupcheck(struct man_node *mnp, struct dupnode **dnp) -{ - struct dupnode *curdnp; - struct secnode *cursnp; - struct stat sb; - int i; - int rv = 1; - int dupfound; - - /* - * If the path doesn't exist, treat it as a duplicate - */ - if (stat(mnp->path, &sb) != 0) { - return (1); - } - - /* - * If no sections were found in the man dir, treat it as duplicate - */ - if (mnp->secv == NULL) { - return (1); - } - - /* - * Find the dupnode structure for the previous time this directory - * was looked at. Device and inode numbers are compared so that - * directories that are reached via different paths (e.g. /usr/man vs. - * /usr/share/man) are treated as equivalent. - */ - for (curdnp = *dnp; curdnp != NULL; curdnp = curdnp->next) { - if (curdnp->dev == sb.st_dev && curdnp->ino == sb.st_ino) { - break; - } - } - - /* - * First time this directory has been seen. Add a new node to the - * head of the list. Since all entries are guaranteed to be unique - * copy all sections to new node. - */ - if (curdnp == NULL) { - if ((curdnp = calloc(1, sizeof (struct dupnode))) == NULL) { - malloc_error(); - } - for (i = 0; mnp->secv[i] != NULL; i++) { - if ((cursnp = calloc(1, sizeof (struct secnode))) - == NULL) { - malloc_error(); - } - cursnp->next = curdnp->secl; - curdnp->secl = cursnp; - if ((cursnp->secp = strdup(mnp->secv[i])) == NULL) { - malloc_error(); - } - } - curdnp->dev = sb.st_dev; - curdnp->ino = sb.st_ino; - curdnp->next = *dnp; - *dnp = curdnp; - return (0); - } - - /* - * Traverse the section vector in the man_node and the section list - * in dupnode cache to eliminate all duplicates from man_node - */ - for (i = 0; mnp->secv[i] != NULL; i++) { - dupfound = 0; - for (cursnp = curdnp->secl; cursnp != NULL; - cursnp = cursnp->next) { - if (strcmp(mnp->secv[i], cursnp->secp) == 0) { - dupfound = 1; - break; - } - } - if (dupfound) { - mnp->secv[i][0] = '\0'; - continue; - } - - - /* - * Update curdnp and set return value to indicate that this - * was not all duplicates. - */ - if ((cursnp = calloc(1, sizeof (struct secnode))) == NULL) { - malloc_error(); - } - cursnp->next = curdnp->secl; - curdnp->secl = cursnp; - if ((cursnp->secp = strdup(mnp->secv[i])) == NULL) { - malloc_error(); - } - rv = 0; - } - - return (rv); -} - -/* - * Given a bin directory, return the corresponding man directory. - * Return string must be free()d by the caller. - * - * NULL will be returned if no matching man directory can be found. - */ - -static char * -path_to_manpath(char *bindir) -{ - char *mand, *p; - int i; - struct stat sb; - - /* - * First look for known translations for specific bin paths - */ - if (stat(bindir, &sb) != 0) { - return (NULL); - } - for (i = 0; bintoman[i].bindir != NULL; i++) { - if (sb.st_dev == bintoman[i].dev && - sb.st_ino == bintoman[i].ino) { - if ((mand = strdup(bintoman[i].mandir)) == NULL) { - malloc_error(); - } - if ((p = strchr(mand, ',')) != NULL) { - *p = '\0'; - } - if (stat(mand, &sb) != 0) { - free(mand); - return (NULL); - } - if (p != NULL) { - *p = ','; - } - return (mand); - } - } - - /* - * No specific translation found. Try `dirname $bindir`/man - * and `dirname $bindir`/share/man - */ - if ((mand = malloc(PATH_MAX)) == NULL) { - malloc_error(); - } - - if (strlcpy(mand, bindir, PATH_MAX) >= PATH_MAX) { - free(mand); - return (NULL); - } - - /* - * Advance to end of buffer, strip trailing /'s then remove last - * directory component. - */ - for (p = mand; *p != '\0'; p++) - ; - for (; p > mand && *p == '/'; p--) - ; - for (; p > mand && *p != '/'; p--) - ; - if (p == mand && *p == '.') { - if (realpath("..", mand) == NULL) { - free(mand); - return (NULL); - } - for (; *p != '\0'; p++) - ; - } else { - *p = '\0'; - } - - if (strlcat(mand, "/man", PATH_MAX) >= PATH_MAX) { - free(mand); - return (NULL); - } - - if ((stat(mand, &sb) == 0) && S_ISDIR(sb.st_mode)) { - return (mand); - } - - /* - * Strip the /man off and try /share/man - */ - *p = '\0'; - if (strlcat(mand, "/share/man", PATH_MAX) >= PATH_MAX) { - free(mand); - return (NULL); - } - if ((stat(mand, &sb) == 0) && S_ISDIR(sb.st_mode)) { - return (mand); - } - - /* - * No man or share/man directory found - */ - free(mand); - return (NULL); -} - -/* - * Free a linked list of dupnode structs - */ -void -free_dupnode(struct dupnode *dnp) { - struct dupnode *dnp2; - struct secnode *snp; - - while (dnp != NULL) { - dnp2 = dnp; - dnp = dnp->next; - while (dnp2->secl != NULL) { - snp = dnp2->secl; - dnp2->secl = dnp2->secl->next; - free(snp->secp); - free(snp); - } - free(dnp2); - } -} - -/* - * prints manp linked list to stdout. - * - * If namep is NULL, output can be used for setting MANPATH. - * - * If namep is not NULL output is two columns. First column is the string - * pointed to by namep. Second column is a MANPATH-compatible representation - * of manp linked list. - */ -void -print_manpath(struct man_node *manp, char *namep) -{ - char colon[2]; - char **secp; - - if (namep != NULL) { - (void) printf("%s ", namep); - } - - colon[0] = '\0'; - colon[1] = '\0'; - - for (; manp != NULL; manp = manp->next) { - (void) printf("%s%s", colon, manp->path); - colon[0] = ':'; - - /* - * If man.cf or a directory scan was used to create section - * list, do not print section list again. If the output of - * man -p is used to set MANPATH, subsequent runs of man - * will re-read man.cf and/or scan man directories as - * required. - */ - if (manp->defsrch != 0) { - continue; - } - - for (secp = manp->secv; *secp != NULL; secp++) { - /* - * Section deduplication may have eliminated some - * sections from the vector. Avoid displaying this - * detail which would appear as ",," in output - */ - if ((*secp)[0] != '\0') { - (void) printf(",%s", *secp); - } - } - } - (void) printf("\n"); -} diff --git a/usr/src/cmd/man/stringlist.c b/usr/src/cmd/man/stringlist.c new file mode 100644 index 0000000000..e9f6035358 --- /dev/null +++ b/usr/src/cmd/man/stringlist.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 1994 Christos Zoulas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + */ + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "stringlist.h" + +#define _SL_CHUNKSIZE 20 + +stringlist * +sl_init(void) +{ + stringlist *sl; + + if ((sl = malloc(sizeof (stringlist))) == NULL) + err(1, "malloc"); + + sl->sl_cur = 0; + sl->sl_max = _SL_CHUNKSIZE; + sl->sl_str = malloc(sl->sl_max * sizeof (char *)); + if (sl->sl_str == NULL) + err(1, "malloc"); + + return (sl); +} + +int +sl_add(stringlist *sl, char *name) +{ + + if (sl->sl_cur == sl->sl_max - 1) { + sl->sl_max += _SL_CHUNKSIZE; + sl->sl_str = realloc(sl->sl_str, sl->sl_max * sizeof (char *)); + if (sl->sl_str == NULL) + return (-1); + } + sl->sl_str[sl->sl_cur++] = name; + + return (0); +} + + +void +sl_free(stringlist *sl, int all) +{ + size_t i; + + if (sl == NULL) + return; + if (sl->sl_str) { + if (all) + for (i = 0; i < sl->sl_cur; i++) + free(sl->sl_str[i]); + free(sl->sl_str); + } + free(sl); +} + + +char * +sl_find(stringlist *sl, char *name) +{ + size_t i; + + for (i = 0; i < sl->sl_cur; i++) + if (strcmp(sl->sl_str[i], name) == 0) + return (sl->sl_str[i]); + + return (NULL); +} diff --git a/usr/src/cmd/man/stringlist.h b/usr/src/cmd/man/stringlist.h new file mode 100644 index 0000000000..2813a102b0 --- /dev/null +++ b/usr/src/cmd/man/stringlist.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 1994 Christos Zoulas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Christos Zoulas. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + */ + +#ifndef _STRINGLIST_H_ +#define _STRINGLIST_H_ + +#include <sys/types.h> + +typedef struct _stringlist { + char **sl_str; + size_t sl_max; + size_t sl_cur; +} stringlist; + +stringlist *sl_init(void); +int sl_add(stringlist *, char *); +void sl_free(stringlist *, int); +char *sl_find(stringlist *, char *); + +#endif /* _STRINGLIST_H_ */ diff --git a/usr/src/cmd/mandoc/Makefile b/usr/src/cmd/mandoc/Makefile new file mode 100644 index 0000000000..6d2e3491d9 --- /dev/null +++ b/usr/src/cmd/mandoc/Makefile @@ -0,0 +1,47 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2014 Nexenta Systems, Inc. All rights reserved. +# Copyright 2014 Garrett D'Amore <garrett@damore.org> +# + +PROGS= mandoc mandoc_preconv + +# We place preconv in /usr/lib. This is done to avoid conflicting with +# GNU groff, which puts it into /usr/bin. We also rename it so that it +# will only be seen by mandoc -- it isn't intended for general end-user use. + +ROOTPROGS = $(ROOTBIN)/mandoc $(ROOTLIB)/mandoc_preconv + +OBJS= $(preconv_OBJS) $(mandoc_OBJS) + +include $(SRC)/cmd/Makefile.cmd +include $(SRC)/cmd/mandoc/Makefile.common + +.KEEP_STATE: + +all: $(PROGS) + +mandoc_preconv: $(preconv_OBJS) + $(LINK.c) $(preconv_OBJS) -o $@ $(LDLIBS) + $(POST_PROCESS) + +mandoc: $(mandoc_OBJS) + $(LINK.c) $(mandoc_OBJS) -o $@ $(LDLIBS) + $(POST_PROCESS) + +clean: + $(RM) $(OBJS) + +install: all $(ROOTPROGS) + +include $(SRC)/cmd/Makefile.targ diff --git a/usr/src/cmd/mandoc/Makefile.common b/usr/src/cmd/mandoc/Makefile.common new file mode 100644 index 0000000000..0969995b72 --- /dev/null +++ b/usr/src/cmd/mandoc/Makefile.common @@ -0,0 +1,33 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2012 Nexenta Systems, Inc. All rights reserved. +# Copyright 2014 Garrett D'Amore <garrett@damore.org> +# + +PROGS= mandoc mandoc_preconv +mandoc_OBJS = arch.o att.o chars.o eqn.o eqn_html.o eqn_term.o \ + html.o lib.o main.o man.o man_hash.o man_html.o \ + man_macro.o man_term.o man_validate.o mandoc.o mdoc.o \ + mdoc_argv.o mdoc_hash.o mdoc_html.o mdoc_macro.o \ + mdoc_man.o mdoc_term.o mdoc_validate.o msec.o out.o \ + read.o roff.o st.o tbl.o tbl_data.o tbl_html.o \ + tbl_layout.o tbl_opts.o tbl_term.o term.o term_ascii.o \ + term_ps.o tree.o vol.o + +preconv_OBJS = preconv.o + +CFLAGS += $(CC_VERBOSE) + +CPPFLAGS += -DHAVE_CONFIG_H -DUSE_WCHAR \ + -DOSNAME="\"illumos\"" \ + -DVERSION="\"1.12.1\"" diff --git a/usr/src/cmd/mandoc/THIRDPARTYLICENSE b/usr/src/cmd/mandoc/THIRDPARTYLICENSE new file mode 100644 index 0000000000..0fae1fd4b2 --- /dev/null +++ b/usr/src/cmd/mandoc/THIRDPARTYLICENSE @@ -0,0 +1,14 @@ +Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> +Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/usr/src/cmd/mandoc/THIRDPARTYLICENSE.descrip b/usr/src/cmd/mandoc/THIRDPARTYLICENSE.descrip new file mode 100644 index 0000000000..ecb8c678a0 --- /dev/null +++ b/usr/src/cmd/mandoc/THIRDPARTYLICENSE.descrip @@ -0,0 +1 @@ +MANDOC - FORMAT AND DISPLAY UNIX MANUALS diff --git a/usr/src/cmd/mandoc/arch.c b/usr/src/cmd/mandoc/arch.c new file mode 100644 index 0000000000..e764bfe993 --- /dev/null +++ b/usr/src/cmd/mandoc/arch.c @@ -0,0 +1,39 @@ +/* $Id: arch.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2arch(const char *p) +{ + +#include "arch.in" + + return(NULL); +} diff --git a/usr/src/cmd/mandoc/arch.in b/usr/src/cmd/mandoc/arch.in new file mode 100644 index 0000000000..5113446e46 --- /dev/null +++ b/usr/src/cmd/mandoc/arch.in @@ -0,0 +1,111 @@ +/* $Id: arch.in,v 1.12 2012/01/28 14:02:17 joerg Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file defines the architecture token of the .Dt prologue macro. + * All architectures that your system supports (or the manuals of your + * system) should be included here. The right-hand-side is the + * formatted output. + * + * Be sure to escape strings. + * + * REMEMBER TO ADD NEW ARCHITECTURES TO MDOC.7! + */ + +LINE("acorn26", "Acorn26") +LINE("acorn32", "Acorn32") +LINE("algor", "Algor") +LINE("alpha", "Alpha") +LINE("amd64", "AMD64") +LINE("amiga", "Amiga") +LINE("amigappc", "AmigaPPC") +LINE("arc", "ARC") +LINE("arm", "ARM") +LINE("arm26", "ARM26") +LINE("arm32", "ARM32") +LINE("armish", "ARMISH") +LINE("aviion", "AViiON") +LINE("atari", "ATARI") +LINE("beagle", "Beagle") +LINE("bebox", "BeBox") +LINE("cats", "cats") +LINE("cesfic", "CESFIC") +LINE("cobalt", "Cobalt") +LINE("dreamcast", "Dreamcast") +LINE("emips", "EMIPS") +LINE("evbarm", "evbARM") +LINE("evbmips", "evbMIPS") +LINE("evbppc", "evbPPC") +LINE("evbsh3", "evbSH3") +LINE("ews4800mips", "EWS4800MIPS") +LINE("hp300", "HP300") +LINE("hp700", "HP700") +LINE("hpcarm", "HPCARM") +LINE("hpcmips", "HPCMIPS") +LINE("hpcsh", "HPCSH") +LINE("hppa", "HPPA") +LINE("hppa64", "HPPA64") +LINE("ia64", "ia64") +LINE("i386", "i386") +LINE("ibmnws", "IBMNWS") +LINE("iyonix", "Iyonix") +LINE("landisk", "LANDISK") +LINE("loongson", "Loongson") +LINE("luna68k", "Luna68k") +LINE("luna88k", "Luna88k") +LINE("m68k", "m68k") +LINE("mac68k", "Mac68k") +LINE("macppc", "MacPPC") +LINE("mips", "MIPS") +LINE("mips64", "MIPS64") +LINE("mipsco", "MIPSCo") +LINE("mmeye", "mmEye") +LINE("mvme68k", "MVME68k") +LINE("mvme88k", "MVME88k") +LINE("mvmeppc", "MVMEPPC") +LINE("netwinder", "NetWinder") +LINE("news68k", "NeWS68k") +LINE("newsmips", "NeWSMIPS") +LINE("next68k", "NeXT68k") +LINE("ofppc", "OFPPC") +LINE("palm", "Palm") +LINE("pc532", "PC532") +LINE("playstation2", "PlayStation2") +LINE("pmax", "PMAX") +LINE("pmppc", "pmPPC") +LINE("powerpc", "PowerPC") +LINE("prep", "PReP") +LINE("rs6000", "RS6000") +LINE("sandpoint", "Sandpoint") +LINE("sbmips", "SBMIPS") +LINE("sgi", "SGI") +LINE("sgimips", "SGIMIPS") +LINE("sh3", "SH3") +LINE("shark", "Shark") +LINE("socppc", "SOCPPC") +LINE("solbourne", "Solbourne") +LINE("sparc", "SPARC") +LINE("sparc64", "SPARC64") +LINE("sun2", "Sun2") +LINE("sun3", "Sun3") +LINE("tahoe", "Tahoe") +LINE("vax", "VAX") +LINE("x68k", "X68k") +LINE("x86", "x86") +LINE("x86_64", "x86_64") +LINE("xen", "Xen") +LINE("zaurus", "Zaurus") diff --git a/usr/src/cmd/mandoc/att.c b/usr/src/cmd/mandoc/att.c new file mode 100644 index 0000000000..24d757ddf7 --- /dev/null +++ b/usr/src/cmd/mandoc/att.c @@ -0,0 +1,39 @@ +/* $Id: att.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2att(const char *p) +{ + +#include "att.in" + + return(NULL); +} diff --git a/usr/src/cmd/mandoc/att.in b/usr/src/cmd/mandoc/att.in new file mode 100644 index 0000000000..b4ef822158 --- /dev/null +++ b/usr/src/cmd/mandoc/att.in @@ -0,0 +1,40 @@ +/* $Id: att.in,v 1.8 2011/07/31 17:30:33 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file defines the AT&T versions of the .At macro. This probably + * isn't going to change. The right-hand side is the formatted string. + * + * Be sure to escape strings. + * The non-breaking blanks prevent ending an output line right before + * a number. Groff prevent line breaks at the same places. + */ + +LINE("v1", "Version\\~1 AT&T UNIX") +LINE("v2", "Version\\~2 AT&T UNIX") +LINE("v3", "Version\\~3 AT&T UNIX") +LINE("v4", "Version\\~4 AT&T UNIX") +LINE("v5", "Version\\~5 AT&T UNIX") +LINE("v6", "Version\\~6 AT&T UNIX") +LINE("v7", "Version\\~7 AT&T UNIX") +LINE("32v", "Version\\~32V AT&T UNIX") +LINE("III", "AT&T System\\~III UNIX") +LINE("V", "AT&T System\\~V UNIX") +LINE("V.1", "AT&T System\\~V Release\\~1 UNIX") +LINE("V.2", "AT&T System\\~V Release\\~2 UNIX") +LINE("V.3", "AT&T System\\~V Release\\~3 UNIX") +LINE("V.4", "AT&T System\\~V Release\\~4 UNIX") diff --git a/usr/src/cmd/mandoc/chars.c b/usr/src/cmd/mandoc/chars.c new file mode 100644 index 0000000000..ce03347b5d --- /dev/null +++ b/usr/src/cmd/mandoc/chars.c @@ -0,0 +1,167 @@ +/* $Id: chars.c,v 1.52 2011/11/08 00:15:23 kristaps Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "libmandoc.h" + +#define PRINT_HI 126 +#define PRINT_LO 32 + +struct ln { + struct ln *next; + const char *code; + const char *ascii; + int unicode; +}; + +#define LINES_MAX 328 + +#define CHAR(in, ch, code) \ + { NULL, (in), (ch), (code) }, + +#define CHAR_TBL_START static struct ln lines[LINES_MAX] = { +#define CHAR_TBL_END }; + +#include "chars.in" + +struct mchars { + struct ln **htab; +}; + +static const struct ln *find(const struct mchars *, + const char *, size_t); + +void +mchars_free(struct mchars *arg) +{ + + free(arg->htab); + free(arg); +} + +struct mchars * +mchars_alloc(void) +{ + struct mchars *tab; + struct ln **htab; + struct ln *pp; + int i, hash; + + /* + * Constructs a very basic chaining hashtable. The hash routine + * is simply the integral value of the first character. + * Subsequent entries are chained in the order they're processed. + */ + + tab = mandoc_malloc(sizeof(struct mchars)); + htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **)); + + for (i = 0; i < LINES_MAX; i++) { + hash = (int)lines[i].code[0] - PRINT_LO; + + if (NULL == (pp = htab[hash])) { + htab[hash] = &lines[i]; + continue; + } + + for ( ; pp->next; pp = pp->next) + /* Scan ahead. */ ; + pp->next = &lines[i]; + } + + tab->htab = htab; + return(tab); +} + +int +mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) +{ + const struct ln *ln; + + ln = find(arg, p, sz); + if (NULL == ln) + return(-1); + return(ln->unicode); +} + +char +mchars_num2char(const char *p, size_t sz) +{ + int i; + + if ((i = mandoc_strntoi(p, sz, 10)) < 0) + return('\0'); + return(i > 0 && i < 256 && isprint(i) ? + /* LINTED */ i : '\0'); +} + +int +mchars_num2uc(const char *p, size_t sz) +{ + int i; + + if ((i = mandoc_strntoi(p, sz, 16)) < 0) + return('\0'); + /* FIXME: make sure we're not in a bogus range. */ + return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); +} + +const char * +mchars_spec2str(const struct mchars *arg, + const char *p, size_t sz, size_t *rsz) +{ + const struct ln *ln; + + ln = find(arg, p, sz); + if (NULL == ln) { + *rsz = 1; + return(NULL); + } + + *rsz = strlen(ln->ascii); + return(ln->ascii); +} + +static const struct ln * +find(const struct mchars *tab, const char *p, size_t sz) +{ + const struct ln *pp; + int hash; + + assert(p); + + if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI) + return(NULL); + + hash = (int)p[0] - PRINT_LO; + + for (pp = tab->htab[hash]; pp; pp = pp->next) + if (0 == strncmp(pp->code, p, sz) && + '\0' == pp->code[(int)sz]) + return(pp); + + return(NULL); +} diff --git a/usr/src/cmd/mandoc/chars.in b/usr/src/cmd/mandoc/chars.in new file mode 100644 index 0000000000..a4c45b3c43 --- /dev/null +++ b/usr/src/cmd/mandoc/chars.in @@ -0,0 +1,397 @@ +/* $Id: chars.in,v 1.42 2011/10/02 10:02:26 kristaps Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * The ASCII translation tables. + * + * The left-hand side corresponds to the input sequence (\x, \(xx, \*(xx + * and so on) whose length is listed second element. The right-hand + * side is what's produced by the front-end, with the fourth element + * being its length. + * + * XXX - C-escape strings! + * XXX - update LINES_MAX if adding more! + */ + +/* Non-breaking, non-collapsing space uses unit separator. */ +static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' }; + +CHAR_TBL_START + +/* Spacing. */ +CHAR("c", "", 0) +CHAR("0", " ", 8194) +CHAR(" ", ascii_nbrsp, 160) +CHAR("~", ascii_nbrsp, 160) +CHAR("%", "", 0) +CHAR("&", "", 0) +CHAR("^", "", 0) +CHAR("|", "", 0) +CHAR("}", "", 0) + +/* Accents. */ +CHAR("a\"", "\"", 779) +CHAR("a-", "-", 175) +CHAR("a.", ".", 729) +CHAR("a^", "^", 770) +CHAR("\'", "\'", 769) +CHAR("aa", "\'", 769) +CHAR("ga", "`", 768) +CHAR("`", "`", 768) +CHAR("ab", "`", 774) +CHAR("ac", ",", 807) +CHAR("ad", "\"", 776) +CHAR("ah", "v", 711) +CHAR("ao", "o", 730) +CHAR("a~", "~", 771) +CHAR("ho", ",", 808) +CHAR("ha", "^", 94) +CHAR("ti", "~", 126) + +/* Quotes. */ +CHAR("Bq", ",,", 8222) +CHAR("bq", ",", 8218) +CHAR("lq", "``", 8220) +CHAR("rq", "\'\'", 8221) +CHAR("oq", "`", 8216) +CHAR("cq", "\'", 8217) +CHAR("aq", "\'", 39) +CHAR("dq", "\"", 34) +CHAR("Fo", "<<", 171) +CHAR("Fc", ">>", 187) +CHAR("fo", "<", 8249) +CHAR("fc", ">", 8250) + +/* Brackets. */ +CHAR("lB", "[", 91) +CHAR("rB", "]", 93) +CHAR("lC", "{", 123) +CHAR("rC", "}", 125) +CHAR("la", "<", 60) +CHAR("ra", ">", 62) +CHAR("bv", "|", 9130) +CHAR("braceex", "|", 9130) +CHAR("bracketlefttp", "|", 9121) +CHAR("bracketleftbp", "|", 9123) +CHAR("bracketleftex", "|", 9122) +CHAR("bracketrighttp", "|", 9124) +CHAR("bracketrightbp", "|", 9126) +CHAR("bracketrightex", "|", 9125) +CHAR("lt", ",-", 9127) +CHAR("bracelefttp", ",-", 9127) +CHAR("lk", "{", 9128) +CHAR("braceleftmid", "{", 9128) +CHAR("lb", ",-", 9129) +CHAR("braceleftbp", "`-", 9129) +CHAR("braceleftex", "|", 9130) +CHAR("rt", "-.", 9131) +CHAR("bracerighttp", "-.", 9131) +CHAR("rk", "}", 9132) +CHAR("bracerightmid", "}", 9132) +CHAR("rb", "-\'", 9133) +CHAR("bracerightbp", "-\'", 9133) +CHAR("bracerightex", "|", 9130) +CHAR("parenlefttp", "/", 9115) +CHAR("parenleftbp", "\\", 9117) +CHAR("parenleftex", "|", 9116) +CHAR("parenrighttp", "\\", 9118) +CHAR("parenrightbp", "/", 9120) +CHAR("parenrightex", "|", 9119) + +/* Greek characters. */ +CHAR("*A", "A", 913) +CHAR("*B", "B", 914) +CHAR("*G", "|", 915) +CHAR("*D", "/\\", 916) +CHAR("*E", "E", 917) +CHAR("*Z", "Z", 918) +CHAR("*Y", "H", 919) +CHAR("*H", "O", 920) +CHAR("*I", "I", 921) +CHAR("*K", "K", 922) +CHAR("*L", "/\\", 923) +CHAR("*M", "M", 924) +CHAR("*N", "N", 925) +CHAR("*C", "H", 926) +CHAR("*O", "O", 927) +CHAR("*P", "TT", 928) +CHAR("*R", "P", 929) +CHAR("*S", ">", 931) +CHAR("*T", "T", 932) +CHAR("*U", "Y", 933) +CHAR("*F", "O_", 934) +CHAR("*X", "X", 935) +CHAR("*Q", "Y", 936) +CHAR("*W", "O", 937) +CHAR("*a", "a", 945) +CHAR("*b", "B", 946) +CHAR("*g", "y", 947) +CHAR("*d", "d", 948) +CHAR("*e", "e", 949) +CHAR("*z", "C", 950) +CHAR("*y", "n", 951) +CHAR("*h", "0", 952) +CHAR("*i", "i", 953) +CHAR("*k", "k", 954) +CHAR("*l", "\\", 955) +CHAR("*m", "u", 956) +CHAR("*n", "v", 957) +CHAR("*c", "E", 958) +CHAR("*o", "o", 959) +CHAR("*p", "n", 960) +CHAR("*r", "p", 961) +CHAR("*s", "o", 963) +CHAR("*t", "t", 964) +CHAR("*u", "u", 965) +CHAR("*f", "o", 981) +CHAR("*x", "x", 967) +CHAR("*q", "u", 968) +CHAR("*w", "w", 969) +CHAR("+h", "0", 977) +CHAR("+f", "o", 966) +CHAR("+p", "w", 982) +CHAR("+e", "e", 1013) +CHAR("ts", "s", 962) + +/* Accented letters. */ +CHAR(",C", "C", 199) +CHAR(",c", "c", 231) +CHAR("/L", "L", 321) +CHAR("/O", "O", 216) +CHAR("/l", "l", 322) +CHAR("/o", "o", 248) +CHAR("oA", "A", 197) +CHAR("oa", "a", 229) +CHAR(":A", "A", 196) +CHAR(":E", "E", 203) +CHAR(":I", "I", 207) +CHAR(":O", "O", 214) +CHAR(":U", "U", 220) +CHAR(":a", "a", 228) +CHAR(":e", "e", 235) +CHAR(":i", "i", 239) +CHAR(":o", "o", 246) +CHAR(":u", "u", 252) +CHAR(":y", "y", 255) +CHAR("\'A", "A", 193) +CHAR("\'E", "E", 201) +CHAR("\'I", "I", 205) +CHAR("\'O", "O", 211) +CHAR("\'U", "U", 218) +CHAR("\'a", "a", 225) +CHAR("\'e", "e", 233) +CHAR("\'i", "i", 237) +CHAR("\'o", "o", 243) +CHAR("\'u", "u", 250) +CHAR("^A", "A", 194) +CHAR("^E", "E", 202) +CHAR("^I", "I", 206) +CHAR("^O", "O", 212) +CHAR("^U", "U", 219) +CHAR("^a", "a", 226) +CHAR("^e", "e", 234) +CHAR("^i", "i", 238) +CHAR("^o", "o", 244) +CHAR("^u", "u", 251) +CHAR("`A", "A", 192) +CHAR("`E", "E", 200) +CHAR("`I", "I", 204) +CHAR("`O", "O", 210) +CHAR("`U", "U", 217) +CHAR("`a", "a", 224) +CHAR("`e", "e", 232) +CHAR("`i", "i", 236) +CHAR("`o", "o", 242) +CHAR("`u", "u", 249) +CHAR("~A", "A", 195) +CHAR("~N", "N", 209) +CHAR("~O", "O", 213) +CHAR("~a", "a", 227) +CHAR("~n", "n", 241) +CHAR("~o", "o", 245) + +/* Arrows and lines. */ +CHAR("<-", "<-", 8592) +CHAR("->", "->", 8594) +CHAR("<>", "<>", 8596) +CHAR("da", "v", 8595) +CHAR("ua", "^", 8593) +CHAR("va", "^v", 8597) +CHAR("lA", "<=", 8656) +CHAR("rA", "=>", 8658) +CHAR("hA", "<=>", 8660) +CHAR("dA", "v", 8659) +CHAR("uA", "^", 8657) +CHAR("vA", "^=v", 8661) + +/* Logic. */ +CHAR("AN", "^", 8743) +CHAR("OR", "v", 8744) +CHAR("no", "~", 172) +CHAR("tno", "~", 172) +CHAR("te", "3", 8707) +CHAR("fa", "V", 8704) +CHAR("st", "-)", 8715) +CHAR("tf", ".:.", 8756) +CHAR("3d", ".:.", 8756) +CHAR("or", "|", 124) + +/* Mathematicals. */ +CHAR("pl", "+", 43) +CHAR("mi", "-", 8722) +CHAR("-", "-", 45) +CHAR("-+", "-+", 8723) +CHAR("+-", "+-", 177) +CHAR("t+-", "+-", 177) +CHAR("pc", ".", 183) +CHAR("md", ".", 8901) +CHAR("mu", "x", 215) +CHAR("tmu", "x", 215) +CHAR("c*", "x", 8855) +CHAR("c+", "+", 8853) +CHAR("di", "-:-", 247) +CHAR("tdi", "-:-", 247) +CHAR("f/", "/", 8260) +CHAR("**", "*", 8727) +CHAR("<=", "<=", 8804) +CHAR(">=", ">=", 8805) +CHAR("<<", "<<", 8810) +CHAR(">>", ">>", 8811) +CHAR("eq", "=", 61) +CHAR("!=", "!=", 8800) +CHAR("==", "==", 8801) +CHAR("ne", "!==", 8802) +CHAR("=~", "=~", 8773) +CHAR("-~", "-~", 8771) +CHAR("ap", "~", 8764) +CHAR("~~", "~~", 8776) +CHAR("~=", "~=", 8780) +CHAR("pt", "oc", 8733) +CHAR("es", "{}", 8709) +CHAR("mo", "E", 8712) +CHAR("nm", "!E", 8713) +CHAR("sb", "(=", 8834) +CHAR("nb", "(!=", 8836) +CHAR("sp", "=)", 8835) +CHAR("nc", "!=)", 8837) +CHAR("ib", "(=", 8838) +CHAR("ip", "=)", 8839) +CHAR("ca", "(^)", 8745) +CHAR("cu", "U", 8746) +CHAR("/_", "/_", 8736) +CHAR("pp", "_|_", 8869) +CHAR("is", "I", 8747) +CHAR("integral", "I", 8747) +CHAR("sum", "E", 8721) +CHAR("product", "TT", 8719) +CHAR("coproduct", "U", 8720) +CHAR("gr", "V", 8711) +CHAR("sr", "\\/", 8730) +CHAR("sqrt", "\\/", 8730) +CHAR("lc", "|~", 8968) +CHAR("rc", "~|", 8969) +CHAR("lf", "|_", 8970) +CHAR("rf", "_|", 8971) +CHAR("if", "oo", 8734) +CHAR("Ah", "N", 8501) +CHAR("Im", "I", 8465) +CHAR("Re", "R", 8476) +CHAR("pd", "a", 8706) +CHAR("-h", "/h", 8463) +CHAR("12", "1/2", 189) +CHAR("14", "1/4", 188) +CHAR("34", "3/4", 190) + +/* Ligatures. */ +CHAR("ff", "ff", 64256) +CHAR("fi", "fi", 64257) +CHAR("fl", "fl", 64258) +CHAR("Fi", "ffi", 64259) +CHAR("Fl", "ffl", 64260) +CHAR("AE", "AE", 198) +CHAR("ae", "ae", 230) +CHAR("OE", "OE", 338) +CHAR("oe", "oe", 339) +CHAR("ss", "ss", 223) +CHAR("IJ", "IJ", 306) +CHAR("ij", "ij", 307) + +/* Special letters. */ +CHAR("-D", "D", 208) +CHAR("Sd", "o", 240) +CHAR("TP", "b", 222) +CHAR("Tp", "b", 254) +CHAR(".i", "i", 305) +CHAR(".j", "j", 567) + +/* Currency. */ +CHAR("Do", "$", 36) +CHAR("ct", "c", 162) +CHAR("Eu", "EUR", 8364) +CHAR("eu", "EUR", 8364) +CHAR("Ye", "Y", 165) +CHAR("Po", "L", 163) +CHAR("Cs", "x", 164) +CHAR("Fn", "f", 402) + +/* Lines. */ +CHAR("ba", "|", 124) +CHAR("br", "|", 9474) +CHAR("ul", "_", 95) +CHAR("rl", "-", 8254) +CHAR("bb", "|", 166) +CHAR("sl", "/", 47) +CHAR("rs", "\\", 92) + +/* Text markers. */ +CHAR("ci", "o", 9675) +CHAR("bu", "o", 8226) +CHAR("dd", "=", 8225) +CHAR("dg", "-", 8224) +CHAR("lz", "<>", 9674) +CHAR("sq", "[]", 9633) +CHAR("ps", "9|", 182) +CHAR("sc", "S", 167) +CHAR("lh", "<=", 9756) +CHAR("rh", "=>", 9758) +CHAR("at", "@", 64) +CHAR("sh", "#", 35) +CHAR("CR", "_|", 8629) +CHAR("OK", "\\/", 10003) + +/* Legal symbols. */ +CHAR("co", "(C)", 169) +CHAR("rg", "(R)", 174) +CHAR("tm", "tm", 8482) + +/* Punctuation. */ +CHAR(".", ".", 46) +CHAR("r!", "i", 161) +CHAR("r?", "c", 191) +CHAR("em", "--", 8212) +CHAR("en", "-", 8211) +CHAR("hy", "-", 8208) +CHAR("e", "\\", 92) + +/* Units. */ +CHAR("de", "o", 176) +CHAR("%0", "%o", 8240) +CHAR("fm", "\'", 8242) +CHAR("sd", "\"", 8243) +CHAR("mc", "mu", 181) + +CHAR_TBL_END diff --git a/usr/src/cmd/mandoc/config.h b/usr/src/cmd/mandoc/config.h new file mode 100644 index 0000000000..969e1b49bf --- /dev/null +++ b/usr/src/cmd/mandoc/config.h @@ -0,0 +1,56 @@ +#ifndef MANDOC_CONFIG_H +#define MANDOC_CONFIG_H + +#if defined(__linux__) || defined(__MINT__) +# define _GNU_SOURCE /* strptime(), getsubopt() */ +#endif + +#include <stdio.h> + +#define HAVE_STRPTIME +#define HAVE_GETSUBOPT +#define HAVE_STRLCAT +#define HAVE_STRLCPY + +#include <sys/types.h> + +#if !defined(__BEGIN_DECLS) +# ifdef __cplusplus +# define __BEGIN_DECLS extern "C" { +# else +# define __BEGIN_DECLS +# endif +#endif +#if !defined(__END_DECLS) +# ifdef __cplusplus +# define __END_DECLS } +# else +# define __END_DECLS +# endif +#endif + +#if defined(__APPLE__) +# define htobe32(x) OSSwapHostToBigInt32(x) +# define betoh32(x) OSSwapBigToHostInt32(x) +# define htobe64(x) OSSwapHostToBigInt64(x) +# define betoh64(x) OSSwapBigToHostInt64(x) +#elif defined(__linux__) +# define betoh32(x) be32toh(x) +# define betoh64(x) be64toh(x) +#endif + +#ifndef HAVE_STRLCAT +extern size_t strlcat(char *, const char *, size_t); +#endif +#ifndef HAVE_STRLCPY +extern size_t strlcpy(char *, const char *, size_t); +#endif +#ifndef HAVE_GETSUBOPT +extern int getsubopt(char **, char * const *, char **); +extern char *suboptarg; +#endif +#ifndef HAVE_FGETLN +extern char *fgetln(FILE *, size_t *); +#endif + +#endif /* MANDOC_CONFIG_H */ diff --git a/usr/src/cmd/mandoc/eqn.c b/usr/src/cmd/mandoc/eqn.c new file mode 100644 index 0000000000..37f01bcb5b --- /dev/null +++ b/usr/src/cmd/mandoc/eqn.c @@ -0,0 +1,949 @@ +/* $Id: eqn.c,v 1.38 2011/07/25 15:37:00 kristaps Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "libmandoc.h" +#include "libroff.h" + +#define EQN_NEST_MAX 128 /* maximum nesting of defines */ +#define EQN_MSG(t, x) mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL) + +enum eqn_rest { + EQN_DESCOPE, + EQN_ERR, + EQN_OK, + EQN_EOF +}; + +enum eqn_symt { + EQNSYM_alpha, + EQNSYM_beta, + EQNSYM_chi, + EQNSYM_delta, + EQNSYM_epsilon, + EQNSYM_eta, + EQNSYM_gamma, + EQNSYM_iota, + EQNSYM_kappa, + EQNSYM_lambda, + EQNSYM_mu, + EQNSYM_nu, + EQNSYM_omega, + EQNSYM_omicron, + EQNSYM_phi, + EQNSYM_pi, + EQNSYM_ps, + EQNSYM_rho, + EQNSYM_sigma, + EQNSYM_tau, + EQNSYM_theta, + EQNSYM_upsilon, + EQNSYM_xi, + EQNSYM_zeta, + EQNSYM_DELTA, + EQNSYM_GAMMA, + EQNSYM_LAMBDA, + EQNSYM_OMEGA, + EQNSYM_PHI, + EQNSYM_PI, + EQNSYM_PSI, + EQNSYM_SIGMA, + EQNSYM_THETA, + EQNSYM_UPSILON, + EQNSYM_XI, + EQNSYM_inter, + EQNSYM_union, + EQNSYM_prod, + EQNSYM_int, + EQNSYM_sum, + EQNSYM_grad, + EQNSYM_del, + EQNSYM_times, + EQNSYM_cdot, + EQNSYM_nothing, + EQNSYM_approx, + EQNSYM_prime, + EQNSYM_half, + EQNSYM_partial, + EQNSYM_inf, + EQNSYM_muchgreat, + EQNSYM_muchless, + EQNSYM_larrow, + EQNSYM_rarrow, + EQNSYM_pm, + EQNSYM_nequal, + EQNSYM_equiv, + EQNSYM_lessequal, + EQNSYM_moreequal, + EQNSYM__MAX +}; + +enum eqnpartt { + EQN_DEFINE = 0, + EQN_NDEFINE, + EQN_TDEFINE, + EQN_SET, + EQN_UNDEF, + EQN_GFONT, + EQN_GSIZE, + EQN_BACK, + EQN_FWD, + EQN_UP, + EQN_DOWN, + EQN__MAX +}; + +struct eqnstr { + const char *name; + size_t sz; +}; + +#define STRNEQ(p1, sz1, p2, sz2) \ + ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) +#define EQNSTREQ(x, p, sz) \ + STRNEQ((x)->name, (x)->sz, (p), (sz)) + +struct eqnpart { + struct eqnstr str; + int (*fp)(struct eqn_node *); +}; + +struct eqnsym { + struct eqnstr str; + const char *sym; +}; + + +static enum eqn_rest eqn_box(struct eqn_node *, struct eqn_box *); +static struct eqn_box *eqn_box_alloc(struct eqn_node *, + struct eqn_box *); +static void eqn_box_free(struct eqn_box *); +static struct eqn_def *eqn_def_find(struct eqn_node *, + const char *, size_t); +static int eqn_do_gfont(struct eqn_node *); +static int eqn_do_gsize(struct eqn_node *); +static int eqn_do_define(struct eqn_node *); +static int eqn_do_ign1(struct eqn_node *); +static int eqn_do_ign2(struct eqn_node *); +static int eqn_do_tdefine(struct eqn_node *); +static int eqn_do_undef(struct eqn_node *); +static enum eqn_rest eqn_eqn(struct eqn_node *, struct eqn_box *); +static enum eqn_rest eqn_list(struct eqn_node *, struct eqn_box *); +static enum eqn_rest eqn_matrix(struct eqn_node *, struct eqn_box *); +static const char *eqn_nexttok(struct eqn_node *, size_t *); +static const char *eqn_nextrawtok(struct eqn_node *, size_t *); +static const char *eqn_next(struct eqn_node *, + char, size_t *, int); +static void eqn_rewind(struct eqn_node *); + +static const struct eqnpart eqnparts[EQN__MAX] = { + { { "define", 6 }, eqn_do_define }, /* EQN_DEFINE */ + { { "ndefine", 7 }, eqn_do_define }, /* EQN_NDEFINE */ + { { "tdefine", 7 }, eqn_do_tdefine }, /* EQN_TDEFINE */ + { { "set", 3 }, eqn_do_ign2 }, /* EQN_SET */ + { { "undef", 5 }, eqn_do_undef }, /* EQN_UNDEF */ + { { "gfont", 5 }, eqn_do_gfont }, /* EQN_GFONT */ + { { "gsize", 5 }, eqn_do_gsize }, /* EQN_GSIZE */ + { { "back", 4 }, eqn_do_ign1 }, /* EQN_BACK */ + { { "fwd", 3 }, eqn_do_ign1 }, /* EQN_FWD */ + { { "up", 2 }, eqn_do_ign1 }, /* EQN_UP */ + { { "down", 4 }, eqn_do_ign1 }, /* EQN_DOWN */ +}; + +static const struct eqnstr eqnmarks[EQNMARK__MAX] = { + { "", 0 }, /* EQNMARK_NONE */ + { "dot", 3 }, /* EQNMARK_DOT */ + { "dotdot", 6 }, /* EQNMARK_DOTDOT */ + { "hat", 3 }, /* EQNMARK_HAT */ + { "tilde", 5 }, /* EQNMARK_TILDE */ + { "vec", 3 }, /* EQNMARK_VEC */ + { "dyad", 4 }, /* EQNMARK_DYAD */ + { "bar", 3 }, /* EQNMARK_BAR */ + { "under", 5 }, /* EQNMARK_UNDER */ +}; + +static const struct eqnstr eqnfonts[EQNFONT__MAX] = { + { "", 0 }, /* EQNFONT_NONE */ + { "roman", 5 }, /* EQNFONT_ROMAN */ + { "bold", 4 }, /* EQNFONT_BOLD */ + { "fat", 3 }, /* EQNFONT_FAT */ + { "italic", 6 }, /* EQNFONT_ITALIC */ +}; + +static const struct eqnstr eqnposs[EQNPOS__MAX] = { + { "", 0 }, /* EQNPOS_NONE */ + { "over", 4 }, /* EQNPOS_OVER */ + { "sup", 3 }, /* EQNPOS_SUP */ + { "sub", 3 }, /* EQNPOS_SUB */ + { "to", 2 }, /* EQNPOS_TO */ + { "from", 4 }, /* EQNPOS_FROM */ +}; + +static const struct eqnstr eqnpiles[EQNPILE__MAX] = { + { "", 0 }, /* EQNPILE_NONE */ + { "pile", 4 }, /* EQNPILE_PILE */ + { "cpile", 5 }, /* EQNPILE_CPILE */ + { "rpile", 5 }, /* EQNPILE_RPILE */ + { "lpile", 5 }, /* EQNPILE_LPILE */ + { "col", 3 }, /* EQNPILE_COL */ + { "ccol", 4 }, /* EQNPILE_CCOL */ + { "rcol", 4 }, /* EQNPILE_RCOL */ + { "lcol", 4 }, /* EQNPILE_LCOL */ +}; + +static const struct eqnsym eqnsyms[EQNSYM__MAX] = { + { { "alpha", 5 }, "*a" }, /* EQNSYM_alpha */ + { { "beta", 4 }, "*b" }, /* EQNSYM_beta */ + { { "chi", 3 }, "*x" }, /* EQNSYM_chi */ + { { "delta", 5 }, "*d" }, /* EQNSYM_delta */ + { { "epsilon", 7 }, "*e" }, /* EQNSYM_epsilon */ + { { "eta", 3 }, "*y" }, /* EQNSYM_eta */ + { { "gamma", 5 }, "*g" }, /* EQNSYM_gamma */ + { { "iota", 4 }, "*i" }, /* EQNSYM_iota */ + { { "kappa", 5 }, "*k" }, /* EQNSYM_kappa */ + { { "lambda", 6 }, "*l" }, /* EQNSYM_lambda */ + { { "mu", 2 }, "*m" }, /* EQNSYM_mu */ + { { "nu", 2 }, "*n" }, /* EQNSYM_nu */ + { { "omega", 5 }, "*w" }, /* EQNSYM_omega */ + { { "omicron", 7 }, "*o" }, /* EQNSYM_omicron */ + { { "phi", 3 }, "*f" }, /* EQNSYM_phi */ + { { "pi", 2 }, "*p" }, /* EQNSYM_pi */ + { { "psi", 2 }, "*q" }, /* EQNSYM_psi */ + { { "rho", 3 }, "*r" }, /* EQNSYM_rho */ + { { "sigma", 5 }, "*s" }, /* EQNSYM_sigma */ + { { "tau", 3 }, "*t" }, /* EQNSYM_tau */ + { { "theta", 5 }, "*h" }, /* EQNSYM_theta */ + { { "upsilon", 7 }, "*u" }, /* EQNSYM_upsilon */ + { { "xi", 2 }, "*c" }, /* EQNSYM_xi */ + { { "zeta", 4 }, "*z" }, /* EQNSYM_zeta */ + { { "DELTA", 5 }, "*D" }, /* EQNSYM_DELTA */ + { { "GAMMA", 5 }, "*G" }, /* EQNSYM_GAMMA */ + { { "LAMBDA", 6 }, "*L" }, /* EQNSYM_LAMBDA */ + { { "OMEGA", 5 }, "*W" }, /* EQNSYM_OMEGA */ + { { "PHI", 3 }, "*F" }, /* EQNSYM_PHI */ + { { "PI", 2 }, "*P" }, /* EQNSYM_PI */ + { { "PSI", 3 }, "*Q" }, /* EQNSYM_PSI */ + { { "SIGMA", 5 }, "*S" }, /* EQNSYM_SIGMA */ + { { "THETA", 5 }, "*H" }, /* EQNSYM_THETA */ + { { "UPSILON", 7 }, "*U" }, /* EQNSYM_UPSILON */ + { { "XI", 2 }, "*C" }, /* EQNSYM_XI */ + { { "inter", 5 }, "ca" }, /* EQNSYM_inter */ + { { "union", 5 }, "cu" }, /* EQNSYM_union */ + { { "prod", 4 }, "product" }, /* EQNSYM_prod */ + { { "int", 3 }, "integral" }, /* EQNSYM_int */ + { { "sum", 3 }, "sum" }, /* EQNSYM_sum */ + { { "grad", 4 }, "gr" }, /* EQNSYM_grad */ + { { "del", 3 }, "gr" }, /* EQNSYM_del */ + { { "times", 5 }, "mu" }, /* EQNSYM_times */ + { { "cdot", 4 }, "pc" }, /* EQNSYM_cdot */ + { { "nothing", 7 }, "&" }, /* EQNSYM_nothing */ + { { "approx", 6 }, "~~" }, /* EQNSYM_approx */ + { { "prime", 5 }, "aq" }, /* EQNSYM_prime */ + { { "half", 4 }, "12" }, /* EQNSYM_half */ + { { "partial", 7 }, "pd" }, /* EQNSYM_partial */ + { { "inf", 3 }, "if" }, /* EQNSYM_inf */ + { { ">>", 2 }, ">>" }, /* EQNSYM_muchgreat */ + { { "<<", 2 }, "<<" }, /* EQNSYM_muchless */ + { { "<-", 2 }, "<-" }, /* EQNSYM_larrow */ + { { "->", 2 }, "->" }, /* EQNSYM_rarrow */ + { { "+-", 2 }, "+-" }, /* EQNSYM_pm */ + { { "!=", 2 }, "!=" }, /* EQNSYM_nequal */ + { { "==", 2 }, "==" }, /* EQNSYM_equiv */ + { { "<=", 2 }, "<=" }, /* EQNSYM_lessequal */ + { { ">=", 2 }, ">=" }, /* EQNSYM_moreequal */ +}; + +/* ARGSUSED */ +enum rofferr +eqn_read(struct eqn_node **epp, int ln, + const char *p, int pos, int *offs) +{ + size_t sz; + struct eqn_node *ep; + enum rofferr er; + + ep = *epp; + + /* + * If we're the terminating mark, unset our equation status and + * validate the full equation. + */ + + if (0 == strncmp(p, ".EN", 3)) { + er = eqn_end(epp); + p += 3; + while (' ' == *p || '\t' == *p) + p++; + if ('\0' == *p) + return(er); + mandoc_msg(MANDOCERR_ARGSLOST, ep->parse, ln, pos, NULL); + return(er); + } + + /* + * Build up the full string, replacing all newlines with regular + * whitespace. + */ + + sz = strlen(p + pos) + 1; + ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1); + + /* First invocation: nil terminate the string. */ + + if (0 == ep->sz) + *ep->data = '\0'; + + ep->sz += sz; + strlcat(ep->data, p + pos, ep->sz + 1); + strlcat(ep->data, " ", ep->sz + 1); + return(ROFF_IGN); +} + +struct eqn_node * +eqn_alloc(const char *name, int pos, int line, struct mparse *parse) +{ + struct eqn_node *p; + size_t sz; + const char *end; + + p = mandoc_calloc(1, sizeof(struct eqn_node)); + + if (name && '\0' != *name) { + sz = strlen(name); + assert(sz); + do { + sz--; + end = name + (int)sz; + } while (' ' == *end || '\t' == *end); + p->eqn.name = mandoc_strndup(name, sz + 1); + } + + p->parse = parse; + p->eqn.ln = line; + p->eqn.pos = pos; + p->gsize = EQN_DEFSIZE; + + return(p); +} + +enum rofferr +eqn_end(struct eqn_node **epp) +{ + struct eqn_node *ep; + struct eqn_box *root; + enum eqn_rest c; + + ep = *epp; + *epp = NULL; + + ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box)); + + root = ep->eqn.root; + root->type = EQN_ROOT; + + if (0 == ep->sz) + return(ROFF_IGN); + + if (EQN_DESCOPE == (c = eqn_eqn(ep, root))) { + EQN_MSG(MANDOCERR_EQNNSCOPE, ep); + c = EQN_ERR; + } + + return(EQN_EOF == c ? ROFF_EQN : ROFF_IGN); +} + +static enum eqn_rest +eqn_eqn(struct eqn_node *ep, struct eqn_box *last) +{ + struct eqn_box *bp; + enum eqn_rest c; + + bp = eqn_box_alloc(ep, last); + bp->type = EQN_SUBEXPR; + + while (EQN_OK == (c = eqn_box(ep, bp))) + /* Spin! */ ; + + return(c); +} + +static enum eqn_rest +eqn_matrix(struct eqn_node *ep, struct eqn_box *last) +{ + struct eqn_box *bp; + const char *start; + size_t sz; + enum eqn_rest c; + + bp = eqn_box_alloc(ep, last); + bp->type = EQN_MATRIX; + + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + if ( ! STRNEQ(start, sz, "{", 1)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(EQN_ERR); + } + + while (EQN_OK == (c = eqn_box(ep, bp))) + switch (bp->last->pile) { + case (EQNPILE_LCOL): + /* FALLTHROUGH */ + case (EQNPILE_CCOL): + /* FALLTHROUGH */ + case (EQNPILE_RCOL): + continue; + default: + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(EQN_ERR); + }; + + if (EQN_DESCOPE != c) { + if (EQN_EOF == c) + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + + eqn_rewind(ep); + start = eqn_nexttok(ep, &sz); + assert(start); + if (STRNEQ(start, sz, "}", 1)) + return(EQN_OK); + + EQN_MSG(MANDOCERR_EQNBADSCOPE, ep); + return(EQN_ERR); +} + +static enum eqn_rest +eqn_list(struct eqn_node *ep, struct eqn_box *last) +{ + struct eqn_box *bp; + const char *start; + size_t sz; + enum eqn_rest c; + + bp = eqn_box_alloc(ep, last); + bp->type = EQN_LIST; + + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + if ( ! STRNEQ(start, sz, "{", 1)) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(EQN_ERR); + } + + while (EQN_DESCOPE == (c = eqn_eqn(ep, bp))) { + eqn_rewind(ep); + start = eqn_nexttok(ep, &sz); + assert(start); + if ( ! STRNEQ(start, sz, "above", 5)) + break; + } + + if (EQN_DESCOPE != c) { + if (EQN_ERR != c) + EQN_MSG(MANDOCERR_EQNSCOPE, ep); + return(EQN_ERR); + } + + eqn_rewind(ep); + start = eqn_nexttok(ep, &sz); + assert(start); + if (STRNEQ(start, sz, "}", 1)) + return(EQN_OK); + + EQN_MSG(MANDOCERR_EQNBADSCOPE, ep); + return(EQN_ERR); +} + +static enum eqn_rest +eqn_box(struct eqn_node *ep, struct eqn_box *last) +{ + size_t sz; + const char *start; + char *left; + char sym[64]; + enum eqn_rest c; + int i, size; + struct eqn_box *bp; + + if (NULL == (start = eqn_nexttok(ep, &sz))) + return(EQN_EOF); + + if (STRNEQ(start, sz, "}", 1)) + return(EQN_DESCOPE); + else if (STRNEQ(start, sz, "right", 5)) + return(EQN_DESCOPE); + else if (STRNEQ(start, sz, "above", 5)) + return(EQN_DESCOPE); + else if (STRNEQ(start, sz, "mark", 4)) + return(EQN_OK); + else if (STRNEQ(start, sz, "lineup", 6)) + return(EQN_OK); + + for (i = 0; i < (int)EQN__MAX; i++) { + if ( ! EQNSTREQ(&eqnparts[i].str, start, sz)) + continue; + return((*eqnparts[i].fp)(ep) ? + EQN_OK : EQN_ERR); + } + + if (STRNEQ(start, sz, "{", 1)) { + if (EQN_DESCOPE != (c = eqn_eqn(ep, last))) { + if (EQN_ERR != c) + EQN_MSG(MANDOCERR_EQNSCOPE, ep); + return(EQN_ERR); + } + eqn_rewind(ep); + start = eqn_nexttok(ep, &sz); + assert(start); + if (STRNEQ(start, sz, "}", 1)) + return(EQN_OK); + EQN_MSG(MANDOCERR_EQNBADSCOPE, ep); + return(EQN_ERR); + } + + for (i = 0; i < (int)EQNPILE__MAX; i++) { + if ( ! EQNSTREQ(&eqnpiles[i], start, sz)) + continue; + if (EQN_OK == (c = eqn_list(ep, last))) + last->last->pile = (enum eqn_pilet)i; + return(c); + } + + if (STRNEQ(start, sz, "matrix", 6)) + return(eqn_matrix(ep, last)); + + if (STRNEQ(start, sz, "left", 4)) { + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + left = mandoc_strndup(start, sz); + c = eqn_eqn(ep, last); + if (last->last) + last->last->left = left; + else + free(left); + if (EQN_DESCOPE != c) + return(c); + assert(last->last); + eqn_rewind(ep); + start = eqn_nexttok(ep, &sz); + assert(start); + if ( ! STRNEQ(start, sz, "right", 5)) + return(EQN_DESCOPE); + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + last->last->right = mandoc_strndup(start, sz); + return(EQN_OK); + } + + for (i = 0; i < (int)EQNPOS__MAX; i++) { + if ( ! EQNSTREQ(&eqnposs[i], start, sz)) + continue; + if (NULL == last->last) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(EQN_ERR); + } + last->last->pos = (enum eqn_post)i; + if (EQN_EOF == (c = eqn_box(ep, last))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + return(c); + } + + for (i = 0; i < (int)EQNMARK__MAX; i++) { + if ( ! EQNSTREQ(&eqnmarks[i], start, sz)) + continue; + if (NULL == last->last) { + EQN_MSG(MANDOCERR_EQNSYNT, ep); + return(EQN_ERR); + } + last->last->mark = (enum eqn_markt)i; + if (EQN_EOF == (c = eqn_box(ep, last))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + return(c); + } + + for (i = 0; i < (int)EQNFONT__MAX; i++) { + if ( ! EQNSTREQ(&eqnfonts[i], start, sz)) + continue; + if (EQN_EOF == (c = eqn_box(ep, last))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } else if (EQN_OK == c) + last->last->font = (enum eqn_fontt)i; + return(c); + } + + if (STRNEQ(start, sz, "size", 4)) { + if (NULL == (start = eqn_nexttok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } + size = mandoc_strntoi(start, sz, 10); + if (EQN_EOF == (c = eqn_box(ep, last))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(EQN_ERR); + } else if (EQN_OK != c) + return(c); + last->last->size = size; + } + + bp = eqn_box_alloc(ep, last); + bp->type = EQN_TEXT; + for (i = 0; i < (int)EQNSYM__MAX; i++) + if (EQNSTREQ(&eqnsyms[i].str, start, sz)) { + sym[63] = '\0'; + snprintf(sym, 62, "\\[%s]", eqnsyms[i].sym); + bp->text = mandoc_strdup(sym); + return(EQN_OK); + } + + bp->text = mandoc_strndup(start, sz); + return(EQN_OK); +} + +void +eqn_free(struct eqn_node *p) +{ + int i; + + eqn_box_free(p->eqn.root); + + for (i = 0; i < (int)p->defsz; i++) { + free(p->defs[i].key); + free(p->defs[i].val); + } + + free(p->eqn.name); + free(p->data); + free(p->defs); + free(p); +} + +static struct eqn_box * +eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) +{ + struct eqn_box *bp; + + bp = mandoc_calloc(1, sizeof(struct eqn_box)); + bp->parent = parent; + bp->size = ep->gsize; + + if (NULL == parent->first) + parent->first = bp; + else + parent->last->next = bp; + + parent->last = bp; + return(bp); +} + +static void +eqn_box_free(struct eqn_box *bp) +{ + + if (bp->first) + eqn_box_free(bp->first); + if (bp->next) + eqn_box_free(bp->next); + + free(bp->text); + free(bp->left); + free(bp->right); + free(bp); +} + +static const char * +eqn_nextrawtok(struct eqn_node *ep, size_t *sz) +{ + + return(eqn_next(ep, '"', sz, 0)); +} + +static const char * +eqn_nexttok(struct eqn_node *ep, size_t *sz) +{ + + return(eqn_next(ep, '"', sz, 1)); +} + +static void +eqn_rewind(struct eqn_node *ep) +{ + + ep->cur = ep->rew; +} + +static const char * +eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) +{ + char *start, *next; + int q, diff, lim; + size_t ssz, dummy; + struct eqn_def *def; + + if (NULL == sz) + sz = &dummy; + + lim = 0; + ep->rew = ep->cur; +again: + /* Prevent self-definitions. */ + + if (lim >= EQN_NEST_MAX) { + EQN_MSG(MANDOCERR_ROFFLOOP, ep); + return(NULL); + } + + ep->cur = ep->rew; + start = &ep->data[(int)ep->cur]; + q = 0; + + if ('\0' == *start) + return(NULL); + + if (quote == *start) { + ep->cur++; + q = 1; + } + + start = &ep->data[(int)ep->cur]; + + if ( ! q) { + if ('{' == *start || '}' == *start) + ssz = 1; + else + ssz = strcspn(start + 1, " ^~\"{}\t") + 1; + next = start + (int)ssz; + if ('\0' == *next) + next = NULL; + } else + next = strchr(start, quote); + + if (NULL != next) { + *sz = (size_t)(next - start); + ep->cur += *sz; + if (q) + ep->cur++; + while (' ' == ep->data[(int)ep->cur] || + '\t' == ep->data[(int)ep->cur] || + '^' == ep->data[(int)ep->cur] || + '~' == ep->data[(int)ep->cur]) + ep->cur++; + } else { + if (q) + EQN_MSG(MANDOCERR_BADQUOTE, ep); + next = strchr(start, '\0'); + *sz = (size_t)(next - start); + ep->cur += *sz; + } + + /* Quotes aren't expanded for values. */ + + if (q || ! repl) + return(start); + + if (NULL != (def = eqn_def_find(ep, start, *sz))) { + diff = def->valsz - *sz; + + if (def->valsz > *sz) { + ep->sz += diff; + ep->data = mandoc_realloc(ep->data, ep->sz + 1); + ep->data[ep->sz] = '\0'; + start = &ep->data[(int)ep->rew]; + } + + diff = def->valsz - *sz; + memmove(start + *sz + diff, start + *sz, + (strlen(start) - *sz) + 1); + memcpy(start, def->val, def->valsz); + goto again; + } + + return(start); +} + +static int +eqn_do_ign1(struct eqn_node *ep) +{ + + if (NULL == eqn_nextrawtok(ep, NULL)) + EQN_MSG(MANDOCERR_EQNEOF, ep); + else + return(1); + + return(0); +} + +static int +eqn_do_ign2(struct eqn_node *ep) +{ + + if (NULL == eqn_nextrawtok(ep, NULL)) + EQN_MSG(MANDOCERR_EQNEOF, ep); + else if (NULL == eqn_nextrawtok(ep, NULL)) + EQN_MSG(MANDOCERR_EQNEOF, ep); + else + return(1); + + return(0); +} + +static int +eqn_do_tdefine(struct eqn_node *ep) +{ + + if (NULL == eqn_nextrawtok(ep, NULL)) + EQN_MSG(MANDOCERR_EQNEOF, ep); + else if (NULL == eqn_next(ep, ep->data[(int)ep->cur], NULL, 0)) + EQN_MSG(MANDOCERR_EQNEOF, ep); + else + return(1); + + return(0); +} + +static int +eqn_do_define(struct eqn_node *ep) +{ + const char *start; + size_t sz; + struct eqn_def *def; + int i; + + if (NULL == (start = eqn_nextrawtok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(0); + } + + /* + * Search for a key that already exists. + * Create a new key if none is found. + */ + + if (NULL == (def = eqn_def_find(ep, start, sz))) { + /* Find holes in string array. */ + for (i = 0; i < (int)ep->defsz; i++) + if (0 == ep->defs[i].keysz) + break; + + if (i == (int)ep->defsz) { + ep->defsz++; + ep->defs = mandoc_realloc + (ep->defs, ep->defsz * + sizeof(struct eqn_def)); + ep->defs[i].key = ep->defs[i].val = NULL; + } + + ep->defs[i].keysz = sz; + ep->defs[i].key = mandoc_realloc + (ep->defs[i].key, sz + 1); + + memcpy(ep->defs[i].key, start, sz); + ep->defs[i].key[(int)sz] = '\0'; + def = &ep->defs[i]; + } + + start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0); + + if (NULL == start) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(0); + } + + def->valsz = sz; + def->val = mandoc_realloc(def->val, sz + 1); + memcpy(def->val, start, sz); + def->val[(int)sz] = '\0'; + return(1); +} + +static int +eqn_do_gfont(struct eqn_node *ep) +{ + + if (NULL == eqn_nextrawtok(ep, NULL)) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(0); + } + return(1); +} + +static int +eqn_do_gsize(struct eqn_node *ep) +{ + const char *start; + size_t sz; + + if (NULL == (start = eqn_nextrawtok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(0); + } + ep->gsize = mandoc_strntoi(start, sz, 10); + return(1); +} + +static int +eqn_do_undef(struct eqn_node *ep) +{ + const char *start; + struct eqn_def *def; + size_t sz; + + if (NULL == (start = eqn_nextrawtok(ep, &sz))) { + EQN_MSG(MANDOCERR_EQNEOF, ep); + return(0); + } else if (NULL != (def = eqn_def_find(ep, start, sz))) + def->keysz = 0; + + return(1); +} + +static struct eqn_def * +eqn_def_find(struct eqn_node *ep, const char *key, size_t sz) +{ + int i; + + for (i = 0; i < (int)ep->defsz; i++) + if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, + ep->defs[i].keysz, key, sz)) + return(&ep->defs[i]); + + return(NULL); +} diff --git a/usr/src/cmd/mandoc/eqn_html.c b/usr/src/cmd/mandoc/eqn_html.c new file mode 100644 index 0000000000..80c82f1de5 --- /dev/null +++ b/usr/src/cmd/mandoc/eqn_html.c @@ -0,0 +1,81 @@ +/* $Id: eqn_html.c,v 1.2 2011/07/24 10:09:03 kristaps Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "html.h" + +static const enum htmltag fontmap[EQNFONT__MAX] = { + TAG_SPAN, /* EQNFONT_NONE */ + TAG_SPAN, /* EQNFONT_ROMAN */ + TAG_B, /* EQNFONT_BOLD */ + TAG_B, /* EQNFONT_FAT */ + TAG_I /* EQNFONT_ITALIC */ +}; + + +static void eqn_box(struct html *, const struct eqn_box *); + +void +print_eqn(struct html *p, const struct eqn *ep) +{ + struct htmlpair tag; + struct tag *t; + + PAIR_CLASS_INIT(&tag, "eqn"); + t = print_otag(p, TAG_SPAN, 1, &tag); + + p->flags |= HTML_NONOSPACE; + eqn_box(p, ep->root); + p->flags &= ~HTML_NONOSPACE; + + print_tagq(p, t); +} + +static void +eqn_box(struct html *p, const struct eqn_box *bp) +{ + struct tag *t; + + t = EQNFONT_NONE == bp->font ? NULL : + print_otag(p, fontmap[(int)bp->font], 0, NULL); + + if (bp->left) + print_text(p, bp->left); + + if (bp->text) + print_text(p, bp->text); + + if (bp->first) + eqn_box(p, bp->first); + + if (NULL != t) + print_tagq(p, t); + if (bp->right) + print_text(p, bp->right); + + if (bp->next) + eqn_box(p, bp->next); +} diff --git a/usr/src/cmd/mandoc/eqn_term.c b/usr/src/cmd/mandoc/eqn_term.c new file mode 100644 index 0000000000..cfbd8d48f8 --- /dev/null +++ b/usr/src/cmd/mandoc/eqn_term.c @@ -0,0 +1,76 @@ +/* $Id: eqn_term.c,v 1.4 2011/07/24 10:09:03 kristaps Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "term.h" + +static const enum termfont fontmap[EQNFONT__MAX] = { + TERMFONT_NONE, /* EQNFONT_NONE */ + TERMFONT_NONE, /* EQNFONT_ROMAN */ + TERMFONT_BOLD, /* EQNFONT_BOLD */ + TERMFONT_BOLD, /* EQNFONT_FAT */ + TERMFONT_UNDER /* EQNFONT_ITALIC */ +}; + +static void eqn_box(struct termp *, const struct eqn_box *); + +void +term_eqn(struct termp *p, const struct eqn *ep) +{ + + p->flags |= TERMP_NONOSPACE; + eqn_box(p, ep->root); + term_word(p, " "); + p->flags &= ~TERMP_NONOSPACE; +} + +static void +eqn_box(struct termp *p, const struct eqn_box *bp) +{ + + if (EQNFONT_NONE != bp->font) + term_fontpush(p, fontmap[(int)bp->font]); + if (bp->left) + term_word(p, bp->left); + if (EQN_SUBEXPR == bp->type) + term_word(p, "("); + + if (bp->text) + term_word(p, bp->text); + + if (bp->first) + eqn_box(p, bp->first); + + if (EQN_SUBEXPR == bp->type) + term_word(p, ")"); + if (bp->right) + term_word(p, bp->right); + if (EQNFONT_NONE != bp->font) + term_fontpop(p); + + if (bp->next) + eqn_box(p, bp->next); +} diff --git a/usr/src/cmd/mandoc/html.c b/usr/src/cmd/mandoc/html.c new file mode 100644 index 0000000000..326df035fc --- /dev/null +++ b/usr/src/cmd/mandoc/html.c @@ -0,0 +1,699 @@ +/* $Id: html.c,v 1.150 2011/10/05 21:35:17 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc.h" +#include "libmandoc.h" +#include "out.h" +#include "html.h" +#include "main.h" + +struct htmldata { + const char *name; + int flags; +#define HTML_CLRLINE (1 << 0) +#define HTML_NOSTACK (1 << 1) +#define HTML_AUTOCLOSE (1 << 2) /* Tag has auto-closure. */ +}; + +static const struct htmldata htmltags[TAG_MAX] = { + {"html", HTML_CLRLINE}, /* TAG_HTML */ + {"head", HTML_CLRLINE}, /* TAG_HEAD */ + {"body", HTML_CLRLINE}, /* TAG_BODY */ + {"meta", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */ + {"title", HTML_CLRLINE}, /* TAG_TITLE */ + {"div", HTML_CLRLINE}, /* TAG_DIV */ + {"h1", 0}, /* TAG_H1 */ + {"h2", 0}, /* TAG_H2 */ + {"span", 0}, /* TAG_SPAN */ + {"link", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */ + {"br", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */ + {"a", 0}, /* TAG_A */ + {"table", HTML_CLRLINE}, /* TAG_TABLE */ + {"tbody", HTML_CLRLINE}, /* TAG_TBODY */ + {"col", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */ + {"tr", HTML_CLRLINE}, /* TAG_TR */ + {"td", HTML_CLRLINE}, /* TAG_TD */ + {"li", HTML_CLRLINE}, /* TAG_LI */ + {"ul", HTML_CLRLINE}, /* TAG_UL */ + {"ol", HTML_CLRLINE}, /* TAG_OL */ + {"dl", HTML_CLRLINE}, /* TAG_DL */ + {"dt", HTML_CLRLINE}, /* TAG_DT */ + {"dd", HTML_CLRLINE}, /* TAG_DD */ + {"blockquote", HTML_CLRLINE}, /* TAG_BLOCKQUOTE */ + {"p", HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_P */ + {"pre", HTML_CLRLINE }, /* TAG_PRE */ + {"b", 0 }, /* TAG_B */ + {"i", 0 }, /* TAG_I */ + {"code", 0 }, /* TAG_CODE */ + {"small", 0 }, /* TAG_SMALL */ +}; + +static const char *const htmlattrs[ATTR_MAX] = { + "http-equiv", /* ATTR_HTTPEQUIV */ + "content", /* ATTR_CONTENT */ + "name", /* ATTR_NAME */ + "rel", /* ATTR_REL */ + "href", /* ATTR_HREF */ + "type", /* ATTR_TYPE */ + "media", /* ATTR_MEDIA */ + "class", /* ATTR_CLASS */ + "style", /* ATTR_STYLE */ + "width", /* ATTR_WIDTH */ + "id", /* ATTR_ID */ + "summary", /* ATTR_SUMMARY */ + "align", /* ATTR_ALIGN */ + "colspan", /* ATTR_COLSPAN */ +}; + +static const char *const roffscales[SCALE_MAX] = { + "cm", /* SCALE_CM */ + "in", /* SCALE_IN */ + "pc", /* SCALE_PC */ + "pt", /* SCALE_PT */ + "em", /* SCALE_EM */ + "em", /* SCALE_MM */ + "ex", /* SCALE_EN */ + "ex", /* SCALE_BU */ + "em", /* SCALE_VS */ + "ex", /* SCALE_FS */ +}; + +static void bufncat(struct html *, const char *, size_t); +static void print_ctag(struct html *, enum htmltag); +static int print_encode(struct html *, const char *, int); +static void print_metaf(struct html *, enum mandoc_esc); +static void print_attr(struct html *, const char *, const char *); +static void *ml_alloc(char *, enum htmltype); + +static void * +ml_alloc(char *outopts, enum htmltype type) +{ + struct html *h; + const char *toks[5]; + char *v; + + toks[0] = "style"; + toks[1] = "man"; + toks[2] = "includes"; + toks[3] = "fragment"; + toks[4] = NULL; + + h = mandoc_calloc(1, sizeof(struct html)); + + h->type = type; + h->tags.head = NULL; + h->symtab = mchars_alloc(); + + while (outopts && *outopts) + switch (getsubopt(&outopts, UNCONST(toks), &v)) { + case (0): + h->style = v; + break; + case (1): + h->base_man = v; + break; + case (2): + h->base_includes = v; + break; + case (3): + h->oflags |= HTML_FRAGMENT; + break; + default: + break; + } + + return(h); +} + +void * +html_alloc(char *outopts) +{ + + return(ml_alloc(outopts, HTML_HTML_4_01_STRICT)); +} + + +void * +xhtml_alloc(char *outopts) +{ + + return(ml_alloc(outopts, HTML_XHTML_1_0_STRICT)); +} + + +void +html_free(void *p) +{ + struct tag *tag; + struct html *h; + + h = (struct html *)p; + + while ((tag = h->tags.head) != NULL) { + h->tags.head = tag->next; + free(tag); + } + + if (h->symtab) + mchars_free(h->symtab); + + free(h); +} + + +void +print_gen_head(struct html *h) +{ + struct htmlpair tag[4]; + + tag[0].key = ATTR_HTTPEQUIV; + tag[0].val = "Content-Type"; + tag[1].key = ATTR_CONTENT; + tag[1].val = "text/html; charset=utf-8"; + print_otag(h, TAG_META, 2, tag); + + tag[0].key = ATTR_NAME; + tag[0].val = "resource-type"; + tag[1].key = ATTR_CONTENT; + tag[1].val = "document"; + print_otag(h, TAG_META, 2, tag); + + if (h->style) { + tag[0].key = ATTR_REL; + tag[0].val = "stylesheet"; + tag[1].key = ATTR_HREF; + tag[1].val = h->style; + tag[2].key = ATTR_TYPE; + tag[2].val = "text/css"; + tag[3].key = ATTR_MEDIA; + tag[3].val = "all"; + print_otag(h, TAG_LINK, 4, tag); + } +} + +static void +print_metaf(struct html *h, enum mandoc_esc deco) +{ + enum htmlfont font; + + switch (deco) { + case (ESCAPE_FONTPREV): + font = h->metal; + break; + case (ESCAPE_FONTITALIC): + font = HTMLFONT_ITALIC; + break; + case (ESCAPE_FONTBOLD): + font = HTMLFONT_BOLD; + break; + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTROMAN): + font = HTMLFONT_NONE; + break; + default: + abort(); + /* NOTREACHED */ + } + + if (h->metaf) { + print_tagq(h, h->metaf); + h->metaf = NULL; + } + + h->metal = h->metac; + h->metac = font; + + if (HTMLFONT_NONE != font) + h->metaf = HTMLFONT_BOLD == font ? + print_otag(h, TAG_B, 0, NULL) : + print_otag(h, TAG_I, 0, NULL); +} + +int +html_strlen(const char *cp) +{ + int ssz, sz; + const char *seq, *p; + + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_strlen() as we + * must calculate the width of produced strings. + * Assume that characters are always width of "1". This is + * hacky, but it gets the job done for approximation of widths. + */ + + sz = 0; + while (NULL != (p = strchr(cp, '\\'))) { + sz += (int)(p - cp); + ++cp; + switch (mandoc_escape(&cp, &seq, &ssz)) { + case (ESCAPE_ERROR): + return(sz); + case (ESCAPE_UNICODE): + /* FALLTHROUGH */ + case (ESCAPE_NUMBERED): + /* FALLTHROUGH */ + case (ESCAPE_SPECIAL): + sz++; + break; + default: + break; + } + } + + assert(sz >= 0); + return(sz + strlen(cp)); +} + +static int +print_encode(struct html *h, const char *p, int norecurse) +{ + size_t sz; + int c, len, nospace; + const char *seq; + enum mandoc_esc esc; + static const char rejs[6] = { '\\', '<', '>', '&', ASCII_HYPH, '\0' }; + + nospace = 0; + + while ('\0' != *p) { + sz = strcspn(p, rejs); + + fwrite(p, 1, sz, stdout); + p += (int)sz; + + if ('\0' == *p) + break; + + switch (*p++) { + case ('<'): + printf("<"); + continue; + case ('>'): + printf(">"); + continue; + case ('&'): + printf("&"); + continue; + case (ASCII_HYPH): + putchar('-'); + continue; + default: + break; + } + + esc = mandoc_escape(&p, &seq, &len); + if (ESCAPE_ERROR == esc) + break; + + switch (esc) { + case (ESCAPE_UNICODE): + /* Skip passed "u" header. */ + c = mchars_num2uc(seq + 1, len - 1); + if ('\0' != c) + printf("&#x%x;", c); + break; + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, len); + if ('\0' != c) + putchar(c); + break; + case (ESCAPE_SPECIAL): + c = mchars_spec2cp(h->symtab, seq, len); + if (c > 0) + printf("&#%d;", c); + else if (-1 == c && 1 == len) + putchar((int)*seq); + break; + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTPREV): + /* FALLTHROUGH */ + case (ESCAPE_FONTBOLD): + /* FALLTHROUGH */ + case (ESCAPE_FONTITALIC): + /* FALLTHROUGH */ + case (ESCAPE_FONTROMAN): + if (norecurse) + break; + print_metaf(h, esc); + break; + case (ESCAPE_NOSPACE): + if ('\0' == *p) + nospace = 1; + break; + default: + break; + } + } + + return(nospace); +} + + +static void +print_attr(struct html *h, const char *key, const char *val) +{ + printf(" %s=\"", key); + (void)print_encode(h, val, 1); + putchar('\"'); +} + + +struct tag * +print_otag(struct html *h, enum htmltag tag, + int sz, const struct htmlpair *p) +{ + int i; + struct tag *t; + + /* Push this tags onto the stack of open scopes. */ + + if ( ! (HTML_NOSTACK & htmltags[tag].flags)) { + t = mandoc_malloc(sizeof(struct tag)); + t->tag = tag; + t->next = h->tags.head; + h->tags.head = t; + } else + t = NULL; + + if ( ! (HTML_NOSPACE & h->flags)) + if ( ! (HTML_CLRLINE & htmltags[tag].flags)) { + /* Manage keeps! */ + if ( ! (HTML_KEEP & h->flags)) { + if (HTML_PREKEEP & h->flags) + h->flags |= HTML_KEEP; + putchar(' '); + } else + printf(" "); + } + + if ( ! (h->flags & HTML_NONOSPACE)) + h->flags &= ~HTML_NOSPACE; + else + h->flags |= HTML_NOSPACE; + + /* Print out the tag name and attributes. */ + + printf("<%s", htmltags[tag].name); + for (i = 0; i < sz; i++) + print_attr(h, htmlattrs[p[i].key], p[i].val); + + /* Add non-overridable attributes. */ + + if (TAG_HTML == tag && HTML_XHTML_1_0_STRICT == h->type) { + print_attr(h, "xmlns", "http://www.w3.org/1999/xhtml"); + print_attr(h, "xml:lang", "en"); + print_attr(h, "lang", "en"); + } + + /* Accommodate for XML "well-formed" singleton escaping. */ + + if (HTML_AUTOCLOSE & htmltags[tag].flags) + switch (h->type) { + case (HTML_XHTML_1_0_STRICT): + putchar('/'); + break; + default: + break; + } + + putchar('>'); + + h->flags |= HTML_NOSPACE; + + if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags) + putchar('\n'); + + return(t); +} + + +static void +print_ctag(struct html *h, enum htmltag tag) +{ + + printf("</%s>", htmltags[tag].name); + if (HTML_CLRLINE & htmltags[tag].flags) { + h->flags |= HTML_NOSPACE; + putchar('\n'); + } +} + +void +print_gen_decls(struct html *h) +{ + const char *doctype; + const char *dtd; + const char *name; + + switch (h->type) { + case (HTML_HTML_4_01_STRICT): + name = "HTML"; + doctype = "-//W3C//DTD HTML 4.01//EN"; + dtd = "http://www.w3.org/TR/html4/strict.dtd"; + break; + default: + puts("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); + name = "html"; + doctype = "-//W3C//DTD XHTML 1.0 Strict//EN"; + dtd = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; + break; + } + + printf("<!DOCTYPE %s PUBLIC \"%s\" \"%s\">\n", + name, doctype, dtd); +} + +void +print_text(struct html *h, const char *word) +{ + + if ( ! (HTML_NOSPACE & h->flags)) { + /* Manage keeps! */ + if ( ! (HTML_KEEP & h->flags)) { + if (HTML_PREKEEP & h->flags) + h->flags |= HTML_KEEP; + putchar(' '); + } else + printf(" "); + } + + assert(NULL == h->metaf); + if (HTMLFONT_NONE != h->metac) + h->metaf = HTMLFONT_BOLD == h->metac ? + print_otag(h, TAG_B, 0, NULL) : + print_otag(h, TAG_I, 0, NULL); + + assert(word); + if ( ! print_encode(h, word, 0)) { + if ( ! (h->flags & HTML_NONOSPACE)) + h->flags &= ~HTML_NOSPACE; + } else + h->flags |= HTML_NOSPACE; + + if (h->metaf) { + print_tagq(h, h->metaf); + h->metaf = NULL; + } + + h->flags &= ~HTML_IGNDELIM; +} + + +void +print_tagq(struct html *h, const struct tag *until) +{ + struct tag *tag; + + while ((tag = h->tags.head) != NULL) { + /* + * Remember to close out and nullify the current + * meta-font and table, if applicable. + */ + if (tag == h->metaf) + h->metaf = NULL; + if (tag == h->tblt) + h->tblt = NULL; + print_ctag(h, tag->tag); + h->tags.head = tag->next; + free(tag); + if (until && tag == until) + return; + } +} + + +void +print_stagq(struct html *h, const struct tag *suntil) +{ + struct tag *tag; + + while ((tag = h->tags.head) != NULL) { + if (suntil && tag == suntil) + return; + /* + * Remember to close out and nullify the current + * meta-font and table, if applicable. + */ + if (tag == h->metaf) + h->metaf = NULL; + if (tag == h->tblt) + h->tblt = NULL; + print_ctag(h, tag->tag); + h->tags.head = tag->next; + free(tag); + } +} + +void +bufinit(struct html *h) +{ + + h->buf[0] = '\0'; + h->buflen = 0; +} + +void +bufcat_style(struct html *h, const char *key, const char *val) +{ + + bufcat(h, key); + bufcat(h, ":"); + bufcat(h, val); + bufcat(h, ";"); +} + +void +bufcat(struct html *h, const char *p) +{ + + h->buflen = strlcat(h->buf, p, BUFSIZ); + assert(h->buflen < BUFSIZ); +} + +void +bufcat_fmt(struct html *h, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + (void)vsnprintf(h->buf + (int)h->buflen, + BUFSIZ - h->buflen - 1, fmt, ap); + va_end(ap); + h->buflen = strlen(h->buf); +} + +static void +bufncat(struct html *h, const char *p, size_t sz) +{ + + assert(h->buflen + sz + 1 < BUFSIZ); + strncat(h->buf, p, sz); + h->buflen += sz; +} + +void +buffmt_includes(struct html *h, const char *name) +{ + const char *p, *pp; + + pp = h->base_includes; + + bufinit(h); + while (NULL != (p = strchr(pp, '%'))) { + bufncat(h, pp, (size_t)(p - pp)); + switch (*(p + 1)) { + case('I'): + bufcat(h, name); + break; + default: + bufncat(h, p, 2); + break; + } + pp = p + 2; + } + if (pp) + bufcat(h, pp); +} + +void +buffmt_man(struct html *h, + const char *name, const char *sec) +{ + const char *p, *pp; + + pp = h->base_man; + + bufinit(h); + while (NULL != (p = strchr(pp, '%'))) { + bufncat(h, pp, (size_t)(p - pp)); + switch (*(p + 1)) { + case('S'): + bufcat(h, sec ? sec : "1"); + break; + case('N'): + bufcat_fmt(h, name); + break; + default: + bufncat(h, p, 2); + break; + } + pp = p + 2; + } + if (pp) + bufcat(h, pp); +} + +void +bufcat_su(struct html *h, const char *p, const struct roffsu *su) +{ + double v; + + v = su->scale; + if (SCALE_MM == su->unit && 0.0 == (v /= 100.0)) + v = 1.0; + + bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]); +} + +void +bufcat_id(struct html *h, const char *src) +{ + + /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */ + + while ('\0' != *src) + bufcat_fmt(h, "%.2x", *src++); +} diff --git a/usr/src/cmd/mandoc/html.h b/usr/src/cmd/mandoc/html.h new file mode 100644 index 0000000000..60960702f1 --- /dev/null +++ b/usr/src/cmd/mandoc/html.h @@ -0,0 +1,164 @@ +/* $Id: html.h,v 1.47 2011/10/05 21:35:17 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef HTML_H +#define HTML_H + +__BEGIN_DECLS + +enum htmltag { + TAG_HTML, + TAG_HEAD, + TAG_BODY, + TAG_META, + TAG_TITLE, + TAG_DIV, + TAG_H1, + TAG_H2, + TAG_SPAN, + TAG_LINK, + TAG_BR, + TAG_A, + TAG_TABLE, + TAG_TBODY, + TAG_COL, + TAG_TR, + TAG_TD, + TAG_LI, + TAG_UL, + TAG_OL, + TAG_DL, + TAG_DT, + TAG_DD, + TAG_BLOCKQUOTE, + TAG_P, + TAG_PRE, + TAG_B, + TAG_I, + TAG_CODE, + TAG_SMALL, + TAG_MAX +}; + +enum htmlattr { + ATTR_HTTPEQUIV, + ATTR_CONTENT, + ATTR_NAME, + ATTR_REL, + ATTR_HREF, + ATTR_TYPE, + ATTR_MEDIA, + ATTR_CLASS, + ATTR_STYLE, + ATTR_WIDTH, + ATTR_ID, + ATTR_SUMMARY, + ATTR_ALIGN, + ATTR_COLSPAN, + ATTR_MAX +}; + +enum htmlfont { + HTMLFONT_NONE = 0, + HTMLFONT_BOLD, + HTMLFONT_ITALIC, + HTMLFONT_MAX +}; + +struct tag { + struct tag *next; + enum htmltag tag; +}; + +struct tagq { + struct tag *head; +}; + +struct htmlpair { + enum htmlattr key; + const char *val; +}; + +#define PAIR_INIT(p, t, v) \ + do { \ + (p)->key = (t); \ + (p)->val = (v); \ + } while (/* CONSTCOND */ 0) + +#define PAIR_ID_INIT(p, v) PAIR_INIT(p, ATTR_ID, v) +#define PAIR_CLASS_INIT(p, v) PAIR_INIT(p, ATTR_CLASS, v) +#define PAIR_HREF_INIT(p, v) PAIR_INIT(p, ATTR_HREF, v) +#define PAIR_STYLE_INIT(p, h) PAIR_INIT(p, ATTR_STYLE, (h)->buf) +#define PAIR_SUMMARY_INIT(p, v) PAIR_INIT(p, ATTR_SUMMARY, v) + +enum htmltype { + HTML_HTML_4_01_STRICT, + HTML_XHTML_1_0_STRICT +}; + +struct html { + int flags; +#define HTML_NOSPACE (1 << 0) /* suppress next space */ +#define HTML_IGNDELIM (1 << 1) +#define HTML_KEEP (1 << 2) +#define HTML_PREKEEP (1 << 3) +#define HTML_NONOSPACE (1 << 4) /* never add spaces */ +#define HTML_LITERAL (1 << 5) /* literal (e.g., <PRE>) context */ + struct tagq tags; /* stack of open tags */ + struct rofftbl tbl; /* current table */ + struct tag *tblt; /* current open table scope */ + struct mchars *symtab; /* character-escapes */ + char *base_man; /* base for manpage href */ + char *base_includes; /* base for include href */ + char *style; /* style-sheet URI */ + char buf[BUFSIZ]; /* see bufcat and friends */ + size_t buflen; + struct tag *metaf; /* current open font scope */ + enum htmlfont metal; /* last used font */ + enum htmlfont metac; /* current font mode */ + enum htmltype type; /* output media type */ + int oflags; /* output options */ +#define HTML_FRAGMENT (1 << 0) /* don't emit HTML/HEAD/BODY */ +}; + +void print_gen_decls(struct html *); +void print_gen_head(struct html *); +struct tag *print_otag(struct html *, enum htmltag, + int, const struct htmlpair *); +void print_tagq(struct html *, const struct tag *); +void print_stagq(struct html *, const struct tag *); +void print_text(struct html *, const char *); +void print_tblclose(struct html *); +void print_tbl(struct html *, const struct tbl_span *); +void print_eqn(struct html *, const struct eqn *); + +void bufcat_fmt(struct html *, const char *, ...); +void bufcat(struct html *, const char *); +void bufcat_id(struct html *, const char *); +void bufcat_style(struct html *, + const char *, const char *); +void bufcat_su(struct html *, const char *, + const struct roffsu *); +void bufinit(struct html *); +void buffmt_man(struct html *, + const char *, const char *); +void buffmt_includes(struct html *, const char *); + +int html_strlen(const char *); + +__END_DECLS + +#endif /*!HTML_H*/ diff --git a/usr/src/cmd/mandoc/lib.c b/usr/src/cmd/mandoc/lib.c new file mode 100644 index 0000000000..7a18a5dd4f --- /dev/null +++ b/usr/src/cmd/mandoc/lib.c @@ -0,0 +1,39 @@ +/* $Id: lib.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2lib(const char *p) +{ + +#include "lib.in" + + return(NULL); +} diff --git a/usr/src/cmd/mandoc/lib.in b/usr/src/cmd/mandoc/lib.in new file mode 100644 index 0000000000..1e4567714b --- /dev/null +++ b/usr/src/cmd/mandoc/lib.in @@ -0,0 +1,18 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + */ + +/* + * TBD + */ diff --git a/usr/src/cmd/mandoc/libman.h b/usr/src/cmd/mandoc/libman.h new file mode 100644 index 0000000000..4bc5128204 --- /dev/null +++ b/usr/src/cmd/mandoc/libman.h @@ -0,0 +1,85 @@ +/* $Id: libman.h,v 1.55 2011/11/07 01:24:40 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef LIBMAN_H +#define LIBMAN_H + +enum man_next { + MAN_NEXT_SIBLING = 0, + MAN_NEXT_CHILD +}; + +struct man { + struct mparse *parse; /* parse pointer */ + int flags; /* parse flags */ +#define MAN_HALT (1 << 0) /* badness happened: die */ +#define MAN_ELINE (1 << 1) /* Next-line element scope. */ +#define MAN_BLINE (1 << 2) /* Next-line block scope. */ +#define MAN_ILINE (1 << 3) /* Ignored in next-line scope. */ +#define MAN_LITERAL (1 << 4) /* Literal input. */ +#define MAN_BPLINE (1 << 5) +#define MAN_NEWLINE (1 << 6) /* first macro/text in a line */ + enum man_next next; /* where to put the next node */ + struct man_node *last; /* the last parsed node */ + struct man_node *first; /* the first parsed node */ + struct man_meta meta; /* document meta-data */ + struct roff *roff; +}; + +#define MACRO_PROT_ARGS struct man *m, \ + enum mant tok, \ + int line, \ + int ppos, \ + int *pos, \ + char *buf + +struct man_macro { + int (*fp)(MACRO_PROT_ARGS); + int flags; +#define MAN_SCOPED (1 << 0) +#define MAN_EXPLICIT (1 << 1) /* See blk_imp(). */ +#define MAN_FSCOPED (1 << 2) /* See blk_imp(). */ +#define MAN_NSCOPED (1 << 3) /* See in_line_eoln(). */ +#define MAN_NOCLOSE (1 << 4) /* See blk_exp(). */ +#define MAN_BSCOPE (1 << 5) /* Break BLINE scope. */ +}; + +extern const struct man_macro *const man_macros; + +__BEGIN_DECLS + +#define man_pmsg(m, l, p, t) \ + mandoc_msg((t), (m)->parse, (l), (p), NULL) +#define man_nmsg(m, n, t) \ + mandoc_msg((t), (m)->parse, (n)->line, (n)->pos, NULL) +int man_word_alloc(struct man *, int, int, const char *); +int man_block_alloc(struct man *, int, int, enum mant); +int man_head_alloc(struct man *, int, int, enum mant); +int man_tail_alloc(struct man *, int, int, enum mant); +int man_body_alloc(struct man *, int, int, enum mant); +int man_elem_alloc(struct man *, int, int, enum mant); +void man_node_delete(struct man *, struct man_node *); +void man_hash_init(void); +enum mant man_hash_find(const char *); +int man_macroend(struct man *); +int man_valid_post(struct man *); +int man_valid_pre(struct man *, struct man_node *); +int man_unscope(struct man *, + const struct man_node *, enum mandocerr); + +__END_DECLS + +#endif /*!LIBMAN_H*/ diff --git a/usr/src/cmd/mandoc/libmandoc.h b/usr/src/cmd/mandoc/libmandoc.h new file mode 100644 index 0000000000..de422884a2 --- /dev/null +++ b/usr/src/cmd/mandoc/libmandoc.h @@ -0,0 +1,92 @@ +/* $Id: libmandoc.h,v 1.29 2011/12/02 01:37:14 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef LIBMANDOC_H +#define LIBMANDOC_H + +enum rofferr { + ROFF_CONT, /* continue processing line */ + ROFF_RERUN, /* re-run roff interpreter with offset */ + ROFF_APPEND, /* re-run main parser, appending next line */ + ROFF_REPARSE, /* re-run main parser on the result */ + ROFF_SO, /* include another file */ + ROFF_IGN, /* ignore current line */ + ROFF_TBL, /* a table row was successfully parsed */ + ROFF_EQN, /* an equation was successfully parsed */ + ROFF_ERR /* badness: puke and stop */ +}; + +enum regs { + REG_nS = 0, /* nS register */ + REG__MAX +}; + +__BEGIN_DECLS + +struct roff; +struct mdoc; +struct man; + +void mandoc_msg(enum mandocerr, struct mparse *, + int, int, const char *); +void mandoc_vmsg(enum mandocerr, struct mparse *, + int, int, const char *, ...); +char *mandoc_getarg(struct mparse *, char **, int, int *); +char *mandoc_normdate(struct mparse *, char *, int, int); +int mandoc_eos(const char *, size_t, int); +int mandoc_getcontrol(const char *, int *); +int mandoc_strntoi(const char *, size_t, int); +const char *mandoc_a2msec(const char*); + +void mdoc_free(struct mdoc *); +struct mdoc *mdoc_alloc(struct roff *, struct mparse *); +void mdoc_reset(struct mdoc *); +int mdoc_parseln(struct mdoc *, int, char *, int); +int mdoc_endparse(struct mdoc *); +int mdoc_addspan(struct mdoc *, const struct tbl_span *); +int mdoc_addeqn(struct mdoc *, const struct eqn *); + +void man_free(struct man *); +struct man *man_alloc(struct roff *, struct mparse *); +void man_reset(struct man *); +int man_parseln(struct man *, int, char *, int); +int man_endparse(struct man *); +int man_addspan(struct man *, const struct tbl_span *); +int man_addeqn(struct man *, const struct eqn *); + +void roff_free(struct roff *); +struct roff *roff_alloc(struct mparse *); +void roff_reset(struct roff *); +enum rofferr roff_parseln(struct roff *, int, + char **, size_t *, int, int *); +void roff_endparse(struct roff *); +int roff_regisset(const struct roff *, enum regs); +unsigned int roff_regget(const struct roff *, enum regs); +void roff_regunset(struct roff *, enum regs); +char *roff_strdup(const struct roff *, const char *); +#if 0 +char roff_eqndelim(const struct roff *); +void roff_openeqn(struct roff *, const char *, + int, int, const char *); +int roff_closeeqn(struct roff *); +#endif + +const struct tbl_span *roff_span(const struct roff *); +const struct eqn *roff_eqn(const struct roff *); + +__END_DECLS + +#endif /*!LIBMANDOC_H*/ diff --git a/usr/src/cmd/mandoc/libmdoc.h b/usr/src/cmd/mandoc/libmdoc.h new file mode 100644 index 0000000000..af1729268a --- /dev/null +++ b/usr/src/cmd/mandoc/libmdoc.h @@ -0,0 +1,141 @@ +/* $Id: libmdoc.h,v 1.78 2011/12/02 01:37:14 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef LIBMDOC_H +#define LIBMDOC_H + +enum mdoc_next { + MDOC_NEXT_SIBLING = 0, + MDOC_NEXT_CHILD +}; + +struct mdoc { + struct mparse *parse; /* parse pointer */ + int flags; /* parse flags */ +#define MDOC_HALT (1 << 0) /* error in parse: halt */ +#define MDOC_LITERAL (1 << 1) /* in a literal scope */ +#define MDOC_PBODY (1 << 2) /* in the document body */ +#define MDOC_NEWLINE (1 << 3) /* first macro/text in a line */ +#define MDOC_PHRASELIT (1 << 4) /* literal within a partila phrase */ +#define MDOC_PPHRASE (1 << 5) /* within a partial phrase */ +#define MDOC_FREECOL (1 << 6) /* `It' invocation should close */ +#define MDOC_SYNOPSIS (1 << 7) /* SYNOPSIS-style formatting */ + enum mdoc_next next; /* where to put the next node */ + struct mdoc_node *last; /* the last node parsed */ + struct mdoc_node *first; /* the first node parsed */ + struct mdoc_meta meta; /* document meta-data */ + enum mdoc_sec lastnamed; + enum mdoc_sec lastsec; + struct roff *roff; +}; + +#define MACRO_PROT_ARGS struct mdoc *m, \ + enum mdoct tok, \ + int line, \ + int ppos, \ + int *pos, \ + char *buf + +struct mdoc_macro { + int (*fp)(MACRO_PROT_ARGS); + int flags; +#define MDOC_CALLABLE (1 << 0) +#define MDOC_PARSED (1 << 1) +#define MDOC_EXPLICIT (1 << 2) +#define MDOC_PROLOGUE (1 << 3) +#define MDOC_IGNDELIM (1 << 4) + /* Reserved words in arguments treated as text. */ +}; + +enum margserr { + ARGS_ERROR, + ARGS_EOLN, /* end-of-line */ + ARGS_WORD, /* normal word */ + ARGS_PUNCT, /* series of punctuation */ + ARGS_QWORD, /* quoted word */ + ARGS_PHRASE, /* Ta'd phrase (-column) */ + ARGS_PPHRASE, /* tabbed phrase (-column) */ + ARGS_PEND /* last phrase (-column) */ +}; + +enum margverr { + ARGV_ERROR, + ARGV_EOLN, /* end of line */ + ARGV_ARG, /* valid argument */ + ARGV_WORD /* normal word (or bad argument---same thing) */ +}; + +/* + * A punctuation delimiter is opening, closing, or "middle mark" + * punctuation. These govern spacing. + * Opening punctuation (e.g., the opening parenthesis) suppresses the + * following space; closing punctuation (e.g., the closing parenthesis) + * suppresses the leading space; middle punctuation (e.g., the vertical + * bar) can do either. The middle punctuation delimiter bends the rules + * depending on usage. + */ +enum mdelim { + DELIM_NONE = 0, + DELIM_OPEN, + DELIM_MIDDLE, + DELIM_CLOSE, + DELIM_MAX +}; + +extern const struct mdoc_macro *const mdoc_macros; + +__BEGIN_DECLS + +#define mdoc_pmsg(m, l, p, t) \ + mandoc_msg((t), (m)->parse, (l), (p), NULL) +#define mdoc_nmsg(m, n, t) \ + mandoc_msg((t), (m)->parse, (n)->line, (n)->pos, NULL) +int mdoc_macro(MACRO_PROT_ARGS); +int mdoc_word_alloc(struct mdoc *, + int, int, const char *); +int mdoc_elem_alloc(struct mdoc *, int, int, + enum mdoct, struct mdoc_arg *); +int mdoc_block_alloc(struct mdoc *, int, int, + enum mdoct, struct mdoc_arg *); +int mdoc_head_alloc(struct mdoc *, int, int, enum mdoct); +int mdoc_tail_alloc(struct mdoc *, int, int, enum mdoct); +int mdoc_body_alloc(struct mdoc *, int, int, enum mdoct); +int mdoc_endbody_alloc(struct mdoc *m, int line, int pos, + enum mdoct tok, struct mdoc_node *body, + enum mdoc_endbody end); +void mdoc_node_delete(struct mdoc *, struct mdoc_node *); +void mdoc_hash_init(void); +enum mdoct mdoc_hash_find(const char *); +const char *mdoc_a2att(const char *); +const char *mdoc_a2lib(const char *); +const char *mdoc_a2st(const char *); +const char *mdoc_a2arch(const char *); +const char *mdoc_a2vol(const char *); +int mdoc_valid_pre(struct mdoc *, struct mdoc_node *); +int mdoc_valid_post(struct mdoc *); +enum margverr mdoc_argv(struct mdoc *, int, enum mdoct, + struct mdoc_arg **, int *, char *); +void mdoc_argv_free(struct mdoc_arg *); +enum margserr mdoc_args(struct mdoc *, int, + int *, char *, enum mdoct, char **); +enum margserr mdoc_zargs(struct mdoc *, int, + int *, char *, char **); +int mdoc_macroend(struct mdoc *); +enum mdelim mdoc_isdelim(const char *); + +__END_DECLS + +#endif /*!LIBMDOC_H*/ diff --git a/usr/src/cmd/mandoc/libroff.h b/usr/src/cmd/mandoc/libroff.h new file mode 100644 index 0000000000..0bdd5a3604 --- /dev/null +++ b/usr/src/cmd/mandoc/libroff.h @@ -0,0 +1,84 @@ +/* $Id: libroff.h,v 1.27 2011/07/25 15:37:00 kristaps Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef LIBROFF_H +#define LIBROFF_H + +__BEGIN_DECLS + +enum tbl_part { + TBL_PART_OPTS, /* in options (first line) */ + TBL_PART_LAYOUT, /* describing layout */ + TBL_PART_DATA, /* creating data rows */ + TBL_PART_CDATA /* continue previous row */ +}; + +struct tbl_node { + struct mparse *parse; /* parse point */ + int pos; /* invocation column */ + int line; /* invocation line */ + enum tbl_part part; + struct tbl opts; + struct tbl_row *first_row; + struct tbl_row *last_row; + struct tbl_span *first_span; + struct tbl_span *current_span; + struct tbl_span *last_span; + struct tbl_head *first_head; + struct tbl_head *last_head; + struct tbl_node *next; +}; + +struct eqn_node { + struct eqn_def *defs; + size_t defsz; + char *data; + size_t rew; + size_t cur; + size_t sz; + int gsize; + struct eqn eqn; + struct mparse *parse; + struct eqn_node *next; +}; + +struct eqn_def { + char *key; + size_t keysz; + char *val; + size_t valsz; +}; + +struct tbl_node *tbl_alloc(int, int, struct mparse *); +void tbl_restart(int, int, struct tbl_node *); +void tbl_free(struct tbl_node *); +void tbl_reset(struct tbl_node *); +enum rofferr tbl_read(struct tbl_node *, int, const char *, int); +int tbl_option(struct tbl_node *, int, const char *); +int tbl_layout(struct tbl_node *, int, const char *); +int tbl_data(struct tbl_node *, int, const char *); +int tbl_cdata(struct tbl_node *, int, const char *); +const struct tbl_span *tbl_span(struct tbl_node *); +void tbl_end(struct tbl_node **); +struct eqn_node *eqn_alloc(const char *, int, int, struct mparse *); +enum rofferr eqn_end(struct eqn_node **); +void eqn_free(struct eqn_node *); +enum rofferr eqn_read(struct eqn_node **, int, + const char *, int, int *); + +__END_DECLS + +#endif /*LIBROFF_H*/ diff --git a/usr/src/cmd/mandoc/main.c b/usr/src/cmd/mandoc/main.c new file mode 100644 index 0000000000..fec83fba51 --- /dev/null +++ b/usr/src/cmd/mandoc/main.c @@ -0,0 +1,401 @@ +/* $Id: main.c,v 1.165 2011/10/06 22:29:12 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc.h" +#include "main.h" +#include "mdoc.h" +#include "man.h" + +#if !defined(__GNUC__) || (__GNUC__ < 2) +# if !defined(lint) +# define __attribute__(x) +# endif +#endif /* !defined(__GNUC__) || (__GNUC__ < 2) */ + +typedef void (*out_mdoc)(void *, const struct mdoc *); +typedef void (*out_man)(void *, const struct man *); +typedef void (*out_free)(void *); + +enum outt { + OUTT_ASCII = 0, /* -Tascii */ + OUTT_LOCALE, /* -Tlocale */ + OUTT_UTF8, /* -Tutf8 */ + OUTT_TREE, /* -Ttree */ + OUTT_MAN, /* -Tman */ + OUTT_HTML, /* -Thtml */ + OUTT_XHTML, /* -Txhtml */ + OUTT_LINT, /* -Tlint */ + OUTT_PS, /* -Tps */ + OUTT_PDF /* -Tpdf */ +}; + +struct curparse { + struct mparse *mp; + enum mandoclevel wlevel; /* ignore messages below this */ + int wstop; /* stop after a file with a warning */ + enum outt outtype; /* which output to use */ + out_mdoc outmdoc; /* mdoc output ptr */ + out_man outman; /* man output ptr */ + out_free outfree; /* free output ptr */ + void *outdata; /* data for output */ + char outopts[BUFSIZ]; /* buf of output opts */ +}; + +static int moptions(enum mparset *, char *); +static void mmsg(enum mandocerr, enum mandoclevel, + const char *, int, int, const char *); +static void parse(struct curparse *, int, + const char *, enum mandoclevel *); +static int toptions(struct curparse *, char *); +static void usage(void) __attribute__((noreturn)); +static void version(void) __attribute__((noreturn)); +static int woptions(struct curparse *, char *); + +static const char *progname; + +int +main(int argc, char *argv[]) +{ + int c; + struct curparse curp; + enum mparset type; + enum mandoclevel rc; + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + memset(&curp, 0, sizeof(struct curparse)); + + type = MPARSE_AUTO; + curp.outtype = OUTT_ASCII; + curp.wlevel = MANDOCLEVEL_FATAL; + + /* LINTED */ + while (-1 != (c = getopt(argc, argv, "m:O:T:VW:"))) + switch (c) { + case ('m'): + if ( ! moptions(&type, optarg)) + return((int)MANDOCLEVEL_BADARG); + break; + case ('O'): + (void)strlcat(curp.outopts, optarg, BUFSIZ); + (void)strlcat(curp.outopts, ",", BUFSIZ); + break; + case ('T'): + if ( ! toptions(&curp, optarg)) + return((int)MANDOCLEVEL_BADARG); + break; + case ('W'): + if ( ! woptions(&curp, optarg)) + return((int)MANDOCLEVEL_BADARG); + break; + case ('V'): + version(); + /* NOTREACHED */ + default: + usage(); + /* NOTREACHED */ + } + + curp.mp = mparse_alloc(type, curp.wlevel, mmsg, &curp); + + /* + * Conditionally start up the lookaside buffer before parsing. + */ + if (OUTT_MAN == curp.outtype) + mparse_keep(curp.mp); + + argc -= optind; + argv += optind; + + rc = MANDOCLEVEL_OK; + + if (NULL == *argv) + parse(&curp, STDIN_FILENO, "<stdin>", &rc); + + while (*argv) { + parse(&curp, -1, *argv, &rc); + if (MANDOCLEVEL_OK != rc && curp.wstop) + break; + ++argv; + } + + if (curp.outfree) + (*curp.outfree)(curp.outdata); + if (curp.mp) + mparse_free(curp.mp); + + return((int)rc); +} + +static void +version(void) +{ + + printf("%s %s\n", progname, VERSION); + exit((int)MANDOCLEVEL_OK); +} + +static void +usage(void) +{ + + fprintf(stderr, "usage: %s " + "[-V] " + "[-foption] " + "[-mformat] " + "[-Ooption] " + "[-Toutput] " + "[-Wlevel] " + "[file...]\n", + progname); + + exit((int)MANDOCLEVEL_BADARG); +} + +static void +parse(struct curparse *curp, int fd, + const char *file, enum mandoclevel *level) +{ + enum mandoclevel rc; + struct mdoc *mdoc; + struct man *man; + + /* Begin by parsing the file itself. */ + + assert(file); + assert(fd >= -1); + + rc = mparse_readfd(curp->mp, fd, file); + + /* Stop immediately if the parse has failed. */ + + if (MANDOCLEVEL_FATAL <= rc) + goto cleanup; + + /* + * With -Wstop and warnings or errors of at least the requested + * level, do not produce output. + */ + + if (MANDOCLEVEL_OK != rc && curp->wstop) + goto cleanup; + + /* If unset, allocate output dev now (if applicable). */ + + if ( ! (curp->outman && curp->outmdoc)) { + switch (curp->outtype) { + case (OUTT_XHTML): + curp->outdata = xhtml_alloc(curp->outopts); + curp->outfree = html_free; + break; + case (OUTT_HTML): + curp->outdata = html_alloc(curp->outopts); + curp->outfree = html_free; + break; + case (OUTT_UTF8): + curp->outdata = utf8_alloc(curp->outopts); + curp->outfree = ascii_free; + break; + case (OUTT_LOCALE): + curp->outdata = locale_alloc(curp->outopts); + curp->outfree = ascii_free; + break; + case (OUTT_ASCII): + curp->outdata = ascii_alloc(curp->outopts); + curp->outfree = ascii_free; + break; + case (OUTT_PDF): + curp->outdata = pdf_alloc(curp->outopts); + curp->outfree = pspdf_free; + break; + case (OUTT_PS): + curp->outdata = ps_alloc(curp->outopts); + curp->outfree = pspdf_free; + break; + default: + break; + } + + switch (curp->outtype) { + case (OUTT_HTML): + /* FALLTHROUGH */ + case (OUTT_XHTML): + curp->outman = html_man; + curp->outmdoc = html_mdoc; + break; + case (OUTT_TREE): + curp->outman = tree_man; + curp->outmdoc = tree_mdoc; + break; + case (OUTT_MAN): + curp->outmdoc = man_mdoc; + curp->outman = man_man; + break; + case (OUTT_PDF): + /* FALLTHROUGH */ + case (OUTT_ASCII): + /* FALLTHROUGH */ + case (OUTT_UTF8): + /* FALLTHROUGH */ + case (OUTT_LOCALE): + /* FALLTHROUGH */ + case (OUTT_PS): + curp->outman = terminal_man; + curp->outmdoc = terminal_mdoc; + break; + default: + break; + } + } + + mparse_result(curp->mp, &mdoc, &man); + + /* Execute the out device, if it exists. */ + + if (man && curp->outman) + (*curp->outman)(curp->outdata, man); + if (mdoc && curp->outmdoc) + (*curp->outmdoc)(curp->outdata, mdoc); + + cleanup: + + mparse_reset(curp->mp); + + if (*level < rc) + *level = rc; +} + +static int +moptions(enum mparset *tflags, char *arg) +{ + + if (0 == strcmp(arg, "doc")) + *tflags = MPARSE_MDOC; + else if (0 == strcmp(arg, "andoc")) + *tflags = MPARSE_AUTO; + else if (0 == strcmp(arg, "an")) + *tflags = MPARSE_MAN; + else { + fprintf(stderr, "%s: Bad argument\n", arg); + return(0); + } + + return(1); +} + +static int +toptions(struct curparse *curp, char *arg) +{ + + if (0 == strcmp(arg, "ascii")) + curp->outtype = OUTT_ASCII; + else if (0 == strcmp(arg, "lint")) { + curp->outtype = OUTT_LINT; + curp->wlevel = MANDOCLEVEL_WARNING; + } else if (0 == strcmp(arg, "tree")) + curp->outtype = OUTT_TREE; + else if (0 == strcmp(arg, "man")) + curp->outtype = OUTT_MAN; + else if (0 == strcmp(arg, "html")) + curp->outtype = OUTT_HTML; + else if (0 == strcmp(arg, "utf8")) + curp->outtype = OUTT_UTF8; + else if (0 == strcmp(arg, "locale")) + curp->outtype = OUTT_LOCALE; + else if (0 == strcmp(arg, "xhtml")) + curp->outtype = OUTT_XHTML; + else if (0 == strcmp(arg, "ps")) + curp->outtype = OUTT_PS; + else if (0 == strcmp(arg, "pdf")) + curp->outtype = OUTT_PDF; + else { + fprintf(stderr, "%s: Bad argument\n", arg); + return(0); + } + + return(1); +} + +static int +woptions(struct curparse *curp, char *arg) +{ + char *v, *o; + const char *toks[6]; + + toks[0] = "stop"; + toks[1] = "all"; + toks[2] = "warning"; + toks[3] = "error"; + toks[4] = "fatal"; + toks[5] = NULL; + + while (*arg) { + o = arg; + switch (getsubopt(&arg, UNCONST(toks), &v)) { + case (0): + curp->wstop = 1; + break; + case (1): + /* FALLTHROUGH */ + case (2): + curp->wlevel = MANDOCLEVEL_WARNING; + break; + case (3): + curp->wlevel = MANDOCLEVEL_ERROR; + break; + case (4): + curp->wlevel = MANDOCLEVEL_FATAL; + break; + default: + fprintf(stderr, "-W%s: Bad argument\n", o); + return(0); + } + } + + return(1); +} + +static void +mmsg(enum mandocerr t, enum mandoclevel lvl, + const char *file, int line, int col, const char *msg) +{ + + fprintf(stderr, "%s:%d:%d: %s: %s", + file, line, col + 1, + mparse_strlevel(lvl), + mparse_strerror(t)); + + if (msg) + fprintf(stderr, ": %s", msg); + + fputc('\n', stderr); +} diff --git a/usr/src/cmd/mandoc/main.h b/usr/src/cmd/mandoc/main.h new file mode 100644 index 0000000000..79dcf489ae --- /dev/null +++ b/usr/src/cmd/mandoc/main.h @@ -0,0 +1,61 @@ +/* $Id: main.h,v 1.15 2011/10/06 22:29:12 kristaps Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef MAIN_H +#define MAIN_H + +__BEGIN_DECLS + +struct mdoc; +struct man; + +#define UNCONST(a) ((void *)(uintptr_t)(const void *)(a)) + + +/* + * Definitions for main.c-visible output device functions, e.g., -Thtml + * and -Tascii. Note that ascii_alloc() is named as such in + * anticipation of latin1_alloc() and so on, all of which map into the + * terminal output routines with different character settings. + */ + +void *html_alloc(char *); +void *xhtml_alloc(char *); +void html_mdoc(void *, const struct mdoc *); +void html_man(void *, const struct man *); +void html_free(void *); + +void tree_mdoc(void *, const struct mdoc *); +void tree_man(void *, const struct man *); + +void man_mdoc(void *, const struct mdoc *); +void man_man(void *, const struct man *); + +void *locale_alloc(char *); +void *utf8_alloc(char *); +void *ascii_alloc(char *); +void ascii_free(void *); + +void *pdf_alloc(char *); +void *ps_alloc(char *); +void pspdf_free(void *); + +void terminal_mdoc(void *, const struct mdoc *); +void terminal_man(void *, const struct man *); + +__END_DECLS + +#endif /*!MAIN_H*/ diff --git a/usr/src/cmd/mandoc/man.c b/usr/src/cmd/mandoc/man.c new file mode 100644 index 0000000000..1bea5610e3 --- /dev/null +++ b/usr/src/cmd/mandoc/man.c @@ -0,0 +1,690 @@ +/* $Id: man.c,v 1.115 2012/01/03 15:16:24 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "man.h" +#include "mandoc.h" +#include "libman.h" +#include "libmandoc.h" + +const char *const __man_macronames[MAN_MAX] = { + "br", "TH", "SH", "SS", + "TP", "LP", "PP", "P", + "IP", "HP", "SM", "SB", + "BI", "IB", "BR", "RB", + "R", "B", "I", "IR", + "RI", "na", "sp", "nf", + "fi", "RE", "RS", "DT", + "UC", "PD", "AT", "in", + "ft", "OP" + }; + +const char * const *man_macronames = __man_macronames; + +static struct man_node *man_node_alloc(struct man *, int, int, + enum man_type, enum mant); +static int man_node_append(struct man *, + struct man_node *); +static void man_node_free(struct man_node *); +static void man_node_unlink(struct man *, + struct man_node *); +static int man_ptext(struct man *, int, char *, int); +static int man_pmacro(struct man *, int, char *, int); +static void man_free1(struct man *); +static void man_alloc1(struct man *); +static int man_descope(struct man *, int, int); + + +const struct man_node * +man_node(const struct man *m) +{ + + assert( ! (MAN_HALT & m->flags)); + return(m->first); +} + + +const struct man_meta * +man_meta(const struct man *m) +{ + + assert( ! (MAN_HALT & m->flags)); + return(&m->meta); +} + + +void +man_reset(struct man *man) +{ + + man_free1(man); + man_alloc1(man); +} + + +void +man_free(struct man *man) +{ + + man_free1(man); + free(man); +} + + +struct man * +man_alloc(struct roff *roff, struct mparse *parse) +{ + struct man *p; + + p = mandoc_calloc(1, sizeof(struct man)); + + man_hash_init(); + p->parse = parse; + p->roff = roff; + + man_alloc1(p); + return(p); +} + + +int +man_endparse(struct man *m) +{ + + assert( ! (MAN_HALT & m->flags)); + if (man_macroend(m)) + return(1); + m->flags |= MAN_HALT; + return(0); +} + + +int +man_parseln(struct man *m, int ln, char *buf, int offs) +{ + + m->flags |= MAN_NEWLINE; + + assert( ! (MAN_HALT & m->flags)); + + return (mandoc_getcontrol(buf, &offs) ? + man_pmacro(m, ln, buf, offs) : + man_ptext(m, ln, buf, offs)); +} + + +static void +man_free1(struct man *man) +{ + + if (man->first) + man_node_delete(man, man->first); + if (man->meta.title) + free(man->meta.title); + if (man->meta.source) + free(man->meta.source); + if (man->meta.date) + free(man->meta.date); + if (man->meta.vol) + free(man->meta.vol); + if (man->meta.msec) + free(man->meta.msec); +} + + +static void +man_alloc1(struct man *m) +{ + + memset(&m->meta, 0, sizeof(struct man_meta)); + m->flags = 0; + m->last = mandoc_calloc(1, sizeof(struct man_node)); + m->first = m->last; + m->last->type = MAN_ROOT; + m->last->tok = MAN_MAX; + m->next = MAN_NEXT_CHILD; +} + + +static int +man_node_append(struct man *man, struct man_node *p) +{ + + assert(man->last); + assert(man->first); + assert(MAN_ROOT != p->type); + + switch (man->next) { + case (MAN_NEXT_SIBLING): + man->last->next = p; + p->prev = man->last; + p->parent = man->last->parent; + break; + case (MAN_NEXT_CHILD): + man->last->child = p; + p->parent = man->last; + break; + default: + abort(); + /* NOTREACHED */ + } + + assert(p->parent); + p->parent->nchild++; + + if ( ! man_valid_pre(man, p)) + return(0); + + switch (p->type) { + case (MAN_HEAD): + assert(MAN_BLOCK == p->parent->type); + p->parent->head = p; + break; + case (MAN_TAIL): + assert(MAN_BLOCK == p->parent->type); + p->parent->tail = p; + break; + case (MAN_BODY): + assert(MAN_BLOCK == p->parent->type); + p->parent->body = p; + break; + default: + break; + } + + man->last = p; + + switch (p->type) { + case (MAN_TBL): + /* FALLTHROUGH */ + case (MAN_TEXT): + if ( ! man_valid_post(man)) + return(0); + break; + default: + break; + } + + return(1); +} + + +static struct man_node * +man_node_alloc(struct man *m, int line, int pos, + enum man_type type, enum mant tok) +{ + struct man_node *p; + + p = mandoc_calloc(1, sizeof(struct man_node)); + p->line = line; + p->pos = pos; + p->type = type; + p->tok = tok; + + if (MAN_NEWLINE & m->flags) + p->flags |= MAN_LINE; + m->flags &= ~MAN_NEWLINE; + return(p); +} + + +int +man_elem_alloc(struct man *m, int line, int pos, enum mant tok) +{ + struct man_node *p; + + p = man_node_alloc(m, line, pos, MAN_ELEM, tok); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_tail_alloc(struct man *m, int line, int pos, enum mant tok) +{ + struct man_node *p; + + p = man_node_alloc(m, line, pos, MAN_TAIL, tok); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_head_alloc(struct man *m, int line, int pos, enum mant tok) +{ + struct man_node *p; + + p = man_node_alloc(m, line, pos, MAN_HEAD, tok); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_body_alloc(struct man *m, int line, int pos, enum mant tok) +{ + struct man_node *p; + + p = man_node_alloc(m, line, pos, MAN_BODY, tok); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + + +int +man_block_alloc(struct man *m, int line, int pos, enum mant tok) +{ + struct man_node *p; + + p = man_node_alloc(m, line, pos, MAN_BLOCK, tok); + if ( ! man_node_append(m, p)) + return(0); + m->next = MAN_NEXT_CHILD; + return(1); +} + +int +man_word_alloc(struct man *m, int line, int pos, const char *word) +{ + struct man_node *n; + + n = man_node_alloc(m, line, pos, MAN_TEXT, MAN_MAX); + n->string = roff_strdup(m->roff, word); + + if ( ! man_node_append(m, n)) + return(0); + + m->next = MAN_NEXT_SIBLING; + return(1); +} + + +/* + * Free all of the resources held by a node. This does NOT unlink a + * node from its context; for that, see man_node_unlink(). + */ +static void +man_node_free(struct man_node *p) +{ + + if (p->string) + free(p->string); + free(p); +} + + +void +man_node_delete(struct man *m, struct man_node *p) +{ + + while (p->child) + man_node_delete(m, p->child); + + man_node_unlink(m, p); + man_node_free(p); +} + +int +man_addeqn(struct man *m, const struct eqn *ep) +{ + struct man_node *n; + + assert( ! (MAN_HALT & m->flags)); + + n = man_node_alloc(m, ep->ln, ep->pos, MAN_EQN, MAN_MAX); + n->eqn = ep; + + if ( ! man_node_append(m, n)) + return(0); + + m->next = MAN_NEXT_SIBLING; + return(man_descope(m, ep->ln, ep->pos)); +} + +int +man_addspan(struct man *m, const struct tbl_span *sp) +{ + struct man_node *n; + + assert( ! (MAN_HALT & m->flags)); + + n = man_node_alloc(m, sp->line, 0, MAN_TBL, MAN_MAX); + n->span = sp; + + if ( ! man_node_append(m, n)) + return(0); + + m->next = MAN_NEXT_SIBLING; + return(man_descope(m, sp->line, 0)); +} + +static int +man_descope(struct man *m, int line, int offs) +{ + /* + * Co-ordinate what happens with having a next-line scope open: + * first close out the element scope (if applicable), then close + * out the block scope (also if applicable). + */ + + if (MAN_ELINE & m->flags) { + m->flags &= ~MAN_ELINE; + if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) + return(0); + } + + if ( ! (MAN_BLINE & m->flags)) + return(1); + m->flags &= ~MAN_BLINE; + + if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) + return(0); + return(man_body_alloc(m, line, offs, m->last->tok)); +} + +static int +man_ptext(struct man *m, int line, char *buf, int offs) +{ + int i; + + /* Literal free-form text whitespace is preserved. */ + + if (MAN_LITERAL & m->flags) { + if ( ! man_word_alloc(m, line, offs, buf + offs)) + return(0); + return(man_descope(m, line, offs)); + } + + /* Pump blank lines directly into the backend. */ + + for (i = offs; ' ' == buf[i]; i++) + /* Skip leading whitespace. */ ; + + if ('\0' == buf[i]) { + /* Allocate a blank entry. */ + if ( ! man_word_alloc(m, line, offs, "")) + return(0); + return(man_descope(m, line, offs)); + } + + /* + * Warn if the last un-escaped character is whitespace. Then + * strip away the remaining spaces (tabs stay!). + */ + + i = (int)strlen(buf); + assert(i); + + if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { + if (i > 1 && '\\' != buf[i - 2]) + man_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE); + + for (--i; i && ' ' == buf[i]; i--) + /* Spin back to non-space. */ ; + + /* Jump ahead of escaped whitespace. */ + i += '\\' == buf[i] ? 2 : 1; + + buf[i] = '\0'; + } + + if ( ! man_word_alloc(m, line, offs, buf + offs)) + return(0); + + /* + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. + */ + + assert(i); + if (mandoc_eos(buf, (size_t)i, 0)) + m->last->flags |= MAN_EOS; + + return(man_descope(m, line, offs)); +} + +static int +man_pmacro(struct man *m, int ln, char *buf, int offs) +{ + int i, ppos; + enum mant tok; + char mac[5]; + struct man_node *n; + + if ('"' == buf[offs]) { + man_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT); + return(1); + } else if ('\0' == buf[offs]) + return(1); + + ppos = offs; + + /* + * Copy the first word into a nil-terminated buffer. + * Stop copying when a tab, space, or eoln is encountered. + */ + + i = 0; + while (i < 4 && '\0' != buf[offs] && + ' ' != buf[offs] && '\t' != buf[offs]) + mac[i++] = buf[offs++]; + + mac[i] = '\0'; + + tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX; + + if (MAN_MAX == tok) { + mandoc_vmsg(MANDOCERR_MACRO, m->parse, ln, + ppos, "%s", buf + ppos - 1); + return(1); + } + + /* The macro is sane. Jump to the next word. */ + + while (buf[offs] && ' ' == buf[offs]) + offs++; + + /* + * Trailing whitespace. Note that tabs are allowed to be passed + * into the parser as "text", so we only warn about spaces here. + */ + + if ('\0' == buf[offs] && ' ' == buf[offs - 1]) + man_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE); + + /* + * Remove prior ELINE macro, as it's being clobbered by a new + * macro. Note that NSCOPED macros do not close out ELINE + * macros---they don't print text---so we let those slip by. + */ + + if ( ! (MAN_NSCOPED & man_macros[tok].flags) && + m->flags & MAN_ELINE) { + n = m->last; + assert(MAN_TEXT != n->type); + + /* Remove repeated NSCOPED macros causing ELINE. */ + + if (MAN_NSCOPED & man_macros[n->tok].flags) + n = n->parent; + + mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line, + n->pos, "%s breaks %s", man_macronames[tok], + man_macronames[n->tok]); + + man_node_delete(m, n); + m->flags &= ~MAN_ELINE; + } + + /* + * Remove prior BLINE macro that is being clobbered. + */ + if ((m->flags & MAN_BLINE) && + (MAN_BSCOPE & man_macros[tok].flags)) { + n = m->last; + + /* Might be a text node like 8 in + * .TP 8 + * .SH foo + */ + if (MAN_TEXT == n->type) + n = n->parent; + + /* Remove element that didn't end BLINE, if any. */ + if ( ! (MAN_BSCOPE & man_macros[n->tok].flags)) + n = n->parent; + + assert(MAN_HEAD == n->type); + n = n->parent; + assert(MAN_BLOCK == n->type); + assert(MAN_SCOPED & man_macros[n->tok].flags); + + mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line, + n->pos, "%s breaks %s", man_macronames[tok], + man_macronames[n->tok]); + + man_node_delete(m, n); + m->flags &= ~MAN_BLINE; + } + + /* + * Save the fact that we're in the next-line for a block. In + * this way, embedded roff instructions can "remember" state + * when they exit. + */ + + if (MAN_BLINE & m->flags) + m->flags |= MAN_BPLINE; + + /* Call to handler... */ + + assert(man_macros[tok].fp); + if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &offs, buf)) + goto err; + + /* + * We weren't in a block-line scope when entering the + * above-parsed macro, so return. + */ + + if ( ! (MAN_BPLINE & m->flags)) { + m->flags &= ~MAN_ILINE; + return(1); + } + m->flags &= ~MAN_BPLINE; + + /* + * If we're in a block scope, then allow this macro to slip by + * without closing scope around it. + */ + + if (MAN_ILINE & m->flags) { + m->flags &= ~MAN_ILINE; + return(1); + } + + /* + * If we've opened a new next-line element scope, then return + * now, as the next line will close out the block scope. + */ + + if (MAN_ELINE & m->flags) + return(1); + + /* Close out the block scope opened in the prior line. */ + + assert(MAN_BLINE & m->flags); + m->flags &= ~MAN_BLINE; + + if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX)) + return(0); + return(man_body_alloc(m, ln, ppos, m->last->tok)); + +err: /* Error out. */ + + m->flags |= MAN_HALT; + return(0); +} + +/* + * Unlink a node from its context. If "m" is provided, the last parse + * point will also be adjusted accordingly. + */ +static void +man_node_unlink(struct man *m, struct man_node *n) +{ + + /* Adjust siblings. */ + + if (n->prev) + n->prev->next = n->next; + if (n->next) + n->next->prev = n->prev; + + /* Adjust parent. */ + + if (n->parent) { + n->parent->nchild--; + if (n->parent->child == n) + n->parent->child = n->prev ? n->prev : n->next; + } + + /* Adjust parse point, if applicable. */ + + if (m && m->last == n) { + /*XXX: this can occur when bailing from validation. */ + /*assert(NULL == n->next);*/ + if (n->prev) { + m->last = n->prev; + m->next = MAN_NEXT_SIBLING; + } else { + m->last = n->parent; + m->next = MAN_NEXT_CHILD; + } + } + + if (m && m->first == n) + m->first = NULL; +} + +const struct mparse * +man_mparse(const struct man *m) +{ + + assert(m && m->parse); + return(m->parse); +} diff --git a/usr/src/cmd/mandoc/man.h b/usr/src/cmd/mandoc/man.h new file mode 100644 index 0000000000..4fc3934e6f --- /dev/null +++ b/usr/src/cmd/mandoc/man.h @@ -0,0 +1,113 @@ +/* $Id: man.h,v 1.60 2012/01/03 15:16:24 kristaps Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef MAN_H +#define MAN_H + +enum mant { + MAN_br = 0, + MAN_TH, + MAN_SH, + MAN_SS, + MAN_TP, + MAN_LP, + MAN_PP, + MAN_P, + MAN_IP, + MAN_HP, + MAN_SM, + MAN_SB, + MAN_BI, + MAN_IB, + MAN_BR, + MAN_RB, + MAN_R, + MAN_B, + MAN_I, + MAN_IR, + MAN_RI, + MAN_na, + MAN_sp, + MAN_nf, + MAN_fi, + MAN_RE, + MAN_RS, + MAN_DT, + MAN_UC, + MAN_PD, + MAN_AT, + MAN_in, + MAN_ft, + MAN_OP, + MAN_MAX +}; + +enum man_type { + MAN_TEXT, + MAN_ELEM, + MAN_ROOT, + MAN_BLOCK, + MAN_HEAD, + MAN_BODY, + MAN_TAIL, + MAN_TBL, + MAN_EQN +}; + +struct man_meta { + char *msec; /* `TH' section (1, 3p, etc.) */ + char *date; /* `TH' normalised date */ + char *vol; /* `TH' volume */ + char *title; /* `TH' title (e.g., FOO) */ + char *source; /* `TH' source (e.g., GNU) */ +}; + +struct man_node { + struct man_node *parent; /* parent AST node */ + struct man_node *child; /* first child AST node */ + struct man_node *next; /* sibling AST node */ + struct man_node *prev; /* prior sibling AST node */ + int nchild; /* number children */ + int line; + int pos; + enum mant tok; /* tok or MAN__MAX if none */ + int flags; +#define MAN_VALID (1 << 0) /* has been validated */ +#define MAN_EOS (1 << 2) /* at sentence boundary */ +#define MAN_LINE (1 << 3) /* first macro/text on line */ + enum man_type type; /* AST node type */ + char *string; /* TEXT node argument */ + struct man_node *head; /* BLOCK node HEAD ptr */ + struct man_node *tail; /* BLOCK node TAIL ptr */ + struct man_node *body; /* BLOCK node BODY ptr */ + const struct tbl_span *span; /* TBL */ + const struct eqn *eqn; /* EQN */ +}; + +/* Names of macros. Index is enum mant. */ +extern const char *const *man_macronames; + +__BEGIN_DECLS + +struct man; + +const struct man_node *man_node(const struct man *); +const struct man_meta *man_meta(const struct man *); +const struct mparse *man_mparse(const struct man *); + +__END_DECLS + +#endif /*!MAN_H*/ diff --git a/usr/src/cmd/mandoc/man_hash.c b/usr/src/cmd/mandoc/man_hash.c new file mode 100644 index 0000000000..86c5c40a19 --- /dev/null +++ b/usr/src/cmd/mandoc/man_hash.c @@ -0,0 +1,107 @@ +/* $Id: man_hash.c,v 1.25 2011/07/24 18:15:14 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> + +#include "man.h" +#include "mandoc.h" +#include "libman.h" + +#define HASH_DEPTH 6 + +#define HASH_ROW(x) do { \ + if (isupper((unsigned char)(x))) \ + (x) -= 65; \ + else \ + (x) -= 97; \ + (x) *= HASH_DEPTH; \ + } while (/* CONSTCOND */ 0) + +/* + * Lookup table is indexed first by lower-case first letter (plus one + * for the period, which is stored in the last row), then by lower or + * uppercase second letter. Buckets correspond to the index of the + * macro (the integer value of the enum stored as a char to save a bit + * of space). + */ +static unsigned char table[26 * HASH_DEPTH]; + +/* + * XXX - this hash has global scope, so if intended for use as a library + * with multiple callers, it will need re-invocation protection. + */ +void +man_hash_init(void) +{ + int i, j, x; + + memset(table, UCHAR_MAX, sizeof(table)); + + assert(/* LINTED */ + MAN_MAX < UCHAR_MAX); + + for (i = 0; i < (int)MAN_MAX; i++) { + x = man_macronames[i][0]; + + assert(isalpha((unsigned char)x)); + + HASH_ROW(x); + + for (j = 0; j < HASH_DEPTH; j++) + if (UCHAR_MAX == table[x + j]) { + table[x + j] = (unsigned char)i; + break; + } + + assert(j < HASH_DEPTH); + } +} + + +enum mant +man_hash_find(const char *tmp) +{ + int x, y, i; + enum mant tok; + + if ('\0' == (x = tmp[0])) + return(MAN_MAX); + if ( ! (isalpha((unsigned char)x))) + return(MAN_MAX); + + HASH_ROW(x); + + for (i = 0; i < HASH_DEPTH; i++) { + if (UCHAR_MAX == (y = table[x + i])) + return(MAN_MAX); + + tok = (enum mant)y; + if (0 == strcmp(tmp, man_macronames[tok])) + return(tok); + } + + return(MAN_MAX); +} diff --git a/usr/src/cmd/mandoc/man_html.c b/usr/src/cmd/mandoc/man_html.c new file mode 100644 index 0000000000..a76ea2d707 --- /dev/null +++ b/usr/src/cmd/mandoc/man_html.c @@ -0,0 +1,688 @@ +/* $Id: man_html.c,v 1.86 2012/01/03 15:16:24 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "html.h" +#include "man.h" +#include "main.h" + +/* TODO: preserve ident widths. */ +/* FIXME: have PD set the default vspace width. */ + +#define INDENT 5 + +#define MAN_ARGS const struct man_meta *m, \ + const struct man_node *n, \ + struct mhtml *mh, \ + struct html *h + +struct mhtml { + int fl; +#define MANH_LITERAL (1 << 0) /* literal context */ +}; + +struct htmlman { + int (*pre)(MAN_ARGS); + int (*post)(MAN_ARGS); +}; + +static void print_bvspace(struct html *, + const struct man_node *); +static void print_man(MAN_ARGS); +static void print_man_head(MAN_ARGS); +static void print_man_nodelist(MAN_ARGS); +static void print_man_node(MAN_ARGS); +static int a2width(const struct man_node *, + struct roffsu *); +static int man_B_pre(MAN_ARGS); +static int man_HP_pre(MAN_ARGS); +static int man_IP_pre(MAN_ARGS); +static int man_I_pre(MAN_ARGS); +static int man_OP_pre(MAN_ARGS); +static int man_PP_pre(MAN_ARGS); +static int man_RS_pre(MAN_ARGS); +static int man_SH_pre(MAN_ARGS); +static int man_SM_pre(MAN_ARGS); +static int man_SS_pre(MAN_ARGS); +static int man_alt_pre(MAN_ARGS); +static int man_br_pre(MAN_ARGS); +static int man_ign_pre(MAN_ARGS); +static int man_in_pre(MAN_ARGS); +static int man_literal_pre(MAN_ARGS); +static void man_root_post(MAN_ARGS); +static void man_root_pre(MAN_ARGS); + +static const struct htmlman mans[MAN_MAX] = { + { man_br_pre, NULL }, /* br */ + { NULL, NULL }, /* TH */ + { man_SH_pre, NULL }, /* SH */ + { man_SS_pre, NULL }, /* SS */ + { man_IP_pre, NULL }, /* TP */ + { man_PP_pre, NULL }, /* LP */ + { man_PP_pre, NULL }, /* PP */ + { man_PP_pre, NULL }, /* P */ + { man_IP_pre, NULL }, /* IP */ + { man_HP_pre, NULL }, /* HP */ + { man_SM_pre, NULL }, /* SM */ + { man_SM_pre, NULL }, /* SB */ + { man_alt_pre, NULL }, /* BI */ + { man_alt_pre, NULL }, /* IB */ + { man_alt_pre, NULL }, /* BR */ + { man_alt_pre, NULL }, /* RB */ + { NULL, NULL }, /* R */ + { man_B_pre, NULL }, /* B */ + { man_I_pre, NULL }, /* I */ + { man_alt_pre, NULL }, /* IR */ + { man_alt_pre, NULL }, /* RI */ + { man_ign_pre, NULL }, /* na */ + { man_br_pre, NULL }, /* sp */ + { man_literal_pre, NULL }, /* nf */ + { man_literal_pre, NULL }, /* fi */ + { NULL, NULL }, /* RE */ + { man_RS_pre, NULL }, /* RS */ + { man_ign_pre, NULL }, /* DT */ + { man_ign_pre, NULL }, /* UC */ + { man_ign_pre, NULL }, /* PD */ + { man_ign_pre, NULL }, /* AT */ + { man_in_pre, NULL }, /* in */ + { man_ign_pre, NULL }, /* ft */ + { man_OP_pre, NULL }, /* OP */ +}; + +/* + * Printing leading vertical space before a block. + * This is used for the paragraph macros. + * The rules are pretty simple, since there's very little nesting going + * on here. Basically, if we're the first within another block (SS/SH), + * then don't emit vertical space. If we are (RS), then do. If not the + * first, print it. + */ +static void +print_bvspace(struct html *h, const struct man_node *n) +{ + + if (n->body && n->body->child) + if (MAN_TBL == n->body->child->type) + return; + + if (MAN_ROOT == n->parent->type || MAN_RS != n->parent->tok) + if (NULL == n->prev) + return; + + print_otag(h, TAG_P, 0, NULL); +} + +void +html_man(void *arg, const struct man *m) +{ + struct mhtml mh; + + memset(&mh, 0, sizeof(struct mhtml)); + print_man(man_meta(m), man_node(m), &mh, (struct html *)arg); + putchar('\n'); +} + +static void +print_man(MAN_ARGS) +{ + struct tag *t, *tt; + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "mandoc"); + + if ( ! (HTML_FRAGMENT & h->oflags)) { + print_gen_decls(h); + t = print_otag(h, TAG_HTML, 0, NULL); + tt = print_otag(h, TAG_HEAD, 0, NULL); + print_man_head(m, n, mh, h); + print_tagq(h, tt); + print_otag(h, TAG_BODY, 0, NULL); + print_otag(h, TAG_DIV, 1, &tag); + } else + t = print_otag(h, TAG_DIV, 1, &tag); + + print_man_nodelist(m, n, mh, h); + print_tagq(h, t); +} + + +/* ARGSUSED */ +static void +print_man_head(MAN_ARGS) +{ + + print_gen_head(h); + assert(m->title); + assert(m->msec); + bufcat_fmt(h, "%s(%s)", m->title, m->msec); + print_otag(h, TAG_TITLE, 0, NULL); + print_text(h, h->buf); +} + + +static void +print_man_nodelist(MAN_ARGS) +{ + + print_man_node(m, n, mh, h); + if (n->next) + print_man_nodelist(m, n->next, mh, h); +} + + +static void +print_man_node(MAN_ARGS) +{ + int child; + struct tag *t; + + child = 1; + t = h->tags.head; + + switch (n->type) { + case (MAN_ROOT): + man_root_pre(m, n, mh, h); + break; + case (MAN_TEXT): + /* + * If we have a blank line, output a vertical space. + * If we have a space as the first character, break + * before printing the line's data. + */ + if ('\0' == *n->string) { + print_otag(h, TAG_P, 0, NULL); + return; + } + + if (' ' == *n->string && MAN_LINE & n->flags) + print_otag(h, TAG_BR, 0, NULL); + else if (MANH_LITERAL & mh->fl && n->prev) + print_otag(h, TAG_BR, 0, NULL); + + print_text(h, n->string); + return; + case (MAN_EQN): + print_eqn(h, n->eqn); + break; + case (MAN_TBL): + /* + * This will take care of initialising all of the table + * state data for the first table, then tearing it down + * for the last one. + */ + print_tbl(h, n->span); + return; + default: + /* + * Close out scope of font prior to opening a macro + * scope. + */ + if (HTMLFONT_NONE != h->metac) { + h->metal = h->metac; + h->metac = HTMLFONT_NONE; + } + + /* + * Close out the current table, if it's open, and unset + * the "meta" table state. This will be reopened on the + * next table element. + */ + if (h->tblt) { + print_tblclose(h); + t = h->tags.head; + } + if (mans[n->tok].pre) + child = (*mans[n->tok].pre)(m, n, mh, h); + break; + } + + if (child && n->child) + print_man_nodelist(m, n->child, mh, h); + + /* This will automatically close out any font scope. */ + print_stagq(h, t); + + switch (n->type) { + case (MAN_ROOT): + man_root_post(m, n, mh, h); + break; + case (MAN_EQN): + break; + default: + if (mans[n->tok].post) + (*mans[n->tok].post)(m, n, mh, h); + break; + } +} + + +static int +a2width(const struct man_node *n, struct roffsu *su) +{ + + if (MAN_TEXT != n->type) + return(0); + if (a2roffsu(n->string, su, SCALE_BU)) + return(1); + + return(0); +} + + +/* ARGSUSED */ +static void +man_root_pre(MAN_ARGS) +{ + struct htmlpair tag[3]; + struct tag *t, *tt; + char b[BUFSIZ], title[BUFSIZ]; + + b[0] = 0; + if (m->vol) + (void)strlcat(b, m->vol, BUFSIZ); + + assert(m->title); + assert(m->msec); + snprintf(title, BUFSIZ - 1, "%s(%s)", m->title, m->msec); + + PAIR_SUMMARY_INIT(&tag[0], "Document Header"); + PAIR_CLASS_INIT(&tag[1], "head"); + PAIR_INIT(&tag[2], ATTR_WIDTH, "100%"); + t = print_otag(h, TAG_TABLE, 3, tag); + PAIR_INIT(&tag[0], ATTR_WIDTH, "30%"); + print_otag(h, TAG_COL, 1, tag); + print_otag(h, TAG_COL, 1, tag); + print_otag(h, TAG_COL, 1, tag); + + print_otag(h, TAG_TBODY, 0, NULL); + + tt = print_otag(h, TAG_TR, 0, NULL); + + PAIR_CLASS_INIT(&tag[0], "head-ltitle"); + print_otag(h, TAG_TD, 1, tag); + print_text(h, title); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag[0], "head-vol"); + PAIR_INIT(&tag[1], ATTR_ALIGN, "center"); + print_otag(h, TAG_TD, 2, tag); + print_text(h, b); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag[0], "head-rtitle"); + PAIR_INIT(&tag[1], ATTR_ALIGN, "right"); + print_otag(h, TAG_TD, 2, tag); + print_text(h, title); + print_tagq(h, t); +} + + +/* ARGSUSED */ +static void +man_root_post(MAN_ARGS) +{ + struct htmlpair tag[3]; + struct tag *t, *tt; + + PAIR_SUMMARY_INIT(&tag[0], "Document Footer"); + PAIR_CLASS_INIT(&tag[1], "foot"); + PAIR_INIT(&tag[2], ATTR_WIDTH, "100%"); + t = print_otag(h, TAG_TABLE, 3, tag); + PAIR_INIT(&tag[0], ATTR_WIDTH, "50%"); + print_otag(h, TAG_COL, 1, tag); + print_otag(h, TAG_COL, 1, tag); + + tt = print_otag(h, TAG_TR, 0, NULL); + + PAIR_CLASS_INIT(&tag[0], "foot-date"); + print_otag(h, TAG_TD, 1, tag); + + assert(m->date); + print_text(h, m->date); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag[0], "foot-os"); + PAIR_INIT(&tag[1], ATTR_ALIGN, "right"); + print_otag(h, TAG_TD, 2, tag); + + if (m->source) + print_text(h, m->source); + print_tagq(h, t); +} + + +/* ARGSUSED */ +static int +man_br_pre(MAN_ARGS) +{ + struct roffsu su; + struct htmlpair tag; + + SCALE_VS_INIT(&su, 1); + + if (MAN_sp == n->tok) { + if (NULL != (n = n->child)) + if ( ! a2roffsu(n->string, &su, SCALE_VS)) + SCALE_VS_INIT(&su, atoi(n->string)); + } else + su.scale = 0; + + bufinit(h); + bufcat_su(h, "height", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + + /* So the div isn't empty: */ + print_text(h, "\\~"); + + return(0); +} + +/* ARGSUSED */ +static int +man_SH_pre(MAN_ARGS) +{ + struct htmlpair tag; + + if (MAN_BLOCK == n->type) { + mh->fl &= ~MANH_LITERAL; + PAIR_CLASS_INIT(&tag, "section"); + print_otag(h, TAG_DIV, 1, &tag); + return(1); + } else if (MAN_BODY == n->type) + return(1); + + print_otag(h, TAG_H1, 0, NULL); + return(1); +} + +/* ARGSUSED */ +static int +man_alt_pre(MAN_ARGS) +{ + const struct man_node *nn; + int i, savelit; + enum htmltag fp; + struct tag *t; + + if ((savelit = mh->fl & MANH_LITERAL)) + print_otag(h, TAG_BR, 0, NULL); + + mh->fl &= ~MANH_LITERAL; + + for (i = 0, nn = n->child; nn; nn = nn->next, i++) { + t = NULL; + switch (n->tok) { + case (MAN_BI): + fp = i % 2 ? TAG_I : TAG_B; + break; + case (MAN_IB): + fp = i % 2 ? TAG_B : TAG_I; + break; + case (MAN_RI): + fp = i % 2 ? TAG_I : TAG_MAX; + break; + case (MAN_IR): + fp = i % 2 ? TAG_MAX : TAG_I; + break; + case (MAN_BR): + fp = i % 2 ? TAG_MAX : TAG_B; + break; + case (MAN_RB): + fp = i % 2 ? TAG_B : TAG_MAX; + break; + default: + abort(); + /* NOTREACHED */ + } + + if (i) + h->flags |= HTML_NOSPACE; + + if (TAG_MAX != fp) + t = print_otag(h, fp, 0, NULL); + + print_man_node(m, nn, mh, h); + + if (t) + print_tagq(h, t); + } + + if (savelit) + mh->fl |= MANH_LITERAL; + + return(0); +} + +/* ARGSUSED */ +static int +man_SM_pre(MAN_ARGS) +{ + + print_otag(h, TAG_SMALL, 0, NULL); + if (MAN_SB == n->tok) + print_otag(h, TAG_B, 0, NULL); + return(1); +} + +/* ARGSUSED */ +static int +man_SS_pre(MAN_ARGS) +{ + struct htmlpair tag; + + if (MAN_BLOCK == n->type) { + mh->fl &= ~MANH_LITERAL; + PAIR_CLASS_INIT(&tag, "subsection"); + print_otag(h, TAG_DIV, 1, &tag); + return(1); + } else if (MAN_BODY == n->type) + return(1); + + print_otag(h, TAG_H2, 0, NULL); + return(1); +} + +/* ARGSUSED */ +static int +man_PP_pre(MAN_ARGS) +{ + + if (MAN_HEAD == n->type) + return(0); + else if (MAN_BLOCK == n->type) + print_bvspace(h, n); + + return(1); +} + +/* ARGSUSED */ +static int +man_IP_pre(MAN_ARGS) +{ + const struct man_node *nn; + + if (MAN_BODY == n->type) { + print_otag(h, TAG_DD, 0, NULL); + return(1); + } else if (MAN_HEAD != n->type) { + print_otag(h, TAG_DL, 0, NULL); + return(1); + } + + /* FIXME: width specification. */ + + print_otag(h, TAG_DT, 0, NULL); + + /* For IP, only print the first header element. */ + + if (MAN_IP == n->tok && n->child) + print_man_node(m, n->child, mh, h); + + /* For TP, only print next-line header elements. */ + + if (MAN_TP == n->tok) + for (nn = n->child; nn; nn = nn->next) + if (nn->line > n->line) + print_man_node(m, nn, mh, h); + + return(0); +} + +/* ARGSUSED */ +static int +man_HP_pre(MAN_ARGS) +{ + struct htmlpair tag; + struct roffsu su; + const struct man_node *np; + + if (MAN_HEAD == n->type) + return(0); + else if (MAN_BLOCK != n->type) + return(1); + + np = n->head->child; + + if (NULL == np || ! a2width(np, &su)) + SCALE_HS_INIT(&su, INDENT); + + bufinit(h); + + print_bvspace(h, n); + bufcat_su(h, "margin-left", &su); + su.scale = -su.scale; + bufcat_su(h, "text-indent", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_P, 1, &tag); + return(1); +} + +/* ARGSUSED */ +static int +man_OP_pre(MAN_ARGS) +{ + struct tag *tt; + struct htmlpair tag; + + print_text(h, "["); + h->flags |= HTML_NOSPACE; + PAIR_CLASS_INIT(&tag, "opt"); + tt = print_otag(h, TAG_SPAN, 1, &tag); + + if (NULL != (n = n->child)) { + print_otag(h, TAG_B, 0, NULL); + print_text(h, n->string); + } + + print_stagq(h, tt); + + if (NULL != n && NULL != n->next) { + print_otag(h, TAG_I, 0, NULL); + print_text(h, n->next->string); + } + + print_stagq(h, tt); + h->flags |= HTML_NOSPACE; + print_text(h, "]"); + return(0); +} + + +/* ARGSUSED */ +static int +man_B_pre(MAN_ARGS) +{ + + print_otag(h, TAG_B, 0, NULL); + return(1); +} + +/* ARGSUSED */ +static int +man_I_pre(MAN_ARGS) +{ + + print_otag(h, TAG_I, 0, NULL); + return(1); +} + +/* ARGSUSED */ +static int +man_literal_pre(MAN_ARGS) +{ + + if (MAN_nf != n->tok) { + print_otag(h, TAG_BR, 0, NULL); + mh->fl &= ~MANH_LITERAL; + } else + mh->fl |= MANH_LITERAL; + + return(0); +} + +/* ARGSUSED */ +static int +man_in_pre(MAN_ARGS) +{ + + print_otag(h, TAG_BR, 0, NULL); + return(0); +} + +/* ARGSUSED */ +static int +man_ign_pre(MAN_ARGS) +{ + + return(0); +} + +/* ARGSUSED */ +static int +man_RS_pre(MAN_ARGS) +{ + struct htmlpair tag; + struct roffsu su; + + if (MAN_HEAD == n->type) + return(0); + else if (MAN_BODY == n->type) + return(1); + + SCALE_HS_INIT(&su, INDENT); + if (n->head->child) + a2width(n->head->child, &su); + + bufinit(h); + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + return(1); +} diff --git a/usr/src/cmd/mandoc/man_macro.c b/usr/src/cmd/mandoc/man_macro.c new file mode 100644 index 0000000000..4bbbc4fa7f --- /dev/null +++ b/usr/src/cmd/mandoc/man_macro.c @@ -0,0 +1,484 @@ +/* $Id: man_macro.c,v 1.71 2012/01/03 15:16:24 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> + +#include "man.h" +#include "mandoc.h" +#include "libmandoc.h" +#include "libman.h" + +enum rew { + REW_REWIND, + REW_NOHALT, + REW_HALT +}; + +static int blk_close(MACRO_PROT_ARGS); +static int blk_exp(MACRO_PROT_ARGS); +static int blk_imp(MACRO_PROT_ARGS); +static int in_line_eoln(MACRO_PROT_ARGS); +static int man_args(struct man *, int, + int *, char *, char **); + +static int rew_scope(enum man_type, + struct man *, enum mant); +static enum rew rew_dohalt(enum mant, enum man_type, + const struct man_node *); +static enum rew rew_block(enum mant, enum man_type, + const struct man_node *); +static void rew_warn(struct man *, + struct man_node *, enum mandocerr); + +const struct man_macro __man_macros[MAN_MAX] = { + { in_line_eoln, MAN_NSCOPED }, /* br */ + { in_line_eoln, MAN_BSCOPE }, /* TH */ + { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SH */ + { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SS */ + { blk_imp, MAN_BSCOPE | MAN_SCOPED | MAN_FSCOPED }, /* TP */ + { blk_imp, MAN_BSCOPE }, /* LP */ + { blk_imp, MAN_BSCOPE }, /* PP */ + { blk_imp, MAN_BSCOPE }, /* P */ + { blk_imp, MAN_BSCOPE }, /* IP */ + { blk_imp, MAN_BSCOPE }, /* HP */ + { in_line_eoln, MAN_SCOPED }, /* SM */ + { in_line_eoln, MAN_SCOPED }, /* SB */ + { in_line_eoln, 0 }, /* BI */ + { in_line_eoln, 0 }, /* IB */ + { in_line_eoln, 0 }, /* BR */ + { in_line_eoln, 0 }, /* RB */ + { in_line_eoln, MAN_SCOPED }, /* R */ + { in_line_eoln, MAN_SCOPED }, /* B */ + { in_line_eoln, MAN_SCOPED }, /* I */ + { in_line_eoln, 0 }, /* IR */ + { in_line_eoln, 0 }, /* RI */ + { in_line_eoln, MAN_NSCOPED }, /* na */ + { in_line_eoln, MAN_NSCOPED }, /* sp */ + { in_line_eoln, MAN_BSCOPE }, /* nf */ + { in_line_eoln, MAN_BSCOPE }, /* fi */ + { blk_close, 0 }, /* RE */ + { blk_exp, MAN_EXPLICIT }, /* RS */ + { in_line_eoln, 0 }, /* DT */ + { in_line_eoln, 0 }, /* UC */ + { in_line_eoln, 0 }, /* PD */ + { in_line_eoln, 0 }, /* AT */ + { in_line_eoln, 0 }, /* in */ + { in_line_eoln, 0 }, /* ft */ + { in_line_eoln, 0 }, /* OP */ +}; + +const struct man_macro * const man_macros = __man_macros; + + +/* + * Warn when "n" is an explicit non-roff macro. + */ +static void +rew_warn(struct man *m, struct man_node *n, enum mandocerr er) +{ + + if (er == MANDOCERR_MAX || MAN_BLOCK != n->type) + return; + if (MAN_VALID & n->flags) + return; + if ( ! (MAN_EXPLICIT & man_macros[n->tok].flags)) + return; + + assert(er < MANDOCERR_FATAL); + man_nmsg(m, n, er); +} + + +/* + * Rewind scope. If a code "er" != MANDOCERR_MAX has been provided, it + * will be used if an explicit block scope is being closed out. + */ +int +man_unscope(struct man *m, const struct man_node *to, + enum mandocerr er) +{ + struct man_node *n; + + assert(to); + + m->next = MAN_NEXT_SIBLING; + + /* LINTED */ + while (m->last != to) { + /* + * Save the parent here, because we may delete the + * m->last node in the post-validation phase and reset + * it to m->last->parent, causing a step in the closing + * out to be lost. + */ + n = m->last->parent; + rew_warn(m, m->last, er); + if ( ! man_valid_post(m)) + return(0); + m->last = n; + assert(m->last); + } + + rew_warn(m, m->last, er); + if ( ! man_valid_post(m)) + return(0); + + return(1); +} + + +static enum rew +rew_block(enum mant ntok, enum man_type type, const struct man_node *n) +{ + + if (MAN_BLOCK == type && ntok == n->parent->tok && + MAN_BODY == n->parent->type) + return(REW_REWIND); + return(ntok == n->tok ? REW_HALT : REW_NOHALT); +} + + +/* + * There are three scope levels: scoped to the root (all), scoped to the + * section (all less sections), and scoped to subsections (all less + * sections and subsections). + */ +static enum rew +rew_dohalt(enum mant tok, enum man_type type, const struct man_node *n) +{ + enum rew c; + + /* We cannot progress beyond the root ever. */ + if (MAN_ROOT == n->type) + return(REW_HALT); + + assert(n->parent); + + /* Normal nodes shouldn't go to the level of the root. */ + if (MAN_ROOT == n->parent->type) + return(REW_REWIND); + + /* Already-validated nodes should be closed out. */ + if (MAN_VALID & n->flags) + return(REW_NOHALT); + + /* First: rewind to ourselves. */ + if (type == n->type && tok == n->tok) + return(REW_REWIND); + + /* + * Next follow the implicit scope-smashings as defined by man.7: + * section, sub-section, etc. + */ + + switch (tok) { + case (MAN_SH): + break; + case (MAN_SS): + /* Rewind to a section, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) + return(c); + break; + case (MAN_RS): + /* Rewind to a subsection, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SS, type, n))) + return(c); + /* Rewind to a section, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) + return(c); + break; + default: + /* Rewind to an offsetter, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_RS, type, n))) + return(c); + /* Rewind to a subsection, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SS, type, n))) + return(c); + /* Rewind to a section, if a block. */ + if (REW_NOHALT != (c = rew_block(MAN_SH, type, n))) + return(c); + break; + } + + return(REW_NOHALT); +} + + +/* + * Rewinding entails ascending the parse tree until a coherent point, + * for example, the `SH' macro will close out any intervening `SS' + * scopes. When a scope is closed, it must be validated and actioned. + */ +static int +rew_scope(enum man_type type, struct man *m, enum mant tok) +{ + struct man_node *n; + enum rew c; + + /* LINTED */ + for (n = m->last; n; n = n->parent) { + /* + * Whether we should stop immediately (REW_HALT), stop + * and rewind until this point (REW_REWIND), or keep + * rewinding (REW_NOHALT). + */ + c = rew_dohalt(tok, type, n); + if (REW_HALT == c) + return(1); + if (REW_REWIND == c) + break; + } + + /* + * Rewind until the current point. Warn if we're a roff + * instruction that's mowing over explicit scopes. + */ + assert(n); + + return(man_unscope(m, n, MANDOCERR_MAX)); +} + + +/* + * Close out a generic explicit macro. + */ +/* ARGSUSED */ +int +blk_close(MACRO_PROT_ARGS) +{ + enum mant ntok; + const struct man_node *nn; + + switch (tok) { + case (MAN_RE): + ntok = MAN_RS; + break; + default: + abort(); + /* NOTREACHED */ + } + + for (nn = m->last->parent; nn; nn = nn->parent) + if (ntok == nn->tok) + break; + + if (NULL == nn) + man_pmsg(m, line, ppos, MANDOCERR_NOSCOPE); + + if ( ! rew_scope(MAN_BODY, m, ntok)) + return(0); + if ( ! rew_scope(MAN_BLOCK, m, ntok)) + return(0); + + return(1); +} + + +/* ARGSUSED */ +int +blk_exp(MACRO_PROT_ARGS) +{ + int la; + char *p; + + /* + * Close out prior scopes. "Regular" explicit macros cannot be + * nested, but we allow roff macros to be placed just about + * anywhere. + */ + + if ( ! man_block_alloc(m, line, ppos, tok)) + return(0); + if ( ! man_head_alloc(m, line, ppos, tok)) + return(0); + + for (;;) { + la = *pos; + if ( ! man_args(m, line, pos, buf, &p)) + break; + if ( ! man_word_alloc(m, line, la, p)) + return(0); + } + + assert(m); + assert(tok != MAN_MAX); + + if ( ! rew_scope(MAN_HEAD, m, tok)) + return(0); + return(man_body_alloc(m, line, ppos, tok)); +} + + + +/* + * Parse an implicit-block macro. These contain a MAN_HEAD and a + * MAN_BODY contained within a MAN_BLOCK. Rules for closing out other + * scopes, such as `SH' closing out an `SS', are defined in the rew + * routines. + */ +/* ARGSUSED */ +int +blk_imp(MACRO_PROT_ARGS) +{ + int la; + char *p; + struct man_node *n; + + /* Close out prior scopes. */ + + if ( ! rew_scope(MAN_BODY, m, tok)) + return(0); + if ( ! rew_scope(MAN_BLOCK, m, tok)) + return(0); + + /* Allocate new block & head scope. */ + + if ( ! man_block_alloc(m, line, ppos, tok)) + return(0); + if ( ! man_head_alloc(m, line, ppos, tok)) + return(0); + + n = m->last; + + /* Add line arguments. */ + + for (;;) { + la = *pos; + if ( ! man_args(m, line, pos, buf, &p)) + break; + if ( ! man_word_alloc(m, line, la, p)) + return(0); + } + + /* Close out head and open body (unless MAN_SCOPE). */ + + if (MAN_SCOPED & man_macros[tok].flags) { + /* If we're forcing scope (`TP'), keep it open. */ + if (MAN_FSCOPED & man_macros[tok].flags) { + m->flags |= MAN_BLINE; + return(1); + } else if (n == m->last) { + m->flags |= MAN_BLINE; + return(1); + } + } + + if ( ! rew_scope(MAN_HEAD, m, tok)) + return(0); + return(man_body_alloc(m, line, ppos, tok)); +} + + +/* ARGSUSED */ +int +in_line_eoln(MACRO_PROT_ARGS) +{ + int la; + char *p; + struct man_node *n; + + if ( ! man_elem_alloc(m, line, ppos, tok)) + return(0); + + n = m->last; + + for (;;) { + la = *pos; + if ( ! man_args(m, line, pos, buf, &p)) + break; + if ( ! man_word_alloc(m, line, la, p)) + return(0); + } + + /* + * If no arguments are specified and this is MAN_SCOPED (i.e., + * next-line scoped), then set our mode to indicate that we're + * waiting for terms to load into our context. + */ + + if (n == m->last && MAN_SCOPED & man_macros[tok].flags) { + assert( ! (MAN_NSCOPED & man_macros[tok].flags)); + m->flags |= MAN_ELINE; + return(1); + } + + /* Set ignorable context, if applicable. */ + + if (MAN_NSCOPED & man_macros[tok].flags) { + assert( ! (MAN_SCOPED & man_macros[tok].flags)); + m->flags |= MAN_ILINE; + } + + assert(MAN_ROOT != m->last->type); + m->next = MAN_NEXT_SIBLING; + + /* + * Rewind our element scope. Note that when TH is pruned, we'll + * be back at the root, so make sure that we don't clobber as + * its sibling. + */ + + for ( ; m->last; m->last = m->last->parent) { + if (m->last == n) + break; + if (m->last->type == MAN_ROOT) + break; + if ( ! man_valid_post(m)) + return(0); + } + + assert(m->last); + + /* + * Same here regarding whether we're back at the root. + */ + + if (m->last->type != MAN_ROOT && ! man_valid_post(m)) + return(0); + + return(1); +} + + +int +man_macroend(struct man *m) +{ + + return(man_unscope(m, m->first, MANDOCERR_SCOPEEXIT)); +} + +static int +man_args(struct man *m, int line, int *pos, char *buf, char **v) +{ + char *start; + + assert(*pos); + *v = start = buf + *pos; + assert(' ' != *start); + + if ('\0' == *start) + return(0); + + *v = mandoc_getarg(m->parse, v, line, pos); + return(1); +} diff --git a/usr/src/cmd/mandoc/man_term.c b/usr/src/cmd/mandoc/man_term.c new file mode 100644 index 0000000000..69c5c95e44 --- /dev/null +++ b/usr/src/cmd/mandoc/man_term.c @@ -0,0 +1,1117 @@ +/* $Id: man_term.c,v 1.127 2012/01/03 15:16:24 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "man.h" +#include "term.h" +#include "main.h" + +#define MAXMARGINS 64 /* maximum number of indented scopes */ + +/* FIXME: have PD set the default vspace width. */ + +struct mtermp { + int fl; +#define MANT_LITERAL (1 << 0) + size_t lmargin[MAXMARGINS]; /* margins (incl. visible page) */ + int lmargincur; /* index of current margin */ + int lmarginsz; /* actual number of nested margins */ + size_t offset; /* default offset to visible page */ +}; + +#define DECL_ARGS struct termp *p, \ + struct mtermp *mt, \ + const struct man_node *n, \ + const struct man_meta *m + +struct termact { + int (*pre)(DECL_ARGS); + void (*post)(DECL_ARGS); + int flags; +#define MAN_NOTEXT (1 << 0) /* Never has text children. */ +}; + +static int a2width(const struct termp *, const char *); +static size_t a2height(const struct termp *, const char *); + +static void print_man_nodelist(DECL_ARGS); +static void print_man_node(DECL_ARGS); +static void print_man_head(struct termp *, const void *); +static void print_man_foot(struct termp *, const void *); +static void print_bvspace(struct termp *, + const struct man_node *); + +static int pre_B(DECL_ARGS); +static int pre_HP(DECL_ARGS); +static int pre_I(DECL_ARGS); +static int pre_IP(DECL_ARGS); +static int pre_OP(DECL_ARGS); +static int pre_PP(DECL_ARGS); +static int pre_RS(DECL_ARGS); +static int pre_SH(DECL_ARGS); +static int pre_SS(DECL_ARGS); +static int pre_TP(DECL_ARGS); +static int pre_alternate(DECL_ARGS); +static int pre_ft(DECL_ARGS); +static int pre_ign(DECL_ARGS); +static int pre_in(DECL_ARGS); +static int pre_literal(DECL_ARGS); +static int pre_sp(DECL_ARGS); + +static void post_IP(DECL_ARGS); +static void post_HP(DECL_ARGS); +static void post_RS(DECL_ARGS); +static void post_SH(DECL_ARGS); +static void post_SS(DECL_ARGS); +static void post_TP(DECL_ARGS); + +static const struct termact termacts[MAN_MAX] = { + { pre_sp, NULL, MAN_NOTEXT }, /* br */ + { NULL, NULL, 0 }, /* TH */ + { pre_SH, post_SH, 0 }, /* SH */ + { pre_SS, post_SS, 0 }, /* SS */ + { pre_TP, post_TP, 0 }, /* TP */ + { pre_PP, NULL, 0 }, /* LP */ + { pre_PP, NULL, 0 }, /* PP */ + { pre_PP, NULL, 0 }, /* P */ + { pre_IP, post_IP, 0 }, /* IP */ + { pre_HP, post_HP, 0 }, /* HP */ + { NULL, NULL, 0 }, /* SM */ + { pre_B, NULL, 0 }, /* SB */ + { pre_alternate, NULL, 0 }, /* BI */ + { pre_alternate, NULL, 0 }, /* IB */ + { pre_alternate, NULL, 0 }, /* BR */ + { pre_alternate, NULL, 0 }, /* RB */ + { NULL, NULL, 0 }, /* R */ + { pre_B, NULL, 0 }, /* B */ + { pre_I, NULL, 0 }, /* I */ + { pre_alternate, NULL, 0 }, /* IR */ + { pre_alternate, NULL, 0 }, /* RI */ + { pre_ign, NULL, MAN_NOTEXT }, /* na */ + { pre_sp, NULL, MAN_NOTEXT }, /* sp */ + { pre_literal, NULL, 0 }, /* nf */ + { pre_literal, NULL, 0 }, /* fi */ + { NULL, NULL, 0 }, /* RE */ + { pre_RS, post_RS, 0 }, /* RS */ + { pre_ign, NULL, 0 }, /* DT */ + { pre_ign, NULL, 0 }, /* UC */ + { pre_ign, NULL, 0 }, /* PD */ + { pre_ign, NULL, 0 }, /* AT */ + { pre_in, NULL, MAN_NOTEXT }, /* in */ + { pre_ft, NULL, MAN_NOTEXT }, /* ft */ + { pre_OP, NULL, 0 }, /* OP */ +}; + + + +void +terminal_man(void *arg, const struct man *man) +{ + struct termp *p; + const struct man_node *n; + const struct man_meta *m; + struct mtermp mt; + + p = (struct termp *)arg; + + if (0 == p->defindent) + p->defindent = 7; + + p->overstep = 0; + p->maxrmargin = p->defrmargin; + p->tabwidth = term_len(p, 5); + + if (NULL == p->symtab) + p->symtab = mchars_alloc(); + + n = man_node(man); + m = man_meta(man); + + term_begin(p, print_man_head, print_man_foot, m); + p->flags |= TERMP_NOSPACE; + + memset(&mt, 0, sizeof(struct mtermp)); + + mt.lmargin[mt.lmargincur] = term_len(p, p->defindent); + mt.offset = term_len(p, p->defindent); + + if (n->child) + print_man_nodelist(p, &mt, n->child, m); + + term_end(p); +} + + +static size_t +a2height(const struct termp *p, const char *cp) +{ + struct roffsu su; + + if ( ! a2roffsu(cp, &su, SCALE_VS)) + SCALE_VS_INIT(&su, atoi(cp)); + + return(term_vspan(p, &su)); +} + + +static int +a2width(const struct termp *p, const char *cp) +{ + struct roffsu su; + + if ( ! a2roffsu(cp, &su, SCALE_BU)) + return(-1); + + return((int)term_hspan(p, &su)); +} + +/* + * Printing leading vertical space before a block. + * This is used for the paragraph macros. + * The rules are pretty simple, since there's very little nesting going + * on here. Basically, if we're the first within another block (SS/SH), + * then don't emit vertical space. If we are (RS), then do. If not the + * first, print it. + */ +static void +print_bvspace(struct termp *p, const struct man_node *n) +{ + + term_newln(p); + + if (n->body && n->body->child) + if (MAN_TBL == n->body->child->type) + return; + + if (MAN_ROOT == n->parent->type || MAN_RS != n->parent->tok) + if (NULL == n->prev) + return; + + term_vspace(p); +} + +/* ARGSUSED */ +static int +pre_ign(DECL_ARGS) +{ + + return(0); +} + + +/* ARGSUSED */ +static int +pre_I(DECL_ARGS) +{ + + term_fontrepl(p, TERMFONT_UNDER); + return(1); +} + + +/* ARGSUSED */ +static int +pre_literal(DECL_ARGS) +{ + + term_newln(p); + + if (MAN_nf == n->tok) + mt->fl |= MANT_LITERAL; + else + mt->fl &= ~MANT_LITERAL; + + /* + * Unlike .IP and .TP, .HP does not have a HEAD. + * So in case a second call to term_flushln() is needed, + * indentation has to be set up explicitly. + */ + if (MAN_HP == n->parent->tok && p->rmargin < p->maxrmargin) { + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + p->flags &= ~(TERMP_NOBREAK | TERMP_TWOSPACE); + p->flags |= TERMP_NOSPACE; + } + + return(0); +} + +/* ARGSUSED */ +static int +pre_alternate(DECL_ARGS) +{ + enum termfont font[2]; + const struct man_node *nn; + int savelit, i; + + switch (n->tok) { + case (MAN_RB): + font[0] = TERMFONT_NONE; + font[1] = TERMFONT_BOLD; + break; + case (MAN_RI): + font[0] = TERMFONT_NONE; + font[1] = TERMFONT_UNDER; + break; + case (MAN_BR): + font[0] = TERMFONT_BOLD; + font[1] = TERMFONT_NONE; + break; + case (MAN_BI): + font[0] = TERMFONT_BOLD; + font[1] = TERMFONT_UNDER; + break; + case (MAN_IR): + font[0] = TERMFONT_UNDER; + font[1] = TERMFONT_NONE; + break; + case (MAN_IB): + font[0] = TERMFONT_UNDER; + font[1] = TERMFONT_BOLD; + break; + default: + abort(); + } + + savelit = MANT_LITERAL & mt->fl; + mt->fl &= ~MANT_LITERAL; + + for (i = 0, nn = n->child; nn; nn = nn->next, i = 1 - i) { + term_fontrepl(p, font[i]); + if (savelit && NULL == nn->next) + mt->fl |= MANT_LITERAL; + print_man_node(p, mt, nn, m); + if (nn->next) + p->flags |= TERMP_NOSPACE; + } + + return(0); +} + +/* ARGSUSED */ +static int +pre_B(DECL_ARGS) +{ + + term_fontrepl(p, TERMFONT_BOLD); + return(1); +} + +/* ARGSUSED */ +static int +pre_OP(DECL_ARGS) +{ + + term_word(p, "["); + p->flags |= TERMP_NOSPACE; + + if (NULL != (n = n->child)) { + term_fontrepl(p, TERMFONT_BOLD); + term_word(p, n->string); + } + if (NULL != n && NULL != n->next) { + term_fontrepl(p, TERMFONT_UNDER); + term_word(p, n->next->string); + } + + term_fontrepl(p, TERMFONT_NONE); + p->flags |= TERMP_NOSPACE; + term_word(p, "]"); + return(0); +} + +/* ARGSUSED */ +static int +pre_ft(DECL_ARGS) +{ + const char *cp; + + if (NULL == n->child) { + term_fontlast(p); + return(0); + } + + cp = n->child->string; + switch (*cp) { + case ('4'): + /* FALLTHROUGH */ + case ('3'): + /* FALLTHROUGH */ + case ('B'): + term_fontrepl(p, TERMFONT_BOLD); + break; + case ('2'): + /* FALLTHROUGH */ + case ('I'): + term_fontrepl(p, TERMFONT_UNDER); + break; + case ('P'): + term_fontlast(p); + break; + case ('1'): + /* FALLTHROUGH */ + case ('C'): + /* FALLTHROUGH */ + case ('R'): + term_fontrepl(p, TERMFONT_NONE); + break; + default: + break; + } + return(0); +} + +/* ARGSUSED */ +static int +pre_in(DECL_ARGS) +{ + int len, less; + size_t v; + const char *cp; + + term_newln(p); + + if (NULL == n->child) { + p->offset = mt->offset; + return(0); + } + + cp = n->child->string; + less = 0; + + if ('-' == *cp) + less = -1; + else if ('+' == *cp) + less = 1; + else + cp--; + + if ((len = a2width(p, ++cp)) < 0) + return(0); + + v = (size_t)len; + + if (less < 0) + p->offset -= p->offset > v ? v : p->offset; + else if (less > 0) + p->offset += v; + else + p->offset = v; + + /* Don't let this creep beyond the right margin. */ + + if (p->offset > p->rmargin) + p->offset = p->rmargin; + + return(0); +} + + +/* ARGSUSED */ +static int +pre_sp(DECL_ARGS) +{ + size_t i, len; + + if ((NULL == n->prev && n->parent)) { + if (MAN_SS == n->parent->tok) + return(0); + if (MAN_SH == n->parent->tok) + return(0); + } + + switch (n->tok) { + case (MAN_br): + len = 0; + break; + default: + len = n->child ? a2height(p, n->child->string) : 1; + break; + } + + if (0 == len) + term_newln(p); + for (i = 0; i < len; i++) + term_vspace(p); + + return(0); +} + + +/* ARGSUSED */ +static int +pre_HP(DECL_ARGS) +{ + size_t len, one; + int ival; + const struct man_node *nn; + + switch (n->type) { + case (MAN_BLOCK): + print_bvspace(p, n); + return(1); + case (MAN_BODY): + p->flags |= TERMP_NOBREAK; + p->flags |= TERMP_TWOSPACE; + break; + default: + return(0); + } + + len = mt->lmargin[mt->lmargincur]; + ival = -1; + + /* Calculate offset. */ + + if (NULL != (nn = n->parent->head->child)) + if ((ival = a2width(p, nn->string)) >= 0) + len = (size_t)ival; + + one = term_len(p, 1); + if (len < one) + len = one; + + p->offset = mt->offset; + p->rmargin = mt->offset + len; + + if (ival >= 0) + mt->lmargin[mt->lmargincur] = (size_t)ival; + + return(1); +} + + +/* ARGSUSED */ +static void +post_HP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + term_flushln(p); + break; + case (MAN_BODY): + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_TWOSPACE; + p->offset = mt->offset; + p->rmargin = p->maxrmargin; + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +pre_PP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + print_bvspace(p, n); + break; + default: + p->offset = mt->offset; + break; + } + + return(MAN_HEAD != n->type); +} + + +/* ARGSUSED */ +static int +pre_IP(DECL_ARGS) +{ + const struct man_node *nn; + size_t len; + int savelit, ival; + + switch (n->type) { + case (MAN_BODY): + p->flags |= TERMP_NOSPACE; + break; + case (MAN_HEAD): + p->flags |= TERMP_NOBREAK; + break; + case (MAN_BLOCK): + print_bvspace(p, n); + /* FALLTHROUGH */ + default: + return(1); + } + + len = mt->lmargin[mt->lmargincur]; + ival = -1; + + /* Calculate the offset from the optional second argument. */ + if (NULL != (nn = n->parent->head->child)) + if (NULL != (nn = nn->next)) + if ((ival = a2width(p, nn->string)) >= 0) + len = (size_t)ival; + + switch (n->type) { + case (MAN_HEAD): + /* Handle zero-width lengths. */ + if (0 == len) + len = term_len(p, 1); + + p->offset = mt->offset; + p->rmargin = mt->offset + len; + if (ival < 0) + break; + + /* Set the saved left-margin. */ + mt->lmargin[mt->lmargincur] = (size_t)ival; + + savelit = MANT_LITERAL & mt->fl; + mt->fl &= ~MANT_LITERAL; + + if (n->child) + print_man_node(p, mt, n->child, m); + + if (savelit) + mt->fl |= MANT_LITERAL; + + return(0); + case (MAN_BODY): + p->offset = mt->offset + len; + p->rmargin = p->maxrmargin; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +post_IP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->rmargin = p->maxrmargin; + break; + case (MAN_BODY): + term_newln(p); + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +pre_TP(DECL_ARGS) +{ + const struct man_node *nn; + size_t len; + int savelit, ival; + + switch (n->type) { + case (MAN_HEAD): + p->flags |= TERMP_NOBREAK; + break; + case (MAN_BODY): + p->flags |= TERMP_NOSPACE; + break; + case (MAN_BLOCK): + print_bvspace(p, n); + /* FALLTHROUGH */ + default: + return(1); + } + + len = (size_t)mt->lmargin[mt->lmargincur]; + ival = -1; + + /* Calculate offset. */ + + if (NULL != (nn = n->parent->head->child)) + if (nn->string && nn->parent->line == nn->line) + if ((ival = a2width(p, nn->string)) >= 0) + len = (size_t)ival; + + switch (n->type) { + case (MAN_HEAD): + /* Handle zero-length properly. */ + if (0 == len) + len = term_len(p, 1); + + p->offset = mt->offset; + p->rmargin = mt->offset + len; + + savelit = MANT_LITERAL & mt->fl; + mt->fl &= ~MANT_LITERAL; + + /* Don't print same-line elements. */ + for (nn = n->child; nn; nn = nn->next) + if (nn->line > n->line) + print_man_node(p, mt, nn, m); + + if (savelit) + mt->fl |= MANT_LITERAL; + if (ival >= 0) + mt->lmargin[mt->lmargincur] = (size_t)ival; + + return(0); + case (MAN_BODY): + p->offset = mt->offset + len; + p->rmargin = p->maxrmargin; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +post_TP(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_flushln(p); + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_TWOSPACE; + p->rmargin = p->maxrmargin; + break; + case (MAN_BODY): + term_newln(p); + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +pre_SS(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + mt->fl &= ~MANT_LITERAL; + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + mt->offset = term_len(p, p->defindent); + /* If following a prior empty `SS', no vspace. */ + if (n->prev && MAN_SS == n->prev->tok) + if (NULL == n->prev->body->child) + break; + if (NULL == n->prev) + break; + term_vspace(p); + break; + case (MAN_HEAD): + term_fontrepl(p, TERMFONT_BOLD); + p->offset = term_len(p, p->defindent/2); + break; + case (MAN_BODY): + p->offset = mt->offset; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +post_SS(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_newln(p); + break; + case (MAN_BODY): + term_newln(p); + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +pre_SH(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + mt->fl &= ~MANT_LITERAL; + mt->lmargin[mt->lmargincur] = term_len(p, p->defindent); + mt->offset = term_len(p, p->defindent); + /* If following a prior empty `SH', no vspace. */ + if (n->prev && MAN_SH == n->prev->tok) + if (NULL == n->prev->body->child) + break; + /* If the first macro, no vspae. */ + if (NULL == n->prev) + break; + term_vspace(p); + break; + case (MAN_HEAD): + term_fontrepl(p, TERMFONT_BOLD); + p->offset = 0; + break; + case (MAN_BODY): + p->offset = mt->offset; + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +post_SH(DECL_ARGS) +{ + + switch (n->type) { + case (MAN_HEAD): + term_newln(p); + break; + case (MAN_BODY): + term_newln(p); + break; + default: + break; + } +} + +/* ARGSUSED */ +static int +pre_RS(DECL_ARGS) +{ + int ival; + size_t sz; + + switch (n->type) { + case (MAN_BLOCK): + term_newln(p); + return(1); + case (MAN_HEAD): + return(0); + default: + break; + } + + sz = term_len(p, p->defindent); + + if (NULL != (n = n->parent->head->child)) + if ((ival = a2width(p, n->string)) >= 0) + sz = (size_t)ival; + + mt->offset += sz; + p->rmargin = p->maxrmargin; + p->offset = mt->offset < p->rmargin ? mt->offset : p->rmargin; + + if (++mt->lmarginsz < MAXMARGINS) + mt->lmargincur = mt->lmarginsz; + + mt->lmargin[mt->lmargincur] = mt->lmargin[mt->lmargincur - 1]; + return(1); +} + +/* ARGSUSED */ +static void +post_RS(DECL_ARGS) +{ + int ival; + size_t sz; + + switch (n->type) { + case (MAN_BLOCK): + return; + case (MAN_HEAD): + return; + default: + term_newln(p); + break; + } + + sz = term_len(p, p->defindent); + + if (NULL != (n = n->parent->head->child)) + if ((ival = a2width(p, n->string)) >= 0) + sz = (size_t)ival; + + mt->offset = mt->offset < sz ? 0 : mt->offset - sz; + p->offset = mt->offset; + + if (--mt->lmarginsz < MAXMARGINS) + mt->lmargincur = mt->lmarginsz; +} + +static void +print_man_node(DECL_ARGS) +{ + size_t rm, rmax; + int c; + + switch (n->type) { + case(MAN_TEXT): + /* + * If we have a blank line, output a vertical space. + * If we have a space as the first character, break + * before printing the line's data. + */ + if ('\0' == *n->string) { + term_vspace(p); + return; + } else if (' ' == *n->string && MAN_LINE & n->flags) + term_newln(p); + + term_word(p, n->string); + + /* + * If we're in a literal context, make sure that words + * togehter on the same line stay together. This is a + * POST-printing call, so we check the NEXT word. Since + * -man doesn't have nested macros, we don't need to be + * more specific than this. + */ + if (MANT_LITERAL & mt->fl && ! (TERMP_NOBREAK & p->flags) && + (NULL == n->next || + n->next->line > n->line)) { + rm = p->rmargin; + rmax = p->maxrmargin; + p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + p->flags |= TERMP_NOSPACE; + term_flushln(p); + p->rmargin = rm; + p->maxrmargin = rmax; + } + + if (MAN_EOS & n->flags) + p->flags |= TERMP_SENTENCE; + return; + case (MAN_EQN): + term_eqn(p, n->eqn); + return; + case (MAN_TBL): + /* + * Tables are preceded by a newline. Then process a + * table line, which will cause line termination, + */ + if (TBL_SPAN_FIRST & n->span->flags) + term_newln(p); + term_tbl(p, n->span); + return; + default: + break; + } + + if ( ! (MAN_NOTEXT & termacts[n->tok].flags)) + term_fontrepl(p, TERMFONT_NONE); + + c = 1; + if (termacts[n->tok].pre) + c = (*termacts[n->tok].pre)(p, mt, n, m); + + if (c && n->child) + print_man_nodelist(p, mt, n->child, m); + + if (termacts[n->tok].post) + (*termacts[n->tok].post)(p, mt, n, m); + if ( ! (MAN_NOTEXT & termacts[n->tok].flags)) + term_fontrepl(p, TERMFONT_NONE); + + if (MAN_EOS & n->flags) + p->flags |= TERMP_SENTENCE; +} + + +static void +print_man_nodelist(DECL_ARGS) +{ + + print_man_node(p, mt, n, m); + if ( ! n->next) + return; + print_man_nodelist(p, mt, n->next, m); +} + + +static void +print_man_foot(struct termp *p, const void *arg) +{ + char title[BUFSIZ]; + size_t datelen; + const struct man_meta *meta; + + meta = (const struct man_meta *)arg; + assert(meta->title); + assert(meta->msec); + assert(meta->date); + + term_fontrepl(p, TERMFONT_NONE); + + term_vspace(p); + + /* + * Temporary, undocumented option to imitate mdoc(7) output. + * In the bottom right corner, use the source instead of + * the title. + */ + + if ( ! p->mdocstyle) { + term_vspace(p); + term_vspace(p); + snprintf(title, BUFSIZ, "%s(%s)", meta->title, meta->msec); + } else if (meta->source) { + strlcpy(title, meta->source, BUFSIZ); + } else { + title[0] = '\0'; + } + datelen = term_strlen(p, meta->date); + + /* Bottom left corner: manual source. */ + + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; + p->offset = 0; + p->rmargin = (p->maxrmargin - datelen + term_len(p, 1)) / 2; + + if (meta->source) + term_word(p, meta->source); + term_flushln(p); + + /* At the bottom in the middle: manual date. */ + + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->maxrmargin - term_strlen(p, title); + if (p->offset + datelen >= p->rmargin) + p->rmargin = p->offset + datelen; + + term_word(p, meta->date); + term_flushln(p); + + /* Bottom right corner: manual title and section. */ + + p->flags &= ~TERMP_NOBREAK; + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + + term_word(p, title); + term_flushln(p); +} + + +static void +print_man_head(struct termp *p, const void *arg) +{ + char buf[BUFSIZ], title[BUFSIZ]; + size_t buflen, titlen; + const struct man_meta *m; + + m = (const struct man_meta *)arg; + assert(m->title); + assert(m->msec); + + if (m->vol) + strlcpy(buf, m->vol, BUFSIZ); + else + buf[0] = '\0'; + buflen = term_strlen(p, buf); + + /* Top left corner: manual title and section. */ + + snprintf(title, BUFSIZ, "%s(%s)", m->title, m->msec); + titlen = term_strlen(p, title); + + p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; + p->offset = 0; + p->rmargin = 2 * (titlen+1) + buflen < p->maxrmargin ? + (p->maxrmargin - + term_strlen(p, buf) + term_len(p, 1)) / 2 : + p->maxrmargin - buflen; + + term_word(p, title); + term_flushln(p); + + /* At the top in the middle: manual volume. */ + + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->offset + buflen + titlen < p->maxrmargin ? + p->maxrmargin - titlen : p->maxrmargin; + + term_word(p, buf); + term_flushln(p); + + /* Top right corner: title and section, again. */ + + p->flags &= ~TERMP_NOBREAK; + if (p->rmargin + titlen <= p->maxrmargin) { + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + term_word(p, title); + term_flushln(p); + } + + p->flags &= ~TERMP_NOSPACE; + p->offset = 0; + p->rmargin = p->maxrmargin; + + /* + * Groff prints three blank lines before the content. + * Do the same, except in the temporary, undocumented + * mode imitating mdoc(7) output. + */ + + term_vspace(p); + if ( ! p->mdocstyle) { + term_vspace(p); + term_vspace(p); + } +} diff --git a/usr/src/cmd/mandoc/man_validate.c b/usr/src/cmd/mandoc/man_validate.c new file mode 100644 index 0000000000..e40b089f53 --- /dev/null +++ b/usr/src/cmd/mandoc/man_validate.c @@ -0,0 +1,550 @@ +/* $Id: man_validate.c,v 1.80 2012/01/03 15:16:24 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "man.h" +#include "mandoc.h" +#include "libman.h" +#include "libmandoc.h" + +#define CHKARGS struct man *m, struct man_node *n + +typedef int (*v_check)(CHKARGS); + +struct man_valid { + v_check *pres; + v_check *posts; +}; + +static int check_eq0(CHKARGS); +static int check_eq2(CHKARGS); +static int check_le1(CHKARGS); +static int check_ge2(CHKARGS); +static int check_le5(CHKARGS); +static int check_par(CHKARGS); +static int check_part(CHKARGS); +static int check_root(CHKARGS); +static void check_text(CHKARGS); + +static int post_AT(CHKARGS); +static int post_vs(CHKARGS); +static int post_fi(CHKARGS); +static int post_ft(CHKARGS); +static int post_nf(CHKARGS); +static int post_sec(CHKARGS); +static int post_TH(CHKARGS); +static int post_UC(CHKARGS); +static int pre_sec(CHKARGS); + +static v_check posts_at[] = { post_AT, NULL }; +static v_check posts_br[] = { post_vs, check_eq0, NULL }; +static v_check posts_eq0[] = { check_eq0, NULL }; +static v_check posts_eq2[] = { check_eq2, NULL }; +static v_check posts_fi[] = { check_eq0, post_fi, NULL }; +static v_check posts_ft[] = { post_ft, NULL }; +static v_check posts_nf[] = { check_eq0, post_nf, NULL }; +static v_check posts_par[] = { check_par, NULL }; +static v_check posts_part[] = { check_part, NULL }; +static v_check posts_sec[] = { post_sec, NULL }; +static v_check posts_sp[] = { post_vs, check_le1, NULL }; +static v_check posts_th[] = { check_ge2, check_le5, post_TH, NULL }; +static v_check posts_uc[] = { post_UC, NULL }; +static v_check pres_sec[] = { pre_sec, NULL }; + +static const struct man_valid man_valids[MAN_MAX] = { + { NULL, posts_br }, /* br */ + { NULL, posts_th }, /* TH */ + { pres_sec, posts_sec }, /* SH */ + { pres_sec, posts_sec }, /* SS */ + { NULL, NULL }, /* TP */ + { NULL, posts_par }, /* LP */ + { NULL, posts_par }, /* PP */ + { NULL, posts_par }, /* P */ + { NULL, NULL }, /* IP */ + { NULL, NULL }, /* HP */ + { NULL, NULL }, /* SM */ + { NULL, NULL }, /* SB */ + { NULL, NULL }, /* BI */ + { NULL, NULL }, /* IB */ + { NULL, NULL }, /* BR */ + { NULL, NULL }, /* RB */ + { NULL, NULL }, /* R */ + { NULL, NULL }, /* B */ + { NULL, NULL }, /* I */ + { NULL, NULL }, /* IR */ + { NULL, NULL }, /* RI */ + { NULL, posts_eq0 }, /* na */ + { NULL, posts_sp }, /* sp */ + { NULL, posts_nf }, /* nf */ + { NULL, posts_fi }, /* fi */ + { NULL, NULL }, /* RE */ + { NULL, posts_part }, /* RS */ + { NULL, NULL }, /* DT */ + { NULL, posts_uc }, /* UC */ + { NULL, NULL }, /* PD */ + { NULL, posts_at }, /* AT */ + { NULL, NULL }, /* in */ + { NULL, posts_ft }, /* ft */ + { NULL, posts_eq2 }, /* OP */ +}; + + +int +man_valid_pre(struct man *m, struct man_node *n) +{ + v_check *cp; + + switch (n->type) { + case (MAN_TEXT): + /* FALLTHROUGH */ + case (MAN_ROOT): + /* FALLTHROUGH */ + case (MAN_EQN): + /* FALLTHROUGH */ + case (MAN_TBL): + return(1); + default: + break; + } + + if (NULL == (cp = man_valids[n->tok].pres)) + return(1); + for ( ; *cp; cp++) + if ( ! (*cp)(m, n)) + return(0); + return(1); +} + + +int +man_valid_post(struct man *m) +{ + v_check *cp; + + if (MAN_VALID & m->last->flags) + return(1); + m->last->flags |= MAN_VALID; + + switch (m->last->type) { + case (MAN_TEXT): + check_text(m, m->last); + return(1); + case (MAN_ROOT): + return(check_root(m, m->last)); + case (MAN_EQN): + /* FALLTHROUGH */ + case (MAN_TBL): + return(1); + default: + break; + } + + if (NULL == (cp = man_valids[m->last->tok].posts)) + return(1); + for ( ; *cp; cp++) + if ( ! (*cp)(m, m->last)) + return(0); + + return(1); +} + + +static int +check_root(CHKARGS) +{ + + if (MAN_BLINE & m->flags) + man_nmsg(m, n, MANDOCERR_SCOPEEXIT); + else if (MAN_ELINE & m->flags) + man_nmsg(m, n, MANDOCERR_SCOPEEXIT); + + m->flags &= ~MAN_BLINE; + m->flags &= ~MAN_ELINE; + + if (NULL == m->first->child) { + man_nmsg(m, n, MANDOCERR_NODOCBODY); + return(0); + } else if (NULL == m->meta.title) { + man_nmsg(m, n, MANDOCERR_NOTITLE); + + /* + * If a title hasn't been set, do so now (by + * implication, date and section also aren't set). + */ + + m->meta.title = mandoc_strdup("unknown"); + m->meta.msec = mandoc_strdup("1"); + m->meta.date = mandoc_normdate + (m->parse, NULL, n->line, n->pos); + } + + return(1); +} + +static void +check_text(CHKARGS) +{ + char *cp, *p; + + if (MAN_LITERAL & m->flags) + return; + + cp = n->string; + for (p = cp; NULL != (p = strchr(p, '\t')); p++) + man_pmsg(m, n->line, (int)(p - cp), MANDOCERR_BADTAB); +} + +#define INEQ_DEFINE(x, ineq, name) \ +static int \ +check_##name(CHKARGS) \ +{ \ + if (n->nchild ineq (x)) \ + return(1); \ + mandoc_vmsg(MANDOCERR_ARGCOUNT, m->parse, n->line, n->pos, \ + "line arguments %s %d (have %d)", \ + #ineq, (x), n->nchild); \ + return(1); \ +} + +INEQ_DEFINE(0, ==, eq0) +INEQ_DEFINE(2, ==, eq2) +INEQ_DEFINE(1, <=, le1) +INEQ_DEFINE(2, >=, ge2) +INEQ_DEFINE(5, <=, le5) + +static int +post_ft(CHKARGS) +{ + char *cp; + int ok; + + if (0 == n->nchild) + return(1); + + ok = 0; + cp = n->child->string; + switch (*cp) { + case ('1'): + /* FALLTHROUGH */ + case ('2'): + /* FALLTHROUGH */ + case ('3'): + /* FALLTHROUGH */ + case ('4'): + /* FALLTHROUGH */ + case ('I'): + /* FALLTHROUGH */ + case ('P'): + /* FALLTHROUGH */ + case ('R'): + if ('\0' == cp[1]) + ok = 1; + break; + case ('B'): + if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) + ok = 1; + break; + case ('C'): + if ('W' == cp[1] && '\0' == cp[2]) + ok = 1; + break; + default: + break; + } + + if (0 == ok) { + mandoc_vmsg + (MANDOCERR_BADFONT, m->parse, + n->line, n->pos, "%s", cp); + *cp = '\0'; + } + + if (1 < n->nchild) + mandoc_vmsg + (MANDOCERR_ARGCOUNT, m->parse, n->line, + n->pos, "want one child (have %d)", + n->nchild); + + return(1); +} + +static int +pre_sec(CHKARGS) +{ + + if (MAN_BLOCK == n->type) + m->flags &= ~MAN_LITERAL; + return(1); +} + +static int +post_sec(CHKARGS) +{ + + if ( ! (MAN_HEAD == n->type && 0 == n->nchild)) + return(1); + + man_nmsg(m, n, MANDOCERR_SYNTARGCOUNT); + return(0); +} + +static int +check_part(CHKARGS) +{ + + if (MAN_BODY == n->type && 0 == n->nchild) + mandoc_msg(MANDOCERR_ARGCWARN, m->parse, n->line, + n->pos, "want children (have none)"); + + return(1); +} + + +static int +check_par(CHKARGS) +{ + + switch (n->type) { + case (MAN_BLOCK): + if (0 == n->body->nchild) + man_node_delete(m, n); + break; + case (MAN_BODY): + if (0 == n->nchild) + man_nmsg(m, n, MANDOCERR_IGNPAR); + break; + case (MAN_HEAD): + if (n->nchild) + man_nmsg(m, n, MANDOCERR_ARGSLOST); + break; + default: + break; + } + + return(1); +} + + +static int +post_TH(CHKARGS) +{ + const char *p; + int line, pos; + + if (m->meta.title) + free(m->meta.title); + if (m->meta.vol) + free(m->meta.vol); + if (m->meta.source) + free(m->meta.source); + if (m->meta.msec) + free(m->meta.msec); + if (m->meta.date) + free(m->meta.date); + + line = n->line; + pos = n->pos; + m->meta.title = m->meta.vol = m->meta.date = + m->meta.msec = m->meta.source = NULL; + + /* ->TITLE<- MSEC DATE SOURCE VOL */ + + n = n->child; + if (n && n->string) { + for (p = n->string; '\0' != *p; p++) { + /* Only warn about this once... */ + if (isalpha((unsigned char)*p) && + ! isupper((unsigned char)*p)) { + man_nmsg(m, n, MANDOCERR_UPPERCASE); + break; + } + } + m->meta.title = mandoc_strdup(n->string); + } else + m->meta.title = mandoc_strdup(""); + + /* TITLE ->MSEC<- DATE SOURCE VOL */ + + if (n) + n = n->next; + if (n && n->string) + m->meta.msec = mandoc_strdup(n->string); + else + m->meta.msec = mandoc_strdup(""); + + /* TITLE MSEC ->DATE<- SOURCE VOL */ + + if (n) + n = n->next; + if (n && n->string && '\0' != n->string[0]) { + pos = n->pos; + m->meta.date = mandoc_normdate + (m->parse, n->string, line, pos); + } else + m->meta.date = mandoc_strdup(""); + + /* TITLE MSEC DATE ->SOURCE<- VOL */ + + if (n && (n = n->next)) + m->meta.source = mandoc_strdup(n->string); + + /* TITLE MSEC DATE SOURCE ->VOL<- */ + /* If missing, use the default VOL name for MSEC. */ + + if (n && (n = n->next)) + m->meta.vol = mandoc_strdup(n->string); + else if ('\0' != m->meta.msec[0] && + (NULL != (p = mandoc_a2msec(m->meta.msec)))) + m->meta.vol = mandoc_strdup(p); + + /* + * Remove the `TH' node after we've processed it for our + * meta-data. + */ + man_node_delete(m, m->last); + return(1); +} + +static int +post_nf(CHKARGS) +{ + + if (MAN_LITERAL & m->flags) + man_nmsg(m, n, MANDOCERR_SCOPEREP); + + m->flags |= MAN_LITERAL; + return(1); +} + +static int +post_fi(CHKARGS) +{ + + if ( ! (MAN_LITERAL & m->flags)) + man_nmsg(m, n, MANDOCERR_WNOSCOPE); + + m->flags &= ~MAN_LITERAL; + return(1); +} + +static int +post_UC(CHKARGS) +{ + static const char * const bsd_versions[] = { + "3rd Berkeley Distribution", + "4th Berkeley Distribution", + "4.2 Berkeley Distribution", + "4.3 Berkeley Distribution", + "4.4 Berkeley Distribution", + }; + + const char *p, *s; + + n = n->child; + + if (NULL == n || MAN_TEXT != n->type) + p = bsd_versions[0]; + else { + s = n->string; + if (0 == strcmp(s, "3")) + p = bsd_versions[0]; + else if (0 == strcmp(s, "4")) + p = bsd_versions[1]; + else if (0 == strcmp(s, "5")) + p = bsd_versions[2]; + else if (0 == strcmp(s, "6")) + p = bsd_versions[3]; + else if (0 == strcmp(s, "7")) + p = bsd_versions[4]; + else + p = bsd_versions[0]; + } + + if (m->meta.source) + free(m->meta.source); + + m->meta.source = mandoc_strdup(p); + return(1); +} + +static int +post_AT(CHKARGS) +{ + static const char * const unix_versions[] = { + "7th Edition", + "System III", + "System V", + "System V Release 2", + }; + + const char *p, *s; + struct man_node *nn; + + n = n->child; + + if (NULL == n || MAN_TEXT != n->type) + p = unix_versions[0]; + else { + s = n->string; + if (0 == strcmp(s, "3")) + p = unix_versions[0]; + else if (0 == strcmp(s, "4")) + p = unix_versions[1]; + else if (0 == strcmp(s, "5")) { + nn = n->next; + if (nn && MAN_TEXT == nn->type && nn->string[0]) + p = unix_versions[3]; + else + p = unix_versions[2]; + } else + p = unix_versions[0]; + } + + if (m->meta.source) + free(m->meta.source); + + m->meta.source = mandoc_strdup(p); + return(1); +} + +static int +post_vs(CHKARGS) +{ + + /* + * Don't warn about this because it occurs in pod2man and would + * cause considerable (unfixable) warnage. + */ + if (NULL == n->prev && MAN_ROOT == n->parent->type) + man_node_delete(m, n); + + return(1); +} diff --git a/usr/src/cmd/mandoc/mandoc.c b/usr/src/cmd/mandoc/mandoc.c new file mode 100644 index 0000000000..604bb67e6a --- /dev/null +++ b/usr/src/cmd/mandoc/mandoc.c @@ -0,0 +1,735 @@ +/* $Id: mandoc.c,v 1.62 2011/12/03 16:08:51 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "libmandoc.h" + +#define DATESIZE 32 + +static int a2time(time_t *, const char *, const char *); +static char *time2a(time_t); +static int numescape(const char *); + +/* + * Pass over recursive numerical expressions. This context of this + * function is important: it's only called within character-terminating + * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial + * recursion: we don't care about what's in these blocks. + * This returns the number of characters skipped or -1 if an error + * occurs (the caller should bail). + */ +static int +numescape(const char *start) +{ + int i; + size_t sz; + const char *cp; + + i = 0; + + /* The expression consists of a subexpression. */ + + if ('\\' == start[i]) { + cp = &start[++i]; + /* + * Read past the end of the subexpression. + * Bail immediately on errors. + */ + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + return(i + cp - &start[i]); + } + + if ('(' != start[i++]) + return(0); + + /* + * A parenthesised subexpression. Read until the closing + * parenthesis, making sure to handle any nested subexpressions + * that might ruin our parse. + */ + + while (')' != start[i]) { + sz = strcspn(&start[i], ")\\"); + i += (int)sz; + + if ('\0' == start[i]) + return(-1); + else if ('\\' != start[i]) + continue; + + cp = &start[++i]; + if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) + return(-1); + i += cp - &start[i]; + } + + /* Read past the terminating ')'. */ + return(++i); +} + +enum mandoc_esc +mandoc_escape(const char **end, const char **start, int *sz) +{ + char c, term, numeric; + int i, lim, ssz, rlim; + const char *cp, *rstart; + enum mandoc_esc gly; + + cp = *end; + rstart = cp; + if (start) + *start = rstart; + i = lim = 0; + gly = ESCAPE_ERROR; + term = numeric = '\0'; + + switch ((c = cp[i++])) { + /* + * First the glyphs. There are several different forms of + * these, but each eventually returns a substring of the glyph + * name. + */ + case ('('): + gly = ESCAPE_SPECIAL; + lim = 2; + break; + case ('['): + gly = ESCAPE_SPECIAL; + /* + * Unicode escapes are defined in groff as \[uXXXX] to + * \[u10FFFF], where the contained value must be a valid + * Unicode codepoint. Here, however, only check whether + * it's not a zero-width escape. + */ + if ('u' == cp[i] && ']' != cp[i + 1]) + gly = ESCAPE_UNICODE; + term = ']'; + break; + case ('C'): + if ('\'' != cp[i]) + return(ESCAPE_ERROR); + gly = ESCAPE_SPECIAL; + term = '\''; + break; + + /* + * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where + * 'X' is the trigger. These have opaque sub-strings. + */ + case ('F'): + /* FALLTHROUGH */ + case ('g'): + /* FALLTHROUGH */ + case ('k'): + /* FALLTHROUGH */ + case ('M'): + /* FALLTHROUGH */ + case ('m'): + /* FALLTHROUGH */ + case ('n'): + /* FALLTHROUGH */ + case ('V'): + /* FALLTHROUGH */ + case ('Y'): + gly = ESCAPE_IGNORE; + /* FALLTHROUGH */ + case ('f'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_FONT; + + rstart= &cp[i]; + if (start) + *start = rstart; + + switch (cp[i++]) { + case ('('): + lim = 2; + break; + case ('['): + term = ']'; + break; + default: + lim = 1; + i--; + break; + } + break; + + /* + * These escapes are of the form \X'Y', where 'X' is the trigger + * and 'Y' is any string. These have opaque sub-strings. + */ + case ('A'): + /* FALLTHROUGH */ + case ('b'): + /* FALLTHROUGH */ + case ('D'): + /* FALLTHROUGH */ + case ('o'): + /* FALLTHROUGH */ + case ('R'): + /* FALLTHROUGH */ + case ('X'): + /* FALLTHROUGH */ + case ('Z'): + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + gly = ESCAPE_IGNORE; + term = '\''; + break; + + /* + * These escapes are of the form \X'N', where 'X' is the trigger + * and 'N' resolves to a numerical expression. + */ + case ('B'): + /* FALLTHROUGH */ + case ('h'): + /* FALLTHROUGH */ + case ('H'): + /* FALLTHROUGH */ + case ('L'): + /* FALLTHROUGH */ + case ('l'): + gly = ESCAPE_NUMBERED; + /* FALLTHROUGH */ + case ('S'): + /* FALLTHROUGH */ + case ('v'): + /* FALLTHROUGH */ + case ('w'): + /* FALLTHROUGH */ + case ('x'): + if (ESCAPE_ERROR == gly) + gly = ESCAPE_IGNORE; + if ('\'' != cp[i++]) + return(ESCAPE_ERROR); + term = numeric = '\''; + break; + + /* + * Special handling for the numbered character escape. + * XXX Do any other escapes need similar handling? + */ + case ('N'): + if ('\0' == cp[i]) + return(ESCAPE_ERROR); + *end = &cp[++i]; + if (isdigit((unsigned char)cp[i-1])) + return(ESCAPE_IGNORE); + while (isdigit((unsigned char)**end)) + (*end)++; + if (start) + *start = &cp[i]; + if (sz) + *sz = *end - &cp[i]; + if ('\0' != **end) + (*end)++; + return(ESCAPE_NUMBERED); + + /* + * Sizes get a special category of their own. + */ + case ('s'): + gly = ESCAPE_IGNORE; + + rstart = &cp[i]; + if (start) + *start = rstart; + + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; + + switch (cp[i++]) { + case ('('): + lim = 2; + break; + case ('['): + term = numeric = ']'; + break; + case ('\''): + term = numeric = '\''; + break; + default: + lim = 1; + i--; + break; + } + + /* See +/- counts as a sign. */ + c = cp[i]; + if ('+' == c || '-' == c || ASCII_HYPH == c) + ++i; + + break; + + /* + * Anything else is assumed to be a glyph. + */ + default: + gly = ESCAPE_SPECIAL; + lim = 1; + i--; + break; + } + + assert(ESCAPE_ERROR != gly); + + rstart = &cp[i]; + if (start) + *start = rstart; + + /* + * If a terminating block has been specified, we need to + * handle the case of recursion, which could have their + * own terminating blocks that mess up our parse. This, by the + * way, means that the "start" and "size" values will be + * effectively meaningless. + */ + + ssz = 0; + if (numeric && -1 == (ssz = numescape(&cp[i]))) + return(ESCAPE_ERROR); + + i += ssz; + rlim = -1; + + /* + * We have a character terminator. Try to read up to that + * character. If we can't (i.e., we hit the nil), then return + * an error; if we can, calculate our length, read past the + * terminating character, and exit. + */ + + if ('\0' != term) { + *end = strchr(&cp[i], term); + if ('\0' == *end) + return(ESCAPE_ERROR); + + rlim = *end - &cp[i]; + if (sz) + *sz = rlim; + (*end)++; + goto out; + } + + assert(lim > 0); + + /* + * We have a numeric limit. If the string is shorter than that, + * stop and return an error. Else adjust our endpoint, length, + * and return the current glyph. + */ + + if ((size_t)lim > strlen(&cp[i])) + return(ESCAPE_ERROR); + + rlim = lim; + if (sz) + *sz = rlim; + + *end = &cp[i] + lim; + +out: + assert(rlim >= 0 && rstart); + + /* Run post-processors. */ + + switch (gly) { + case (ESCAPE_FONT): + /* + * Pretend that the constant-width font modes are the + * same as the regular font modes. + */ + if (2 == rlim && 'C' == *rstart) + rstart++; + else if (1 != rlim) + break; + + switch (*rstart) { + case ('3'): + /* FALLTHROUGH */ + case ('B'): + gly = ESCAPE_FONTBOLD; + break; + case ('2'): + /* FALLTHROUGH */ + case ('I'): + gly = ESCAPE_FONTITALIC; + break; + case ('P'): + gly = ESCAPE_FONTPREV; + break; + case ('1'): + /* FALLTHROUGH */ + case ('R'): + gly = ESCAPE_FONTROMAN; + break; + } + break; + case (ESCAPE_SPECIAL): + if (1 != rlim) + break; + if ('c' == *rstart) + gly = ESCAPE_NOSPACE; + break; + default: + break; + } + + return(gly); +} + +void * +mandoc_calloc(size_t num, size_t size) +{ + void *ptr; + + ptr = calloc(num, size); + if (NULL == ptr) { + perror(NULL); + exit((int)MANDOCLEVEL_SYSERR); + } + + return(ptr); +} + + +void * +mandoc_malloc(size_t size) +{ + void *ptr; + + ptr = malloc(size); + if (NULL == ptr) { + perror(NULL); + exit((int)MANDOCLEVEL_SYSERR); + } + + return(ptr); +} + + +void * +mandoc_realloc(void *ptr, size_t size) +{ + + ptr = realloc(ptr, size); + if (NULL == ptr) { + perror(NULL); + exit((int)MANDOCLEVEL_SYSERR); + } + + return(ptr); +} + +char * +mandoc_strndup(const char *ptr, size_t sz) +{ + char *p; + + p = mandoc_malloc(sz + 1); + memcpy(p, ptr, sz); + p[(int)sz] = '\0'; + return(p); +} + +char * +mandoc_strdup(const char *ptr) +{ + char *p; + + p = strdup(ptr); + if (NULL == p) { + perror(NULL); + exit((int)MANDOCLEVEL_SYSERR); + } + + return(p); +} + +/* + * Parse a quoted or unquoted roff-style request or macro argument. + * Return a pointer to the parsed argument, which is either the original + * pointer or advanced by one byte in case the argument is quoted. + * Null-terminate the argument in place. + * Collapse pairs of quotes inside quoted arguments. + * Advance the argument pointer to the next argument, + * or to the null byte terminating the argument line. + */ +char * +mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) +{ + char *start, *cp; + int quoted, pairs, white; + + /* Quoting can only start with a new word. */ + start = *cpp; + quoted = 0; + if ('"' == *start) { + quoted = 1; + start++; + } + + pairs = 0; + white = 0; + for (cp = start; '\0' != *cp; cp++) { + /* Move left after quoted quotes and escaped backslashes. */ + if (pairs) + cp[-pairs] = cp[0]; + if ('\\' == cp[0]) { + if ('\\' == cp[1]) { + /* Poor man's copy mode. */ + pairs++; + cp++; + } else if (0 == quoted && ' ' == cp[1]) + /* Skip escaped blanks. */ + cp++; + } else if (0 == quoted) { + if (' ' == cp[0]) { + /* Unescaped blanks end unquoted args. */ + white = 1; + break; + } + } else if ('"' == cp[0]) { + if ('"' == cp[1]) { + /* Quoted quotes collapse. */ + pairs++; + cp++; + } else { + /* Unquoted quotes end quoted args. */ + quoted = 2; + break; + } + } + } + + /* Quoted argument without a closing quote. */ + if (1 == quoted) + mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL); + + /* Null-terminate this argument and move to the next one. */ + if (pairs) + cp[-pairs] = '\0'; + if ('\0' != *cp) { + *cp++ = '\0'; + while (' ' == *cp) + cp++; + } + *pos += (int)(cp - start) + (quoted ? 1 : 0); + *cpp = cp; + + if ('\0' == *cp && (white || ' ' == cp[-1])) + mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL); + + return(start); +} + +static int +a2time(time_t *t, const char *fmt, const char *p) +{ + struct tm tm; + char *pp; + + memset(&tm, 0, sizeof(struct tm)); + + pp = NULL; +#ifdef HAVE_STRPTIME + pp = strptime(p, fmt, &tm); +#endif + if (NULL != pp && '\0' == *pp) { + *t = mktime(&tm); + return(1); + } + + return(0); +} + +static char * +time2a(time_t t) +{ + struct tm *tm; + char *buf, *p; + size_t ssz; + int isz; + + tm = localtime(&t); + + /* + * Reserve space: + * up to 9 characters for the month (September) + blank + * up to 2 characters for the day + comma + blank + * 4 characters for the year and a terminating '\0' + */ + p = buf = mandoc_malloc(10 + 4 + 4 + 1); + + if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm))) + goto fail; + p += (int)ssz; + + if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday))) + goto fail; + p += isz; + + if (0 == strftime(p, 4 + 1, "%Y", tm)) + goto fail; + return(buf); + +fail: + free(buf); + return(NULL); +} + +char * +mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) +{ + char *out; + time_t t; + + if (NULL == in || '\0' == *in || + 0 == strcmp(in, "$" "Mdocdate$")) { + mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL); + time(&t); + } + else if (a2time(&t, "%Y-%m-%d", in)) + t = 0; + else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && + !a2time(&t, "%b %d, %Y", in)) { + mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL); + t = 0; + } + out = t ? time2a(t) : NULL; + return(out ? out : mandoc_strdup(in)); +} + +int +mandoc_eos(const char *p, size_t sz, int enclosed) +{ + const char *q; + int found; + + if (0 == sz) + return(0); + + /* + * End-of-sentence recognition must include situations where + * some symbols, such as `)', allow prior EOS punctuation to + * propagate outward. + */ + + found = 0; + for (q = p + (int)sz - 1; q >= p; q--) { + switch (*q) { + case ('\"'): + /* FALLTHROUGH */ + case ('\''): + /* FALLTHROUGH */ + case (']'): + /* FALLTHROUGH */ + case (')'): + if (0 == found) + enclosed = 1; + break; + case ('.'): + /* FALLTHROUGH */ + case ('!'): + /* FALLTHROUGH */ + case ('?'): + found = 1; + break; + default: + return(found && (!enclosed || isalnum((unsigned char)*q))); + } + } + + return(found && !enclosed); +} + +/* + * Find out whether a line is a macro line or not. If it is, adjust the + * current position and return one; if it isn't, return zero and don't + * change the current position. + */ +int +mandoc_getcontrol(const char *cp, int *ppos) +{ + int pos; + + pos = *ppos; + + if ('\\' == cp[pos] && '.' == cp[pos + 1]) + pos += 2; + else if ('.' == cp[pos] || '\'' == cp[pos]) + pos++; + else + return(0); + + while (' ' == cp[pos] || '\t' == cp[pos]) + pos++; + + *ppos = pos; + return(1); +} + +/* + * Convert a string to a long that may not be <0. + * If the string is invalid, or is less than 0, return -1. + */ +int +mandoc_strntoi(const char *p, size_t sz, int base) +{ + char buf[32]; + char *ep; + long v; + + if (sz > 31) + return(-1); + + memcpy(buf, p, sz); + buf[(int)sz] = '\0'; + + errno = 0; + v = strtol(buf, &ep, base); + + if (buf[0] == '\0' || *ep != '\0') + return(-1); + + if (v > INT_MAX) + v = INT_MAX; + if (v < INT_MIN) + v = INT_MIN; + + return((int)v); +} diff --git a/usr/src/cmd/mandoc/mandoc.h b/usr/src/cmd/mandoc/mandoc.h new file mode 100644 index 0000000000..a37effc5f5 --- /dev/null +++ b/usr/src/cmd/mandoc/mandoc.h @@ -0,0 +1,432 @@ +/* $Id: mandoc.h,v 1.99 2012/02/16 20:51:31 joerg Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef MANDOC_H +#define MANDOC_H + +#define ASCII_NBRSP 31 /* non-breaking space */ +#define ASCII_HYPH 30 /* breakable hyphen */ + +/* + * Status level. This refers to both internal status (i.e., whilst + * running, when warnings/errors are reported) and an indicator of a + * threshold of when to halt (when said internal state exceeds the + * threshold). + */ +enum mandoclevel { + MANDOCLEVEL_OK = 0, + MANDOCLEVEL_RESERVED, + MANDOCLEVEL_WARNING, /* warnings: syntax, whitespace, etc. */ + MANDOCLEVEL_ERROR, /* input has been thrown away */ + MANDOCLEVEL_FATAL, /* input is borked */ + MANDOCLEVEL_BADARG, /* bad argument in invocation */ + MANDOCLEVEL_SYSERR, /* system error */ + MANDOCLEVEL_MAX +}; + +/* + * All possible things that can go wrong within a parse, be it libroff, + * libmdoc, or libman. + */ +enum mandocerr { + MANDOCERR_OK, + + MANDOCERR_WARNING, /* ===== start of warnings ===== */ + + /* related to the prologue */ + MANDOCERR_NOTITLE, /* no title in document */ + MANDOCERR_UPPERCASE, /* document title should be all caps */ + MANDOCERR_BADMSEC, /* unknown manual section */ + MANDOCERR_NODATE, /* date missing, using today's date */ + MANDOCERR_BADDATE, /* cannot parse date, using it verbatim */ + MANDOCERR_PROLOGOOO, /* prologue macros out of order */ + MANDOCERR_PROLOGREP, /* duplicate prologue macro */ + MANDOCERR_BADPROLOG, /* macro not allowed in prologue */ + MANDOCERR_BADBODY, /* macro not allowed in body */ + + /* related to document structure */ + MANDOCERR_SO, /* .so is fragile, better use ln(1) */ + MANDOCERR_NAMESECFIRST, /* NAME section must come first */ + MANDOCERR_BADNAMESEC, /* bad NAME section contents */ + MANDOCERR_NONAME, /* manual name not yet set */ + MANDOCERR_SECOOO, /* sections out of conventional order */ + MANDOCERR_SECREP, /* duplicate section name */ + MANDOCERR_SECMSEC, /* section not in conventional manual section */ + + /* related to macros and nesting */ + MANDOCERR_MACROOBS, /* skipping obsolete macro */ + MANDOCERR_IGNPAR, /* skipping paragraph macro */ + MANDOCERR_IGNNS, /* skipping no-space macro */ + MANDOCERR_SCOPENEST, /* blocks badly nested */ + MANDOCERR_CHILD, /* child violates parent syntax */ + MANDOCERR_NESTEDDISP, /* nested displays are not portable */ + MANDOCERR_SCOPEREP, /* already in literal mode */ + MANDOCERR_LINESCOPE, /* line scope broken */ + + /* related to missing macro arguments */ + MANDOCERR_MACROEMPTY, /* skipping empty macro */ + MANDOCERR_ARGCWARN, /* argument count wrong */ + MANDOCERR_DISPTYPE, /* missing display type */ + MANDOCERR_LISTFIRST, /* list type must come first */ + MANDOCERR_NOWIDTHARG, /* tag lists require a width argument */ + MANDOCERR_FONTTYPE, /* missing font type */ + MANDOCERR_WNOSCOPE, /* skipping end of block that is not open */ + + /* related to bad macro arguments */ + MANDOCERR_IGNARGV, /* skipping argument */ + MANDOCERR_ARGVREP, /* duplicate argument */ + MANDOCERR_DISPREP, /* duplicate display type */ + MANDOCERR_LISTREP, /* duplicate list type */ + MANDOCERR_BADATT, /* unknown AT&T UNIX version */ + MANDOCERR_BADBOOL, /* bad Boolean value */ + MANDOCERR_BADFONT, /* unknown font */ + MANDOCERR_BADSTANDARD, /* unknown standard specifier */ + MANDOCERR_BADWIDTH, /* bad width argument */ + + /* related to plain text */ + MANDOCERR_NOBLANKLN, /* blank line in non-literal context */ + MANDOCERR_BADTAB, /* tab in non-literal context */ + MANDOCERR_EOLNSPACE, /* end of line whitespace */ + MANDOCERR_BADCOMMENT, /* bad comment style */ + MANDOCERR_BADESCAPE, /* unknown escape sequence */ + MANDOCERR_BADQUOTE, /* unterminated quoted string */ + + /* related to equations */ + MANDOCERR_EQNQUOTE, /* unexpected literal in equation */ + + MANDOCERR_ERROR, /* ===== start of errors ===== */ + + /* related to equations */ + MANDOCERR_EQNNSCOPE, /* unexpected equation scope closure*/ + MANDOCERR_EQNSCOPE, /* equation scope open on exit */ + MANDOCERR_EQNBADSCOPE, /* overlapping equation scopes */ + MANDOCERR_EQNEOF, /* unexpected end of equation */ + MANDOCERR_EQNSYNT, /* equation syntax error */ + + /* related to tables */ + MANDOCERR_TBL, /* bad table syntax */ + MANDOCERR_TBLOPT, /* bad table option */ + MANDOCERR_TBLLAYOUT, /* bad table layout */ + MANDOCERR_TBLNOLAYOUT, /* no table layout cells specified */ + MANDOCERR_TBLNODATA, /* no table data cells specified */ + MANDOCERR_TBLIGNDATA, /* ignore data in cell */ + MANDOCERR_TBLBLOCK, /* data block still open */ + MANDOCERR_TBLEXTRADAT, /* ignoring extra data cells */ + + MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */ + MANDOCERR_BADCHAR, /* skipping bad character */ + MANDOCERR_NAMESC, /* escaped character not allowed in a name */ + MANDOCERR_NOTEXT, /* skipping text before the first section header */ + MANDOCERR_MACRO, /* skipping unknown macro */ + MANDOCERR_REQUEST, /* NOT IMPLEMENTED: skipping request */ + MANDOCERR_ARGCOUNT, /* argument count wrong */ + MANDOCERR_NOSCOPE, /* skipping end of block that is not open */ + MANDOCERR_SCOPEBROKEN, /* missing end of block */ + MANDOCERR_SCOPEEXIT, /* scope open on exit */ + MANDOCERR_UNAME, /* uname(3) system call failed */ + /* FIXME: merge following with MANDOCERR_ARGCOUNT */ + MANDOCERR_NOARGS, /* macro requires line argument(s) */ + MANDOCERR_NOBODY, /* macro requires body argument(s) */ + MANDOCERR_NOARGV, /* macro requires argument(s) */ + MANDOCERR_LISTTYPE, /* missing list type */ + MANDOCERR_ARGSLOST, /* line argument(s) will be lost */ + MANDOCERR_BODYLOST, /* body argument(s) will be lost */ + + MANDOCERR_FATAL, /* ===== start of fatal errors ===== */ + + MANDOCERR_NOTMANUAL, /* manual isn't really a manual */ + MANDOCERR_COLUMNS, /* column syntax is inconsistent */ + MANDOCERR_BADDISP, /* NOT IMPLEMENTED: .Bd -file */ + MANDOCERR_SYNTARGVCOUNT, /* argument count wrong, violates syntax */ + MANDOCERR_SYNTCHILD, /* child violates parent syntax */ + MANDOCERR_SYNTARGCOUNT, /* argument count wrong, violates syntax */ + MANDOCERR_SOPATH, /* NOT IMPLEMENTED: .so with absolute path or ".." */ + MANDOCERR_NODOCBODY, /* no document body */ + MANDOCERR_NODOCPROLOG, /* no document prologue */ + MANDOCERR_MEM, /* static buffer exhausted */ + MANDOCERR_MAX +}; + +struct tbl { + char tab; /* cell-separator */ + char decimal; /* decimal point */ + int linesize; + int opts; +#define TBL_OPT_CENTRE (1 << 0) +#define TBL_OPT_EXPAND (1 << 1) +#define TBL_OPT_BOX (1 << 2) +#define TBL_OPT_DBOX (1 << 3) +#define TBL_OPT_ALLBOX (1 << 4) +#define TBL_OPT_NOKEEP (1 << 5) +#define TBL_OPT_NOSPACE (1 << 6) + int cols; /* number of columns */ +}; + +enum tbl_headt { + TBL_HEAD_DATA, /* plug in data from tbl_dat */ + TBL_HEAD_VERT, /* vertical spacer */ + TBL_HEAD_DVERT /* double-vertical spacer */ +}; + +/* + * The head of a table specifies all of its columns. When formatting a + * tbl_span, iterate over these and plug in data from the tbl_span when + * appropriate, using tbl_cell as a guide to placement. + */ +struct tbl_head { + enum tbl_headt pos; + int ident; /* 0 <= unique id < cols */ + struct tbl_head *next; + struct tbl_head *prev; +}; + +enum tbl_cellt { + TBL_CELL_CENTRE, /* c, C */ + TBL_CELL_RIGHT, /* r, R */ + TBL_CELL_LEFT, /* l, L */ + TBL_CELL_NUMBER, /* n, N */ + TBL_CELL_SPAN, /* s, S */ + TBL_CELL_LONG, /* a, A */ + TBL_CELL_DOWN, /* ^ */ + TBL_CELL_HORIZ, /* _, - */ + TBL_CELL_DHORIZ, /* = */ + TBL_CELL_VERT, /* | */ + TBL_CELL_DVERT, /* || */ + TBL_CELL_MAX +}; + +/* + * A cell in a layout row. + */ +struct tbl_cell { + struct tbl_cell *next; + enum tbl_cellt pos; + size_t spacing; + int flags; +#define TBL_CELL_TALIGN (1 << 0) /* t, T */ +#define TBL_CELL_BALIGN (1 << 1) /* d, D */ +#define TBL_CELL_BOLD (1 << 2) /* fB, B, b */ +#define TBL_CELL_ITALIC (1 << 3) /* fI, I, i */ +#define TBL_CELL_EQUAL (1 << 4) /* e, E */ +#define TBL_CELL_UP (1 << 5) /* u, U */ +#define TBL_CELL_WIGN (1 << 6) /* z, Z */ + struct tbl_head *head; +}; + +/* + * A layout row. + */ +struct tbl_row { + struct tbl_row *next; + struct tbl_cell *first; + struct tbl_cell *last; +}; + +enum tbl_datt { + TBL_DATA_NONE, /* has no data */ + TBL_DATA_DATA, /* consists of data/string */ + TBL_DATA_HORIZ, /* horizontal line */ + TBL_DATA_DHORIZ, /* double-horizontal line */ + TBL_DATA_NHORIZ, /* squeezed horizontal line */ + TBL_DATA_NDHORIZ /* squeezed double-horizontal line */ +}; + +/* + * A cell within a row of data. The "string" field contains the actual + * string value that's in the cell. The rest is layout. + */ +struct tbl_dat { + struct tbl_cell *layout; /* layout cell */ + int spans; /* how many spans follow */ + struct tbl_dat *next; + char *string; /* data (NULL if not TBL_DATA_DATA) */ + enum tbl_datt pos; +}; + +enum tbl_spant { + TBL_SPAN_DATA, /* span consists of data */ + TBL_SPAN_HORIZ, /* span is horizontal line */ + TBL_SPAN_DHORIZ /* span is double horizontal line */ +}; + +/* + * A row of data in a table. + */ +struct tbl_span { + struct tbl *tbl; + struct tbl_head *head; + struct tbl_row *layout; /* layout row */ + struct tbl_dat *first; + struct tbl_dat *last; + int line; /* parse line */ + int flags; +#define TBL_SPAN_FIRST (1 << 0) +#define TBL_SPAN_LAST (1 << 1) + enum tbl_spant pos; + struct tbl_span *next; +}; + +enum eqn_boxt { + EQN_ROOT, /* root of parse tree */ + EQN_TEXT, /* text (number, variable, whatever) */ + EQN_SUBEXPR, /* nested `eqn' subexpression */ + EQN_LIST, /* subexpressions list */ + EQN_MATRIX /* matrix subexpression */ +}; + +enum eqn_markt { + EQNMARK_NONE = 0, + EQNMARK_DOT, + EQNMARK_DOTDOT, + EQNMARK_HAT, + EQNMARK_TILDE, + EQNMARK_VEC, + EQNMARK_DYAD, + EQNMARK_BAR, + EQNMARK_UNDER, + EQNMARK__MAX +}; + +enum eqn_fontt { + EQNFONT_NONE = 0, + EQNFONT_ROMAN, + EQNFONT_BOLD, + EQNFONT_FAT, + EQNFONT_ITALIC, + EQNFONT__MAX +}; + +enum eqn_post { + EQNPOS_NONE = 0, + EQNPOS_OVER, + EQNPOS_SUP, + EQNPOS_SUB, + EQNPOS_TO, + EQNPOS_FROM, + EQNPOS__MAX +}; + +enum eqn_pilet { + EQNPILE_NONE = 0, + EQNPILE_PILE, + EQNPILE_CPILE, + EQNPILE_RPILE, + EQNPILE_LPILE, + EQNPILE_COL, + EQNPILE_CCOL, + EQNPILE_RCOL, + EQNPILE_LCOL, + EQNPILE__MAX +}; + + /* + * A "box" is a parsed mathematical expression as defined by the eqn.7 + * grammar. + */ +struct eqn_box { + int size; /* font size of expression */ +#define EQN_DEFSIZE INT_MIN + enum eqn_boxt type; /* type of node */ + struct eqn_box *first; /* first child node */ + struct eqn_box *last; /* last child node */ + struct eqn_box *next; /* node sibling */ + struct eqn_box *parent; /* node sibling */ + char *text; /* text (or NULL) */ + char *left; + char *right; + enum eqn_post pos; /* position of next box */ + enum eqn_markt mark; /* a mark about the box */ + enum eqn_fontt font; /* font of box */ + enum eqn_pilet pile; /* equation piling */ +}; + +/* + * An equation consists of a tree of expressions starting at a given + * line and position. + */ +struct eqn { + char *name; /* identifier (or NULL) */ + struct eqn_box *root; /* root mathematical expression */ + int ln; /* invocation line */ + int pos; /* invocation position */ +}; + +/* + * The type of parse sequence. This value is usually passed via the + * mandoc(1) command line of -man and -mdoc. It's almost exclusively + * -mandoc but the others have been retained for compatibility. + */ +enum mparset { + MPARSE_AUTO, /* magically determine the document type */ + MPARSE_MDOC, /* assume -mdoc */ + MPARSE_MAN /* assume -man */ +}; + +enum mandoc_esc { + ESCAPE_ERROR = 0, /* bail! unparsable escape */ + ESCAPE_IGNORE, /* escape to be ignored */ + ESCAPE_SPECIAL, /* a regular special character */ + ESCAPE_FONT, /* a generic font mode */ + ESCAPE_FONTBOLD, /* bold font mode */ + ESCAPE_FONTITALIC, /* italic font mode */ + ESCAPE_FONTROMAN, /* roman font mode */ + ESCAPE_FONTPREV, /* previous font mode */ + ESCAPE_NUMBERED, /* a numbered glyph */ + ESCAPE_UNICODE, /* a unicode codepoint */ + ESCAPE_NOSPACE /* suppress space if the last on a line */ +}; + +typedef void (*mandocmsg)(enum mandocerr, enum mandoclevel, + const char *, int, int, const char *); + +struct mparse; +struct mchars; +struct mdoc; +struct man; + +__BEGIN_DECLS + +void *mandoc_calloc(size_t, size_t); +enum mandoc_esc mandoc_escape(const char **, const char **, int *); +void *mandoc_malloc(size_t); +void *mandoc_realloc(void *, size_t); +char *mandoc_strdup(const char *); +char *mandoc_strndup(const char *, size_t); +struct mchars *mchars_alloc(void); +void mchars_free(struct mchars *); +char mchars_num2char(const char *, size_t); +int mchars_num2uc(const char *, size_t); +int mchars_spec2cp(const struct mchars *, + const char *, size_t); +const char *mchars_spec2str(const struct mchars *, + const char *, size_t, size_t *); +struct mparse *mparse_alloc(enum mparset, + enum mandoclevel, mandocmsg, void *); +void mparse_free(struct mparse *); +void mparse_keep(struct mparse *); +enum mandoclevel mparse_readfd(struct mparse *, int, const char *); +enum mandoclevel mparse_readmem(struct mparse *, const void *, size_t, + const char *); +void mparse_reset(struct mparse *); +void mparse_result(struct mparse *, + struct mdoc **, struct man **); +const char *mparse_getkeep(const struct mparse *); +const char *mparse_strerror(enum mandocerr); +const char *mparse_strlevel(enum mandoclevel); + +__END_DECLS + +#endif /*!MANDOC_H*/ diff --git a/usr/src/cmd/mandoc/mdoc.c b/usr/src/cmd/mandoc/mdoc.c new file mode 100644 index 0000000000..81a4ffc96f --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc.c @@ -0,0 +1,987 @@ +/* $Id: mdoc.c,v 1.196 2011/09/30 00:13:28 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" +#include "libmandoc.h" + +const char *const __mdoc_macronames[MDOC_MAX] = { + "Ap", "Dd", "Dt", "Os", + "Sh", "Ss", "Pp", "D1", + "Dl", "Bd", "Ed", "Bl", + "El", "It", "Ad", "An", + "Ar", "Cd", "Cm", "Dv", + "Er", "Ev", "Ex", "Fa", + "Fd", "Fl", "Fn", "Ft", + "Ic", "In", "Li", "Nd", + "Nm", "Op", "Ot", "Pa", + "Rv", "St", "Va", "Vt", + /* LINTED */ + "Xr", "%A", "%B", "%D", + /* LINTED */ + "%I", "%J", "%N", "%O", + /* LINTED */ + "%P", "%R", "%T", "%V", + "Ac", "Ao", "Aq", "At", + "Bc", "Bf", "Bo", "Bq", + "Bsx", "Bx", "Db", "Dc", + "Do", "Dq", "Ec", "Ef", + "Em", "Eo", "Fx", "Ms", + "No", "Ns", "Nx", "Ox", + "Pc", "Pf", "Po", "Pq", + "Qc", "Ql", "Qo", "Qq", + "Re", "Rs", "Sc", "So", + "Sq", "Sm", "Sx", "Sy", + "Tn", "Ux", "Xc", "Xo", + "Fo", "Fc", "Oo", "Oc", + "Bk", "Ek", "Bt", "Hf", + "Fr", "Ud", "Lb", "Lp", + "Lk", "Mt", "Brq", "Bro", + /* LINTED */ + "Brc", "%C", "Es", "En", + /* LINTED */ + "Dx", "%Q", "br", "sp", + /* LINTED */ + "%U", "Ta" + }; + +const char *const __mdoc_argnames[MDOC_ARG_MAX] = { + "split", "nosplit", "ragged", + "unfilled", "literal", "file", + "offset", "bullet", "dash", + "hyphen", "item", "enum", + "tag", "diag", "hang", + "ohang", "inset", "column", + "width", "compact", "std", + "filled", "words", "emphasis", + "symbolic", "nested", "centered" + }; + +const char * const *mdoc_macronames = __mdoc_macronames; +const char * const *mdoc_argnames = __mdoc_argnames; + +static void mdoc_node_free(struct mdoc_node *); +static void mdoc_node_unlink(struct mdoc *, + struct mdoc_node *); +static void mdoc_free1(struct mdoc *); +static void mdoc_alloc1(struct mdoc *); +static struct mdoc_node *node_alloc(struct mdoc *, int, int, + enum mdoct, enum mdoc_type); +static int node_append(struct mdoc *, + struct mdoc_node *); +#if 0 +static int mdoc_preptext(struct mdoc *, int, char *, int); +#endif +static int mdoc_ptext(struct mdoc *, int, char *, int); +static int mdoc_pmacro(struct mdoc *, int, char *, int); + +const struct mdoc_node * +mdoc_node(const struct mdoc *m) +{ + + assert( ! (MDOC_HALT & m->flags)); + return(m->first); +} + + +const struct mdoc_meta * +mdoc_meta(const struct mdoc *m) +{ + + assert( ! (MDOC_HALT & m->flags)); + return(&m->meta); +} + + +/* + * Frees volatile resources (parse tree, meta-data, fields). + */ +static void +mdoc_free1(struct mdoc *mdoc) +{ + + if (mdoc->first) + mdoc_node_delete(mdoc, mdoc->first); + if (mdoc->meta.title) + free(mdoc->meta.title); + if (mdoc->meta.os) + free(mdoc->meta.os); + if (mdoc->meta.name) + free(mdoc->meta.name); + if (mdoc->meta.arch) + free(mdoc->meta.arch); + if (mdoc->meta.vol) + free(mdoc->meta.vol); + if (mdoc->meta.msec) + free(mdoc->meta.msec); + if (mdoc->meta.date) + free(mdoc->meta.date); +} + + +/* + * Allocate all volatile resources (parse tree, meta-data, fields). + */ +static void +mdoc_alloc1(struct mdoc *mdoc) +{ + + memset(&mdoc->meta, 0, sizeof(struct mdoc_meta)); + mdoc->flags = 0; + mdoc->lastnamed = mdoc->lastsec = SEC_NONE; + mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node)); + mdoc->first = mdoc->last; + mdoc->last->type = MDOC_ROOT; + mdoc->last->tok = MDOC_MAX; + mdoc->next = MDOC_NEXT_CHILD; +} + + +/* + * Free up volatile resources (see mdoc_free1()) then re-initialises the + * data with mdoc_alloc1(). After invocation, parse data has been reset + * and the parser is ready for re-invocation on a new tree; however, + * cross-parse non-volatile data is kept intact. + */ +void +mdoc_reset(struct mdoc *mdoc) +{ + + mdoc_free1(mdoc); + mdoc_alloc1(mdoc); +} + + +/* + * Completely free up all volatile and non-volatile parse resources. + * After invocation, the pointer is no longer usable. + */ +void +mdoc_free(struct mdoc *mdoc) +{ + + mdoc_free1(mdoc); + free(mdoc); +} + + +/* + * Allocate volatile and non-volatile parse resources. + */ +struct mdoc * +mdoc_alloc(struct roff *roff, struct mparse *parse) +{ + struct mdoc *p; + + p = mandoc_calloc(1, sizeof(struct mdoc)); + + p->parse = parse; + p->roff = roff; + + mdoc_hash_init(); + mdoc_alloc1(p); + return(p); +} + + +/* + * Climb back up the parse tree, validating open scopes. Mostly calls + * through to macro_end() in macro.c. + */ +int +mdoc_endparse(struct mdoc *m) +{ + + assert( ! (MDOC_HALT & m->flags)); + if (mdoc_macroend(m)) + return(1); + m->flags |= MDOC_HALT; + return(0); +} + +int +mdoc_addeqn(struct mdoc *m, const struct eqn *ep) +{ + struct mdoc_node *n; + + assert( ! (MDOC_HALT & m->flags)); + + /* No text before an initial macro. */ + + if (SEC_NONE == m->lastnamed) { + mdoc_pmsg(m, ep->ln, ep->pos, MANDOCERR_NOTEXT); + return(1); + } + + n = node_alloc(m, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN); + n->eqn = ep; + + if ( ! node_append(m, n)) + return(0); + + m->next = MDOC_NEXT_SIBLING; + return(1); +} + +int +mdoc_addspan(struct mdoc *m, const struct tbl_span *sp) +{ + struct mdoc_node *n; + + assert( ! (MDOC_HALT & m->flags)); + + /* No text before an initial macro. */ + + if (SEC_NONE == m->lastnamed) { + mdoc_pmsg(m, sp->line, 0, MANDOCERR_NOTEXT); + return(1); + } + + n = node_alloc(m, sp->line, 0, MDOC_MAX, MDOC_TBL); + n->span = sp; + + if ( ! node_append(m, n)) + return(0); + + m->next = MDOC_NEXT_SIBLING; + return(1); +} + + +/* + * Main parse routine. Parses a single line -- really just hands off to + * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). + */ +int +mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs) +{ + + assert( ! (MDOC_HALT & m->flags)); + + m->flags |= MDOC_NEWLINE; + + /* + * Let the roff nS register switch SYNOPSIS mode early, + * such that the parser knows at all times + * whether this mode is on or off. + * Note that this mode is also switched by the Sh macro. + */ + if (roff_regisset(m->roff, REG_nS)) { + if (roff_regget(m->roff, REG_nS)) + m->flags |= MDOC_SYNOPSIS; + else + m->flags &= ~MDOC_SYNOPSIS; + } + + return(mandoc_getcontrol(buf, &offs) ? + mdoc_pmacro(m, ln, buf, offs) : + mdoc_ptext(m, ln, buf, offs)); +} + +int +mdoc_macro(MACRO_PROT_ARGS) +{ + assert(tok < MDOC_MAX); + + /* If we're in the body, deny prologue calls. */ + + if (MDOC_PROLOGUE & mdoc_macros[tok].flags && + MDOC_PBODY & m->flags) { + mdoc_pmsg(m, line, ppos, MANDOCERR_BADBODY); + return(1); + } + + /* If we're in the prologue, deny "body" macros. */ + + if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) && + ! (MDOC_PBODY & m->flags)) { + mdoc_pmsg(m, line, ppos, MANDOCERR_BADPROLOG); + if (NULL == m->meta.msec) + m->meta.msec = mandoc_strdup("1"); + if (NULL == m->meta.title) + m->meta.title = mandoc_strdup("UNKNOWN"); + if (NULL == m->meta.vol) + m->meta.vol = mandoc_strdup("LOCAL"); + if (NULL == m->meta.os) + m->meta.os = mandoc_strdup("LOCAL"); + if (NULL == m->meta.date) + m->meta.date = mandoc_normdate + (m->parse, NULL, line, ppos); + m->flags |= MDOC_PBODY; + } + + return((*mdoc_macros[tok].fp)(m, tok, line, ppos, pos, buf)); +} + + +static int +node_append(struct mdoc *mdoc, struct mdoc_node *p) +{ + + assert(mdoc->last); + assert(mdoc->first); + assert(MDOC_ROOT != p->type); + + switch (mdoc->next) { + case (MDOC_NEXT_SIBLING): + mdoc->last->next = p; + p->prev = mdoc->last; + p->parent = mdoc->last->parent; + break; + case (MDOC_NEXT_CHILD): + mdoc->last->child = p; + p->parent = mdoc->last; + break; + default: + abort(); + /* NOTREACHED */ + } + + p->parent->nchild++; + + /* + * Copy over the normalised-data pointer of our parent. Not + * everybody has one, but copying a null pointer is fine. + */ + + switch (p->type) { + case (MDOC_BODY): + /* FALLTHROUGH */ + case (MDOC_TAIL): + /* FALLTHROUGH */ + case (MDOC_HEAD): + p->norm = p->parent->norm; + break; + default: + break; + } + + if ( ! mdoc_valid_pre(mdoc, p)) + return(0); + + switch (p->type) { + case (MDOC_HEAD): + assert(MDOC_BLOCK == p->parent->type); + p->parent->head = p; + break; + case (MDOC_TAIL): + assert(MDOC_BLOCK == p->parent->type); + p->parent->tail = p; + break; + case (MDOC_BODY): + if (p->end) + break; + assert(MDOC_BLOCK == p->parent->type); + p->parent->body = p; + break; + default: + break; + } + + mdoc->last = p; + + switch (p->type) { + case (MDOC_TBL): + /* FALLTHROUGH */ + case (MDOC_TEXT): + if ( ! mdoc_valid_post(mdoc)) + return(0); + break; + default: + break; + } + + return(1); +} + + +static struct mdoc_node * +node_alloc(struct mdoc *m, int line, int pos, + enum mdoct tok, enum mdoc_type type) +{ + struct mdoc_node *p; + + p = mandoc_calloc(1, sizeof(struct mdoc_node)); + p->sec = m->lastsec; + p->line = line; + p->pos = pos; + p->tok = tok; + p->type = type; + + /* Flag analysis. */ + + if (MDOC_SYNOPSIS & m->flags) + p->flags |= MDOC_SYNPRETTY; + else + p->flags &= ~MDOC_SYNPRETTY; + if (MDOC_NEWLINE & m->flags) + p->flags |= MDOC_LINE; + m->flags &= ~MDOC_NEWLINE; + + return(p); +} + + +int +mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_TAIL); + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) +{ + struct mdoc_node *p; + + assert(m->first); + assert(m->last); + + p = node_alloc(m, line, pos, tok, MDOC_HEAD); + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_BODY); + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok, + struct mdoc_node *body, enum mdoc_endbody end) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_BODY); + p->pending = body; + p->end = end; + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_SIBLING; + return(1); +} + + +int +mdoc_block_alloc(struct mdoc *m, int line, int pos, + enum mdoct tok, struct mdoc_arg *args) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_BLOCK); + p->args = args; + if (p->args) + (args->refcnt)++; + + switch (tok) { + case (MDOC_Bd): + /* FALLTHROUGH */ + case (MDOC_Bf): + /* FALLTHROUGH */ + case (MDOC_Bl): + /* FALLTHROUGH */ + case (MDOC_Rs): + p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); + break; + default: + break; + } + + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + + +int +mdoc_elem_alloc(struct mdoc *m, int line, int pos, + enum mdoct tok, struct mdoc_arg *args) +{ + struct mdoc_node *p; + + p = node_alloc(m, line, pos, tok, MDOC_ELEM); + p->args = args; + if (p->args) + (args->refcnt)++; + + switch (tok) { + case (MDOC_An): + p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); + break; + default: + break; + } + + if ( ! node_append(m, p)) + return(0); + m->next = MDOC_NEXT_CHILD; + return(1); +} + +int +mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p) +{ + struct mdoc_node *n; + + n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT); + n->string = roff_strdup(m->roff, p); + + if ( ! node_append(m, n)) + return(0); + + m->next = MDOC_NEXT_SIBLING; + return(1); +} + + +static void +mdoc_node_free(struct mdoc_node *p) +{ + + if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type) + free(p->norm); + if (p->string) + free(p->string); + if (p->args) + mdoc_argv_free(p->args); + free(p); +} + + +static void +mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n) +{ + + /* Adjust siblings. */ + + if (n->prev) + n->prev->next = n->next; + if (n->next) + n->next->prev = n->prev; + + /* Adjust parent. */ + + if (n->parent) { + n->parent->nchild--; + if (n->parent->child == n) + n->parent->child = n->prev ? n->prev : n->next; + if (n->parent->last == n) + n->parent->last = n->prev ? n->prev : NULL; + } + + /* Adjust parse point, if applicable. */ + + if (m && m->last == n) { + if (n->prev) { + m->last = n->prev; + m->next = MDOC_NEXT_SIBLING; + } else { + m->last = n->parent; + m->next = MDOC_NEXT_CHILD; + } + } + + if (m && m->first == n) + m->first = NULL; +} + + +void +mdoc_node_delete(struct mdoc *m, struct mdoc_node *p) +{ + + while (p->child) { + assert(p->nchild); + mdoc_node_delete(m, p->child); + } + assert(0 == p->nchild); + + mdoc_node_unlink(m, p); + mdoc_node_free(p); +} + +#if 0 +/* + * Pre-treat a text line. + * Text lines can consist of equations, which must be handled apart from + * the regular text. + * Thus, use this function to step through a line checking if it has any + * equations embedded in it. + * This must handle multiple equations AND equations that do not end at + * the end-of-line, i.e., will re-enter in the next roff parse. + */ +static int +mdoc_preptext(struct mdoc *m, int line, char *buf, int offs) +{ + char *start, *end; + char delim; + + while ('\0' != buf[offs]) { + /* Mark starting position if eqn is set. */ + start = NULL; + if ('\0' != (delim = roff_eqndelim(m->roff))) + if (NULL != (start = strchr(buf + offs, delim))) + *start++ = '\0'; + + /* Parse text as normal. */ + if ( ! mdoc_ptext(m, line, buf, offs)) + return(0); + + /* Continue only if an equation exists. */ + if (NULL == start) + break; + + /* Read past the end of the equation. */ + offs += start - (buf + offs); + assert(start == &buf[offs]); + if (NULL != (end = strchr(buf + offs, delim))) { + *end++ = '\0'; + while (' ' == *end) + end++; + } + + /* Parse the equation itself. */ + roff_openeqn(m->roff, NULL, line, offs, buf); + + /* Process a finished equation? */ + if (roff_closeeqn(m->roff)) + if ( ! mdoc_addeqn(m, roff_eqn(m->roff))) + return(0); + offs += (end - (buf + offs)); + } + + return(1); +} +#endif + +/* + * Parse free-form text, that is, a line that does not begin with the + * control character. + */ +static int +mdoc_ptext(struct mdoc *m, int line, char *buf, int offs) +{ + char *c, *ws, *end; + struct mdoc_node *n; + + /* No text before an initial macro. */ + + if (SEC_NONE == m->lastnamed) { + mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT); + return(1); + } + + assert(m->last); + n = m->last; + + /* + * Divert directly to list processing if we're encountering a + * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry + * (a MDOC_BODY means it's already open, in which case we should + * process within its context in the normal way). + */ + + if (MDOC_Bl == n->tok && MDOC_BODY == n->type && + LIST_column == n->norm->Bl.type) { + /* `Bl' is open without any children. */ + m->flags |= MDOC_FREECOL; + return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); + } + + if (MDOC_It == n->tok && MDOC_BLOCK == n->type && + NULL != n->parent && + MDOC_Bl == n->parent->tok && + LIST_column == n->parent->norm->Bl.type) { + /* `Bl' has block-level `It' children. */ + m->flags |= MDOC_FREECOL; + return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf)); + } + + /* + * Search for the beginning of unescaped trailing whitespace (ws) + * and for the first character not to be output (end). + */ + + /* FIXME: replace with strcspn(). */ + ws = NULL; + for (c = end = buf + offs; *c; c++) { + switch (*c) { + case ' ': + if (NULL == ws) + ws = c; + continue; + case '\t': + /* + * Always warn about trailing tabs, + * even outside literal context, + * where they should be put on the next line. + */ + if (NULL == ws) + ws = c; + /* + * Strip trailing tabs in literal context only; + * outside, they affect the next line. + */ + if (MDOC_LITERAL & m->flags) + continue; + break; + case '\\': + /* Skip the escaped character, too, if any. */ + if (c[1]) + c++; + /* FALLTHROUGH */ + default: + ws = NULL; + break; + } + end = c + 1; + } + *end = '\0'; + + if (ws) + mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE); + + if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) { + mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN); + + /* + * Insert a `sp' in the case of a blank line. Technically, + * blank lines aren't allowed, but enough manuals assume this + * behaviour that we want to work around it. + */ + if ( ! mdoc_elem_alloc(m, line, offs, MDOC_sp, NULL)) + return(0); + + m->next = MDOC_NEXT_SIBLING; + return(1); + } + + if ( ! mdoc_word_alloc(m, line, offs, buf+offs)) + return(0); + + if (MDOC_LITERAL & m->flags) + return(1); + + /* + * End-of-sentence check. If the last character is an unescaped + * EOS character, then flag the node as being the end of a + * sentence. The front-end will know how to interpret this. + */ + + assert(buf < end); + + if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0)) + m->last->flags |= MDOC_EOS; + + return(1); +} + + +/* + * Parse a macro line, that is, a line beginning with the control + * character. + */ +static int +mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs) +{ + enum mdoct tok; + int i, sv; + char mac[5]; + struct mdoc_node *n; + + /* Empty post-control lines are ignored. */ + + if ('"' == buf[offs]) { + mdoc_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT); + return(1); + } else if ('\0' == buf[offs]) + return(1); + + sv = offs; + + /* + * Copy the first word into a nil-terminated buffer. + * Stop copying when a tab, space, or eoln is encountered. + */ + + i = 0; + while (i < 4 && '\0' != buf[offs] && + ' ' != buf[offs] && '\t' != buf[offs]) + mac[i++] = buf[offs++]; + + mac[i] = '\0'; + + tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX; + + if (MDOC_MAX == tok) { + mandoc_vmsg(MANDOCERR_MACRO, m->parse, + ln, sv, "%s", buf + sv - 1); + return(1); + } + + /* Disregard the first trailing tab, if applicable. */ + + if ('\t' == buf[offs]) + offs++; + + /* Jump to the next non-whitespace word. */ + + while (buf[offs] && ' ' == buf[offs]) + offs++; + + /* + * Trailing whitespace. Note that tabs are allowed to be passed + * into the parser as "text", so we only warn about spaces here. + */ + + if ('\0' == buf[offs] && ' ' == buf[offs - 1]) + mdoc_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE); + + /* + * If an initial macro or a list invocation, divert directly + * into macro processing. + */ + + if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) { + if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) + goto err; + return(1); + } + + n = m->last; + assert(m->last); + + /* + * If the first macro of a `Bl -column', open an `It' block + * context around the parsed macro. + */ + + if (MDOC_Bl == n->tok && MDOC_BODY == n->type && + LIST_column == n->norm->Bl.type) { + m->flags |= MDOC_FREECOL; + if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) + goto err; + return(1); + } + + /* + * If we're following a block-level `It' within a `Bl -column' + * context (perhaps opened in the above block or in ptext()), + * then open an `It' block context around the parsed macro. + */ + + if (MDOC_It == n->tok && MDOC_BLOCK == n->type && + NULL != n->parent && + MDOC_Bl == n->parent->tok && + LIST_column == n->parent->norm->Bl.type) { + m->flags |= MDOC_FREECOL; + if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf)) + goto err; + return(1); + } + + /* Normal processing of a macro. */ + + if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf)) + goto err; + + return(1); + +err: /* Error out. */ + + m->flags |= MDOC_HALT; + return(0); +} + +enum mdelim +mdoc_isdelim(const char *p) +{ + + if ('\0' == p[0]) + return(DELIM_NONE); + + if ('\0' == p[1]) + switch (p[0]) { + case('('): + /* FALLTHROUGH */ + case('['): + return(DELIM_OPEN); + case('|'): + return(DELIM_MIDDLE); + case('.'): + /* FALLTHROUGH */ + case(','): + /* FALLTHROUGH */ + case(';'): + /* FALLTHROUGH */ + case(':'): + /* FALLTHROUGH */ + case('?'): + /* FALLTHROUGH */ + case('!'): + /* FALLTHROUGH */ + case(')'): + /* FALLTHROUGH */ + case(']'): + return(DELIM_CLOSE); + default: + return(DELIM_NONE); + } + + if ('\\' != p[0]) + return(DELIM_NONE); + + if (0 == strcmp(p + 1, ".")) + return(DELIM_CLOSE); + if (0 == strcmp(p + 1, "*(Ba")) + return(DELIM_MIDDLE); + + return(DELIM_NONE); +} diff --git a/usr/src/cmd/mandoc/mdoc.h b/usr/src/cmd/mandoc/mdoc.h new file mode 100644 index 0000000000..9cee098e7f --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc.h @@ -0,0 +1,392 @@ +/* $Id: mdoc.h,v 1.122 2011/03/22 14:05:45 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef MDOC_H +#define MDOC_H + +enum mdoct { + MDOC_Ap = 0, + MDOC_Dd, + MDOC_Dt, + MDOC_Os, + MDOC_Sh, + MDOC_Ss, + MDOC_Pp, + MDOC_D1, + MDOC_Dl, + MDOC_Bd, + MDOC_Ed, + MDOC_Bl, + MDOC_El, + MDOC_It, + MDOC_Ad, + MDOC_An, + MDOC_Ar, + MDOC_Cd, + MDOC_Cm, + MDOC_Dv, + MDOC_Er, + MDOC_Ev, + MDOC_Ex, + MDOC_Fa, + MDOC_Fd, + MDOC_Fl, + MDOC_Fn, + MDOC_Ft, + MDOC_Ic, + MDOC_In, + MDOC_Li, + MDOC_Nd, + MDOC_Nm, + MDOC_Op, + MDOC_Ot, + MDOC_Pa, + MDOC_Rv, + MDOC_St, + MDOC_Va, + MDOC_Vt, + MDOC_Xr, + MDOC__A, + MDOC__B, + MDOC__D, + MDOC__I, + MDOC__J, + MDOC__N, + MDOC__O, + MDOC__P, + MDOC__R, + MDOC__T, + MDOC__V, + MDOC_Ac, + MDOC_Ao, + MDOC_Aq, + MDOC_At, + MDOC_Bc, + MDOC_Bf, + MDOC_Bo, + MDOC_Bq, + MDOC_Bsx, + MDOC_Bx, + MDOC_Db, + MDOC_Dc, + MDOC_Do, + MDOC_Dq, + MDOC_Ec, + MDOC_Ef, + MDOC_Em, + MDOC_Eo, + MDOC_Fx, + MDOC_Ms, + MDOC_No, + MDOC_Ns, + MDOC_Nx, + MDOC_Ox, + MDOC_Pc, + MDOC_Pf, + MDOC_Po, + MDOC_Pq, + MDOC_Qc, + MDOC_Ql, + MDOC_Qo, + MDOC_Qq, + MDOC_Re, + MDOC_Rs, + MDOC_Sc, + MDOC_So, + MDOC_Sq, + MDOC_Sm, + MDOC_Sx, + MDOC_Sy, + MDOC_Tn, + MDOC_Ux, + MDOC_Xc, + MDOC_Xo, + MDOC_Fo, + MDOC_Fc, + MDOC_Oo, + MDOC_Oc, + MDOC_Bk, + MDOC_Ek, + MDOC_Bt, + MDOC_Hf, + MDOC_Fr, + MDOC_Ud, + MDOC_Lb, + MDOC_Lp, + MDOC_Lk, + MDOC_Mt, + MDOC_Brq, + MDOC_Bro, + MDOC_Brc, + MDOC__C, + MDOC_Es, + MDOC_En, + MDOC_Dx, + MDOC__Q, + MDOC_br, + MDOC_sp, + MDOC__U, + MDOC_Ta, + MDOC_MAX +}; + +enum mdocargt { + MDOC_Split, /* -split */ + MDOC_Nosplit, /* -nospli */ + MDOC_Ragged, /* -ragged */ + MDOC_Unfilled, /* -unfilled */ + MDOC_Literal, /* -literal */ + MDOC_File, /* -file */ + MDOC_Offset, /* -offset */ + MDOC_Bullet, /* -bullet */ + MDOC_Dash, /* -dash */ + MDOC_Hyphen, /* -hyphen */ + MDOC_Item, /* -item */ + MDOC_Enum, /* -enum */ + MDOC_Tag, /* -tag */ + MDOC_Diag, /* -diag */ + MDOC_Hang, /* -hang */ + MDOC_Ohang, /* -ohang */ + MDOC_Inset, /* -inset */ + MDOC_Column, /* -column */ + MDOC_Width, /* -width */ + MDOC_Compact, /* -compact */ + MDOC_Std, /* -std */ + MDOC_Filled, /* -filled */ + MDOC_Words, /* -words */ + MDOC_Emphasis, /* -emphasis */ + MDOC_Symbolic, /* -symbolic */ + MDOC_Nested, /* -nested */ + MDOC_Centred, /* -centered */ + MDOC_ARG_MAX +}; + +enum mdoc_type { + MDOC_TEXT, + MDOC_ELEM, + MDOC_HEAD, + MDOC_TAIL, + MDOC_BODY, + MDOC_BLOCK, + MDOC_TBL, + MDOC_EQN, + MDOC_ROOT +}; + +/* + * Section (named/unnamed) of `Sh'. Note that these appear in the + * conventional order imposed by mdoc.7. In the case of SEC_NONE, no + * section has been invoked (this shouldn't happen). SEC_CUSTOM refers + * to other sections. + */ +enum mdoc_sec { + SEC_NONE = 0, + SEC_NAME, /* NAME */ + SEC_LIBRARY, /* LIBRARY */ + SEC_SYNOPSIS, /* SYNOPSIS */ + SEC_DESCRIPTION, /* DESCRIPTION */ + SEC_IMPLEMENTATION, /* IMPLEMENTATION NOTES */ + SEC_RETURN_VALUES, /* RETURN VALUES */ + SEC_ENVIRONMENT, /* ENVIRONMENT */ + SEC_FILES, /* FILES */ + SEC_EXIT_STATUS, /* EXIT STATUS */ + SEC_EXAMPLES, /* EXAMPLES */ + SEC_DIAGNOSTICS, /* DIAGNOSTICS */ + SEC_COMPATIBILITY, /* COMPATIBILITY */ + SEC_ERRORS, /* ERRORS */ + SEC_SEE_ALSO, /* SEE ALSO */ + SEC_STANDARDS, /* STANDARDS */ + SEC_HISTORY, /* HISTORY */ + SEC_AUTHORS, /* AUTHORS */ + SEC_CAVEATS, /* CAVEATS */ + SEC_BUGS, /* BUGS */ + SEC_SECURITY, /* SECURITY */ + SEC_CUSTOM, + SEC__MAX +}; + +struct mdoc_meta { + char *msec; /* `Dt' section (1, 3p, etc.) */ + char *vol; /* `Dt' volume (implied) */ + char *arch; /* `Dt' arch (i386, etc.) */ + char *date; /* `Dd' normalised date */ + char *title; /* `Dt' title (FOO, etc.) */ + char *os; /* `Os' system (OpenBSD, etc.) */ + char *name; /* leading `Nm' name */ +}; + +/* + * An argument to a macro (multiple values = `-column xxx yyy'). + */ +struct mdoc_argv { + enum mdocargt arg; /* type of argument */ + int line; + int pos; + size_t sz; /* elements in "value" */ + char **value; /* argument strings */ +}; + +/* + * Reference-counted macro arguments. These are refcounted because + * blocks have multiple instances of the same arguments spread across + * the HEAD, BODY, TAIL, and BLOCK node types. + */ +struct mdoc_arg { + size_t argc; + struct mdoc_argv *argv; + unsigned int refcnt; +}; + +/* + * Indicates that a BODY's formatting has ended, but the scope is still + * open. Used for syntax-broken blocks. + */ +enum mdoc_endbody { + ENDBODY_NOT = 0, + ENDBODY_SPACE, /* is broken: append a space */ + ENDBODY_NOSPACE /* is broken: don't append a space */ +}; + +enum mdoc_list { + LIST__NONE = 0, + LIST_bullet, /* -bullet */ + LIST_column, /* -column */ + LIST_dash, /* -dash */ + LIST_diag, /* -diag */ + LIST_enum, /* -enum */ + LIST_hang, /* -hang */ + LIST_hyphen, /* -hyphen */ + LIST_inset, /* -inset */ + LIST_item, /* -item */ + LIST_ohang, /* -ohang */ + LIST_tag, /* -tag */ + LIST_MAX +}; + +enum mdoc_disp { + DISP__NONE = 0, + DISP_centred, /* -centered */ + DISP_ragged, /* -ragged */ + DISP_unfilled, /* -unfilled */ + DISP_filled, /* -filled */ + DISP_literal /* -literal */ +}; + +enum mdoc_auth { + AUTH__NONE = 0, + AUTH_split, /* -split */ + AUTH_nosplit /* -nosplit */ +}; + +enum mdoc_font { + FONT__NONE = 0, + FONT_Em, /* Em, -emphasis */ + FONT_Li, /* Li, -literal */ + FONT_Sy /* Sy, -symbolic */ +}; + +struct mdoc_bd { + const char *offs; /* -offset */ + enum mdoc_disp type; /* -ragged, etc. */ + int comp; /* -compact */ +}; + +struct mdoc_bl { + const char *width; /* -width */ + const char *offs; /* -offset */ + enum mdoc_list type; /* -tag, -enum, etc. */ + int comp; /* -compact */ + size_t ncols; /* -column arg count */ + const char **cols; /* -column val ptr */ +}; + +struct mdoc_bf { + enum mdoc_font font; /* font */ +}; + +struct mdoc_an { + enum mdoc_auth auth; /* -split, etc. */ +}; + +struct mdoc_rs { + int quote_T; /* whether to quote %T */ +}; + +/* + * Consists of normalised node arguments. These should be used instead + * of iterating through the mdoc_arg pointers of a node: defaults are + * provided, etc. + */ +union mdoc_data { + struct mdoc_an An; + struct mdoc_bd Bd; + struct mdoc_bf Bf; + struct mdoc_bl Bl; + struct mdoc_rs Rs; +}; + +/* + * Single node in tree-linked AST. + */ +struct mdoc_node { + struct mdoc_node *parent; /* parent AST node */ + struct mdoc_node *child; /* first child AST node */ + struct mdoc_node *last; /* last child AST node */ + struct mdoc_node *next; /* sibling AST node */ + struct mdoc_node *prev; /* prior sibling AST node */ + int nchild; /* number children */ + int line; /* parse line */ + int pos; /* parse column */ + enum mdoct tok; /* tok or MDOC__MAX if none */ + int flags; +#define MDOC_VALID (1 << 0) /* has been validated */ +#define MDOC_EOS (1 << 2) /* at sentence boundary */ +#define MDOC_LINE (1 << 3) /* first macro/text on line */ +#define MDOC_SYNPRETTY (1 << 4) /* SYNOPSIS-style formatting */ +#define MDOC_ENDED (1 << 5) /* rendering has been ended */ +#define MDOC_DELIMO (1 << 6) +#define MDOC_DELIMC (1 << 7) + enum mdoc_type type; /* AST node type */ + enum mdoc_sec sec; /* current named section */ + union mdoc_data *norm; /* normalised args */ + /* FIXME: these can be union'd to shave a few bytes. */ + struct mdoc_arg *args; /* BLOCK/ELEM */ + struct mdoc_node *pending; /* BLOCK */ + struct mdoc_node *head; /* BLOCK */ + struct mdoc_node *body; /* BLOCK */ + struct mdoc_node *tail; /* BLOCK */ + char *string; /* TEXT */ + const struct tbl_span *span; /* TBL */ + const struct eqn *eqn; /* EQN */ + enum mdoc_endbody end; /* BODY */ +}; + +/* Names of macros. Index is enum mdoct. */ +extern const char *const *mdoc_macronames; + +/* Names of macro args. Index is enum mdocargt. */ +extern const char *const *mdoc_argnames; + +__BEGIN_DECLS + +struct mdoc; + +const struct mdoc_node *mdoc_node(const struct mdoc *); +const struct mdoc_meta *mdoc_meta(const struct mdoc *); + +__END_DECLS + +#endif /*!MDOC_H*/ diff --git a/usr/src/cmd/mandoc/mdoc_argv.c b/usr/src/cmd/mandoc/mdoc_argv.c new file mode 100644 index 0000000000..08386e09b1 --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc_argv.c @@ -0,0 +1,716 @@ +/* $Id: mdoc_argv.c,v 1.82 2012/03/23 05:50:24 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" +#include "libmandoc.h" + +#define MULTI_STEP 5 /* pre-allocate argument values */ +#define DELIMSZ 6 /* max possible size of a delimiter */ + +enum argsflag { + ARGSFL_NONE = 0, + ARGSFL_DELIM, /* handle delimiters of [[::delim::][ ]+]+ */ + ARGSFL_TABSEP /* handle tab/`Ta' separated phrases */ +}; + +enum argvflag { + ARGV_NONE, /* no args to flag (e.g., -split) */ + ARGV_SINGLE, /* one arg to flag (e.g., -file xxx) */ + ARGV_MULTI, /* multiple args (e.g., -column xxx yyy) */ + ARGV_OPT_SINGLE /* optional arg (e.g., -offset [xxx]) */ +}; + +struct mdocarg { + enum argsflag flags; + const enum mdocargt *argvs; +}; + +static void argn_free(struct mdoc_arg *, int); +static enum margserr args(struct mdoc *, int, int *, + char *, enum argsflag, char **); +static int args_checkpunct(const char *, int); +static int argv_multi(struct mdoc *, int, + struct mdoc_argv *, int *, char *); +static int argv_opt_single(struct mdoc *, int, + struct mdoc_argv *, int *, char *); +static int argv_single(struct mdoc *, int, + struct mdoc_argv *, int *, char *); + +static const enum argvflag argvflags[MDOC_ARG_MAX] = { + ARGV_NONE, /* MDOC_Split */ + ARGV_NONE, /* MDOC_Nosplit */ + ARGV_NONE, /* MDOC_Ragged */ + ARGV_NONE, /* MDOC_Unfilled */ + ARGV_NONE, /* MDOC_Literal */ + ARGV_SINGLE, /* MDOC_File */ + ARGV_OPT_SINGLE, /* MDOC_Offset */ + ARGV_NONE, /* MDOC_Bullet */ + ARGV_NONE, /* MDOC_Dash */ + ARGV_NONE, /* MDOC_Hyphen */ + ARGV_NONE, /* MDOC_Item */ + ARGV_NONE, /* MDOC_Enum */ + ARGV_NONE, /* MDOC_Tag */ + ARGV_NONE, /* MDOC_Diag */ + ARGV_NONE, /* MDOC_Hang */ + ARGV_NONE, /* MDOC_Ohang */ + ARGV_NONE, /* MDOC_Inset */ + ARGV_MULTI, /* MDOC_Column */ + ARGV_OPT_SINGLE, /* MDOC_Width */ + ARGV_NONE, /* MDOC_Compact */ + ARGV_NONE, /* MDOC_Std */ + ARGV_NONE, /* MDOC_Filled */ + ARGV_NONE, /* MDOC_Words */ + ARGV_NONE, /* MDOC_Emphasis */ + ARGV_NONE, /* MDOC_Symbolic */ + ARGV_NONE /* MDOC_Symbolic */ +}; + +static const enum mdocargt args_Ex[] = { + MDOC_Std, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_An[] = { + MDOC_Split, + MDOC_Nosplit, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bd[] = { + MDOC_Ragged, + MDOC_Unfilled, + MDOC_Filled, + MDOC_Literal, + MDOC_File, + MDOC_Offset, + MDOC_Compact, + MDOC_Centred, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bf[] = { + MDOC_Emphasis, + MDOC_Literal, + MDOC_Symbolic, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bk[] = { + MDOC_Words, + MDOC_ARG_MAX +}; + +static const enum mdocargt args_Bl[] = { + MDOC_Bullet, + MDOC_Dash, + MDOC_Hyphen, + MDOC_Item, + MDOC_Enum, + MDOC_Tag, + MDOC_Diag, + MDOC_Hang, + MDOC_Ohang, + MDOC_Inset, + MDOC_Column, + MDOC_Width, + MDOC_Offset, + MDOC_Compact, + MDOC_Nested, + MDOC_ARG_MAX +}; + +static const struct mdocarg mdocargs[MDOC_MAX] = { + { ARGSFL_NONE, NULL }, /* Ap */ + { ARGSFL_NONE, NULL }, /* Dd */ + { ARGSFL_NONE, NULL }, /* Dt */ + { ARGSFL_NONE, NULL }, /* Os */ + { ARGSFL_NONE, NULL }, /* Sh */ + { ARGSFL_NONE, NULL }, /* Ss */ + { ARGSFL_NONE, NULL }, /* Pp */ + { ARGSFL_DELIM, NULL }, /* D1 */ + { ARGSFL_DELIM, NULL }, /* Dl */ + { ARGSFL_NONE, args_Bd }, /* Bd */ + { ARGSFL_NONE, NULL }, /* Ed */ + { ARGSFL_NONE, args_Bl }, /* Bl */ + { ARGSFL_NONE, NULL }, /* El */ + { ARGSFL_NONE, NULL }, /* It */ + { ARGSFL_DELIM, NULL }, /* Ad */ + { ARGSFL_DELIM, args_An }, /* An */ + { ARGSFL_DELIM, NULL }, /* Ar */ + { ARGSFL_NONE, NULL }, /* Cd */ + { ARGSFL_DELIM, NULL }, /* Cm */ + { ARGSFL_DELIM, NULL }, /* Dv */ + { ARGSFL_DELIM, NULL }, /* Er */ + { ARGSFL_DELIM, NULL }, /* Ev */ + { ARGSFL_NONE, args_Ex }, /* Ex */ + { ARGSFL_DELIM, NULL }, /* Fa */ + { ARGSFL_NONE, NULL }, /* Fd */ + { ARGSFL_DELIM, NULL }, /* Fl */ + { ARGSFL_DELIM, NULL }, /* Fn */ + { ARGSFL_DELIM, NULL }, /* Ft */ + { ARGSFL_DELIM, NULL }, /* Ic */ + { ARGSFL_NONE, NULL }, /* In */ + { ARGSFL_DELIM, NULL }, /* Li */ + { ARGSFL_NONE, NULL }, /* Nd */ + { ARGSFL_DELIM, NULL }, /* Nm */ + { ARGSFL_DELIM, NULL }, /* Op */ + { ARGSFL_NONE, NULL }, /* Ot */ + { ARGSFL_DELIM, NULL }, /* Pa */ + { ARGSFL_NONE, args_Ex }, /* Rv */ + { ARGSFL_DELIM, NULL }, /* St */ + { ARGSFL_DELIM, NULL }, /* Va */ + { ARGSFL_DELIM, NULL }, /* Vt */ + { ARGSFL_DELIM, NULL }, /* Xr */ + { ARGSFL_NONE, NULL }, /* %A */ + { ARGSFL_NONE, NULL }, /* %B */ + { ARGSFL_NONE, NULL }, /* %D */ + { ARGSFL_NONE, NULL }, /* %I */ + { ARGSFL_NONE, NULL }, /* %J */ + { ARGSFL_NONE, NULL }, /* %N */ + { ARGSFL_NONE, NULL }, /* %O */ + { ARGSFL_NONE, NULL }, /* %P */ + { ARGSFL_NONE, NULL }, /* %R */ + { ARGSFL_NONE, NULL }, /* %T */ + { ARGSFL_NONE, NULL }, /* %V */ + { ARGSFL_DELIM, NULL }, /* Ac */ + { ARGSFL_NONE, NULL }, /* Ao */ + { ARGSFL_DELIM, NULL }, /* Aq */ + { ARGSFL_DELIM, NULL }, /* At */ + { ARGSFL_DELIM, NULL }, /* Bc */ + { ARGSFL_NONE, args_Bf }, /* Bf */ + { ARGSFL_NONE, NULL }, /* Bo */ + { ARGSFL_DELIM, NULL }, /* Bq */ + { ARGSFL_DELIM, NULL }, /* Bsx */ + { ARGSFL_DELIM, NULL }, /* Bx */ + { ARGSFL_NONE, NULL }, /* Db */ + { ARGSFL_DELIM, NULL }, /* Dc */ + { ARGSFL_NONE, NULL }, /* Do */ + { ARGSFL_DELIM, NULL }, /* Dq */ + { ARGSFL_DELIM, NULL }, /* Ec */ + { ARGSFL_NONE, NULL }, /* Ef */ + { ARGSFL_DELIM, NULL }, /* Em */ + { ARGSFL_NONE, NULL }, /* Eo */ + { ARGSFL_DELIM, NULL }, /* Fx */ + { ARGSFL_DELIM, NULL }, /* Ms */ + { ARGSFL_DELIM, NULL }, /* No */ + { ARGSFL_DELIM, NULL }, /* Ns */ + { ARGSFL_DELIM, NULL }, /* Nx */ + { ARGSFL_DELIM, NULL }, /* Ox */ + { ARGSFL_DELIM, NULL }, /* Pc */ + { ARGSFL_DELIM, NULL }, /* Pf */ + { ARGSFL_NONE, NULL }, /* Po */ + { ARGSFL_DELIM, NULL }, /* Pq */ + { ARGSFL_DELIM, NULL }, /* Qc */ + { ARGSFL_DELIM, NULL }, /* Ql */ + { ARGSFL_NONE, NULL }, /* Qo */ + { ARGSFL_DELIM, NULL }, /* Qq */ + { ARGSFL_NONE, NULL }, /* Re */ + { ARGSFL_NONE, NULL }, /* Rs */ + { ARGSFL_DELIM, NULL }, /* Sc */ + { ARGSFL_NONE, NULL }, /* So */ + { ARGSFL_DELIM, NULL }, /* Sq */ + { ARGSFL_NONE, NULL }, /* Sm */ + { ARGSFL_DELIM, NULL }, /* Sx */ + { ARGSFL_DELIM, NULL }, /* Sy */ + { ARGSFL_DELIM, NULL }, /* Tn */ + { ARGSFL_DELIM, NULL }, /* Ux */ + { ARGSFL_DELIM, NULL }, /* Xc */ + { ARGSFL_NONE, NULL }, /* Xo */ + { ARGSFL_NONE, NULL }, /* Fo */ + { ARGSFL_NONE, NULL }, /* Fc */ + { ARGSFL_NONE, NULL }, /* Oo */ + { ARGSFL_DELIM, NULL }, /* Oc */ + { ARGSFL_NONE, args_Bk }, /* Bk */ + { ARGSFL_NONE, NULL }, /* Ek */ + { ARGSFL_NONE, NULL }, /* Bt */ + { ARGSFL_NONE, NULL }, /* Hf */ + { ARGSFL_NONE, NULL }, /* Fr */ + { ARGSFL_NONE, NULL }, /* Ud */ + { ARGSFL_NONE, NULL }, /* Lb */ + { ARGSFL_NONE, NULL }, /* Lp */ + { ARGSFL_DELIM, NULL }, /* Lk */ + { ARGSFL_DELIM, NULL }, /* Mt */ + { ARGSFL_DELIM, NULL }, /* Brq */ + { ARGSFL_NONE, NULL }, /* Bro */ + { ARGSFL_DELIM, NULL }, /* Brc */ + { ARGSFL_NONE, NULL }, /* %C */ + { ARGSFL_NONE, NULL }, /* Es */ + { ARGSFL_NONE, NULL }, /* En */ + { ARGSFL_NONE, NULL }, /* Dx */ + { ARGSFL_NONE, NULL }, /* %Q */ + { ARGSFL_NONE, NULL }, /* br */ + { ARGSFL_NONE, NULL }, /* sp */ + { ARGSFL_NONE, NULL }, /* %U */ + { ARGSFL_NONE, NULL }, /* Ta */ +}; + + +/* + * Parse an argument from line text. This comes in the form of -key + * [value0...], which may either have a single mandatory value, at least + * one mandatory value, an optional single value, or no value. + */ +enum margverr +mdoc_argv(struct mdoc *m, int line, enum mdoct tok, + struct mdoc_arg **v, int *pos, char *buf) +{ + char *p, sv; + struct mdoc_argv tmp; + struct mdoc_arg *arg; + const enum mdocargt *ap; + + if ('\0' == buf[*pos]) + return(ARGV_EOLN); + else if (NULL == (ap = mdocargs[tok].argvs)) + return(ARGV_WORD); + else if ('-' != buf[*pos]) + return(ARGV_WORD); + + /* Seek to the first unescaped space. */ + + p = &buf[++(*pos)]; + + assert(*pos > 0); + + for ( ; buf[*pos] ; (*pos)++) + if (' ' == buf[*pos] && '\\' != buf[*pos - 1]) + break; + + /* + * We want to nil-terminate the word to look it up (it's easier + * that way). But we may not have a flag, in which case we need + * to restore the line as-is. So keep around the stray byte, + * which we'll reset upon exiting (if necessary). + */ + + if ('\0' != (sv = buf[*pos])) + buf[(*pos)++] = '\0'; + + /* + * Now look up the word as a flag. Use temporary storage that + * we'll copy into the node's flags, if necessary. + */ + + memset(&tmp, 0, sizeof(struct mdoc_argv)); + + tmp.line = line; + tmp.pos = *pos; + tmp.arg = MDOC_ARG_MAX; + + while (MDOC_ARG_MAX != (tmp.arg = *ap++)) + if (0 == strcmp(p, mdoc_argnames[tmp.arg])) + break; + + if (MDOC_ARG_MAX == tmp.arg) { + /* + * The flag was not found. + * Restore saved zeroed byte and return as a word. + */ + if (sv) + buf[*pos - 1] = sv; + return(ARGV_WORD); + } + + /* Read to the next word (the argument). */ + + while (buf[*pos] && ' ' == buf[*pos]) + (*pos)++; + + switch (argvflags[tmp.arg]) { + case (ARGV_SINGLE): + if ( ! argv_single(m, line, &tmp, pos, buf)) + return(ARGV_ERROR); + break; + case (ARGV_MULTI): + if ( ! argv_multi(m, line, &tmp, pos, buf)) + return(ARGV_ERROR); + break; + case (ARGV_OPT_SINGLE): + if ( ! argv_opt_single(m, line, &tmp, pos, buf)) + return(ARGV_ERROR); + break; + case (ARGV_NONE): + break; + } + + if (NULL == (arg = *v)) + arg = *v = mandoc_calloc(1, sizeof(struct mdoc_arg)); + + arg->argc++; + arg->argv = mandoc_realloc + (arg->argv, arg->argc * sizeof(struct mdoc_argv)); + + memcpy(&arg->argv[(int)arg->argc - 1], + &tmp, sizeof(struct mdoc_argv)); + + return(ARGV_ARG); +} + +void +mdoc_argv_free(struct mdoc_arg *p) +{ + int i; + + if (NULL == p) + return; + + if (p->refcnt) { + --(p->refcnt); + if (p->refcnt) + return; + } + assert(p->argc); + + for (i = (int)p->argc - 1; i >= 0; i--) + argn_free(p, i); + + free(p->argv); + free(p); +} + +static void +argn_free(struct mdoc_arg *p, int iarg) +{ + struct mdoc_argv *arg; + int j; + + arg = &p->argv[iarg]; + + if (arg->sz && arg->value) { + for (j = (int)arg->sz - 1; j >= 0; j--) + free(arg->value[j]); + free(arg->value); + } + + for (--p->argc; iarg < (int)p->argc; iarg++) + p->argv[iarg] = p->argv[iarg+1]; +} + +enum margserr +mdoc_zargs(struct mdoc *m, int line, int *pos, char *buf, char **v) +{ + + return(args(m, line, pos, buf, ARGSFL_NONE, v)); +} + +enum margserr +mdoc_args(struct mdoc *m, int line, int *pos, + char *buf, enum mdoct tok, char **v) +{ + enum argsflag fl; + struct mdoc_node *n; + + fl = mdocargs[tok].flags; + + if (MDOC_It != tok) + return(args(m, line, pos, buf, fl, v)); + + /* + * We know that we're in an `It', so it's reasonable to expect + * us to be sitting in a `Bl'. Someday this may not be the case + * (if we allow random `It's sitting out there), so provide a + * safe fall-back into the default behaviour. + */ + + for (n = m->last; n; n = n->parent) + if (MDOC_Bl == n->tok) + if (LIST_column == n->norm->Bl.type) { + fl = ARGSFL_TABSEP; + break; + } + + return(args(m, line, pos, buf, fl, v)); +} + +static enum margserr +args(struct mdoc *m, int line, int *pos, + char *buf, enum argsflag fl, char **v) +{ + char *p, *pp; + enum margserr rc; + + if ('\0' == buf[*pos]) { + if (MDOC_PPHRASE & m->flags) + return(ARGS_EOLN); + /* + * If we're not in a partial phrase and the flag for + * being a phrase literal is still set, the punctuation + * is unterminated. + */ + if (MDOC_PHRASELIT & m->flags) + mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE); + + m->flags &= ~MDOC_PHRASELIT; + return(ARGS_EOLN); + } + + *v = &buf[*pos]; + + if (ARGSFL_DELIM == fl) + if (args_checkpunct(buf, *pos)) + return(ARGS_PUNCT); + + /* + * First handle TABSEP items, restricted to `Bl -column'. This + * ignores conventional token parsing and instead uses tabs or + * `Ta' macros to separate phrases. Phrases are parsed again + * for arguments at a later phase. + */ + + if (ARGSFL_TABSEP == fl) { + /* Scan ahead to tab (can't be escaped). */ + p = strchr(*v, '\t'); + pp = NULL; + + /* Scan ahead to unescaped `Ta'. */ + if ( ! (MDOC_PHRASELIT & m->flags)) + for (pp = *v; ; pp++) { + if (NULL == (pp = strstr(pp, "Ta"))) + break; + if (pp > *v && ' ' != *(pp - 1)) + continue; + if (' ' == *(pp + 2) || '\0' == *(pp + 2)) + break; + } + + /* By default, assume a phrase. */ + rc = ARGS_PHRASE; + + /* + * Adjust new-buffer position to be beyond delimiter + * mark (e.g., Ta -> end + 2). + */ + if (p && pp) { + *pos += pp < p ? 2 : 1; + rc = pp < p ? ARGS_PHRASE : ARGS_PPHRASE; + p = pp < p ? pp : p; + } else if (p && ! pp) { + rc = ARGS_PPHRASE; + *pos += 1; + } else if (pp && ! p) { + p = pp; + *pos += 2; + } else { + rc = ARGS_PEND; + p = strchr(*v, 0); + } + + /* Whitespace check for eoln case... */ + if ('\0' == *p && ' ' == *(p - 1)) + mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); + + *pos += (int)(p - *v); + + /* Strip delimiter's preceding whitespace. */ + pp = p - 1; + while (pp > *v && ' ' == *pp) { + if (pp > *v && '\\' == *(pp - 1)) + break; + pp--; + } + *(pp + 1) = 0; + + /* Strip delimiter's proceeding whitespace. */ + for (pp = &buf[*pos]; ' ' == *pp; pp++, (*pos)++) + /* Skip ahead. */ ; + + return(rc); + } + + /* + * Process a quoted literal. A quote begins with a double-quote + * and ends with a double-quote NOT preceded by a double-quote. + * Whitespace is NOT involved in literal termination. + */ + + if (MDOC_PHRASELIT & m->flags || '\"' == buf[*pos]) { + if ( ! (MDOC_PHRASELIT & m->flags)) + *v = &buf[++(*pos)]; + + if (MDOC_PPHRASE & m->flags) + m->flags |= MDOC_PHRASELIT; + + for ( ; buf[*pos]; (*pos)++) { + if ('\"' != buf[*pos]) + continue; + if ('\"' != buf[*pos + 1]) + break; + (*pos)++; + } + + if ('\0' == buf[*pos]) { + if (MDOC_PPHRASE & m->flags) + return(ARGS_QWORD); + mdoc_pmsg(m, line, *pos, MANDOCERR_BADQUOTE); + return(ARGS_QWORD); + } + + m->flags &= ~MDOC_PHRASELIT; + buf[(*pos)++] = '\0'; + + if ('\0' == buf[*pos]) + return(ARGS_QWORD); + + while (' ' == buf[*pos]) + (*pos)++; + + if ('\0' == buf[*pos]) + mdoc_pmsg(m, line, *pos, MANDOCERR_EOLNSPACE); + + return(ARGS_QWORD); + } + + p = &buf[*pos]; + *v = mandoc_getarg(m->parse, &p, line, pos); + + return(ARGS_WORD); +} + +/* + * Check if the string consists only of space-separated closing + * delimiters. This is a bit of a dance: the first must be a close + * delimiter, but it may be followed by middle delimiters. Arbitrary + * whitespace may separate these tokens. + */ +static int +args_checkpunct(const char *buf, int i) +{ + int j; + char dbuf[DELIMSZ]; + enum mdelim d; + + /* First token must be a close-delimiter. */ + + for (j = 0; buf[i] && ' ' != buf[i] && j < DELIMSZ; j++, i++) + dbuf[j] = buf[i]; + + if (DELIMSZ == j) + return(0); + + dbuf[j] = '\0'; + if (DELIM_CLOSE != mdoc_isdelim(dbuf)) + return(0); + + while (' ' == buf[i]) + i++; + + /* Remaining must NOT be open/none. */ + + while (buf[i]) { + j = 0; + while (buf[i] && ' ' != buf[i] && j < DELIMSZ) + dbuf[j++] = buf[i++]; + + if (DELIMSZ == j) + return(0); + + dbuf[j] = '\0'; + d = mdoc_isdelim(dbuf); + if (DELIM_NONE == d || DELIM_OPEN == d) + return(0); + + while (' ' == buf[i]) + i++; + } + + return('\0' == buf[i]); +} + +static int +argv_multi(struct mdoc *m, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + enum margserr ac; + char *p; + + for (v->sz = 0; ; v->sz++) { + if ('-' == buf[*pos]) + break; + ac = args(m, line, pos, buf, ARGSFL_NONE, &p); + if (ARGS_ERROR == ac) + return(0); + else if (ARGS_EOLN == ac) + break; + + if (0 == v->sz % MULTI_STEP) + v->value = mandoc_realloc(v->value, + (v->sz + MULTI_STEP) * sizeof(char *)); + + v->value[(int)v->sz] = mandoc_strdup(p); + } + + return(1); +} + +static int +argv_opt_single(struct mdoc *m, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + enum margserr ac; + char *p; + + if ('-' == buf[*pos]) + return(1); + + ac = args(m, line, pos, buf, ARGSFL_NONE, &p); + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + return(1); + + v->sz = 1; + v->value = mandoc_malloc(sizeof(char *)); + v->value[0] = mandoc_strdup(p); + + return(1); +} + +static int +argv_single(struct mdoc *m, int line, + struct mdoc_argv *v, int *pos, char *buf) +{ + int ppos; + enum margserr ac; + char *p; + + ppos = *pos; + + ac = args(m, line, pos, buf, ARGSFL_NONE, &p); + if (ARGS_EOLN == ac) { + mdoc_pmsg(m, line, ppos, MANDOCERR_SYNTARGVCOUNT); + return(0); + } else if (ARGS_ERROR == ac) + return(0); + + v->sz = 1; + v->value = mandoc_malloc(sizeof(char *)); + v->value[0] = mandoc_strdup(p); + + return(1); +} diff --git a/usr/src/cmd/mandoc/mdoc_hash.c b/usr/src/cmd/mandoc/mdoc_hash.c new file mode 100644 index 0000000000..59a8d26a88 --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc_hash.c @@ -0,0 +1,94 @@ +/* $Id: mdoc_hash.c,v 1.18 2011/07/24 18:15:14 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" + +static unsigned char table[27 * 12]; + +/* + * XXX - this hash has global scope, so if intended for use as a library + * with multiple callers, it will need re-invocation protection. + */ +void +mdoc_hash_init(void) +{ + int i, j, major; + const char *p; + + memset(table, UCHAR_MAX, sizeof(table)); + + for (i = 0; i < (int)MDOC_MAX; i++) { + p = mdoc_macronames[i]; + + if (isalpha((unsigned char)p[1])) + major = 12 * (tolower((unsigned char)p[1]) - 97); + else + major = 12 * 26; + + for (j = 0; j < 12; j++) + if (UCHAR_MAX == table[major + j]) { + table[major + j] = (unsigned char)i; + break; + } + + assert(j < 12); + } +} + +enum mdoct +mdoc_hash_find(const char *p) +{ + int major, i, j; + + if (0 == p[0]) + return(MDOC_MAX); + if ( ! isalpha((unsigned char)p[0]) && '%' != p[0]) + return(MDOC_MAX); + + if (isalpha((unsigned char)p[1])) + major = 12 * (tolower((unsigned char)p[1]) - 97); + else if ('1' == p[1]) + major = 12 * 26; + else + return(MDOC_MAX); + + if (p[2] && p[3]) + return(MDOC_MAX); + + for (j = 0; j < 12; j++) { + if (UCHAR_MAX == (i = table[major + j])) + break; + if (0 == strcmp(p, mdoc_macronames[i])) + return((enum mdoct)i); + } + + return(MDOC_MAX); +} diff --git a/usr/src/cmd/mandoc/mdoc_html.c b/usr/src/cmd/mandoc/mdoc_html.c new file mode 100644 index 0000000000..60ea6dc738 --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc_html.c @@ -0,0 +1,2284 @@ +/* $Id: mdoc_html.c,v 1.182 2011/11/03 20:37:00 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc.h" +#include "out.h" +#include "html.h" +#include "mdoc.h" +#include "main.h" + +#define INDENT 5 + +#define MDOC_ARGS const struct mdoc_meta *m, \ + const struct mdoc_node *n, \ + struct html *h + +#ifndef MIN +#define MIN(a,b) ((/*CONSTCOND*/(a)<(b))?(a):(b)) +#endif + +struct htmlmdoc { + int (*pre)(MDOC_ARGS); + void (*post)(MDOC_ARGS); +}; + +static void print_mdoc(MDOC_ARGS); +static void print_mdoc_head(MDOC_ARGS); +static void print_mdoc_node(MDOC_ARGS); +static void print_mdoc_nodelist(MDOC_ARGS); +static void synopsis_pre(struct html *, + const struct mdoc_node *); + +static void a2width(const char *, struct roffsu *); +static void a2offs(const char *, struct roffsu *); + +static void mdoc_root_post(MDOC_ARGS); +static int mdoc_root_pre(MDOC_ARGS); + +static void mdoc__x_post(MDOC_ARGS); +static int mdoc__x_pre(MDOC_ARGS); +static int mdoc_ad_pre(MDOC_ARGS); +static int mdoc_an_pre(MDOC_ARGS); +static int mdoc_ap_pre(MDOC_ARGS); +static int mdoc_ar_pre(MDOC_ARGS); +static int mdoc_bd_pre(MDOC_ARGS); +static int mdoc_bf_pre(MDOC_ARGS); +static void mdoc_bk_post(MDOC_ARGS); +static int mdoc_bk_pre(MDOC_ARGS); +static int mdoc_bl_pre(MDOC_ARGS); +static int mdoc_bt_pre(MDOC_ARGS); +static int mdoc_bx_pre(MDOC_ARGS); +static int mdoc_cd_pre(MDOC_ARGS); +static int mdoc_d1_pre(MDOC_ARGS); +static int mdoc_dv_pre(MDOC_ARGS); +static int mdoc_fa_pre(MDOC_ARGS); +static int mdoc_fd_pre(MDOC_ARGS); +static int mdoc_fl_pre(MDOC_ARGS); +static int mdoc_fn_pre(MDOC_ARGS); +static int mdoc_ft_pre(MDOC_ARGS); +static int mdoc_em_pre(MDOC_ARGS); +static int mdoc_er_pre(MDOC_ARGS); +static int mdoc_ev_pre(MDOC_ARGS); +static int mdoc_ex_pre(MDOC_ARGS); +static void mdoc_fo_post(MDOC_ARGS); +static int mdoc_fo_pre(MDOC_ARGS); +static int mdoc_ic_pre(MDOC_ARGS); +static int mdoc_igndelim_pre(MDOC_ARGS); +static int mdoc_in_pre(MDOC_ARGS); +static int mdoc_it_pre(MDOC_ARGS); +static int mdoc_lb_pre(MDOC_ARGS); +static int mdoc_li_pre(MDOC_ARGS); +static int mdoc_lk_pre(MDOC_ARGS); +static int mdoc_mt_pre(MDOC_ARGS); +static int mdoc_ms_pre(MDOC_ARGS); +static int mdoc_nd_pre(MDOC_ARGS); +static int mdoc_nm_pre(MDOC_ARGS); +static int mdoc_ns_pre(MDOC_ARGS); +static int mdoc_pa_pre(MDOC_ARGS); +static void mdoc_pf_post(MDOC_ARGS); +static int mdoc_pp_pre(MDOC_ARGS); +static void mdoc_quote_post(MDOC_ARGS); +static int mdoc_quote_pre(MDOC_ARGS); +static int mdoc_rs_pre(MDOC_ARGS); +static int mdoc_rv_pre(MDOC_ARGS); +static int mdoc_sh_pre(MDOC_ARGS); +static int mdoc_sm_pre(MDOC_ARGS); +static int mdoc_sp_pre(MDOC_ARGS); +static int mdoc_ss_pre(MDOC_ARGS); +static int mdoc_sx_pre(MDOC_ARGS); +static int mdoc_sy_pre(MDOC_ARGS); +static int mdoc_ud_pre(MDOC_ARGS); +static int mdoc_va_pre(MDOC_ARGS); +static int mdoc_vt_pre(MDOC_ARGS); +static int mdoc_xr_pre(MDOC_ARGS); +static int mdoc_xx_pre(MDOC_ARGS); + +static const struct htmlmdoc mdocs[MDOC_MAX] = { + {mdoc_ap_pre, NULL}, /* Ap */ + {NULL, NULL}, /* Dd */ + {NULL, NULL}, /* Dt */ + {NULL, NULL}, /* Os */ + {mdoc_sh_pre, NULL }, /* Sh */ + {mdoc_ss_pre, NULL }, /* Ss */ + {mdoc_pp_pre, NULL}, /* Pp */ + {mdoc_d1_pre, NULL}, /* D1 */ + {mdoc_d1_pre, NULL}, /* Dl */ + {mdoc_bd_pre, NULL}, /* Bd */ + {NULL, NULL}, /* Ed */ + {mdoc_bl_pre, NULL}, /* Bl */ + {NULL, NULL}, /* El */ + {mdoc_it_pre, NULL}, /* It */ + {mdoc_ad_pre, NULL}, /* Ad */ + {mdoc_an_pre, NULL}, /* An */ + {mdoc_ar_pre, NULL}, /* Ar */ + {mdoc_cd_pre, NULL}, /* Cd */ + {mdoc_fl_pre, NULL}, /* Cm */ + {mdoc_dv_pre, NULL}, /* Dv */ + {mdoc_er_pre, NULL}, /* Er */ + {mdoc_ev_pre, NULL}, /* Ev */ + {mdoc_ex_pre, NULL}, /* Ex */ + {mdoc_fa_pre, NULL}, /* Fa */ + {mdoc_fd_pre, NULL}, /* Fd */ + {mdoc_fl_pre, NULL}, /* Fl */ + {mdoc_fn_pre, NULL}, /* Fn */ + {mdoc_ft_pre, NULL}, /* Ft */ + {mdoc_ic_pre, NULL}, /* Ic */ + {mdoc_in_pre, NULL}, /* In */ + {mdoc_li_pre, NULL}, /* Li */ + {mdoc_nd_pre, NULL}, /* Nd */ + {mdoc_nm_pre, NULL}, /* Nm */ + {mdoc_quote_pre, mdoc_quote_post}, /* Op */ + {NULL, NULL}, /* Ot */ + {mdoc_pa_pre, NULL}, /* Pa */ + {mdoc_rv_pre, NULL}, /* Rv */ + {NULL, NULL}, /* St */ + {mdoc_va_pre, NULL}, /* Va */ + {mdoc_vt_pre, NULL}, /* Vt */ + {mdoc_xr_pre, NULL}, /* Xr */ + {mdoc__x_pre, mdoc__x_post}, /* %A */ + {mdoc__x_pre, mdoc__x_post}, /* %B */ + {mdoc__x_pre, mdoc__x_post}, /* %D */ + {mdoc__x_pre, mdoc__x_post}, /* %I */ + {mdoc__x_pre, mdoc__x_post}, /* %J */ + {mdoc__x_pre, mdoc__x_post}, /* %N */ + {mdoc__x_pre, mdoc__x_post}, /* %O */ + {mdoc__x_pre, mdoc__x_post}, /* %P */ + {mdoc__x_pre, mdoc__x_post}, /* %R */ + {mdoc__x_pre, mdoc__x_post}, /* %T */ + {mdoc__x_pre, mdoc__x_post}, /* %V */ + {NULL, NULL}, /* Ac */ + {mdoc_quote_pre, mdoc_quote_post}, /* Ao */ + {mdoc_quote_pre, mdoc_quote_post}, /* Aq */ + {NULL, NULL}, /* At */ + {NULL, NULL}, /* Bc */ + {mdoc_bf_pre, NULL}, /* Bf */ + {mdoc_quote_pre, mdoc_quote_post}, /* Bo */ + {mdoc_quote_pre, mdoc_quote_post}, /* Bq */ + {mdoc_xx_pre, NULL}, /* Bsx */ + {mdoc_bx_pre, NULL}, /* Bx */ + {NULL, NULL}, /* Db */ + {NULL, NULL}, /* Dc */ + {mdoc_quote_pre, mdoc_quote_post}, /* Do */ + {mdoc_quote_pre, mdoc_quote_post}, /* Dq */ + {NULL, NULL}, /* Ec */ /* FIXME: no space */ + {NULL, NULL}, /* Ef */ + {mdoc_em_pre, NULL}, /* Em */ + {mdoc_quote_pre, mdoc_quote_post}, /* Eo */ + {mdoc_xx_pre, NULL}, /* Fx */ + {mdoc_ms_pre, NULL}, /* Ms */ + {mdoc_igndelim_pre, NULL}, /* No */ + {mdoc_ns_pre, NULL}, /* Ns */ + {mdoc_xx_pre, NULL}, /* Nx */ + {mdoc_xx_pre, NULL}, /* Ox */ + {NULL, NULL}, /* Pc */ + {mdoc_igndelim_pre, mdoc_pf_post}, /* Pf */ + {mdoc_quote_pre, mdoc_quote_post}, /* Po */ + {mdoc_quote_pre, mdoc_quote_post}, /* Pq */ + {NULL, NULL}, /* Qc */ + {mdoc_quote_pre, mdoc_quote_post}, /* Ql */ + {mdoc_quote_pre, mdoc_quote_post}, /* Qo */ + {mdoc_quote_pre, mdoc_quote_post}, /* Qq */ + {NULL, NULL}, /* Re */ + {mdoc_rs_pre, NULL}, /* Rs */ + {NULL, NULL}, /* Sc */ + {mdoc_quote_pre, mdoc_quote_post}, /* So */ + {mdoc_quote_pre, mdoc_quote_post}, /* Sq */ + {mdoc_sm_pre, NULL}, /* Sm */ + {mdoc_sx_pre, NULL}, /* Sx */ + {mdoc_sy_pre, NULL}, /* Sy */ + {NULL, NULL}, /* Tn */ + {mdoc_xx_pre, NULL}, /* Ux */ + {NULL, NULL}, /* Xc */ + {NULL, NULL}, /* Xo */ + {mdoc_fo_pre, mdoc_fo_post}, /* Fo */ + {NULL, NULL}, /* Fc */ + {mdoc_quote_pre, mdoc_quote_post}, /* Oo */ + {NULL, NULL}, /* Oc */ + {mdoc_bk_pre, mdoc_bk_post}, /* Bk */ + {NULL, NULL}, /* Ek */ + {mdoc_bt_pre, NULL}, /* Bt */ + {NULL, NULL}, /* Hf */ + {NULL, NULL}, /* Fr */ + {mdoc_ud_pre, NULL}, /* Ud */ + {mdoc_lb_pre, NULL}, /* Lb */ + {mdoc_pp_pre, NULL}, /* Lp */ + {mdoc_lk_pre, NULL}, /* Lk */ + {mdoc_mt_pre, NULL}, /* Mt */ + {mdoc_quote_pre, mdoc_quote_post}, /* Brq */ + {mdoc_quote_pre, mdoc_quote_post}, /* Bro */ + {NULL, NULL}, /* Brc */ + {mdoc__x_pre, mdoc__x_post}, /* %C */ + {NULL, NULL}, /* Es */ /* TODO */ + {NULL, NULL}, /* En */ /* TODO */ + {mdoc_xx_pre, NULL}, /* Dx */ + {mdoc__x_pre, mdoc__x_post}, /* %Q */ + {mdoc_sp_pre, NULL}, /* br */ + {mdoc_sp_pre, NULL}, /* sp */ + {mdoc__x_pre, mdoc__x_post}, /* %U */ + {NULL, NULL}, /* Ta */ +}; + +static const char * const lists[LIST_MAX] = { + NULL, + "list-bul", + "list-col", + "list-dash", + "list-diag", + "list-enum", + "list-hang", + "list-hyph", + "list-inset", + "list-item", + "list-ohang", + "list-tag" +}; + +void +html_mdoc(void *arg, const struct mdoc *m) +{ + + print_mdoc(mdoc_meta(m), mdoc_node(m), (struct html *)arg); + putchar('\n'); +} + + +/* + * Calculate the scaling unit passed in a `-width' argument. This uses + * either a native scaling unit (e.g., 1i, 2m) or the string length of + * the value. + */ +static void +a2width(const char *p, struct roffsu *su) +{ + + if ( ! a2roffsu(p, su, SCALE_MAX)) { + su->unit = SCALE_BU; + su->scale = html_strlen(p); + } +} + + +/* + * See the same function in mdoc_term.c for documentation. + */ +static void +synopsis_pre(struct html *h, const struct mdoc_node *n) +{ + + if (NULL == n->prev || ! (MDOC_SYNPRETTY & n->flags)) + return; + + if (n->prev->tok == n->tok && + MDOC_Fo != n->tok && + MDOC_Ft != n->tok && + MDOC_Fn != n->tok) { + print_otag(h, TAG_BR, 0, NULL); + return; + } + + switch (n->prev->tok) { + case (MDOC_Fd): + /* FALLTHROUGH */ + case (MDOC_Fn): + /* FALLTHROUGH */ + case (MDOC_Fo): + /* FALLTHROUGH */ + case (MDOC_In): + /* FALLTHROUGH */ + case (MDOC_Vt): + print_otag(h, TAG_P, 0, NULL); + break; + case (MDOC_Ft): + if (MDOC_Fn != n->tok && MDOC_Fo != n->tok) { + print_otag(h, TAG_P, 0, NULL); + break; + } + /* FALLTHROUGH */ + default: + print_otag(h, TAG_BR, 0, NULL); + break; + } +} + + +/* + * Calculate the scaling unit passed in an `-offset' argument. This + * uses either a native scaling unit (e.g., 1i, 2m), one of a set of + * predefined strings (indent, etc.), or the string length of the value. + */ +static void +a2offs(const char *p, struct roffsu *su) +{ + + /* FIXME: "right"? */ + + if (0 == strcmp(p, "left")) + SCALE_HS_INIT(su, 0); + else if (0 == strcmp(p, "indent")) + SCALE_HS_INIT(su, INDENT); + else if (0 == strcmp(p, "indent-two")) + SCALE_HS_INIT(su, INDENT * 2); + else if ( ! a2roffsu(p, su, SCALE_MAX)) + SCALE_HS_INIT(su, html_strlen(p)); +} + + +static void +print_mdoc(MDOC_ARGS) +{ + struct tag *t, *tt; + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "mandoc"); + + if ( ! (HTML_FRAGMENT & h->oflags)) { + print_gen_decls(h); + t = print_otag(h, TAG_HTML, 0, NULL); + tt = print_otag(h, TAG_HEAD, 0, NULL); + print_mdoc_head(m, n, h); + print_tagq(h, tt); + print_otag(h, TAG_BODY, 0, NULL); + print_otag(h, TAG_DIV, 1, &tag); + } else + t = print_otag(h, TAG_DIV, 1, &tag); + + print_mdoc_nodelist(m, n, h); + print_tagq(h, t); +} + + +/* ARGSUSED */ +static void +print_mdoc_head(MDOC_ARGS) +{ + + print_gen_head(h); + bufinit(h); + bufcat_fmt(h, "%s(%s)", m->title, m->msec); + + if (m->arch) + bufcat_fmt(h, " (%s)", m->arch); + + print_otag(h, TAG_TITLE, 0, NULL); + print_text(h, h->buf); +} + + +static void +print_mdoc_nodelist(MDOC_ARGS) +{ + + print_mdoc_node(m, n, h); + if (n->next) + print_mdoc_nodelist(m, n->next, h); +} + + +static void +print_mdoc_node(MDOC_ARGS) +{ + int child; + struct tag *t; + + child = 1; + t = h->tags.head; + + switch (n->type) { + case (MDOC_ROOT): + child = mdoc_root_pre(m, n, h); + break; + case (MDOC_TEXT): + /* No tables in this mode... */ + assert(NULL == h->tblt); + + /* + * Make sure that if we're in a literal mode already + * (i.e., within a <PRE>) don't print the newline. + */ + if (' ' == *n->string && MDOC_LINE & n->flags) + if ( ! (HTML_LITERAL & h->flags)) + print_otag(h, TAG_BR, 0, NULL); + if (MDOC_DELIMC & n->flags) + h->flags |= HTML_NOSPACE; + print_text(h, n->string); + if (MDOC_DELIMO & n->flags) + h->flags |= HTML_NOSPACE; + return; + case (MDOC_EQN): + print_eqn(h, n->eqn); + break; + case (MDOC_TBL): + /* + * This will take care of initialising all of the table + * state data for the first table, then tearing it down + * for the last one. + */ + print_tbl(h, n->span); + return; + default: + /* + * Close out the current table, if it's open, and unset + * the "meta" table state. This will be reopened on the + * next table element. + */ + if (h->tblt) { + print_tblclose(h); + t = h->tags.head; + } + + assert(NULL == h->tblt); + if (mdocs[n->tok].pre && ENDBODY_NOT == n->end) + child = (*mdocs[n->tok].pre)(m, n, h); + break; + } + + if (HTML_KEEP & h->flags) { + if (n->prev && n->prev->line != n->line) { + h->flags &= ~HTML_KEEP; + h->flags |= HTML_PREKEEP; + } else if (NULL == n->prev) { + if (n->parent && n->parent->line != n->line) { + h->flags &= ~HTML_KEEP; + h->flags |= HTML_PREKEEP; + } + } + } + + if (child && n->child) + print_mdoc_nodelist(m, n->child, h); + + print_stagq(h, t); + + switch (n->type) { + case (MDOC_ROOT): + mdoc_root_post(m, n, h); + break; + case (MDOC_EQN): + break; + default: + if (mdocs[n->tok].post && ENDBODY_NOT == n->end) + (*mdocs[n->tok].post)(m, n, h); + break; + } +} + +/* ARGSUSED */ +static void +mdoc_root_post(MDOC_ARGS) +{ + struct htmlpair tag[3]; + struct tag *t, *tt; + + PAIR_SUMMARY_INIT(&tag[0], "Document Footer"); + PAIR_CLASS_INIT(&tag[1], "foot"); + PAIR_INIT(&tag[2], ATTR_WIDTH, "100%"); + t = print_otag(h, TAG_TABLE, 3, tag); + PAIR_INIT(&tag[0], ATTR_WIDTH, "50%"); + print_otag(h, TAG_COL, 1, tag); + print_otag(h, TAG_COL, 1, tag); + + print_otag(h, TAG_TBODY, 0, NULL); + + tt = print_otag(h, TAG_TR, 0, NULL); + + PAIR_CLASS_INIT(&tag[0], "foot-date"); + print_otag(h, TAG_TD, 1, tag); + print_text(h, m->date); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag[0], "foot-os"); + PAIR_INIT(&tag[1], ATTR_ALIGN, "right"); + print_otag(h, TAG_TD, 2, tag); + print_text(h, m->os); + print_tagq(h, t); +} + + +/* ARGSUSED */ +static int +mdoc_root_pre(MDOC_ARGS) +{ + struct htmlpair tag[3]; + struct tag *t, *tt; + char b[BUFSIZ], title[BUFSIZ]; + + strlcpy(b, m->vol, BUFSIZ); + + if (m->arch) { + strlcat(b, " (", BUFSIZ); + strlcat(b, m->arch, BUFSIZ); + strlcat(b, ")", BUFSIZ); + } + + snprintf(title, BUFSIZ - 1, "%s(%s)", m->title, m->msec); + + PAIR_SUMMARY_INIT(&tag[0], "Document Header"); + PAIR_CLASS_INIT(&tag[1], "head"); + PAIR_INIT(&tag[2], ATTR_WIDTH, "100%"); + t = print_otag(h, TAG_TABLE, 3, tag); + PAIR_INIT(&tag[0], ATTR_WIDTH, "30%"); + print_otag(h, TAG_COL, 1, tag); + print_otag(h, TAG_COL, 1, tag); + print_otag(h, TAG_COL, 1, tag); + + print_otag(h, TAG_TBODY, 0, NULL); + + tt = print_otag(h, TAG_TR, 0, NULL); + + PAIR_CLASS_INIT(&tag[0], "head-ltitle"); + print_otag(h, TAG_TD, 1, tag); + print_text(h, title); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag[0], "head-vol"); + PAIR_INIT(&tag[1], ATTR_ALIGN, "center"); + print_otag(h, TAG_TD, 2, tag); + print_text(h, b); + print_stagq(h, tt); + + PAIR_CLASS_INIT(&tag[0], "head-rtitle"); + PAIR_INIT(&tag[1], ATTR_ALIGN, "right"); + print_otag(h, TAG_TD, 2, tag); + print_text(h, title); + print_tagq(h, t); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_sh_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (MDOC_BLOCK == n->type) { + PAIR_CLASS_INIT(&tag, "section"); + print_otag(h, TAG_DIV, 1, &tag); + return(1); + } else if (MDOC_BODY == n->type) + return(1); + + bufinit(h); + bufcat(h, "x"); + + for (n = n->child; n && MDOC_TEXT == n->type; ) { + bufcat_id(h, n->string); + if (NULL != (n = n->next)) + bufcat_id(h, " "); + } + + if (NULL == n) { + PAIR_ID_INIT(&tag, h->buf); + print_otag(h, TAG_H1, 1, &tag); + } else + print_otag(h, TAG_H1, 0, NULL); + + return(1); +} + +/* ARGSUSED */ +static int +mdoc_ss_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (MDOC_BLOCK == n->type) { + PAIR_CLASS_INIT(&tag, "subsection"); + print_otag(h, TAG_DIV, 1, &tag); + return(1); + } else if (MDOC_BODY == n->type) + return(1); + + bufinit(h); + bufcat(h, "x"); + + for (n = n->child; n && MDOC_TEXT == n->type; ) { + bufcat_id(h, n->string); + if (NULL != (n = n->next)) + bufcat_id(h, " "); + } + + if (NULL == n) { + PAIR_ID_INIT(&tag, h->buf); + print_otag(h, TAG_H2, 1, &tag); + } else + print_otag(h, TAG_H2, 0, NULL); + + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_fl_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "flag"); + print_otag(h, TAG_B, 1, &tag); + + /* `Cm' has no leading hyphen. */ + + if (MDOC_Cm == n->tok) + return(1); + + print_text(h, "\\-"); + + if (n->child) + h->flags |= HTML_NOSPACE; + else if (n->next && n->next->line == n->line) + h->flags |= HTML_NOSPACE; + + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_nd_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (MDOC_BODY != n->type) + return(1); + + /* XXX: this tag in theory can contain block elements. */ + + print_text(h, "\\(em"); + PAIR_CLASS_INIT(&tag, "desc"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +static int +mdoc_nm_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct roffsu su; + int len; + + switch (n->type) { + case (MDOC_ELEM): + synopsis_pre(h, n); + PAIR_CLASS_INIT(&tag, "name"); + print_otag(h, TAG_B, 1, &tag); + if (NULL == n->child && m->name) + print_text(h, m->name); + return(1); + case (MDOC_HEAD): + print_otag(h, TAG_TD, 0, NULL); + if (NULL == n->child && m->name) + print_text(h, m->name); + return(1); + case (MDOC_BODY): + print_otag(h, TAG_TD, 0, NULL); + return(1); + default: + break; + } + + synopsis_pre(h, n); + PAIR_CLASS_INIT(&tag, "synopsis"); + print_otag(h, TAG_TABLE, 1, &tag); + + for (len = 0, n = n->child; n; n = n->next) + if (MDOC_TEXT == n->type) + len += html_strlen(n->string); + + if (0 == len && m->name) + len = html_strlen(m->name); + + SCALE_HS_INIT(&su, (double)len); + bufinit(h); + bufcat_su(h, "width", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_COL, 1, &tag); + print_otag(h, TAG_COL, 0, NULL); + print_otag(h, TAG_TBODY, 0, NULL); + print_otag(h, TAG_TR, 0, NULL); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_xr_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + + if (NULL == n->child) + return(0); + + PAIR_CLASS_INIT(&tag[0], "link-man"); + + if (h->base_man) { + buffmt_man(h, n->child->string, + n->child->next ? + n->child->next->string : NULL); + PAIR_HREF_INIT(&tag[1], h->buf); + print_otag(h, TAG_A, 2, tag); + } else + print_otag(h, TAG_A, 1, tag); + + n = n->child; + print_text(h, n->string); + + if (NULL == (n = n->next)) + return(0); + + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + print_text(h, n->string); + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_ns_pre(MDOC_ARGS) +{ + + if ( ! (MDOC_LINE & n->flags)) + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ar_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "arg"); + print_otag(h, TAG_I, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_xx_pre(MDOC_ARGS) +{ + const char *pp; + struct htmlpair tag; + int flags; + + switch (n->tok) { + case (MDOC_Bsx): + pp = "BSD/OS"; + break; + case (MDOC_Dx): + pp = "DragonFly"; + break; + case (MDOC_Fx): + pp = "FreeBSD"; + break; + case (MDOC_Nx): + pp = "NetBSD"; + break; + case (MDOC_Ox): + pp = "OpenBSD"; + break; + case (MDOC_Ux): + pp = "UNIX"; + break; + default: + return(1); + } + + PAIR_CLASS_INIT(&tag, "unix"); + print_otag(h, TAG_SPAN, 1, &tag); + + print_text(h, pp); + if (n->child) { + flags = h->flags; + h->flags |= HTML_KEEP; + print_text(h, n->child->string); + h->flags = flags; + } + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_bx_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "unix"); + print_otag(h, TAG_SPAN, 1, &tag); + + if (NULL != (n = n->child)) { + print_text(h, n->string); + h->flags |= HTML_NOSPACE; + print_text(h, "BSD"); + } else { + print_text(h, "BSD"); + return(0); + } + + if (NULL != (n = n->next)) { + h->flags |= HTML_NOSPACE; + print_text(h, "-"); + h->flags |= HTML_NOSPACE; + print_text(h, n->string); + } + + return(0); +} + +/* ARGSUSED */ +static int +mdoc_it_pre(MDOC_ARGS) +{ + struct roffsu su; + enum mdoc_list type; + struct htmlpair tag[2]; + const struct mdoc_node *bl; + + bl = n->parent; + while (bl && MDOC_Bl != bl->tok) + bl = bl->parent; + + assert(bl); + + type = bl->norm->Bl.type; + + assert(lists[type]); + PAIR_CLASS_INIT(&tag[0], lists[type]); + + bufinit(h); + + if (MDOC_HEAD == n->type) { + switch (type) { + case(LIST_bullet): + /* FALLTHROUGH */ + case(LIST_dash): + /* FALLTHROUGH */ + case(LIST_item): + /* FALLTHROUGH */ + case(LIST_hyphen): + /* FALLTHROUGH */ + case(LIST_enum): + return(0); + case(LIST_diag): + /* FALLTHROUGH */ + case(LIST_hang): + /* FALLTHROUGH */ + case(LIST_inset): + /* FALLTHROUGH */ + case(LIST_ohang): + /* FALLTHROUGH */ + case(LIST_tag): + SCALE_VS_INIT(&su, ! bl->norm->Bl.comp); + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DT, 2, tag); + if (LIST_diag != type) + break; + PAIR_CLASS_INIT(&tag[0], "diag"); + print_otag(h, TAG_B, 1, tag); + break; + case(LIST_column): + break; + default: + break; + } + } else if (MDOC_BODY == n->type) { + switch (type) { + case(LIST_bullet): + /* FALLTHROUGH */ + case(LIST_hyphen): + /* FALLTHROUGH */ + case(LIST_dash): + /* FALLTHROUGH */ + case(LIST_enum): + /* FALLTHROUGH */ + case(LIST_item): + SCALE_VS_INIT(&su, ! bl->norm->Bl.comp); + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_LI, 2, tag); + break; + case(LIST_diag): + /* FALLTHROUGH */ + case(LIST_hang): + /* FALLTHROUGH */ + case(LIST_inset): + /* FALLTHROUGH */ + case(LIST_ohang): + /* FALLTHROUGH */ + case(LIST_tag): + if (NULL == bl->norm->Bl.width) { + print_otag(h, TAG_DD, 1, tag); + break; + } + a2width(bl->norm->Bl.width, &su); + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DD, 2, tag); + break; + case(LIST_column): + SCALE_VS_INIT(&su, ! bl->norm->Bl.comp); + bufcat_su(h, "margin-top", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_TD, 2, tag); + break; + default: + break; + } + } else { + switch (type) { + case (LIST_column): + print_otag(h, TAG_TR, 1, tag); + break; + default: + break; + } + } + + return(1); +} + +/* ARGSUSED */ +static int +mdoc_bl_pre(MDOC_ARGS) +{ + int i; + struct htmlpair tag[3]; + struct roffsu su; + char buf[BUFSIZ]; + + bufinit(h); + + if (MDOC_BODY == n->type) { + if (LIST_column == n->norm->Bl.type) + print_otag(h, TAG_TBODY, 0, NULL); + return(1); + } + + if (MDOC_HEAD == n->type) { + if (LIST_column != n->norm->Bl.type) + return(0); + + /* + * For each column, print out the <COL> tag with our + * suggested width. The last column gets min-width, as + * in terminal mode it auto-sizes to the width of the + * screen and we want to preserve that behaviour. + */ + + for (i = 0; i < (int)n->norm->Bl.ncols; i++) { + a2width(n->norm->Bl.cols[i], &su); + if (i < (int)n->norm->Bl.ncols - 1) + bufcat_su(h, "width", &su); + else + bufcat_su(h, "min-width", &su); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_COL, 1, tag); + } + + return(0); + } + + SCALE_VS_INIT(&su, 0); + bufcat_su(h, "margin-top", &su); + bufcat_su(h, "margin-bottom", &su); + PAIR_STYLE_INIT(&tag[0], h); + + assert(lists[n->norm->Bl.type]); + strlcpy(buf, "list ", BUFSIZ); + strlcat(buf, lists[n->norm->Bl.type], BUFSIZ); + PAIR_INIT(&tag[1], ATTR_CLASS, buf); + + /* Set the block's left-hand margin. */ + + if (n->norm->Bl.offs) { + a2offs(n->norm->Bl.offs, &su); + bufcat_su(h, "margin-left", &su); + } + + switch (n->norm->Bl.type) { + case(LIST_bullet): + /* FALLTHROUGH */ + case(LIST_dash): + /* FALLTHROUGH */ + case(LIST_hyphen): + /* FALLTHROUGH */ + case(LIST_item): + print_otag(h, TAG_UL, 2, tag); + break; + case(LIST_enum): + print_otag(h, TAG_OL, 2, tag); + break; + case(LIST_diag): + /* FALLTHROUGH */ + case(LIST_hang): + /* FALLTHROUGH */ + case(LIST_inset): + /* FALLTHROUGH */ + case(LIST_ohang): + /* FALLTHROUGH */ + case(LIST_tag): + print_otag(h, TAG_DL, 2, tag); + break; + case(LIST_column): + print_otag(h, TAG_TABLE, 2, tag); + break; + default: + abort(); + /* NOTREACHED */ + } + + return(1); +} + +/* ARGSUSED */ +static int +mdoc_ex_pre(MDOC_ARGS) +{ + struct tag *t; + struct htmlpair tag; + int nchild; + + if (n->prev) + print_otag(h, TAG_BR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "utility"); + + print_text(h, "The"); + + nchild = n->nchild; + for (n = n->child; n; n = n->next) { + assert(MDOC_TEXT == n->type); + + t = print_otag(h, TAG_B, 1, &tag); + print_text(h, n->string); + print_tagq(h, t); + + if (nchild > 2 && n->next) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + + if (n->next && NULL == n->next->next) + print_text(h, "and"); + } + + if (nchild > 1) + print_text(h, "utilities exit"); + else + print_text(h, "utility exits"); + + print_text(h, "0 on success, and >0 if an error occurs."); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_em_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "emph"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_d1_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + struct roffsu su; + + if (MDOC_BLOCK != n->type) + return(1); + + SCALE_VS_INIT(&su, 0); + bufinit(h); + bufcat_su(h, "margin-top", &su); + bufcat_su(h, "margin-bottom", &su); + PAIR_STYLE_INIT(&tag[0], h); + print_otag(h, TAG_BLOCKQUOTE, 1, tag); + + /* BLOCKQUOTE needs a block body. */ + + PAIR_CLASS_INIT(&tag[0], "display"); + print_otag(h, TAG_DIV, 1, tag); + + if (MDOC_Dl == n->tok) { + PAIR_CLASS_INIT(&tag[0], "lit"); + print_otag(h, TAG_CODE, 1, tag); + } + + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_sx_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + + bufinit(h); + bufcat(h, "#x"); + + for (n = n->child; n; ) { + bufcat_id(h, n->string); + if (NULL != (n = n->next)) + bufcat_id(h, " "); + } + + PAIR_CLASS_INIT(&tag[0], "link-sec"); + PAIR_HREF_INIT(&tag[1], h->buf); + + print_otag(h, TAG_I, 1, tag); + print_otag(h, TAG_A, 2, tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_bd_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + int comp, sv; + const struct mdoc_node *nn; + struct roffsu su; + + if (MDOC_HEAD == n->type) + return(0); + + if (MDOC_BLOCK == n->type) { + comp = n->norm->Bd.comp; + for (nn = n; nn && ! comp; nn = nn->parent) { + if (MDOC_BLOCK != nn->type) + continue; + if (MDOC_Ss == nn->tok || MDOC_Sh == nn->tok) + comp = 1; + if (nn->prev) + break; + } + if ( ! comp) + print_otag(h, TAG_P, 0, NULL); + return(1); + } + + SCALE_HS_INIT(&su, 0); + if (n->norm->Bd.offs) + a2offs(n->norm->Bd.offs, &su); + + bufinit(h); + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag[0], h); + + if (DISP_unfilled != n->norm->Bd.type && + DISP_literal != n->norm->Bd.type) { + PAIR_CLASS_INIT(&tag[1], "display"); + print_otag(h, TAG_DIV, 2, tag); + return(1); + } + + PAIR_CLASS_INIT(&tag[1], "lit display"); + print_otag(h, TAG_PRE, 2, tag); + + /* This can be recursive: save & set our literal state. */ + + sv = h->flags & HTML_LITERAL; + h->flags |= HTML_LITERAL; + + for (nn = n->child; nn; nn = nn->next) { + print_mdoc_node(m, nn, h); + /* + * If the printed node flushes its own line, then we + * needn't do it here as well. This is hacky, but the + * notion of selective eoln whitespace is pretty dumb + * anyway, so don't sweat it. + */ + switch (nn->tok) { + case (MDOC_Sm): + /* FALLTHROUGH */ + case (MDOC_br): + /* FALLTHROUGH */ + case (MDOC_sp): + /* FALLTHROUGH */ + case (MDOC_Bl): + /* FALLTHROUGH */ + case (MDOC_D1): + /* FALLTHROUGH */ + case (MDOC_Dl): + /* FALLTHROUGH */ + case (MDOC_Lp): + /* FALLTHROUGH */ + case (MDOC_Pp): + continue; + default: + break; + } + if (nn->next && nn->next->line == nn->line) + continue; + else if (nn->next) + print_text(h, "\n"); + + h->flags |= HTML_NOSPACE; + } + + if (0 == sv) + h->flags &= ~HTML_LITERAL; + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_pa_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "file"); + print_otag(h, TAG_I, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ad_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "addr"); + print_otag(h, TAG_I, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_an_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + /* TODO: -split and -nosplit (see termp_an_pre()). */ + + PAIR_CLASS_INIT(&tag, "author"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_cd_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + synopsis_pre(h, n); + PAIR_CLASS_INIT(&tag, "config"); + print_otag(h, TAG_B, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_dv_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "define"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ev_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "env"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_er_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "errno"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_fa_pre(MDOC_ARGS) +{ + const struct mdoc_node *nn; + struct htmlpair tag; + struct tag *t; + + PAIR_CLASS_INIT(&tag, "farg"); + if (n->parent->tok != MDOC_Fo) { + print_otag(h, TAG_I, 1, &tag); + return(1); + } + + for (nn = n->child; nn; nn = nn->next) { + t = print_otag(h, TAG_I, 1, &tag); + print_text(h, nn->string); + print_tagq(h, t); + if (nn->next) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + } + + if (n->child && n->next && n->next->tok == MDOC_Fa) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_fd_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + char buf[BUFSIZ]; + size_t sz; + int i; + struct tag *t; + + synopsis_pre(h, n); + + if (NULL == (n = n->child)) + return(0); + + assert(MDOC_TEXT == n->type); + + if (strcmp(n->string, "#include")) { + PAIR_CLASS_INIT(&tag[0], "macro"); + print_otag(h, TAG_B, 1, tag); + return(1); + } + + PAIR_CLASS_INIT(&tag[0], "includes"); + print_otag(h, TAG_B, 1, tag); + print_text(h, n->string); + + if (NULL != (n = n->next)) { + assert(MDOC_TEXT == n->type); + strlcpy(buf, '<' == *n->string || '"' == *n->string ? + n->string + 1 : n->string, BUFSIZ); + + sz = strlen(buf); + if (sz && ('>' == buf[sz - 1] || '"' == buf[sz - 1])) + buf[sz - 1] = '\0'; + + PAIR_CLASS_INIT(&tag[0], "link-includes"); + + i = 1; + if (h->base_includes) { + buffmt_includes(h, buf); + PAIR_HREF_INIT(&tag[i], h->buf); + i++; + } + + t = print_otag(h, TAG_A, i, tag); + print_text(h, n->string); + print_tagq(h, t); + + n = n->next; + } + + for ( ; n; n = n->next) { + assert(MDOC_TEXT == n->type); + print_text(h, n->string); + } + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_vt_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (MDOC_BLOCK == n->type) { + synopsis_pre(h, n); + return(1); + } else if (MDOC_ELEM == n->type) { + synopsis_pre(h, n); + } else if (MDOC_HEAD == n->type) + return(0); + + PAIR_CLASS_INIT(&tag, "type"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ft_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + synopsis_pre(h, n); + PAIR_CLASS_INIT(&tag, "ftype"); + print_otag(h, TAG_I, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_fn_pre(MDOC_ARGS) +{ + struct tag *t; + struct htmlpair tag[2]; + char nbuf[BUFSIZ]; + const char *sp, *ep; + int sz, i, pretty; + + pretty = MDOC_SYNPRETTY & n->flags; + synopsis_pre(h, n); + + /* Split apart into type and name. */ + assert(n->child->string); + sp = n->child->string; + + ep = strchr(sp, ' '); + if (NULL != ep) { + PAIR_CLASS_INIT(&tag[0], "ftype"); + t = print_otag(h, TAG_I, 1, tag); + + while (ep) { + sz = MIN((int)(ep - sp), BUFSIZ - 1); + (void)memcpy(nbuf, sp, (size_t)sz); + nbuf[sz] = '\0'; + print_text(h, nbuf); + sp = ++ep; + ep = strchr(sp, ' '); + } + print_tagq(h, t); + } + + PAIR_CLASS_INIT(&tag[0], "fname"); + + /* + * FIXME: only refer to IDs that we know exist. + */ + +#if 0 + if (MDOC_SYNPRETTY & n->flags) { + nbuf[0] = '\0'; + html_idcat(nbuf, sp, BUFSIZ); + PAIR_ID_INIT(&tag[1], nbuf); + } else { + strlcpy(nbuf, "#", BUFSIZ); + html_idcat(nbuf, sp, BUFSIZ); + PAIR_HREF_INIT(&tag[1], nbuf); + } +#endif + + t = print_otag(h, TAG_B, 1, tag); + + if (sp) { + strlcpy(nbuf, sp, BUFSIZ); + print_text(h, nbuf); + } + + print_tagq(h, t); + + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + + PAIR_CLASS_INIT(&tag[0], "farg"); + bufinit(h); + bufcat_style(h, "white-space", "nowrap"); + PAIR_STYLE_INIT(&tag[1], h); + + for (n = n->child->next; n; n = n->next) { + i = 1; + if (MDOC_SYNPRETTY & n->flags) + i = 2; + t = print_otag(h, TAG_I, i, tag); + print_text(h, n->string); + print_tagq(h, t); + if (n->next) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + } + + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + + if (pretty) { + h->flags |= HTML_NOSPACE; + print_text(h, ";"); + } + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_sm_pre(MDOC_ARGS) +{ + + assert(n->child && MDOC_TEXT == n->child->type); + if (0 == strcmp("on", n->child->string)) { + /* + * FIXME: no p->col to check. Thus, if we have + * .Bd -literal + * .Sm off + * 1 2 + * .Sm on + * 3 + * .Ed + * the "3" is preceded by a space. + */ + h->flags &= ~HTML_NOSPACE; + h->flags &= ~HTML_NONOSPACE; + } else + h->flags |= HTML_NONOSPACE; + + return(0); +} + +/* ARGSUSED */ +static int +mdoc_pp_pre(MDOC_ARGS) +{ + + print_otag(h, TAG_P, 0, NULL); + return(0); + +} + +/* ARGSUSED */ +static int +mdoc_sp_pre(MDOC_ARGS) +{ + struct roffsu su; + struct htmlpair tag; + + SCALE_VS_INIT(&su, 1); + + if (MDOC_sp == n->tok) { + if (NULL != (n = n->child)) + if ( ! a2roffsu(n->string, &su, SCALE_VS)) + SCALE_VS_INIT(&su, atoi(n->string)); + } else + su.scale = 0; + + bufinit(h); + bufcat_su(h, "height", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_DIV, 1, &tag); + + /* So the div isn't empty: */ + print_text(h, "\\~"); + + return(0); + +} + +/* ARGSUSED */ +static int +mdoc_lk_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + + if (NULL == (n = n->child)) + return(0); + + assert(MDOC_TEXT == n->type); + + PAIR_CLASS_INIT(&tag[0], "link-ext"); + PAIR_HREF_INIT(&tag[1], n->string); + + print_otag(h, TAG_A, 2, tag); + + if (NULL == n->next) + print_text(h, n->string); + + for (n = n->next; n; n = n->next) + print_text(h, n->string); + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_mt_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + struct tag *t; + + PAIR_CLASS_INIT(&tag[0], "link-mail"); + + for (n = n->child; n; n = n->next) { + assert(MDOC_TEXT == n->type); + + bufinit(h); + bufcat(h, "mailto:"); + bufcat(h, n->string); + + PAIR_HREF_INIT(&tag[1], h->buf); + t = print_otag(h, TAG_A, 2, tag); + print_text(h, n->string); + print_tagq(h, t); + } + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_fo_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct tag *t; + + if (MDOC_BODY == n->type) { + h->flags |= HTML_NOSPACE; + print_text(h, "("); + h->flags |= HTML_NOSPACE; + return(1); + } else if (MDOC_BLOCK == n->type) { + synopsis_pre(h, n); + return(1); + } + + /* XXX: we drop non-initial arguments as per groff. */ + + assert(n->child); + assert(n->child->string); + + PAIR_CLASS_INIT(&tag, "fname"); + t = print_otag(h, TAG_B, 1, &tag); + print_text(h, n->child->string); + print_tagq(h, t); + return(0); +} + + +/* ARGSUSED */ +static void +mdoc_fo_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + h->flags |= HTML_NOSPACE; + print_text(h, ")"); + h->flags |= HTML_NOSPACE; + print_text(h, ";"); +} + + +/* ARGSUSED */ +static int +mdoc_in_pre(MDOC_ARGS) +{ + struct tag *t; + struct htmlpair tag[2]; + int i; + + synopsis_pre(h, n); + + PAIR_CLASS_INIT(&tag[0], "includes"); + print_otag(h, TAG_B, 1, tag); + + /* + * The first argument of the `In' gets special treatment as + * being a linked value. Subsequent values are printed + * afterward. groff does similarly. This also handles the case + * of no children. + */ + + if (MDOC_SYNPRETTY & n->flags && MDOC_LINE & n->flags) + print_text(h, "#include"); + + print_text(h, "<"); + h->flags |= HTML_NOSPACE; + + if (NULL != (n = n->child)) { + assert(MDOC_TEXT == n->type); + + PAIR_CLASS_INIT(&tag[0], "link-includes"); + + i = 1; + if (h->base_includes) { + buffmt_includes(h, n->string); + PAIR_HREF_INIT(&tag[i], h->buf); + i++; + } + + t = print_otag(h, TAG_A, i, tag); + print_text(h, n->string); + print_tagq(h, t); + + n = n->next; + } + + h->flags |= HTML_NOSPACE; + print_text(h, ">"); + + for ( ; n; n = n->next) { + assert(MDOC_TEXT == n->type); + print_text(h, n->string); + } + + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_ic_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "cmd"); + print_otag(h, TAG_B, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_rv_pre(MDOC_ARGS) +{ + struct htmlpair tag; + struct tag *t; + int nchild; + + if (n->prev) + print_otag(h, TAG_BR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "fname"); + + print_text(h, "The"); + + nchild = n->nchild; + for (n = n->child; n; n = n->next) { + assert(MDOC_TEXT == n->type); + + t = print_otag(h, TAG_B, 1, &tag); + print_text(h, n->string); + print_tagq(h, t); + + h->flags |= HTML_NOSPACE; + print_text(h, "()"); + + if (nchild > 2 && n->next) { + h->flags |= HTML_NOSPACE; + print_text(h, ","); + } + + if (n->next && NULL == n->next->next) + print_text(h, "and"); + } + + if (nchild > 1) + print_text(h, "functions return"); + else + print_text(h, "function returns"); + + print_text(h, "the value 0 if successful; otherwise the value " + "-1 is returned and the global variable"); + + PAIR_CLASS_INIT(&tag, "var"); + t = print_otag(h, TAG_B, 1, &tag); + print_text(h, "errno"); + print_tagq(h, t); + print_text(h, "is set to indicate the error."); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_va_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "var"); + print_otag(h, TAG_B, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ap_pre(MDOC_ARGS) +{ + + h->flags |= HTML_NOSPACE; + print_text(h, "\\(aq"); + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_bf_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + struct roffsu su; + + if (MDOC_HEAD == n->type) + return(0); + else if (MDOC_BODY != n->type) + return(1); + + if (FONT_Em == n->norm->Bf.font) + PAIR_CLASS_INIT(&tag[0], "emph"); + else if (FONT_Sy == n->norm->Bf.font) + PAIR_CLASS_INIT(&tag[0], "symb"); + else if (FONT_Li == n->norm->Bf.font) + PAIR_CLASS_INIT(&tag[0], "lit"); + else + PAIR_CLASS_INIT(&tag[0], "none"); + + /* + * We want this to be inline-formatted, but needs to be div to + * accept block children. + */ + bufinit(h); + bufcat_style(h, "display", "inline"); + SCALE_HS_INIT(&su, 1); + /* Needs a left-margin for spacing. */ + bufcat_su(h, "margin-left", &su); + PAIR_STYLE_INIT(&tag[1], h); + print_otag(h, TAG_DIV, 2, tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_ms_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "symb"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_igndelim_pre(MDOC_ARGS) +{ + + h->flags |= HTML_IGNDELIM; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_pf_post(MDOC_ARGS) +{ + + h->flags |= HTML_NOSPACE; +} + + +/* ARGSUSED */ +static int +mdoc_rs_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (MDOC_BLOCK != n->type) + return(1); + + if (n->prev && SEC_SEE_ALSO == n->sec) + print_otag(h, TAG_P, 0, NULL); + + PAIR_CLASS_INIT(&tag, "ref"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + + +/* ARGSUSED */ +static int +mdoc_li_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "lit"); + print_otag(h, TAG_CODE, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_sy_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + PAIR_CLASS_INIT(&tag, "symb"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc_bt_pre(MDOC_ARGS) +{ + + print_text(h, "is currently in beta test."); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_ud_pre(MDOC_ARGS) +{ + + print_text(h, "currently under development."); + return(0); +} + + +/* ARGSUSED */ +static int +mdoc_lb_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (SEC_LIBRARY == n->sec && MDOC_LINE & n->flags && n->prev) + print_otag(h, TAG_BR, 0, NULL); + + PAIR_CLASS_INIT(&tag, "lib"); + print_otag(h, TAG_SPAN, 1, &tag); + return(1); +} + + +/* ARGSUSED */ +static int +mdoc__x_pre(MDOC_ARGS) +{ + struct htmlpair tag[2]; + enum htmltag t; + + t = TAG_SPAN; + + switch (n->tok) { + case(MDOC__A): + PAIR_CLASS_INIT(&tag[0], "ref-auth"); + if (n->prev && MDOC__A == n->prev->tok) + if (NULL == n->next || MDOC__A != n->next->tok) + print_text(h, "and"); + break; + case(MDOC__B): + PAIR_CLASS_INIT(&tag[0], "ref-book"); + t = TAG_I; + break; + case(MDOC__C): + PAIR_CLASS_INIT(&tag[0], "ref-city"); + break; + case(MDOC__D): + PAIR_CLASS_INIT(&tag[0], "ref-date"); + break; + case(MDOC__I): + PAIR_CLASS_INIT(&tag[0], "ref-issue"); + t = TAG_I; + break; + case(MDOC__J): + PAIR_CLASS_INIT(&tag[0], "ref-jrnl"); + t = TAG_I; + break; + case(MDOC__N): + PAIR_CLASS_INIT(&tag[0], "ref-num"); + break; + case(MDOC__O): + PAIR_CLASS_INIT(&tag[0], "ref-opt"); + break; + case(MDOC__P): + PAIR_CLASS_INIT(&tag[0], "ref-page"); + break; + case(MDOC__Q): + PAIR_CLASS_INIT(&tag[0], "ref-corp"); + break; + case(MDOC__R): + PAIR_CLASS_INIT(&tag[0], "ref-rep"); + break; + case(MDOC__T): + PAIR_CLASS_INIT(&tag[0], "ref-title"); + break; + case(MDOC__U): + PAIR_CLASS_INIT(&tag[0], "link-ref"); + break; + case(MDOC__V): + PAIR_CLASS_INIT(&tag[0], "ref-vol"); + break; + default: + abort(); + /* NOTREACHED */ + } + + if (MDOC__U != n->tok) { + print_otag(h, t, 1, tag); + return(1); + } + + PAIR_HREF_INIT(&tag[1], n->child->string); + print_otag(h, TAG_A, 2, tag); + + return(1); +} + + +/* ARGSUSED */ +static void +mdoc__x_post(MDOC_ARGS) +{ + + if (MDOC__A == n->tok && n->next && MDOC__A == n->next->tok) + if (NULL == n->next->next || MDOC__A != n->next->next->tok) + if (NULL == n->prev || MDOC__A != n->prev->tok) + return; + + /* TODO: %U */ + + if (NULL == n->parent || MDOC_Rs != n->parent->tok) + return; + + h->flags |= HTML_NOSPACE; + print_text(h, n->next ? "," : "."); +} + + +/* ARGSUSED */ +static int +mdoc_bk_pre(MDOC_ARGS) +{ + + switch (n->type) { + case (MDOC_BLOCK): + break; + case (MDOC_HEAD): + return(0); + case (MDOC_BODY): + if (n->parent->args || 0 == n->prev->nchild) + h->flags |= HTML_PREKEEP; + break; + default: + abort(); + /* NOTREACHED */ + } + + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_bk_post(MDOC_ARGS) +{ + + if (MDOC_BODY == n->type) + h->flags &= ~(HTML_KEEP | HTML_PREKEEP); +} + + +/* ARGSUSED */ +static int +mdoc_quote_pre(MDOC_ARGS) +{ + struct htmlpair tag; + + if (MDOC_BODY != n->type) + return(1); + + switch (n->tok) { + case (MDOC_Ao): + /* FALLTHROUGH */ + case (MDOC_Aq): + print_text(h, "\\(la"); + break; + case (MDOC_Bro): + /* FALLTHROUGH */ + case (MDOC_Brq): + print_text(h, "\\(lC"); + break; + case (MDOC_Bo): + /* FALLTHROUGH */ + case (MDOC_Bq): + print_text(h, "\\(lB"); + break; + case (MDOC_Oo): + /* FALLTHROUGH */ + case (MDOC_Op): + print_text(h, "\\(lB"); + h->flags |= HTML_NOSPACE; + PAIR_CLASS_INIT(&tag, "opt"); + print_otag(h, TAG_SPAN, 1, &tag); + break; + case (MDOC_Eo): + break; + case (MDOC_Do): + /* FALLTHROUGH */ + case (MDOC_Dq): + /* FALLTHROUGH */ + case (MDOC_Qo): + /* FALLTHROUGH */ + case (MDOC_Qq): + print_text(h, "\\(lq"); + break; + case (MDOC_Po): + /* FALLTHROUGH */ + case (MDOC_Pq): + print_text(h, "("); + break; + case (MDOC_Ql): + print_text(h, "\\(oq"); + h->flags |= HTML_NOSPACE; + PAIR_CLASS_INIT(&tag, "lit"); + print_otag(h, TAG_CODE, 1, &tag); + break; + case (MDOC_So): + /* FALLTHROUGH */ + case (MDOC_Sq): + print_text(h, "\\(oq"); + break; + default: + abort(); + /* NOTREACHED */ + } + + h->flags |= HTML_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +mdoc_quote_post(MDOC_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + + h->flags |= HTML_NOSPACE; + + switch (n->tok) { + case (MDOC_Ao): + /* FALLTHROUGH */ + case (MDOC_Aq): + print_text(h, "\\(ra"); + break; + case (MDOC_Bro): + /* FALLTHROUGH */ + case (MDOC_Brq): + print_text(h, "\\(rC"); + break; + case (MDOC_Oo): + /* FALLTHROUGH */ + case (MDOC_Op): + /* FALLTHROUGH */ + case (MDOC_Bo): + /* FALLTHROUGH */ + case (MDOC_Bq): + print_text(h, "\\(rB"); + break; + case (MDOC_Eo): + break; + case (MDOC_Qo): + /* FALLTHROUGH */ + case (MDOC_Qq): + /* FALLTHROUGH */ + case (MDOC_Do): + /* FALLTHROUGH */ + case (MDOC_Dq): + print_text(h, "\\(rq"); + break; + case (MDOC_Po): + /* FALLTHROUGH */ + case (MDOC_Pq): + print_text(h, ")"); + break; + case (MDOC_Ql): + /* FALLTHROUGH */ + case (MDOC_So): + /* FALLTHROUGH */ + case (MDOC_Sq): + print_text(h, "\\(aq"); + break; + default: + abort(); + /* NOTREACHED */ + } +} + + diff --git a/usr/src/cmd/mandoc/mdoc_macro.c b/usr/src/cmd/mandoc/mdoc_macro.c new file mode 100644 index 0000000000..11d147399e --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc_macro.c @@ -0,0 +1,1787 @@ +/* $Id: mdoc_macro.c,v 1.115 2012/01/05 00:43:51 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" +#include "libmandoc.h" + +enum rew { /* see rew_dohalt() */ + REWIND_NONE, + REWIND_THIS, + REWIND_MORE, + REWIND_FORCE, + REWIND_LATER, + REWIND_ERROR +}; + +static int blk_full(MACRO_PROT_ARGS); +static int blk_exp_close(MACRO_PROT_ARGS); +static int blk_part_exp(MACRO_PROT_ARGS); +static int blk_part_imp(MACRO_PROT_ARGS); +static int ctx_synopsis(MACRO_PROT_ARGS); +static int in_line_eoln(MACRO_PROT_ARGS); +static int in_line_argn(MACRO_PROT_ARGS); +static int in_line(MACRO_PROT_ARGS); +static int obsolete(MACRO_PROT_ARGS); +static int phrase_ta(MACRO_PROT_ARGS); + +static int dword(struct mdoc *, int, int, + const char *, enum mdelim); +static int append_delims(struct mdoc *, + int, int *, char *); +static enum mdoct lookup(enum mdoct, const char *); +static enum mdoct lookup_raw(const char *); +static int make_pending(struct mdoc_node *, enum mdoct, + struct mdoc *, int, int); +static int phrase(struct mdoc *, int, int, char *); +static enum mdoct rew_alt(enum mdoct); +static enum rew rew_dohalt(enum mdoct, enum mdoc_type, + const struct mdoc_node *); +static int rew_elem(struct mdoc *, enum mdoct); +static int rew_last(struct mdoc *, + const struct mdoc_node *); +static int rew_sub(enum mdoc_type, struct mdoc *, + enum mdoct, int, int); + +const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ap */ + { in_line_eoln, MDOC_PROLOGUE }, /* Dd */ + { in_line_eoln, MDOC_PROLOGUE }, /* Dt */ + { in_line_eoln, MDOC_PROLOGUE }, /* Os */ + { blk_full, MDOC_PARSED }, /* Sh */ + { blk_full, MDOC_PARSED }, /* Ss */ + { in_line_eoln, 0 }, /* Pp */ + { blk_part_imp, MDOC_PARSED }, /* D1 */ + { blk_part_imp, MDOC_PARSED }, /* Dl */ + { blk_full, MDOC_EXPLICIT }, /* Bd */ + { blk_exp_close, MDOC_EXPLICIT }, /* Ed */ + { blk_full, MDOC_EXPLICIT }, /* Bl */ + { blk_exp_close, MDOC_EXPLICIT }, /* El */ + { blk_full, MDOC_PARSED }, /* It */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ad */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* An */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ar */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Cd */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Cm */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Dv */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Er */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ev */ + { in_line_eoln, 0 }, /* Ex */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fa */ + { in_line_eoln, 0 }, /* Fd */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fl */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Fn */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ft */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ic */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* In */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Li */ + { blk_full, 0 }, /* Nd */ + { ctx_synopsis, MDOC_CALLABLE | MDOC_PARSED }, /* Nm */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Op */ + { obsolete, 0 }, /* Ot */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Pa */ + { in_line_eoln, 0 }, /* Rv */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* St */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Va */ + { ctx_synopsis, MDOC_CALLABLE | MDOC_PARSED }, /* Vt */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Xr */ + { in_line_eoln, 0 }, /* %A */ + { in_line_eoln, 0 }, /* %B */ + { in_line_eoln, 0 }, /* %D */ + { in_line_eoln, 0 }, /* %I */ + { in_line_eoln, 0 }, /* %J */ + { in_line_eoln, 0 }, /* %N */ + { in_line_eoln, 0 }, /* %O */ + { in_line_eoln, 0 }, /* %P */ + { in_line_eoln, 0 }, /* %R */ + { in_line_eoln, 0 }, /* %T */ + { in_line_eoln, 0 }, /* %V */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Ac */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Ao */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Aq */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* At */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Bc */ + { blk_full, MDOC_EXPLICIT }, /* Bf */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Bo */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Bq */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bsx */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Bx */ + { in_line_eoln, 0 }, /* Db */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Dc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Do */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Dq */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Ec */ + { blk_exp_close, MDOC_EXPLICIT }, /* Ef */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Em */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Eo */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Fx */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ms */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_IGNDELIM }, /* No */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_IGNDELIM }, /* Ns */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Nx */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ox */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Pc */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_IGNDELIM }, /* Pf */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Po */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Pq */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Qc */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Ql */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Qo */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Qq */ + { blk_exp_close, MDOC_EXPLICIT }, /* Re */ + { blk_full, MDOC_EXPLICIT }, /* Rs */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Sc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* So */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Sq */ + { in_line_eoln, 0 }, /* Sm */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Sx */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Sy */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Tn */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Ux */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Xc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Xo */ + { blk_full, MDOC_EXPLICIT | MDOC_CALLABLE }, /* Fo */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Fc */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Oo */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Oc */ + { blk_full, MDOC_EXPLICIT }, /* Bk */ + { blk_exp_close, MDOC_EXPLICIT }, /* Ek */ + { in_line_eoln, 0 }, /* Bt */ + { in_line_eoln, 0 }, /* Hf */ + { obsolete, 0 }, /* Fr */ + { in_line_eoln, 0 }, /* Ud */ + { in_line, 0 }, /* Lb */ + { in_line_eoln, 0 }, /* Lp */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Lk */ + { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Mt */ + { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED }, /* Brq */ + { blk_part_exp, MDOC_CALLABLE | MDOC_PARSED | MDOC_EXPLICIT }, /* Bro */ + { blk_exp_close, MDOC_EXPLICIT | MDOC_CALLABLE | MDOC_PARSED }, /* Brc */ + { in_line_eoln, 0 }, /* %C */ + { obsolete, 0 }, /* Es */ + { obsolete, 0 }, /* En */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Dx */ + { in_line_eoln, 0 }, /* %Q */ + { in_line_eoln, 0 }, /* br */ + { in_line_eoln, 0 }, /* sp */ + { in_line_eoln, 0 }, /* %U */ + { phrase_ta, MDOC_CALLABLE | MDOC_PARSED }, /* Ta */ +}; + +const struct mdoc_macro * const mdoc_macros = __mdoc_macros; + + +/* + * This is called at the end of parsing. It must traverse up the tree, + * closing out open [implicit] scopes. Obviously, open explicit scopes + * are errors. + */ +int +mdoc_macroend(struct mdoc *m) +{ + struct mdoc_node *n; + + /* Scan for open explicit scopes. */ + + n = MDOC_VALID & m->last->flags ? m->last->parent : m->last; + + for ( ; n; n = n->parent) + if (MDOC_BLOCK == n->type && + MDOC_EXPLICIT & mdoc_macros[n->tok].flags) + mdoc_nmsg(m, n, MANDOCERR_SCOPEEXIT); + + /* Rewind to the first. */ + + return(rew_last(m, m->first)); +} + + +/* + * Look up a macro from within a subsequent context. + */ +static enum mdoct +lookup(enum mdoct from, const char *p) +{ + + if ( ! (MDOC_PARSED & mdoc_macros[from].flags)) + return(MDOC_MAX); + return(lookup_raw(p)); +} + + +/* + * Lookup a macro following the initial line macro. + */ +static enum mdoct +lookup_raw(const char *p) +{ + enum mdoct res; + + if (MDOC_MAX == (res = mdoc_hash_find(p))) + return(MDOC_MAX); + if (MDOC_CALLABLE & mdoc_macros[res].flags) + return(res); + return(MDOC_MAX); +} + + +static int +rew_last(struct mdoc *mdoc, const struct mdoc_node *to) +{ + struct mdoc_node *n, *np; + + assert(to); + mdoc->next = MDOC_NEXT_SIBLING; + + /* LINTED */ + while (mdoc->last != to) { + /* + * Save the parent here, because we may delete the + * m->last node in the post-validation phase and reset + * it to m->last->parent, causing a step in the closing + * out to be lost. + */ + np = mdoc->last->parent; + if ( ! mdoc_valid_post(mdoc)) + return(0); + n = mdoc->last; + mdoc->last = np; + assert(mdoc->last); + mdoc->last->last = n; + } + + return(mdoc_valid_post(mdoc)); +} + + +/* + * For a block closing macro, return the corresponding opening one. + * Otherwise, return the macro itself. + */ +static enum mdoct +rew_alt(enum mdoct tok) +{ + switch (tok) { + case (MDOC_Ac): + return(MDOC_Ao); + case (MDOC_Bc): + return(MDOC_Bo); + case (MDOC_Brc): + return(MDOC_Bro); + case (MDOC_Dc): + return(MDOC_Do); + case (MDOC_Ec): + return(MDOC_Eo); + case (MDOC_Ed): + return(MDOC_Bd); + case (MDOC_Ef): + return(MDOC_Bf); + case (MDOC_Ek): + return(MDOC_Bk); + case (MDOC_El): + return(MDOC_Bl); + case (MDOC_Fc): + return(MDOC_Fo); + case (MDOC_Oc): + return(MDOC_Oo); + case (MDOC_Pc): + return(MDOC_Po); + case (MDOC_Qc): + return(MDOC_Qo); + case (MDOC_Re): + return(MDOC_Rs); + case (MDOC_Sc): + return(MDOC_So); + case (MDOC_Xc): + return(MDOC_Xo); + default: + return(tok); + } + /* NOTREACHED */ +} + + +/* + * Rewinding to tok, how do we have to handle *p? + * REWIND_NONE: *p would delimit tok, but no tok scope is open + * inside *p, so there is no need to rewind anything at all. + * REWIND_THIS: *p matches tok, so rewind *p and nothing else. + * REWIND_MORE: *p is implicit, rewind it and keep searching for tok. + * REWIND_FORCE: *p is explicit, but tok is full, force rewinding *p. + * REWIND_LATER: *p is explicit and still open, postpone rewinding. + * REWIND_ERROR: No tok block is open at all. + */ +static enum rew +rew_dohalt(enum mdoct tok, enum mdoc_type type, + const struct mdoc_node *p) +{ + + /* + * No matching token, no delimiting block, no broken block. + * This can happen when full implicit macros are called for + * the first time but try to rewind their previous + * instance anyway. + */ + if (MDOC_ROOT == p->type) + return(MDOC_BLOCK == type && + MDOC_EXPLICIT & mdoc_macros[tok].flags ? + REWIND_ERROR : REWIND_NONE); + + /* + * When starting to rewind, skip plain text + * and nodes that have already been rewound. + */ + if (MDOC_TEXT == p->type || MDOC_VALID & p->flags) + return(REWIND_MORE); + + /* + * The easiest case: Found a matching token. + * This applies to both blocks and elements. + */ + tok = rew_alt(tok); + if (tok == p->tok) + return(p->end ? REWIND_NONE : + type == p->type ? REWIND_THIS : REWIND_MORE); + + /* + * While elements do require rewinding for themselves, + * they never affect rewinding of other nodes. + */ + if (MDOC_ELEM == p->type) + return(REWIND_MORE); + + /* + * Blocks delimited by our target token get REWIND_MORE. + * Blocks delimiting our target token get REWIND_NONE. + */ + switch (tok) { + case (MDOC_Bl): + if (MDOC_It == p->tok) + return(REWIND_MORE); + break; + case (MDOC_It): + if (MDOC_BODY == p->type && MDOC_Bl == p->tok) + return(REWIND_NONE); + break; + /* + * XXX Badly nested block handling still fails badly + * when one block is breaking two blocks of the same type. + * This is an incomplete and extremely ugly workaround, + * required to let the OpenBSD tree build. + */ + case (MDOC_Oo): + if (MDOC_Op == p->tok) + return(REWIND_MORE); + break; + case (MDOC_Nm): + return(REWIND_NONE); + case (MDOC_Nd): + /* FALLTHROUGH */ + case (MDOC_Ss): + if (MDOC_BODY == p->type && MDOC_Sh == p->tok) + return(REWIND_NONE); + /* FALLTHROUGH */ + case (MDOC_Sh): + if (MDOC_Nd == p->tok || MDOC_Ss == p->tok || + MDOC_Sh == p->tok) + return(REWIND_MORE); + break; + default: + break; + } + + /* + * Default block rewinding rules. + * In particular, always skip block end markers, + * and let all blocks rewind Nm children. + */ + if (ENDBODY_NOT != p->end || MDOC_Nm == p->tok || + (MDOC_BLOCK == p->type && + ! (MDOC_EXPLICIT & mdoc_macros[tok].flags))) + return(REWIND_MORE); + + /* + * By default, closing out full blocks + * forces closing of broken explicit blocks, + * while closing out partial blocks + * allows delayed rewinding by default. + */ + return (&blk_full == mdoc_macros[tok].fp ? + REWIND_FORCE : REWIND_LATER); +} + + +static int +rew_elem(struct mdoc *mdoc, enum mdoct tok) +{ + struct mdoc_node *n; + + n = mdoc->last; + if (MDOC_ELEM != n->type) + n = n->parent; + assert(MDOC_ELEM == n->type); + assert(tok == n->tok); + + return(rew_last(mdoc, n)); +} + + +/* + * We are trying to close a block identified by tok, + * but the child block *broken is still open. + * Thus, postpone closing the tok block + * until the rew_sub call closing *broken. + */ +static int +make_pending(struct mdoc_node *broken, enum mdoct tok, + struct mdoc *m, int line, int ppos) +{ + struct mdoc_node *breaker; + + /* + * Iterate backwards, searching for the block matching tok, + * that is, the block breaking the *broken block. + */ + for (breaker = broken->parent; breaker; breaker = breaker->parent) { + + /* + * If the *broken block had already been broken before + * and we encounter its breaker, make the tok block + * pending on the inner breaker. + * Graphically, "[A breaker=[B broken=[C->B B] tok=A] C]" + * becomes "[A broken=[B [C->B B] tok=A] C]" + * and finally "[A [B->A [C->B B] A] C]". + */ + if (breaker == broken->pending) { + broken = breaker; + continue; + } + + if (REWIND_THIS != rew_dohalt(tok, MDOC_BLOCK, breaker)) + continue; + if (MDOC_BODY == broken->type) + broken = broken->parent; + + /* + * Found the breaker. + * If another, outer breaker is already pending on + * the *broken block, we must not clobber the link + * to the outer breaker, but make it pending on the + * new, now inner breaker. + * Graphically, "[A breaker=[B broken=[C->A A] tok=B] C]" + * becomes "[A breaker=[B->A broken=[C A] tok=B] C]" + * and finally "[A [B->A [C->B A] B] C]". + */ + if (broken->pending) { + struct mdoc_node *taker; + + /* + * If the breaker had also been broken before, + * it cannot take on the outer breaker itself, + * but must hand it on to its own breakers. + * Graphically, this is the following situation: + * "[A [B breaker=[C->B B] broken=[D->A A] tok=C] D]" + * "[A taker=[B->A breaker=[C->B B] [D->C A] C] D]" + */ + taker = breaker; + while (taker->pending) + taker = taker->pending; + taker->pending = broken->pending; + } + broken->pending = breaker; + mandoc_vmsg(MANDOCERR_SCOPENEST, m->parse, line, ppos, + "%s breaks %s", mdoc_macronames[tok], + mdoc_macronames[broken->tok]); + return(1); + } + + /* + * Found no matching block for tok. + * Are you trying to close a block that is not open? + */ + return(0); +} + + +static int +rew_sub(enum mdoc_type t, struct mdoc *m, + enum mdoct tok, int line, int ppos) +{ + struct mdoc_node *n; + + n = m->last; + while (n) { + switch (rew_dohalt(tok, t, n)) { + case (REWIND_NONE): + return(1); + case (REWIND_THIS): + break; + case (REWIND_FORCE): + mandoc_vmsg(MANDOCERR_SCOPEBROKEN, m->parse, + line, ppos, "%s breaks %s", + mdoc_macronames[tok], + mdoc_macronames[n->tok]); + /* FALLTHROUGH */ + case (REWIND_MORE): + n = n->parent; + continue; + case (REWIND_LATER): + if (make_pending(n, tok, m, line, ppos) || + MDOC_BLOCK != t) + return(1); + /* FALLTHROUGH */ + case (REWIND_ERROR): + mdoc_pmsg(m, line, ppos, MANDOCERR_NOSCOPE); + return(1); + } + break; + } + + assert(n); + if ( ! rew_last(m, n)) + return(0); + + /* + * The current block extends an enclosing block. + * Now that the current block ends, close the enclosing block, too. + */ + while (NULL != (n = n->pending)) { + if ( ! rew_last(m, n)) + return(0); + if (MDOC_HEAD == n->type && + ! mdoc_body_alloc(m, n->line, n->pos, n->tok)) + return(0); + } + + return(1); +} + +/* + * Allocate a word and check whether it's punctuation or not. + * Punctuation consists of those tokens found in mdoc_isdelim(). + */ +static int +dword(struct mdoc *m, int line, + int col, const char *p, enum mdelim d) +{ + + if (DELIM_MAX == d) + d = mdoc_isdelim(p); + + if ( ! mdoc_word_alloc(m, line, col, p)) + return(0); + + if (DELIM_OPEN == d) + m->last->flags |= MDOC_DELIMO; + + /* + * Closing delimiters only suppress the preceding space + * when they follow something, not when they start a new + * block or element, and not when they follow `No'. + * + * XXX Explicitly special-casing MDOC_No here feels + * like a layering violation. Find a better way + * and solve this in the code related to `No'! + */ + + else if (DELIM_CLOSE == d && m->last->prev && + m->last->prev->tok != MDOC_No) + m->last->flags |= MDOC_DELIMC; + + return(1); +} + +static int +append_delims(struct mdoc *m, int line, int *pos, char *buf) +{ + int la; + enum margserr ac; + char *p; + + if ('\0' == buf[*pos]) + return(1); + + for (;;) { + la = *pos; + ac = mdoc_zargs(m, line, pos, buf, &p); + + if (ARGS_ERROR == ac) + return(0); + else if (ARGS_EOLN == ac) + break; + + dword(m, line, la, p, DELIM_MAX); + + /* + * If we encounter end-of-sentence symbols, then trigger + * the double-space. + * + * XXX: it's easy to allow this to propagate outward to + * the last symbol, such that `. )' will cause the + * correct double-spacing. However, (1) groff isn't + * smart enough to do this and (2) it would require + * knowing which symbols break this behaviour, for + * example, `. ;' shouldn't propagate the double-space. + */ + if (mandoc_eos(p, strlen(p), 0)) + m->last->flags |= MDOC_EOS; + } + + return(1); +} + + +/* + * Close out block partial/full explicit. + */ +static int +blk_exp_close(MACRO_PROT_ARGS) +{ + struct mdoc_node *body; /* Our own body. */ + struct mdoc_node *later; /* A sub-block starting later. */ + struct mdoc_node *n; /* For searching backwards. */ + + int j, lastarg, maxargs, flushed, nl; + enum margserr ac; + enum mdoct atok, ntok; + char *p; + + nl = MDOC_NEWLINE & m->flags; + + switch (tok) { + case (MDOC_Ec): + maxargs = 1; + break; + default: + maxargs = 0; + break; + } + + /* + * Search backwards for beginnings of blocks, + * both of our own and of pending sub-blocks. + */ + atok = rew_alt(tok); + body = later = NULL; + for (n = m->last; n; n = n->parent) { + if (MDOC_VALID & n->flags) + continue; + + /* Remember the start of our own body. */ + if (MDOC_BODY == n->type && atok == n->tok) { + if (ENDBODY_NOT == n->end) + body = n; + continue; + } + + if (MDOC_BLOCK != n->type || MDOC_Nm == n->tok) + continue; + if (atok == n->tok) { + assert(body); + + /* + * Found the start of our own block. + * When there is no pending sub block, + * just proceed to closing out. + */ + if (NULL == later) + break; + + /* + * When there is a pending sub block, + * postpone closing out the current block + * until the rew_sub() closing out the sub-block. + */ + make_pending(later, tok, m, line, ppos); + + /* + * Mark the place where the formatting - but not + * the scope - of the current block ends. + */ + if ( ! mdoc_endbody_alloc(m, line, ppos, + atok, body, ENDBODY_SPACE)) + return(0); + break; + } + + /* + * When finding an open sub block, remember the last + * open explicit block, or, in case there are only + * implicit ones, the first open implicit block. + */ + if (later && + MDOC_EXPLICIT & mdoc_macros[later->tok].flags) + continue; + if (MDOC_CALLABLE & mdoc_macros[n->tok].flags) + later = n; + } + + if ( ! (MDOC_CALLABLE & mdoc_macros[tok].flags)) { + /* FIXME: do this in validate */ + if (buf[*pos]) + mdoc_pmsg(m, line, ppos, MANDOCERR_ARGSLOST); + + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + return(rew_sub(MDOC_BLOCK, m, tok, line, ppos)); + } + + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + + if (NULL == later && maxargs > 0) + if ( ! mdoc_tail_alloc(m, line, ppos, rew_alt(tok))) + return(0); + + for (flushed = j = 0; ; j++) { + lastarg = *pos; + + if (j == maxargs && ! flushed) { + if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + flushed = 1; + } + + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_PUNCT == ac) + break; + if (ARGS_EOLN == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! dword(m, line, lastarg, p, DELIM_MAX)) + return(0); + continue; + } + + if ( ! flushed) { + if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + flushed = 1; + } + if ( ! mdoc_macro(m, ntok, line, lastarg, pos, buf)) + return(0); + break; + } + + if ( ! flushed && ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); +} + + +static int +in_line(MACRO_PROT_ARGS) +{ + int la, scope, cnt, nc, nl; + enum margverr av; + enum mdoct ntok; + enum margserr ac; + enum mdelim d; + struct mdoc_arg *arg; + char *p; + + nl = MDOC_NEWLINE & m->flags; + + /* + * Whether we allow ignored elements (those without content, + * usually because of reserved words) to squeak by. + */ + + switch (tok) { + case (MDOC_An): + /* FALLTHROUGH */ + case (MDOC_Ar): + /* FALLTHROUGH */ + case (MDOC_Fl): + /* FALLTHROUGH */ + case (MDOC_Mt): + /* FALLTHROUGH */ + case (MDOC_Nm): + /* FALLTHROUGH */ + case (MDOC_Pa): + nc = 1; + break; + default: + nc = 0; + break; + } + + for (arg = NULL;; ) { + la = *pos; + av = mdoc_argv(m, line, tok, &arg, pos, buf); + + if (ARGV_WORD == av) { + *pos = la; + break; + } + if (ARGV_EOLN == av) + break; + if (ARGV_ARG == av) + continue; + + mdoc_argv_free(arg); + return(0); + } + + for (cnt = scope = 0;; ) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + if (ARGS_PUNCT == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + /* + * In this case, we've located a submacro and must + * execute it. Close out scope, if open. If no + * elements have been generated, either create one (nc) + * or raise a warning. + */ + + if (MDOC_MAX != ntok) { + if (scope && ! rew_elem(m, tok)) + return(0); + if (nc && 0 == cnt) { + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + if ( ! rew_last(m, m->last)) + return(0); + } else if ( ! nc && 0 == cnt) { + mdoc_argv_free(arg); + mdoc_pmsg(m, line, ppos, MANDOCERR_MACROEMPTY); + } + + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); + } + + /* + * Non-quote-enclosed punctuation. Set up our scope, if + * a word; rewind the scope, if a delimiter; then append + * the word. + */ + + d = ARGS_QWORD == ac ? DELIM_NONE : mdoc_isdelim(p); + + if (DELIM_NONE != d) { + /* + * If we encounter closing punctuation, no word + * has been omitted, no scope is open, and we're + * allowed to have an empty element, then start + * a new scope. `Ar', `Fl', and `Li', only do + * this once per invocation. There may be more + * of these (all of them?). + */ + if (0 == cnt && (nc || MDOC_Li == tok) && + DELIM_CLOSE == d && ! scope) { + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + if (MDOC_Ar == tok || MDOC_Li == tok || + MDOC_Fl == tok) + cnt++; + scope = 1; + } + /* + * Close out our scope, if one is open, before + * any punctuation. + */ + if (scope && ! rew_elem(m, tok)) + return(0); + scope = 0; + } else if ( ! scope) { + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + scope = 1; + } + + if (DELIM_NONE == d) + cnt++; + + if ( ! dword(m, line, la, p, d)) + return(0); + + /* + * `Fl' macros have their scope re-opened with each new + * word so that the `-' can be added to each one without + * having to parse out spaces. + */ + if (scope && MDOC_Fl == tok) { + if ( ! rew_elem(m, tok)) + return(0); + scope = 0; + } + } + + if (scope && ! rew_elem(m, tok)) + return(0); + + /* + * If no elements have been collected and we're allowed to have + * empties (nc), open a scope and close it out. Otherwise, + * raise a warning. + */ + + if (nc && 0 == cnt) { + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + if ( ! rew_last(m, m->last)) + return(0); + } else if ( ! nc && 0 == cnt) { + mdoc_argv_free(arg); + mdoc_pmsg(m, line, ppos, MANDOCERR_MACROEMPTY); + } + + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); +} + + +static int +blk_full(MACRO_PROT_ARGS) +{ + int la, nl, nparsed; + struct mdoc_arg *arg; + struct mdoc_node *head; /* save of head macro */ + struct mdoc_node *body; /* save of body macro */ + struct mdoc_node *n; + enum mdoc_type mtt; + enum mdoct ntok; + enum margserr ac, lac; + enum margverr av; + char *p; + + nl = MDOC_NEWLINE & m->flags; + + /* Close out prior implicit scope. */ + + if ( ! (MDOC_EXPLICIT & mdoc_macros[tok].flags)) { + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + } + + /* + * This routine accommodates implicitly- and explicitly-scoped + * macro openings. Implicit ones first close out prior scope + * (seen above). Delay opening the head until necessary to + * allow leading punctuation to print. Special consideration + * for `It -column', which has phrase-part syntax instead of + * regular child nodes. + */ + + for (arg = NULL;; ) { + la = *pos; + av = mdoc_argv(m, line, tok, &arg, pos, buf); + + if (ARGV_WORD == av) { + *pos = la; + break; + } + + if (ARGV_EOLN == av) + break; + if (ARGV_ARG == av) + continue; + + mdoc_argv_free(arg); + return(0); + } + + if ( ! mdoc_block_alloc(m, line, ppos, tok, arg)) + return(0); + + head = body = NULL; + + /* + * Exception: Heads of `It' macros in `-diag' lists are not + * parsed, even though `It' macros in general are parsed. + */ + nparsed = MDOC_It == tok && + MDOC_Bl == m->last->parent->tok && + LIST_diag == m->last->parent->norm->Bl.type; + + /* + * The `Nd' macro has all arguments in its body: it's a hybrid + * of block partial-explicit and full-implicit. Stupid. + */ + + if (MDOC_Nd == tok) { + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + head = m->last; + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + } + + ac = ARGS_ERROR; + + for ( ; ; ) { + la = *pos; + /* Initialise last-phrase-type with ARGS_PEND. */ + lac = ARGS_ERROR == ac ? ARGS_PEND : ac; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_PUNCT == ac) + break; + + if (ARGS_ERROR == ac) + return(0); + + if (ARGS_EOLN == ac) { + if (ARGS_PPHRASE != lac && ARGS_PHRASE != lac) + break; + /* + * This is necessary: if the last token on a + * line is a `Ta' or tab, then we'll get + * ARGS_EOLN, so we must be smart enough to + * reopen our scope if the last parse was a + * phrase or partial phrase. + */ + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + break; + } + + /* + * Emit leading punctuation (i.e., punctuation before + * the MDOC_HEAD) for non-phrase types. + */ + + if (NULL == head && + ARGS_PEND != ac && + ARGS_PHRASE != ac && + ARGS_PPHRASE != ac && + ARGS_QWORD != ac && + DELIM_OPEN == mdoc_isdelim(p)) { + if ( ! dword(m, line, la, p, DELIM_OPEN)) + return(0); + continue; + } + + /* Open a head if one hasn't been opened. */ + + if (NULL == head) { + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + head = m->last; + } + + if (ARGS_PHRASE == ac || + ARGS_PEND == ac || + ARGS_PPHRASE == ac) { + /* + * If we haven't opened a body yet, rewind the + * head; if we have, rewind that instead. + */ + + mtt = body ? MDOC_BODY : MDOC_HEAD; + if ( ! rew_sub(mtt, m, tok, line, ppos)) + return(0); + + /* Then allocate our body context. */ + + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + + /* + * Process phrases: set whether we're in a + * partial-phrase (this effects line handling) + * then call down into the phrase parser. + */ + + if (ARGS_PPHRASE == ac) + m->flags |= MDOC_PPHRASE; + if (ARGS_PEND == ac && ARGS_PPHRASE == lac) + m->flags |= MDOC_PPHRASE; + + if ( ! phrase(m, line, la, buf)) + return(0); + + m->flags &= ~MDOC_PPHRASE; + continue; + } + + ntok = nparsed || ARGS_QWORD == ac ? + MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! dword(m, line, la, p, DELIM_MAX)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + break; + } + + if (NULL == head) { + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + head = m->last; + } + + if (nl && ! append_delims(m, line, pos, buf)) + return(0); + + /* If we've already opened our body, exit now. */ + + if (NULL != body) + goto out; + + /* + * If there is an open (i.e., unvalidated) sub-block requiring + * explicit close-out, postpone switching the current block from + * head to body until the rew_sub() call closing out that + * sub-block. + */ + for (n = m->last; n && n != head; n = n->parent) { + if (MDOC_BLOCK == n->type && + MDOC_EXPLICIT & mdoc_macros[n->tok].flags && + ! (MDOC_VALID & n->flags)) { + n->pending = head; + return(1); + } + } + + /* Close out scopes to remain in a consistent state. */ + + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + +out: + if ( ! (MDOC_FREECOL & m->flags)) + return(1); + + if ( ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + if ( ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + + m->flags &= ~MDOC_FREECOL; + return(1); +} + + +static int +blk_part_imp(MACRO_PROT_ARGS) +{ + int la, nl; + enum mdoct ntok; + enum margserr ac; + char *p; + struct mdoc_node *blk; /* saved block context */ + struct mdoc_node *body; /* saved body context */ + struct mdoc_node *n; + + nl = MDOC_NEWLINE & m->flags; + + /* + * A macro that spans to the end of the line. This is generally + * (but not necessarily) called as the first macro. The block + * has a head as the immediate child, which is always empty, + * followed by zero or more opening punctuation nodes, then the + * body (which may be empty, depending on the macro), then zero + * or more closing punctuation nodes. + */ + + if ( ! mdoc_block_alloc(m, line, ppos, tok, NULL)) + return(0); + + blk = m->last; + + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + + /* + * Open the body scope "on-demand", that is, after we've + * processed all our the leading delimiters (open parenthesis, + * etc.). + */ + + for (body = NULL; ; ) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + if (ARGS_PUNCT == ac) + break; + + if (NULL == body && ARGS_QWORD != ac && + DELIM_OPEN == mdoc_isdelim(p)) { + if ( ! dword(m, line, la, p, DELIM_OPEN)) + return(0); + continue; + } + + if (NULL == body) { + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + } + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! dword(m, line, la, p, DELIM_MAX)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + break; + } + + /* Clean-ups to leave in a consistent state. */ + + if (NULL == body) { + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + } + + for (n = body->child; n && n->next; n = n->next) + /* Do nothing. */ ; + + /* + * End of sentence spacing: if the last node is a text node and + * has a trailing period, then mark it as being end-of-sentence. + */ + + if (n && MDOC_TEXT == n->type && n->string) + if (mandoc_eos(n->string, strlen(n->string), 1)) + n->flags |= MDOC_EOS; + + /* Up-propagate the end-of-space flag. */ + + if (n && (MDOC_EOS & n->flags)) { + body->flags |= MDOC_EOS; + body->parent->flags |= MDOC_EOS; + } + + /* + * If there is an open sub-block requiring explicit close-out, + * postpone closing out the current block + * until the rew_sub() call closing out the sub-block. + */ + for (n = m->last; n && n != body && n != blk->parent; n = n->parent) { + if (MDOC_BLOCK == n->type && + MDOC_EXPLICIT & mdoc_macros[n->tok].flags && + ! (MDOC_VALID & n->flags)) { + make_pending(n, tok, m, line, ppos); + if ( ! mdoc_endbody_alloc(m, line, ppos, + tok, body, ENDBODY_NOSPACE)) + return(0); + return(1); + } + } + + /* + * If we can't rewind to our body, then our scope has already + * been closed by another macro (like `Oc' closing `Op'). This + * is ugly behaviour nodding its head to OpenBSD's overwhelming + * crufty use of `Op' breakage. + */ + if (n != body) + mandoc_vmsg(MANDOCERR_SCOPENEST, m->parse, line, ppos, + "%s broken", mdoc_macronames[tok]); + + if (n && ! rew_sub(MDOC_BODY, m, tok, line, ppos)) + return(0); + + /* Standard appending of delimiters. */ + + if (nl && ! append_delims(m, line, pos, buf)) + return(0); + + /* Rewind scope, if applicable. */ + + if (n && ! rew_sub(MDOC_BLOCK, m, tok, line, ppos)) + return(0); + + return(1); +} + + +static int +blk_part_exp(MACRO_PROT_ARGS) +{ + int la, nl; + enum margserr ac; + struct mdoc_node *head; /* keep track of head */ + struct mdoc_node *body; /* keep track of body */ + char *p; + enum mdoct ntok; + + nl = MDOC_NEWLINE & m->flags; + + /* + * The opening of an explicit macro having zero or more leading + * punctuation nodes; a head with optional single element (the + * case of `Eo'); and a body that may be empty. + */ + + if ( ! mdoc_block_alloc(m, line, ppos, tok, NULL)) + return(0); + + for (head = body = NULL; ; ) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_PUNCT == ac) + break; + if (ARGS_EOLN == ac) + break; + + /* Flush out leading punctuation. */ + + if (NULL == head && ARGS_QWORD != ac && + DELIM_OPEN == mdoc_isdelim(p)) { + assert(NULL == body); + if ( ! dword(m, line, la, p, DELIM_OPEN)) + return(0); + continue; + } + + if (NULL == head) { + assert(NULL == body); + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + head = m->last; + } + + /* + * `Eo' gobbles any data into the head, but most other + * macros just immediately close out and begin the body. + */ + + if (NULL == body) { + assert(head); + /* No check whether it's a macro! */ + if (MDOC_Eo == tok) + if ( ! dword(m, line, la, p, DELIM_MAX)) + return(0); + + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + body = m->last; + + if (MDOC_Eo == tok) + continue; + } + + assert(NULL != head && NULL != body); + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! dword(m, line, la, p, DELIM_MAX)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + break; + } + + /* Clean-up to leave in a consistent state. */ + + if (NULL == head) + if ( ! mdoc_head_alloc(m, line, ppos, tok)) + return(0); + + if (NULL == body) { + if ( ! rew_sub(MDOC_HEAD, m, tok, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, tok)) + return(0); + } + + /* Standard appending of delimiters. */ + + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); +} + + +/* ARGSUSED */ +static int +in_line_argn(MACRO_PROT_ARGS) +{ + int la, flushed, j, maxargs, nl; + enum margserr ac; + enum margverr av; + struct mdoc_arg *arg; + char *p; + enum mdoct ntok; + + nl = MDOC_NEWLINE & m->flags; + + /* + * A line macro that has a fixed number of arguments (maxargs). + * Only open the scope once the first non-leading-punctuation is + * found (unless MDOC_IGNDELIM is noted, like in `Pf'), then + * keep it open until the maximum number of arguments are + * exhausted. + */ + + switch (tok) { + case (MDOC_Ap): + /* FALLTHROUGH */ + case (MDOC_No): + /* FALLTHROUGH */ + case (MDOC_Ns): + /* FALLTHROUGH */ + case (MDOC_Ux): + maxargs = 0; + break; + case (MDOC_Bx): + /* FALLTHROUGH */ + case (MDOC_Xr): + maxargs = 2; + break; + default: + maxargs = 1; + break; + } + + for (arg = NULL; ; ) { + la = *pos; + av = mdoc_argv(m, line, tok, &arg, pos, buf); + + if (ARGV_WORD == av) { + *pos = la; + break; + } + + if (ARGV_EOLN == av) + break; + if (ARGV_ARG == av) + continue; + + mdoc_argv_free(arg); + return(0); + } + + for (flushed = j = 0; ; ) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_PUNCT == ac) + break; + if (ARGS_EOLN == ac) + break; + + if ( ! (MDOC_IGNDELIM & mdoc_macros[tok].flags) && + ARGS_QWORD != ac && 0 == j && + DELIM_OPEN == mdoc_isdelim(p)) { + if ( ! dword(m, line, la, p, DELIM_OPEN)) + return(0); + continue; + } else if (0 == j) + if ( ! mdoc_elem_alloc(m, line, la, tok, arg)) + return(0); + + if (j == maxargs && ! flushed) { + if ( ! rew_elem(m, tok)) + return(0); + flushed = 1; + } + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX != ntok) { + if ( ! flushed && ! rew_elem(m, tok)) + return(0); + flushed = 1; + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + j++; + break; + } + + if ( ! (MDOC_IGNDELIM & mdoc_macros[tok].flags) && + ARGS_QWORD != ac && + ! flushed && + DELIM_NONE != mdoc_isdelim(p)) { + if ( ! rew_elem(m, tok)) + return(0); + flushed = 1; + } + + if ( ! dword(m, line, la, p, DELIM_MAX)) + return(0); + j++; + } + + if (0 == j && ! mdoc_elem_alloc(m, line, la, tok, arg)) + return(0); + + /* Close out in a consistent state. */ + + if ( ! flushed && ! rew_elem(m, tok)) + return(0); + if ( ! nl) + return(1); + return(append_delims(m, line, pos, buf)); +} + + +static int +in_line_eoln(MACRO_PROT_ARGS) +{ + int la; + enum margserr ac; + enum margverr av; + struct mdoc_arg *arg; + char *p; + enum mdoct ntok; + + assert( ! (MDOC_PARSED & mdoc_macros[tok].flags)); + + if (tok == MDOC_Pp) + rew_sub(MDOC_BLOCK, m, MDOC_Nm, line, ppos); + + /* Parse macro arguments. */ + + for (arg = NULL; ; ) { + la = *pos; + av = mdoc_argv(m, line, tok, &arg, pos, buf); + + if (ARGV_WORD == av) { + *pos = la; + break; + } + if (ARGV_EOLN == av) + break; + if (ARGV_ARG == av) + continue; + + mdoc_argv_free(arg); + return(0); + } + + /* Open element scope. */ + + if ( ! mdoc_elem_alloc(m, line, ppos, tok, arg)) + return(0); + + /* Parse argument terms. */ + + for (;;) { + la = *pos; + ac = mdoc_args(m, line, pos, buf, tok, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup(tok, p); + + if (MDOC_MAX == ntok) { + if ( ! dword(m, line, la, p, DELIM_MAX)) + return(0); + continue; + } + + if ( ! rew_elem(m, tok)) + return(0); + return(mdoc_macro(m, ntok, line, la, pos, buf)); + } + + /* Close out (no delimiters). */ + + return(rew_elem(m, tok)); +} + + +/* ARGSUSED */ +static int +ctx_synopsis(MACRO_PROT_ARGS) +{ + int nl; + + nl = MDOC_NEWLINE & m->flags; + + /* If we're not in the SYNOPSIS, go straight to in-line. */ + if ( ! (MDOC_SYNOPSIS & m->flags)) + return(in_line(m, tok, line, ppos, pos, buf)); + + /* If we're a nested call, same place. */ + if ( ! nl) + return(in_line(m, tok, line, ppos, pos, buf)); + + /* + * XXX: this will open a block scope; however, if later we end + * up formatting the block scope, then child nodes will inherit + * the formatting. Be careful. + */ + if (MDOC_Nm == tok) + return(blk_full(m, tok, line, ppos, pos, buf)); + assert(MDOC_Vt == tok); + return(blk_part_imp(m, tok, line, ppos, pos, buf)); +} + + +/* ARGSUSED */ +static int +obsolete(MACRO_PROT_ARGS) +{ + + mdoc_pmsg(m, line, ppos, MANDOCERR_MACROOBS); + return(1); +} + + +/* + * Phrases occur within `Bl -column' entries, separated by `Ta' or tabs. + * They're unusual because they're basically free-form text until a + * macro is encountered. + */ +static int +phrase(struct mdoc *m, int line, int ppos, char *buf) +{ + int la, pos; + enum margserr ac; + enum mdoct ntok; + char *p; + + for (pos = ppos; ; ) { + la = pos; + + ac = mdoc_zargs(m, line, &pos, buf, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup_raw(p); + + if (MDOC_MAX == ntok) { + if ( ! dword(m, line, la, p, DELIM_MAX)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, &pos, buf)) + return(0); + return(append_delims(m, line, &pos, buf)); + } + + return(1); +} + + +/* ARGSUSED */ +static int +phrase_ta(MACRO_PROT_ARGS) +{ + int la; + enum mdoct ntok; + enum margserr ac; + char *p; + + /* + * FIXME: this is overly restrictive: if the `Ta' is unexpected, + * it should simply error out with ARGSLOST. + */ + + if ( ! rew_sub(MDOC_BODY, m, MDOC_It, line, ppos)) + return(0); + if ( ! mdoc_body_alloc(m, line, ppos, MDOC_It)) + return(0); + + for (;;) { + la = *pos; + ac = mdoc_zargs(m, line, pos, buf, &p); + + if (ARGS_ERROR == ac) + return(0); + if (ARGS_EOLN == ac) + break; + + ntok = ARGS_QWORD == ac ? MDOC_MAX : lookup_raw(p); + + if (MDOC_MAX == ntok) { + if ( ! dword(m, line, la, p, DELIM_MAX)) + return(0); + continue; + } + + if ( ! mdoc_macro(m, ntok, line, la, pos, buf)) + return(0); + return(append_delims(m, line, pos, buf)); + } + + return(1); +} diff --git a/usr/src/cmd/mandoc/mdoc_man.c b/usr/src/cmd/mandoc/mdoc_man.c new file mode 100644 index 0000000000..9d7d2ca238 --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc_man.c @@ -0,0 +1,637 @@ +/* $Id: mdoc_man.c,v 1.9 2011/10/24 21:47:59 schwarze Exp $ */ +/* + * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> + +#include "mandoc.h" +#include "man.h" +#include "mdoc.h" +#include "main.h" + +#define DECL_ARGS const struct mdoc_meta *m, \ + const struct mdoc_node *n, \ + struct mman *mm + +struct mman { + int need_space; /* next word needs prior ws */ + int need_nl; /* next word needs prior nl */ +}; + +struct manact { + int (*cond)(DECL_ARGS); /* DON'T run actions */ + int (*pre)(DECL_ARGS); /* pre-node action */ + void (*post)(DECL_ARGS); /* post-node action */ + const char *prefix; /* pre-node string constant */ + const char *suffix; /* post-node string constant */ +}; + +static int cond_body(DECL_ARGS); +static int cond_head(DECL_ARGS); +static void post_bd(DECL_ARGS); +static void post_dl(DECL_ARGS); +static void post_enc(DECL_ARGS); +static void post_nm(DECL_ARGS); +static void post_percent(DECL_ARGS); +static void post_pf(DECL_ARGS); +static void post_sect(DECL_ARGS); +static void post_sp(DECL_ARGS); +static int pre_ap(DECL_ARGS); +static int pre_bd(DECL_ARGS); +static int pre_br(DECL_ARGS); +static int pre_bx(DECL_ARGS); +static int pre_dl(DECL_ARGS); +static int pre_enc(DECL_ARGS); +static int pre_it(DECL_ARGS); +static int pre_nm(DECL_ARGS); +static int pre_ns(DECL_ARGS); +static int pre_pp(DECL_ARGS); +static int pre_sp(DECL_ARGS); +static int pre_sect(DECL_ARGS); +static int pre_ux(DECL_ARGS); +static int pre_xr(DECL_ARGS); +static void print_word(struct mman *, const char *); +static void print_node(DECL_ARGS); + +static const struct manact manacts[MDOC_MAX + 1] = { + { NULL, pre_ap, NULL, NULL, NULL }, /* Ap */ + { NULL, NULL, NULL, NULL, NULL }, /* Dd */ + { NULL, NULL, NULL, NULL, NULL }, /* Dt */ + { NULL, NULL, NULL, NULL, NULL }, /* Os */ + { NULL, pre_sect, post_sect, ".SH", NULL }, /* Sh */ + { NULL, pre_sect, post_sect, ".SS", NULL }, /* Ss */ + { NULL, pre_pp, NULL, NULL, NULL }, /* Pp */ + { cond_body, pre_dl, post_dl, NULL, NULL }, /* D1 */ + { cond_body, pre_dl, post_dl, NULL, NULL }, /* Dl */ + { cond_body, pre_bd, post_bd, NULL, NULL }, /* Bd */ + { NULL, NULL, NULL, NULL, NULL }, /* Ed */ + { NULL, NULL, NULL, NULL, NULL }, /* Bl */ + { NULL, NULL, NULL, NULL, NULL }, /* El */ + { NULL, pre_it, NULL, NULL, NULL }, /* _It */ + { NULL, pre_enc, post_enc, "\\fI", "\\fP" }, /* Ad */ + { NULL, NULL, NULL, NULL, NULL }, /* _An */ + { NULL, pre_enc, post_enc, "\\fI", "\\fP" }, /* Ar */ + { NULL, pre_enc, post_enc, "\\fB", "\\fP" }, /* Cd */ + { NULL, pre_enc, post_enc, "\\fB", "\\fP" }, /* Cm */ + { NULL, pre_enc, post_enc, "\\fR", "\\fP" }, /* Dv */ + { NULL, pre_enc, post_enc, "\\fR", "\\fP" }, /* Er */ + { NULL, pre_enc, post_enc, "\\fR", "\\fP" }, /* Ev */ + { NULL, pre_enc, post_enc, "The \\fB", + "\\fP\nutility exits 0 on success, and >0 if an error occurs." + }, /* Ex */ + { NULL, NULL, NULL, NULL, NULL }, /* _Fa */ + { NULL, NULL, NULL, NULL, NULL }, /* _Fd */ + { NULL, pre_enc, post_enc, "\\fB-", "\\fP" }, /* Fl */ + { NULL, NULL, NULL, NULL, NULL }, /* _Fn */ + { NULL, NULL, NULL, NULL, NULL }, /* _Ft */ + { NULL, pre_enc, post_enc, "\\fB", "\\fP" }, /* Ic */ + { NULL, NULL, NULL, NULL, NULL }, /* _In */ + { NULL, pre_enc, post_enc, "\\fR", "\\fP" }, /* Li */ + { cond_head, pre_enc, NULL, "\\- ", NULL }, /* Nd */ + { NULL, pre_nm, post_nm, NULL, NULL }, /* Nm */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Op */ + { NULL, NULL, NULL, NULL, NULL }, /* Ot */ + { NULL, pre_enc, post_enc, "\\fI", "\\fP" }, /* Pa */ + { NULL, pre_enc, post_enc, "The \\fB", + "\\fP\nfunction returns the value 0 if successful;\n" + "otherwise the value -1 is returned and the global\n" + "variable \\fIerrno\\fP is set to indicate the error." + }, /* Rv */ + { NULL, NULL, NULL, NULL, NULL }, /* St */ + { NULL, NULL, NULL, NULL, NULL }, /* _Va */ + { NULL, NULL, NULL, NULL, NULL }, /* _Vt */ + { NULL, pre_xr, NULL, NULL, NULL }, /* Xr */ + { NULL, NULL, post_percent, NULL, NULL }, /* _%A */ + { NULL, NULL, NULL, NULL, NULL }, /* _%B */ + { NULL, NULL, post_percent, NULL, NULL }, /* _%D */ + { NULL, NULL, NULL, NULL, NULL }, /* _%I */ + { NULL, pre_enc, post_percent, "\\fI", "\\fP" }, /* %J */ + { NULL, NULL, NULL, NULL, NULL }, /* _%N */ + { NULL, NULL, NULL, NULL, NULL }, /* _%O */ + { NULL, NULL, NULL, NULL, NULL }, /* _%P */ + { NULL, NULL, NULL, NULL, NULL }, /* _%R */ + { NULL, pre_enc, post_percent, "\"", "\"" }, /* %T */ + { NULL, NULL, NULL, NULL, NULL }, /* _%V */ + { NULL, NULL, NULL, NULL, NULL }, /* Ac */ + { cond_body, pre_enc, post_enc, "<", ">" }, /* Ao */ + { cond_body, pre_enc, post_enc, "<", ">" }, /* Aq */ + { NULL, NULL, NULL, NULL, NULL }, /* At */ + { NULL, NULL, NULL, NULL, NULL }, /* Bc */ + { NULL, NULL, NULL, NULL, NULL }, /* _Bf */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Bo */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Bq */ + { NULL, pre_ux, NULL, "BSD/OS", NULL }, /* Bsx */ + { NULL, pre_bx, NULL, NULL, NULL }, /* Bx */ + { NULL, NULL, NULL, NULL, NULL }, /* Db */ + { NULL, NULL, NULL, NULL, NULL }, /* Dc */ + { cond_body, pre_enc, post_enc, "``", "''" }, /* Do */ + { cond_body, pre_enc, post_enc, "``", "''" }, /* Dq */ + { NULL, NULL, NULL, NULL, NULL }, /* _Ec */ + { NULL, NULL, NULL, NULL, NULL }, /* _Ef */ + { NULL, pre_enc, post_enc, "\\fI", "\\fP" }, /* Em */ + { NULL, NULL, NULL, NULL, NULL }, /* _Eo */ + { NULL, pre_ux, NULL, "FreeBSD", NULL }, /* Fx */ + { NULL, pre_enc, post_enc, "\\fB", "\\fP" }, /* Ms */ + { NULL, NULL, NULL, NULL, NULL }, /* No */ + { NULL, pre_ns, NULL, NULL, NULL }, /* Ns */ + { NULL, pre_ux, NULL, "NetBSD", NULL }, /* Nx */ + { NULL, pre_ux, NULL, "OpenBSD", NULL }, /* Ox */ + { NULL, NULL, NULL, NULL, NULL }, /* Pc */ + { NULL, NULL, post_pf, NULL, NULL }, /* Pf */ + { cond_body, pre_enc, post_enc, "(", ")" }, /* Po */ + { cond_body, pre_enc, post_enc, "(", ")" }, /* Pq */ + { NULL, NULL, NULL, NULL, NULL }, /* Qc */ + { cond_body, pre_enc, post_enc, "`", "'" }, /* Ql */ + { cond_body, pre_enc, post_enc, "\"", "\"" }, /* Qo */ + { cond_body, pre_enc, post_enc, "\"", "\"" }, /* Qq */ + { NULL, NULL, NULL, NULL, NULL }, /* Re */ + { cond_body, pre_pp, NULL, NULL, NULL }, /* Rs */ + { NULL, NULL, NULL, NULL, NULL }, /* Sc */ + { cond_body, pre_enc, post_enc, "`", "'" }, /* So */ + { cond_body, pre_enc, post_enc, "`", "'" }, /* Sq */ + { NULL, NULL, NULL, NULL, NULL }, /* _Sm */ + { NULL, pre_enc, post_enc, "\\fI", "\\fP" }, /* Sx */ + { NULL, pre_enc, post_enc, "\\fB", "\\fP" }, /* Sy */ + { NULL, pre_enc, post_enc, "\\fR", "\\fP" }, /* Tn */ + { NULL, pre_ux, NULL, "UNIX", NULL }, /* Ux */ + { NULL, NULL, NULL, NULL, NULL }, /* _Xc */ + { NULL, NULL, NULL, NULL, NULL }, /* _Xo */ + { NULL, NULL, NULL, NULL, NULL }, /* _Fo */ + { NULL, NULL, NULL, NULL, NULL }, /* _Fc */ + { cond_body, pre_enc, post_enc, "[", "]" }, /* Oo */ + { NULL, NULL, NULL, NULL, NULL }, /* Oc */ + { NULL, NULL, NULL, NULL, NULL }, /* _Bk */ + { NULL, NULL, NULL, NULL, NULL }, /* _Ek */ + { NULL, pre_ux, NULL, "is currently in beta test.", NULL }, /* Bt */ + { NULL, NULL, NULL, NULL, NULL }, /* Hf */ + { NULL, NULL, NULL, NULL, NULL }, /* Fr */ + { NULL, pre_ux, NULL, "currently under development.", NULL }, /* Ud */ + { NULL, NULL, NULL, NULL, NULL }, /* _Lb */ + { NULL, pre_pp, NULL, NULL, NULL }, /* Lp */ + { NULL, NULL, NULL, NULL, NULL }, /* _Lk */ + { NULL, NULL, NULL, NULL, NULL }, /* _Mt */ + { cond_body, pre_enc, post_enc, "{", "}" }, /* Brq */ + { cond_body, pre_enc, post_enc, "{", "}" }, /* Bro */ + { NULL, NULL, NULL, NULL, NULL }, /* Brc */ + { NULL, NULL, NULL, NULL, NULL }, /* _%C */ + { NULL, NULL, NULL, NULL, NULL }, /* _Es */ + { NULL, NULL, NULL, NULL, NULL }, /* _En */ + { NULL, pre_ux, NULL, "DragonFly", NULL }, /* Dx */ + { NULL, NULL, NULL, NULL, NULL }, /* _%Q */ + { NULL, pre_br, NULL, NULL, NULL }, /* br */ + { NULL, pre_sp, post_sp, NULL, NULL }, /* sp */ + { NULL, NULL, NULL, NULL, NULL }, /* _%U */ + { NULL, NULL, NULL, NULL, NULL }, /* _Ta */ + { NULL, NULL, NULL, NULL, NULL }, /* ROOT */ +}; + +static void +print_word(struct mman *mm, const char *s) +{ + + if (mm->need_nl) { + /* + * If we need a newline, print it now and start afresh. + */ + putchar('\n'); + mm->need_space = 0; + mm->need_nl = 0; + } else if (mm->need_space && '\0' != s[0]) + /* + * If we need a space, only print it before + * (1) a nonzero length word; + * (2) a word that is non-punctuation; and + * (3) if punctuation, non-terminating puncutation. + */ + if (NULL == strchr(".,:;)]?!", s[0]) || '\0' != s[1]) + putchar(' '); + + /* + * Reassign needing space if we're not following opening + * punctuation. + */ + mm->need_space = + ('(' != s[0] && '[' != s[0]) || '\0' != s[1]; + + for ( ; *s; s++) { + switch (*s) { + case (ASCII_NBRSP): + printf("\\~"); + break; + case (ASCII_HYPH): + putchar('-'); + break; + default: + putchar((unsigned char)*s); + break; + } + } +} + +void +man_man(void *arg, const struct man *man) +{ + + /* + * Dump the keep buffer. + * We're guaranteed by now that this exists (is non-NULL). + * Flush stdout afterward, just in case. + */ + fputs(mparse_getkeep(man_mparse(man)), stdout); + fflush(stdout); +} + +void +man_mdoc(void *arg, const struct mdoc *mdoc) +{ + const struct mdoc_meta *m; + const struct mdoc_node *n; + struct mman mm; + + m = mdoc_meta(mdoc); + n = mdoc_node(mdoc); + + printf(".TH \"%s\" \"%s\" \"%s\" \"%s\" \"%s\"", + m->title, m->msec, m->date, m->os, m->vol); + + memset(&mm, 0, sizeof(struct mman)); + + mm.need_nl = 1; + print_node(m, n, &mm); + putchar('\n'); +} + +static void +print_node(DECL_ARGS) +{ + const struct mdoc_node *prev, *sub; + const struct manact *act; + int cond, do_sub; + + /* + * Break the line if we were parsed subsequent the current node. + * This makes the page structure be more consistent. + */ + prev = n->prev ? n->prev : n->parent; + if (prev && prev->line < n->line) + mm->need_nl = 1; + + act = NULL; + cond = 0; + do_sub = 1; + + if (MDOC_TEXT == n->type) { + /* + * Make sure that we don't happen to start with a + * control character at the start of a line. + */ + if (mm->need_nl && ('.' == *n->string || + '\'' == *n->string)) { + print_word(mm, "\\&"); + mm->need_space = 0; + } + print_word(mm, n->string); + } else { + /* + * Conditionally run the pre-node action handler for a + * node. + */ + act = manacts + n->tok; + cond = NULL == act->cond || (*act->cond)(m, n, mm); + if (cond && act->pre) + do_sub = (*act->pre)(m, n, mm); + } + + /* + * Conditionally run all child nodes. + * Note that this iterates over children instead of using + * recursion. This prevents unnecessary depth in the stack. + */ + if (do_sub) + for (sub = n->child; sub; sub = sub->next) + print_node(m, sub, mm); + + /* + * Lastly, conditionally run the post-node handler. + */ + if (cond && act->post) + (*act->post)(m, n, mm); +} + +static int +cond_head(DECL_ARGS) +{ + + return(MDOC_HEAD == n->type); +} + +static int +cond_body(DECL_ARGS) +{ + + return(MDOC_BODY == n->type); +} + +/* + * Output a font encoding before a node, e.g., \fR. + * This obviously has no trailing space. + */ +static int +pre_enc(DECL_ARGS) +{ + const char *prefix; + + prefix = manacts[n->tok].prefix; + if (NULL == prefix) + return(1); + print_word(mm, prefix); + mm->need_space = 0; + return(1); +} + +/* + * Output a font encoding subsequent a node, e.g., \fP. + */ +static void +post_enc(DECL_ARGS) +{ + const char *suffix; + + suffix = manacts[n->tok].suffix; + if (NULL == suffix) + return; + mm->need_space = 0; + print_word(mm, suffix); +} + +/* + * Used in listings (percent = %A, e.g.). + * FIXME: this is incomplete. + * It doesn't print a nice ", and" for lists. + */ +static void +post_percent(DECL_ARGS) +{ + + post_enc(m, n, mm); + if (n->next) + print_word(mm, ","); + else { + print_word(mm, "."); + mm->need_nl = 1; + } +} + +/* + * Print before a section header. + */ +static int +pre_sect(DECL_ARGS) +{ + + if (MDOC_HEAD != n->type) + return(1); + mm->need_nl = 1; + print_word(mm, manacts[n->tok].prefix); + print_word(mm, "\""); + mm->need_space = 0; + return(1); +} + +/* + * Print subsequent a section header. + */ +static void +post_sect(DECL_ARGS) +{ + + if (MDOC_HEAD != n->type) + return; + mm->need_space = 0; + print_word(mm, "\""); + mm->need_nl = 1; +} + +static int +pre_ap(DECL_ARGS) +{ + + mm->need_space = 0; + print_word(mm, "'"); + mm->need_space = 0; + return(0); +} + +static int +pre_bd(DECL_ARGS) +{ + + if (DISP_unfilled == n->norm->Bd.type || + DISP_literal == n->norm->Bd.type) { + mm->need_nl = 1; + print_word(mm, ".nf"); + } + mm->need_nl = 1; + return(1); +} + +static void +post_bd(DECL_ARGS) +{ + + if (DISP_unfilled == n->norm->Bd.type || + DISP_literal == n->norm->Bd.type) { + mm->need_nl = 1; + print_word(mm, ".fi"); + } + mm->need_nl = 1; +} + +static int +pre_br(DECL_ARGS) +{ + + mm->need_nl = 1; + print_word(mm, ".br"); + mm->need_nl = 1; + return(0); +} + +static int +pre_bx(DECL_ARGS) +{ + + n = n->child; + if (n) { + print_word(mm, n->string); + mm->need_space = 0; + n = n->next; + } + print_word(mm, "BSD"); + if (NULL == n) + return(0); + mm->need_space = 0; + print_word(mm, "-"); + mm->need_space = 0; + print_word(mm, n->string); + return(0); +} + +static int +pre_dl(DECL_ARGS) +{ + + mm->need_nl = 1; + print_word(mm, ".RS 6n"); + mm->need_nl = 1; + return(1); +} + +static void +post_dl(DECL_ARGS) +{ + + mm->need_nl = 1; + print_word(mm, ".RE"); + mm->need_nl = 1; +} + +static int +pre_it(DECL_ARGS) +{ + const struct mdoc_node *bln; + + if (MDOC_HEAD == n->type) { + mm->need_nl = 1; + print_word(mm, ".TP"); + bln = n->parent->parent->prev; + switch (bln->norm->Bl.type) { + case (LIST_bullet): + print_word(mm, "4n"); + mm->need_nl = 1; + print_word(mm, "\\fBo\\fP"); + break; + default: + if (bln->norm->Bl.width) + print_word(mm, bln->norm->Bl.width); + break; + } + mm->need_nl = 1; + } + return(1); +} + +static int +pre_nm(DECL_ARGS) +{ + + if (MDOC_ELEM != n->type && MDOC_HEAD != n->type) + return(1); + print_word(mm, "\\fB"); + mm->need_space = 0; + if (NULL == n->child) + print_word(mm, m->name); + return(1); +} + +static void +post_nm(DECL_ARGS) +{ + + if (MDOC_ELEM != n->type && MDOC_HEAD != n->type) + return; + mm->need_space = 0; + print_word(mm, "\\fP"); +} + +static int +pre_ns(DECL_ARGS) +{ + + mm->need_space = 0; + return(0); +} + +static void +post_pf(DECL_ARGS) +{ + + mm->need_space = 0; +} + +static int +pre_pp(DECL_ARGS) +{ + + mm->need_nl = 1; + if (MDOC_It == n->parent->tok) + print_word(mm, ".sp"); + else + print_word(mm, ".PP"); + mm->need_nl = 1; + return(1); +} + +static int +pre_sp(DECL_ARGS) +{ + + mm->need_nl = 1; + print_word(mm, ".sp"); + return(1); +} + +static void +post_sp(DECL_ARGS) +{ + + mm->need_nl = 1; +} + +static int +pre_xr(DECL_ARGS) +{ + + n = n->child; + if (NULL == n) + return(0); + print_node(m, n, mm); + n = n->next; + if (NULL == n) + return(0); + mm->need_space = 0; + print_word(mm, "("); + print_node(m, n, mm); + print_word(mm, ")"); + return(0); +} + +static int +pre_ux(DECL_ARGS) +{ + + print_word(mm, manacts[n->tok].prefix); + if (NULL == n->child) + return(0); + mm->need_space = 0; + print_word(mm, "\\~"); + mm->need_space = 0; + return(1); +} diff --git a/usr/src/cmd/mandoc/mdoc_term.c b/usr/src/cmd/mandoc/mdoc_term.c new file mode 100644 index 0000000000..5333566444 --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc_term.c @@ -0,0 +1,2257 @@ +/* $Id: mdoc_term.c,v 1.238 2011/11/13 13:15:14 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "term.h" +#include "mdoc.h" +#include "main.h" + +struct termpair { + struct termpair *ppair; + int count; +}; + +#define DECL_ARGS struct termp *p, \ + struct termpair *pair, \ + const struct mdoc_meta *m, \ + const struct mdoc_node *n + +struct termact { + int (*pre)(DECL_ARGS); + void (*post)(DECL_ARGS); +}; + +static size_t a2width(const struct termp *, const char *); +static size_t a2height(const struct termp *, const char *); +static size_t a2offs(const struct termp *, const char *); + +static void print_bvspace(struct termp *, + const struct mdoc_node *, + const struct mdoc_node *); +static void print_mdoc_node(DECL_ARGS); +static void print_mdoc_nodelist(DECL_ARGS); +static void print_mdoc_head(struct termp *, const void *); +static void print_mdoc_foot(struct termp *, const void *); +static void synopsis_pre(struct termp *, + const struct mdoc_node *); + +static void termp____post(DECL_ARGS); +static void termp__t_post(DECL_ARGS); +static void termp_an_post(DECL_ARGS); +static void termp_bd_post(DECL_ARGS); +static void termp_bk_post(DECL_ARGS); +static void termp_bl_post(DECL_ARGS); +static void termp_d1_post(DECL_ARGS); +static void termp_fo_post(DECL_ARGS); +static void termp_in_post(DECL_ARGS); +static void termp_it_post(DECL_ARGS); +static void termp_lb_post(DECL_ARGS); +static void termp_nm_post(DECL_ARGS); +static void termp_pf_post(DECL_ARGS); +static void termp_quote_post(DECL_ARGS); +static void termp_sh_post(DECL_ARGS); +static void termp_ss_post(DECL_ARGS); + +static int termp__a_pre(DECL_ARGS); +static int termp__t_pre(DECL_ARGS); +static int termp_an_pre(DECL_ARGS); +static int termp_ap_pre(DECL_ARGS); +static int termp_bd_pre(DECL_ARGS); +static int termp_bf_pre(DECL_ARGS); +static int termp_bk_pre(DECL_ARGS); +static int termp_bl_pre(DECL_ARGS); +static int termp_bold_pre(DECL_ARGS); +static int termp_bt_pre(DECL_ARGS); +static int termp_bx_pre(DECL_ARGS); +static int termp_cd_pre(DECL_ARGS); +static int termp_d1_pre(DECL_ARGS); +static int termp_ex_pre(DECL_ARGS); +static int termp_fa_pre(DECL_ARGS); +static int termp_fd_pre(DECL_ARGS); +static int termp_fl_pre(DECL_ARGS); +static int termp_fn_pre(DECL_ARGS); +static int termp_fo_pre(DECL_ARGS); +static int termp_ft_pre(DECL_ARGS); +static int termp_igndelim_pre(DECL_ARGS); +static int termp_in_pre(DECL_ARGS); +static int termp_it_pre(DECL_ARGS); +static int termp_li_pre(DECL_ARGS); +static int termp_lk_pre(DECL_ARGS); +static int termp_nd_pre(DECL_ARGS); +static int termp_nm_pre(DECL_ARGS); +static int termp_ns_pre(DECL_ARGS); +static int termp_quote_pre(DECL_ARGS); +static int termp_rs_pre(DECL_ARGS); +static int termp_rv_pre(DECL_ARGS); +static int termp_sh_pre(DECL_ARGS); +static int termp_sm_pre(DECL_ARGS); +static int termp_sp_pre(DECL_ARGS); +static int termp_ss_pre(DECL_ARGS); +static int termp_under_pre(DECL_ARGS); +static int termp_ud_pre(DECL_ARGS); +static int termp_vt_pre(DECL_ARGS); +static int termp_xr_pre(DECL_ARGS); +static int termp_xx_pre(DECL_ARGS); + +static const struct termact termacts[MDOC_MAX] = { + { termp_ap_pre, NULL }, /* Ap */ + { NULL, NULL }, /* Dd */ + { NULL, NULL }, /* Dt */ + { NULL, NULL }, /* Os */ + { termp_sh_pre, termp_sh_post }, /* Sh */ + { termp_ss_pre, termp_ss_post }, /* Ss */ + { termp_sp_pre, NULL }, /* Pp */ + { termp_d1_pre, termp_d1_post }, /* D1 */ + { termp_d1_pre, termp_d1_post }, /* Dl */ + { termp_bd_pre, termp_bd_post }, /* Bd */ + { NULL, NULL }, /* Ed */ + { termp_bl_pre, termp_bl_post }, /* Bl */ + { NULL, NULL }, /* El */ + { termp_it_pre, termp_it_post }, /* It */ + { termp_under_pre, NULL }, /* Ad */ + { termp_an_pre, termp_an_post }, /* An */ + { termp_under_pre, NULL }, /* Ar */ + { termp_cd_pre, NULL }, /* Cd */ + { termp_bold_pre, NULL }, /* Cm */ + { NULL, NULL }, /* Dv */ + { NULL, NULL }, /* Er */ + { NULL, NULL }, /* Ev */ + { termp_ex_pre, NULL }, /* Ex */ + { termp_fa_pre, NULL }, /* Fa */ + { termp_fd_pre, NULL }, /* Fd */ + { termp_fl_pre, NULL }, /* Fl */ + { termp_fn_pre, NULL }, /* Fn */ + { termp_ft_pre, NULL }, /* Ft */ + { termp_bold_pre, NULL }, /* Ic */ + { termp_in_pre, termp_in_post }, /* In */ + { termp_li_pre, NULL }, /* Li */ + { termp_nd_pre, NULL }, /* Nd */ + { termp_nm_pre, termp_nm_post }, /* Nm */ + { termp_quote_pre, termp_quote_post }, /* Op */ + { NULL, NULL }, /* Ot */ + { termp_under_pre, NULL }, /* Pa */ + { termp_rv_pre, NULL }, /* Rv */ + { NULL, NULL }, /* St */ + { termp_under_pre, NULL }, /* Va */ + { termp_vt_pre, NULL }, /* Vt */ + { termp_xr_pre, NULL }, /* Xr */ + { termp__a_pre, termp____post }, /* %A */ + { termp_under_pre, termp____post }, /* %B */ + { NULL, termp____post }, /* %D */ + { termp_under_pre, termp____post }, /* %I */ + { termp_under_pre, termp____post }, /* %J */ + { NULL, termp____post }, /* %N */ + { NULL, termp____post }, /* %O */ + { NULL, termp____post }, /* %P */ + { NULL, termp____post }, /* %R */ + { termp__t_pre, termp__t_post }, /* %T */ + { NULL, termp____post }, /* %V */ + { NULL, NULL }, /* Ac */ + { termp_quote_pre, termp_quote_post }, /* Ao */ + { termp_quote_pre, termp_quote_post }, /* Aq */ + { NULL, NULL }, /* At */ + { NULL, NULL }, /* Bc */ + { termp_bf_pre, NULL }, /* Bf */ + { termp_quote_pre, termp_quote_post }, /* Bo */ + { termp_quote_pre, termp_quote_post }, /* Bq */ + { termp_xx_pre, NULL }, /* Bsx */ + { termp_bx_pre, NULL }, /* Bx */ + { NULL, NULL }, /* Db */ + { NULL, NULL }, /* Dc */ + { termp_quote_pre, termp_quote_post }, /* Do */ + { termp_quote_pre, termp_quote_post }, /* Dq */ + { NULL, NULL }, /* Ec */ /* FIXME: no space */ + { NULL, NULL }, /* Ef */ + { termp_under_pre, NULL }, /* Em */ + { termp_quote_pre, termp_quote_post }, /* Eo */ + { termp_xx_pre, NULL }, /* Fx */ + { termp_bold_pre, NULL }, /* Ms */ + { termp_igndelim_pre, NULL }, /* No */ + { termp_ns_pre, NULL }, /* Ns */ + { termp_xx_pre, NULL }, /* Nx */ + { termp_xx_pre, NULL }, /* Ox */ + { NULL, NULL }, /* Pc */ + { termp_igndelim_pre, termp_pf_post }, /* Pf */ + { termp_quote_pre, termp_quote_post }, /* Po */ + { termp_quote_pre, termp_quote_post }, /* Pq */ + { NULL, NULL }, /* Qc */ + { termp_quote_pre, termp_quote_post }, /* Ql */ + { termp_quote_pre, termp_quote_post }, /* Qo */ + { termp_quote_pre, termp_quote_post }, /* Qq */ + { NULL, NULL }, /* Re */ + { termp_rs_pre, NULL }, /* Rs */ + { NULL, NULL }, /* Sc */ + { termp_quote_pre, termp_quote_post }, /* So */ + { termp_quote_pre, termp_quote_post }, /* Sq */ + { termp_sm_pre, NULL }, /* Sm */ + { termp_under_pre, NULL }, /* Sx */ + { termp_bold_pre, NULL }, /* Sy */ + { NULL, NULL }, /* Tn */ + { termp_xx_pre, NULL }, /* Ux */ + { NULL, NULL }, /* Xc */ + { NULL, NULL }, /* Xo */ + { termp_fo_pre, termp_fo_post }, /* Fo */ + { NULL, NULL }, /* Fc */ + { termp_quote_pre, termp_quote_post }, /* Oo */ + { NULL, NULL }, /* Oc */ + { termp_bk_pre, termp_bk_post }, /* Bk */ + { NULL, NULL }, /* Ek */ + { termp_bt_pre, NULL }, /* Bt */ + { NULL, NULL }, /* Hf */ + { NULL, NULL }, /* Fr */ + { termp_ud_pre, NULL }, /* Ud */ + { NULL, termp_lb_post }, /* Lb */ + { termp_sp_pre, NULL }, /* Lp */ + { termp_lk_pre, NULL }, /* Lk */ + { termp_under_pre, NULL }, /* Mt */ + { termp_quote_pre, termp_quote_post }, /* Brq */ + { termp_quote_pre, termp_quote_post }, /* Bro */ + { NULL, NULL }, /* Brc */ + { NULL, termp____post }, /* %C */ + { NULL, NULL }, /* Es */ /* TODO */ + { NULL, NULL }, /* En */ /* TODO */ + { termp_xx_pre, NULL }, /* Dx */ + { NULL, termp____post }, /* %Q */ + { termp_sp_pre, NULL }, /* br */ + { termp_sp_pre, NULL }, /* sp */ + { termp_under_pre, termp____post }, /* %U */ + { NULL, NULL }, /* Ta */ +}; + + +void +terminal_mdoc(void *arg, const struct mdoc *mdoc) +{ + const struct mdoc_node *n; + const struct mdoc_meta *m; + struct termp *p; + + p = (struct termp *)arg; + + if (0 == p->defindent) + p->defindent = 5; + + p->overstep = 0; + p->maxrmargin = p->defrmargin; + p->tabwidth = term_len(p, 5); + + if (NULL == p->symtab) + p->symtab = mchars_alloc(); + + n = mdoc_node(mdoc); + m = mdoc_meta(mdoc); + + term_begin(p, print_mdoc_head, print_mdoc_foot, m); + + if (n->child) + print_mdoc_nodelist(p, NULL, m, n->child); + + term_end(p); +} + + +static void +print_mdoc_nodelist(DECL_ARGS) +{ + + print_mdoc_node(p, pair, m, n); + if (n->next) + print_mdoc_nodelist(p, pair, m, n->next); +} + + +/* ARGSUSED */ +static void +print_mdoc_node(DECL_ARGS) +{ + int chld; + const void *font; + struct termpair npair; + size_t offset, rmargin; + + chld = 1; + offset = p->offset; + rmargin = p->rmargin; + font = term_fontq(p); + + memset(&npair, 0, sizeof(struct termpair)); + npair.ppair = pair; + + /* + * Keeps only work until the end of a line. If a keep was + * invoked in a prior line, revert it to PREKEEP. + * + * Also let SYNPRETTY sections behave as if they were wrapped + * in a `Bk' block. + */ + + if (TERMP_KEEP & p->flags || MDOC_SYNPRETTY & n->flags) { + if (n->prev && n->prev->line != n->line) { + p->flags &= ~TERMP_KEEP; + p->flags |= TERMP_PREKEEP; + } else if (NULL == n->prev) { + if (n->parent && n->parent->line != n->line) { + p->flags &= ~TERMP_KEEP; + p->flags |= TERMP_PREKEEP; + } + } + } + + /* + * Since SYNPRETTY sections aren't "turned off" with `Ek', + * we have to intuit whether we should disable formatting. + */ + + if ( ! (MDOC_SYNPRETTY & n->flags) && + ((n->prev && MDOC_SYNPRETTY & n->prev->flags) || + (n->parent && MDOC_SYNPRETTY & n->parent->flags))) + p->flags &= ~(TERMP_KEEP | TERMP_PREKEEP); + + /* + * After the keep flags have been set up, we may now + * produce output. Note that some pre-handlers do so. + */ + + switch (n->type) { + case (MDOC_TEXT): + if (' ' == *n->string && MDOC_LINE & n->flags) + term_newln(p); + if (MDOC_DELIMC & n->flags) + p->flags |= TERMP_NOSPACE; + term_word(p, n->string); + if (MDOC_DELIMO & n->flags) + p->flags |= TERMP_NOSPACE; + break; + case (MDOC_EQN): + term_eqn(p, n->eqn); + break; + case (MDOC_TBL): + term_tbl(p, n->span); + break; + default: + if (termacts[n->tok].pre && ENDBODY_NOT == n->end) + chld = (*termacts[n->tok].pre) + (p, &npair, m, n); + break; + } + + if (chld && n->child) + print_mdoc_nodelist(p, &npair, m, n->child); + + term_fontpopq(p, font); + + switch (n->type) { + case (MDOC_TEXT): + break; + case (MDOC_TBL): + break; + case (MDOC_EQN): + break; + default: + if ( ! termacts[n->tok].post || MDOC_ENDED & n->flags) + break; + (void)(*termacts[n->tok].post)(p, &npair, m, n); + + /* + * Explicit end tokens not only call the post + * handler, but also tell the respective block + * that it must not call the post handler again. + */ + if (ENDBODY_NOT != n->end) + n->pending->flags |= MDOC_ENDED; + + /* + * End of line terminating an implicit block + * while an explicit block is still open. + * Continue the explicit block without spacing. + */ + if (ENDBODY_NOSPACE == n->end) + p->flags |= TERMP_NOSPACE; + break; + } + + if (MDOC_EOS & n->flags) + p->flags |= TERMP_SENTENCE; + + p->offset = offset; + p->rmargin = rmargin; +} + + +static void +print_mdoc_foot(struct termp *p, const void *arg) +{ + const struct mdoc_meta *m; + + m = (const struct mdoc_meta *)arg; + + term_fontrepl(p, TERMFONT_NONE); + + /* + * Output the footer in new-groff style, that is, three columns + * with the middle being the manual date and flanking columns + * being the operating system: + * + * SYSTEM DATE SYSTEM + */ + + term_vspace(p); + + p->offset = 0; + p->rmargin = (p->maxrmargin - + term_strlen(p, m->date) + term_len(p, 1)) / 2; + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; + + term_word(p, m->os); + term_flushln(p); + + p->offset = p->rmargin; + p->rmargin = p->maxrmargin - term_strlen(p, m->os); + p->flags |= TERMP_NOSPACE; + + term_word(p, m->date); + term_flushln(p); + + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + p->flags &= ~TERMP_NOBREAK; + p->flags |= TERMP_NOSPACE; + + term_word(p, m->os); + term_flushln(p); + + p->offset = 0; + p->rmargin = p->maxrmargin; + p->flags = 0; +} + + +static void +print_mdoc_head(struct termp *p, const void *arg) +{ + char buf[BUFSIZ], title[BUFSIZ]; + size_t buflen, titlen; + const struct mdoc_meta *m; + + m = (const struct mdoc_meta *)arg; + + /* + * The header is strange. It has three components, which are + * really two with the first duplicated. It goes like this: + * + * IDENTIFIER TITLE IDENTIFIER + * + * The IDENTIFIER is NAME(SECTION), which is the command-name + * (if given, or "unknown" if not) followed by the manual page + * section. These are given in `Dt'. The TITLE is a free-form + * string depending on the manual volume. If not specified, it + * switches on the manual section. + */ + + p->offset = 0; + p->rmargin = p->maxrmargin; + + assert(m->vol); + strlcpy(buf, m->vol, BUFSIZ); + buflen = term_strlen(p, buf); + + if (m->arch) { + strlcat(buf, " (", BUFSIZ); + strlcat(buf, m->arch, BUFSIZ); + strlcat(buf, ")", BUFSIZ); + } + + snprintf(title, BUFSIZ, "%s(%s)", m->title, m->msec); + titlen = term_strlen(p, title); + + p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; + p->offset = 0; + p->rmargin = 2 * (titlen+1) + buflen < p->maxrmargin ? + (p->maxrmargin - + term_strlen(p, buf) + term_len(p, 1)) / 2 : + p->maxrmargin - buflen; + + term_word(p, title); + term_flushln(p); + + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->offset + buflen + titlen < p->maxrmargin ? + p->maxrmargin - titlen : p->maxrmargin; + + term_word(p, buf); + term_flushln(p); + + p->flags &= ~TERMP_NOBREAK; + if (p->rmargin + titlen <= p->maxrmargin) { + p->flags |= TERMP_NOSPACE; + p->offset = p->rmargin; + p->rmargin = p->maxrmargin; + term_word(p, title); + term_flushln(p); + } + + p->flags &= ~TERMP_NOSPACE; + p->offset = 0; + p->rmargin = p->maxrmargin; +} + + +static size_t +a2height(const struct termp *p, const char *v) +{ + struct roffsu su; + + + assert(v); + if ( ! a2roffsu(v, &su, SCALE_VS)) + SCALE_VS_INIT(&su, atoi(v)); + + return(term_vspan(p, &su)); +} + + +static size_t +a2width(const struct termp *p, const char *v) +{ + struct roffsu su; + + assert(v); + if ( ! a2roffsu(v, &su, SCALE_MAX)) + SCALE_HS_INIT(&su, term_strlen(p, v)); + + return(term_hspan(p, &su)); +} + + +static size_t +a2offs(const struct termp *p, const char *v) +{ + struct roffsu su; + + if ('\0' == *v) + return(0); + else if (0 == strcmp(v, "left")) + return(0); + else if (0 == strcmp(v, "indent")) + return(term_len(p, p->defindent + 1)); + else if (0 == strcmp(v, "indent-two")) + return(term_len(p, (p->defindent + 1) * 2)); + else if ( ! a2roffsu(v, &su, SCALE_MAX)) + SCALE_HS_INIT(&su, term_strlen(p, v)); + + return(term_hspan(p, &su)); +} + + +/* + * Determine how much space to print out before block elements of `It' + * (and thus `Bl') and `Bd'. And then go ahead and print that space, + * too. + */ +static void +print_bvspace(struct termp *p, + const struct mdoc_node *bl, + const struct mdoc_node *n) +{ + const struct mdoc_node *nn; + + assert(n); + + term_newln(p); + + if (MDOC_Bd == bl->tok && bl->norm->Bd.comp) + return; + if (MDOC_Bl == bl->tok && bl->norm->Bl.comp) + return; + + /* Do not vspace directly after Ss/Sh. */ + + for (nn = n; nn; nn = nn->parent) { + if (MDOC_BLOCK != nn->type) + continue; + if (MDOC_Ss == nn->tok) + return; + if (MDOC_Sh == nn->tok) + return; + if (NULL == nn->prev) + continue; + break; + } + + /* A `-column' does not assert vspace within the list. */ + + if (MDOC_Bl == bl->tok && LIST_column == bl->norm->Bl.type) + if (n->prev && MDOC_It == n->prev->tok) + return; + + /* A `-diag' without body does not vspace. */ + + if (MDOC_Bl == bl->tok && LIST_diag == bl->norm->Bl.type) + if (n->prev && MDOC_It == n->prev->tok) { + assert(n->prev->body); + if (NULL == n->prev->body->child) + return; + } + + term_vspace(p); +} + + +/* ARGSUSED */ +static int +termp_it_pre(DECL_ARGS) +{ + const struct mdoc_node *bl, *nn; + char buf[7]; + int i; + size_t width, offset, ncols, dcol; + enum mdoc_list type; + + if (MDOC_BLOCK == n->type) { + print_bvspace(p, n->parent->parent, n); + return(1); + } + + bl = n->parent->parent->parent; + type = bl->norm->Bl.type; + + /* + * First calculate width and offset. This is pretty easy unless + * we're a -column list, in which case all prior columns must + * be accounted for. + */ + + width = offset = 0; + + if (bl->norm->Bl.offs) + offset = a2offs(p, bl->norm->Bl.offs); + + switch (type) { + case (LIST_column): + if (MDOC_HEAD == n->type) + break; + + /* + * Imitate groff's column handling: + * - For each earlier column, add its width. + * - For less than 5 columns, add four more blanks per + * column. + * - For exactly 5 columns, add three more blank per + * column. + * - For more than 5 columns, add only one column. + */ + ncols = bl->norm->Bl.ncols; + + /* LINTED */ + dcol = ncols < 5 ? term_len(p, 4) : + ncols == 5 ? term_len(p, 3) : term_len(p, 1); + + /* + * Calculate the offset by applying all prior MDOC_BODY, + * so we stop at the MDOC_HEAD (NULL == nn->prev). + */ + + for (i = 0, nn = n->prev; + nn->prev && i < (int)ncols; + nn = nn->prev, i++) + offset += dcol + a2width + (p, bl->norm->Bl.cols[i]); + + /* + * When exceeding the declared number of columns, leave + * the remaining widths at 0. This will later be + * adjusted to the default width of 10, or, for the last + * column, stretched to the right margin. + */ + if (i >= (int)ncols) + break; + + /* + * Use the declared column widths, extended as explained + * in the preceding paragraph. + */ + width = a2width(p, bl->norm->Bl.cols[i]) + dcol; + break; + default: + if (NULL == bl->norm->Bl.width) + break; + + /* + * Note: buffer the width by 2, which is groff's magic + * number for buffering single arguments. See the above + * handling for column for how this changes. + */ + assert(bl->norm->Bl.width); + width = a2width(p, bl->norm->Bl.width) + term_len(p, 2); + break; + } + + /* + * List-type can override the width in the case of fixed-head + * values (bullet, dash/hyphen, enum). Tags need a non-zero + * offset. + */ + + switch (type) { + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_hyphen): + if (width < term_len(p, 4)) + width = term_len(p, 4); + break; + case (LIST_enum): + if (width < term_len(p, 5)) + width = term_len(p, 5); + break; + case (LIST_hang): + if (0 == width) + width = term_len(p, 8); + break; + case (LIST_column): + /* FALLTHROUGH */ + case (LIST_tag): + if (0 == width) + width = term_len(p, 10); + break; + default: + break; + } + + /* + * Whitespace control. Inset bodies need an initial space, + * while diagonal bodies need two. + */ + + p->flags |= TERMP_NOSPACE; + + switch (type) { + case (LIST_diag): + if (MDOC_BODY == n->type) + term_word(p, "\\ \\ "); + break; + case (LIST_inset): + if (MDOC_BODY == n->type) + term_word(p, "\\ "); + break; + default: + break; + } + + p->flags |= TERMP_NOSPACE; + + switch (type) { + case (LIST_diag): + if (MDOC_HEAD == n->type) + term_fontpush(p, TERMFONT_BOLD); + break; + default: + break; + } + + /* + * Pad and break control. This is the tricky part. These flags + * are documented in term_flushln() in term.c. Note that we're + * going to unset all of these flags in termp_it_post() when we + * exit. + */ + + switch (type) { + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_enum): + /* FALLTHROUGH */ + case (LIST_hyphen): + if (MDOC_HEAD == n->type) + p->flags |= TERMP_NOBREAK; + break; + case (LIST_hang): + if (MDOC_HEAD == n->type) + p->flags |= TERMP_NOBREAK; + else + break; + + /* + * This is ugly. If `-hang' is specified and the body + * is a `Bl' or `Bd', then we want basically to nullify + * the "overstep" effect in term_flushln() and treat + * this as a `-ohang' list instead. + */ + if (n->next->child && + (MDOC_Bl == n->next->child->tok || + MDOC_Bd == n->next->child->tok)) + p->flags &= ~TERMP_NOBREAK; + else + p->flags |= TERMP_HANG; + break; + case (LIST_tag): + if (MDOC_HEAD == n->type) + p->flags |= TERMP_NOBREAK | TERMP_TWOSPACE; + + if (MDOC_HEAD != n->type) + break; + if (NULL == n->next || NULL == n->next->child) + p->flags |= TERMP_DANGLE; + break; + case (LIST_column): + if (MDOC_HEAD == n->type) + break; + + if (NULL == n->next) + p->flags &= ~TERMP_NOBREAK; + else + p->flags |= TERMP_NOBREAK; + + break; + case (LIST_diag): + if (MDOC_HEAD == n->type) + p->flags |= TERMP_NOBREAK; + break; + default: + break; + } + + /* + * Margin control. Set-head-width lists have their right + * margins shortened. The body for these lists has the offset + * necessarily lengthened. Everybody gets the offset. + */ + + p->offset += offset; + + switch (type) { + case (LIST_hang): + /* + * Same stipulation as above, regarding `-hang'. We + * don't want to recalculate rmargin and offsets when + * using `Bd' or `Bl' within `-hang' overstep lists. + */ + if (MDOC_HEAD == n->type && n->next->child && + (MDOC_Bl == n->next->child->tok || + MDOC_Bd == n->next->child->tok)) + break; + /* FALLTHROUGH */ + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_enum): + /* FALLTHROUGH */ + case (LIST_hyphen): + /* FALLTHROUGH */ + case (LIST_tag): + assert(width); + if (MDOC_HEAD == n->type) + p->rmargin = p->offset + width; + else + p->offset += width; + break; + case (LIST_column): + assert(width); + p->rmargin = p->offset + width; + /* + * XXX - this behaviour is not documented: the + * right-most column is filled to the right margin. + */ + if (MDOC_HEAD == n->type) + break; + if (NULL == n->next && p->rmargin < p->maxrmargin) + p->rmargin = p->maxrmargin; + break; + default: + break; + } + + /* + * The dash, hyphen, bullet and enum lists all have a special + * HEAD character (temporarily bold, in some cases). + */ + + if (MDOC_HEAD == n->type) + switch (type) { + case (LIST_bullet): + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\[bu]"); + term_fontpop(p); + break; + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_hyphen): + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\(hy"); + term_fontpop(p); + break; + case (LIST_enum): + (pair->ppair->ppair->count)++; + snprintf(buf, sizeof(buf), "%d.", + pair->ppair->ppair->count); + term_word(p, buf); + break; + default: + break; + } + + /* + * If we're not going to process our children, indicate so here. + */ + + switch (type) { + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_item): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_hyphen): + /* FALLTHROUGH */ + case (LIST_enum): + if (MDOC_HEAD == n->type) + return(0); + break; + case (LIST_column): + if (MDOC_HEAD == n->type) + return(0); + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +termp_it_post(DECL_ARGS) +{ + enum mdoc_list type; + + if (MDOC_BLOCK == n->type) + return; + + type = n->parent->parent->parent->norm->Bl.type; + + switch (type) { + case (LIST_item): + /* FALLTHROUGH */ + case (LIST_diag): + /* FALLTHROUGH */ + case (LIST_inset): + if (MDOC_BODY == n->type) + term_newln(p); + break; + case (LIST_column): + if (MDOC_BODY == n->type) + term_flushln(p); + break; + default: + term_newln(p); + break; + } + + /* + * Now that our output is flushed, we can reset our tags. Since + * only `It' sets these flags, we're free to assume that nobody + * has munged them in the meanwhile. + */ + + p->flags &= ~TERMP_DANGLE; + p->flags &= ~TERMP_NOBREAK; + p->flags &= ~TERMP_TWOSPACE; + p->flags &= ~TERMP_HANG; +} + + +/* ARGSUSED */ +static int +termp_nm_pre(DECL_ARGS) +{ + + if (MDOC_BLOCK == n->type) + return(1); + + if (MDOC_BODY == n->type) { + if (NULL == n->child) + return(0); + p->flags |= TERMP_NOSPACE; + p->offset += term_len(p, 1) + + (NULL == n->prev->child ? term_strlen(p, m->name) : + MDOC_TEXT == n->prev->child->type ? + term_strlen(p, n->prev->child->string) : + term_len(p, 5)); + return(1); + } + + if (NULL == n->child && NULL == m->name) + return(0); + + if (MDOC_HEAD == n->type) + synopsis_pre(p, n->parent); + + if (MDOC_HEAD == n->type && n->next->child) { + p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; + p->rmargin = p->offset + term_len(p, 1); + if (NULL == n->child) { + p->rmargin += term_strlen(p, m->name); + } else if (MDOC_TEXT == n->child->type) { + p->rmargin += term_strlen(p, n->child->string); + if (n->child->next) + p->flags |= TERMP_HANG; + } else { + p->rmargin += term_len(p, 5); + p->flags |= TERMP_HANG; + } + } + + term_fontpush(p, TERMFONT_BOLD); + if (NULL == n->child) + term_word(p, m->name); + return(1); +} + + +/* ARGSUSED */ +static void +termp_nm_post(DECL_ARGS) +{ + + if (MDOC_HEAD == n->type && n->next->child) { + term_flushln(p); + p->flags &= ~(TERMP_NOBREAK | TERMP_HANG); + } else if (MDOC_BODY == n->type && n->child) + term_flushln(p); +} + + +/* ARGSUSED */ +static int +termp_fl_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "\\-"); + + if (n->child) + p->flags |= TERMP_NOSPACE; + else if (n->next && n->next->line == n->line) + p->flags |= TERMP_NOSPACE; + + return(1); +} + + +/* ARGSUSED */ +static int +termp__a_pre(DECL_ARGS) +{ + + if (n->prev && MDOC__A == n->prev->tok) + if (NULL == n->next || MDOC__A != n->next->tok) + term_word(p, "and"); + + return(1); +} + + +/* ARGSUSED */ +static int +termp_an_pre(DECL_ARGS) +{ + + if (NULL == n->child) + return(1); + + /* + * If not in the AUTHORS section, `An -split' will cause + * newlines to occur before the author name. If in the AUTHORS + * section, by default, the first `An' invocation is nosplit, + * then all subsequent ones, regardless of whether interspersed + * with other macros/text, are split. -split, in this case, + * will override the condition of the implied first -nosplit. + */ + + if (n->sec == SEC_AUTHORS) { + if ( ! (TERMP_ANPREC & p->flags)) { + if (TERMP_SPLIT & p->flags) + term_newln(p); + return(1); + } + if (TERMP_NOSPLIT & p->flags) + return(1); + term_newln(p); + return(1); + } + + if (TERMP_SPLIT & p->flags) + term_newln(p); + + return(1); +} + + +/* ARGSUSED */ +static void +termp_an_post(DECL_ARGS) +{ + + if (n->child) { + if (SEC_AUTHORS == n->sec) + p->flags |= TERMP_ANPREC; + return; + } + + if (AUTH_split == n->norm->An.auth) { + p->flags &= ~TERMP_NOSPLIT; + p->flags |= TERMP_SPLIT; + } else if (AUTH_nosplit == n->norm->An.auth) { + p->flags &= ~TERMP_SPLIT; + p->flags |= TERMP_NOSPLIT; + } + +} + + +/* ARGSUSED */ +static int +termp_ns_pre(DECL_ARGS) +{ + + if ( ! (MDOC_LINE & n->flags)) + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static int +termp_rs_pre(DECL_ARGS) +{ + + if (SEC_SEE_ALSO != n->sec) + return(1); + if (MDOC_BLOCK == n->type && n->prev) + term_vspace(p); + return(1); +} + + +/* ARGSUSED */ +static int +termp_rv_pre(DECL_ARGS) +{ + int nchild; + + term_newln(p); + term_word(p, "The"); + + nchild = n->nchild; + for (n = n->child; n; n = n->next) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, n->string); + term_fontpop(p); + + p->flags |= TERMP_NOSPACE; + term_word(p, "()"); + + if (nchild > 2 && n->next) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + + if (n->next && NULL == n->next->next) + term_word(p, "and"); + } + + if (nchild > 1) + term_word(p, "functions return"); + else + term_word(p, "function returns"); + + term_word(p, "the value 0 if successful; otherwise the value " + "-1 is returned and the global variable"); + + term_fontpush(p, TERMFONT_UNDER); + term_word(p, "errno"); + term_fontpop(p); + + term_word(p, "is set to indicate the error."); + p->flags |= TERMP_SENTENCE; + + return(0); +} + + +/* ARGSUSED */ +static int +termp_ex_pre(DECL_ARGS) +{ + int nchild; + + term_newln(p); + term_word(p, "The"); + + nchild = n->nchild; + for (n = n->child; n; n = n->next) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, n->string); + term_fontpop(p); + + if (nchild > 2 && n->next) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + + if (n->next && NULL == n->next->next) + term_word(p, "and"); + } + + if (nchild > 1) + term_word(p, "utilities exit"); + else + term_word(p, "utility exits"); + + term_word(p, "0 on success, and >0 if an error occurs."); + + p->flags |= TERMP_SENTENCE; + return(0); +} + + +/* ARGSUSED */ +static int +termp_nd_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + +#if defined(__OpenBSD__) || defined(__linux__) + term_word(p, "\\(en"); +#else + term_word(p, "\\(em"); +#endif + return(1); +} + + +/* ARGSUSED */ +static int +termp_bl_pre(DECL_ARGS) +{ + + return(MDOC_HEAD != n->type); +} + + +/* ARGSUSED */ +static void +termp_bl_post(DECL_ARGS) +{ + + if (MDOC_BLOCK == n->type) + term_newln(p); +} + +/* ARGSUSED */ +static int +termp_xr_pre(DECL_ARGS) +{ + + if (NULL == (n = n->child)) + return(0); + + assert(MDOC_TEXT == n->type); + term_word(p, n->string); + + if (NULL == (n = n->next)) + return(0); + + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + + assert(MDOC_TEXT == n->type); + term_word(p, n->string); + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + return(0); +} + +/* + * This decides how to assert whitespace before any of the SYNOPSIS set + * of macros (which, as in the case of Ft/Fo and Ft/Fn, may contain + * macro combos). + */ +static void +synopsis_pre(struct termp *p, const struct mdoc_node *n) +{ + /* + * Obviously, if we're not in a SYNOPSIS or no prior macros + * exist, do nothing. + */ + if (NULL == n->prev || ! (MDOC_SYNPRETTY & n->flags)) + return; + + /* + * If we're the second in a pair of like elements, emit our + * newline and return. UNLESS we're `Fo', `Fn', `Fn', in which + * case we soldier on. + */ + if (n->prev->tok == n->tok && + MDOC_Ft != n->tok && + MDOC_Fo != n->tok && + MDOC_Fn != n->tok) { + term_newln(p); + return; + } + + /* + * If we're one of the SYNOPSIS set and non-like pair-wise after + * another (or Fn/Fo, which we've let slip through) then assert + * vertical space, else only newline and move on. + */ + switch (n->prev->tok) { + case (MDOC_Fd): + /* FALLTHROUGH */ + case (MDOC_Fn): + /* FALLTHROUGH */ + case (MDOC_Fo): + /* FALLTHROUGH */ + case (MDOC_In): + /* FALLTHROUGH */ + case (MDOC_Vt): + term_vspace(p); + break; + case (MDOC_Ft): + if (MDOC_Fn != n->tok && MDOC_Fo != n->tok) { + term_vspace(p); + break; + } + /* FALLTHROUGH */ + default: + term_newln(p); + break; + } +} + + +static int +termp_vt_pre(DECL_ARGS) +{ + + if (MDOC_ELEM == n->type) { + synopsis_pre(p, n); + return(termp_under_pre(p, pair, m, n)); + } else if (MDOC_BLOCK == n->type) { + synopsis_pre(p, n); + return(1); + } else if (MDOC_HEAD == n->type) + return(0); + + return(termp_under_pre(p, pair, m, n)); +} + + +/* ARGSUSED */ +static int +termp_bold_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_BOLD); + return(1); +} + + +/* ARGSUSED */ +static int +termp_fd_pre(DECL_ARGS) +{ + + synopsis_pre(p, n); + return(termp_bold_pre(p, pair, m, n)); +} + + +/* ARGSUSED */ +static int +termp_sh_pre(DECL_ARGS) +{ + + /* No vspace between consecutive `Sh' calls. */ + + switch (n->type) { + case (MDOC_BLOCK): + if (n->prev && MDOC_Sh == n->prev->tok) + if (NULL == n->prev->body->child) + break; + term_vspace(p); + break; + case (MDOC_HEAD): + term_fontpush(p, TERMFONT_BOLD); + break; + case (MDOC_BODY): + p->offset = term_len(p, p->defindent); + break; + default: + break; + } + return(1); +} + + +/* ARGSUSED */ +static void +termp_sh_post(DECL_ARGS) +{ + + switch (n->type) { + case (MDOC_HEAD): + term_newln(p); + break; + case (MDOC_BODY): + term_newln(p); + p->offset = 0; + break; + default: + break; + } +} + + +/* ARGSUSED */ +static int +termp_bt_pre(DECL_ARGS) +{ + + term_word(p, "is currently in beta test."); + p->flags |= TERMP_SENTENCE; + return(0); +} + + +/* ARGSUSED */ +static void +termp_lb_post(DECL_ARGS) +{ + + if (SEC_LIBRARY == n->sec && MDOC_LINE & n->flags) + term_newln(p); +} + + +/* ARGSUSED */ +static int +termp_ud_pre(DECL_ARGS) +{ + + term_word(p, "currently under development."); + p->flags |= TERMP_SENTENCE; + return(0); +} + + +/* ARGSUSED */ +static int +termp_d1_pre(DECL_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return(1); + term_newln(p); + p->offset += term_len(p, p->defindent + 1); + return(1); +} + + +/* ARGSUSED */ +static void +termp_d1_post(DECL_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return; + term_newln(p); +} + + +/* ARGSUSED */ +static int +termp_ft_pre(DECL_ARGS) +{ + + /* NB: MDOC_LINE does not effect this! */ + synopsis_pre(p, n); + term_fontpush(p, TERMFONT_UNDER); + return(1); +} + + +/* ARGSUSED */ +static int +termp_fn_pre(DECL_ARGS) +{ + int pretty; + + pretty = MDOC_SYNPRETTY & n->flags; + + synopsis_pre(p, n); + + if (NULL == (n = n->child)) + return(0); + + assert(MDOC_TEXT == n->type); + term_fontpush(p, TERMFONT_BOLD); + term_word(p, n->string); + term_fontpop(p); + + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + + for (n = n->next; n; n = n->next) { + assert(MDOC_TEXT == n->type); + term_fontpush(p, TERMFONT_UNDER); + term_word(p, n->string); + term_fontpop(p); + + if (n->next) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + } + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + if (pretty) { + p->flags |= TERMP_NOSPACE; + term_word(p, ";"); + } + + return(0); +} + + +/* ARGSUSED */ +static int +termp_fa_pre(DECL_ARGS) +{ + const struct mdoc_node *nn; + + if (n->parent->tok != MDOC_Fo) { + term_fontpush(p, TERMFONT_UNDER); + return(1); + } + + for (nn = n->child; nn; nn = nn->next) { + term_fontpush(p, TERMFONT_UNDER); + term_word(p, nn->string); + term_fontpop(p); + + if (nn->next) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + } + + if (n->child && n->next && n->next->tok == MDOC_Fa) { + p->flags |= TERMP_NOSPACE; + term_word(p, ","); + } + + return(0); +} + + +/* ARGSUSED */ +static int +termp_bd_pre(DECL_ARGS) +{ + size_t tabwidth, rm, rmax; + const struct mdoc_node *nn; + + if (MDOC_BLOCK == n->type) { + print_bvspace(p, n, n); + return(1); + } else if (MDOC_HEAD == n->type) + return(0); + + if (n->norm->Bd.offs) + p->offset += a2offs(p, n->norm->Bd.offs); + + /* + * If -ragged or -filled are specified, the block does nothing + * but change the indentation. If -unfilled or -literal are + * specified, text is printed exactly as entered in the display: + * for macro lines, a newline is appended to the line. Blank + * lines are allowed. + */ + + if (DISP_literal != n->norm->Bd.type && + DISP_unfilled != n->norm->Bd.type) + return(1); + + tabwidth = p->tabwidth; + if (DISP_literal == n->norm->Bd.type) + p->tabwidth = term_len(p, 8); + + rm = p->rmargin; + rmax = p->maxrmargin; + p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + + for (nn = n->child; nn; nn = nn->next) { + print_mdoc_node(p, pair, m, nn); + /* + * If the printed node flushes its own line, then we + * needn't do it here as well. This is hacky, but the + * notion of selective eoln whitespace is pretty dumb + * anyway, so don't sweat it. + */ + switch (nn->tok) { + case (MDOC_Sm): + /* FALLTHROUGH */ + case (MDOC_br): + /* FALLTHROUGH */ + case (MDOC_sp): + /* FALLTHROUGH */ + case (MDOC_Bl): + /* FALLTHROUGH */ + case (MDOC_D1): + /* FALLTHROUGH */ + case (MDOC_Dl): + /* FALLTHROUGH */ + case (MDOC_Lp): + /* FALLTHROUGH */ + case (MDOC_Pp): + continue; + default: + break; + } + if (nn->next && nn->next->line == nn->line) + continue; + term_flushln(p); + p->flags |= TERMP_NOSPACE; + } + + p->tabwidth = tabwidth; + p->rmargin = rm; + p->maxrmargin = rmax; + return(0); +} + + +/* ARGSUSED */ +static void +termp_bd_post(DECL_ARGS) +{ + size_t rm, rmax; + + if (MDOC_BODY != n->type) + return; + + rm = p->rmargin; + rmax = p->maxrmargin; + + if (DISP_literal == n->norm->Bd.type || + DISP_unfilled == n->norm->Bd.type) + p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + + p->flags |= TERMP_NOSPACE; + term_newln(p); + + p->rmargin = rm; + p->maxrmargin = rmax; +} + + +/* ARGSUSED */ +static int +termp_bx_pre(DECL_ARGS) +{ + + if (NULL != (n = n->child)) { + term_word(p, n->string); + p->flags |= TERMP_NOSPACE; + term_word(p, "BSD"); + } else { + term_word(p, "BSD"); + return(0); + } + + if (NULL != (n = n->next)) { + p->flags |= TERMP_NOSPACE; + term_word(p, "-"); + p->flags |= TERMP_NOSPACE; + term_word(p, n->string); + } + + return(0); +} + + +/* ARGSUSED */ +static int +termp_xx_pre(DECL_ARGS) +{ + const char *pp; + int flags; + + pp = NULL; + switch (n->tok) { + case (MDOC_Bsx): + pp = "BSD/OS"; + break; + case (MDOC_Dx): + pp = "DragonFly"; + break; + case (MDOC_Fx): + pp = "FreeBSD"; + break; + case (MDOC_Nx): + pp = "NetBSD"; + break; + case (MDOC_Ox): + pp = "OpenBSD"; + break; + case (MDOC_Ux): + pp = "UNIX"; + break; + default: + break; + } + + term_word(p, pp); + if (n->child) { + flags = p->flags; + p->flags |= TERMP_KEEP; + term_word(p, n->child->string); + p->flags = flags; + } + return(0); +} + + +/* ARGSUSED */ +static int +termp_igndelim_pre(DECL_ARGS) +{ + + p->flags |= TERMP_IGNDELIM; + return(1); +} + + +/* ARGSUSED */ +static void +termp_pf_post(DECL_ARGS) +{ + + p->flags |= TERMP_NOSPACE; +} + + +/* ARGSUSED */ +static int +termp_ss_pre(DECL_ARGS) +{ + + switch (n->type) { + case (MDOC_BLOCK): + term_newln(p); + if (n->prev) + term_vspace(p); + break; + case (MDOC_HEAD): + term_fontpush(p, TERMFONT_BOLD); + p->offset = term_len(p, (p->defindent+1)/2); + break; + default: + break; + } + + return(1); +} + + +/* ARGSUSED */ +static void +termp_ss_post(DECL_ARGS) +{ + + if (MDOC_HEAD == n->type) + term_newln(p); +} + + +/* ARGSUSED */ +static int +termp_cd_pre(DECL_ARGS) +{ + + synopsis_pre(p, n); + term_fontpush(p, TERMFONT_BOLD); + return(1); +} + + +/* ARGSUSED */ +static int +termp_in_pre(DECL_ARGS) +{ + + synopsis_pre(p, n); + + if (MDOC_SYNPRETTY & n->flags && MDOC_LINE & n->flags) { + term_fontpush(p, TERMFONT_BOLD); + term_word(p, "#include"); + term_word(p, "<"); + } else { + term_word(p, "<"); + term_fontpush(p, TERMFONT_UNDER); + } + + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_in_post(DECL_ARGS) +{ + + if (MDOC_SYNPRETTY & n->flags) + term_fontpush(p, TERMFONT_BOLD); + + p->flags |= TERMP_NOSPACE; + term_word(p, ">"); + + if (MDOC_SYNPRETTY & n->flags) + term_fontpop(p); +} + + +/* ARGSUSED */ +static int +termp_sp_pre(DECL_ARGS) +{ + size_t i, len; + + switch (n->tok) { + case (MDOC_sp): + len = n->child ? a2height(p, n->child->string) : 1; + break; + case (MDOC_br): + len = 0; + break; + default: + len = 1; + break; + } + + if (0 == len) + term_newln(p); + for (i = 0; i < len; i++) + term_vspace(p); + + return(0); +} + + +/* ARGSUSED */ +static int +termp_quote_pre(DECL_ARGS) +{ + + if (MDOC_BODY != n->type && MDOC_ELEM != n->type) + return(1); + + switch (n->tok) { + case (MDOC_Ao): + /* FALLTHROUGH */ + case (MDOC_Aq): + term_word(p, "<"); + break; + case (MDOC_Bro): + /* FALLTHROUGH */ + case (MDOC_Brq): + term_word(p, "{"); + break; + case (MDOC_Oo): + /* FALLTHROUGH */ + case (MDOC_Op): + /* FALLTHROUGH */ + case (MDOC_Bo): + /* FALLTHROUGH */ + case (MDOC_Bq): + term_word(p, "["); + break; + case (MDOC_Do): + /* FALLTHROUGH */ + case (MDOC_Dq): + term_word(p, "``"); + break; + case (MDOC_Eo): + break; + case (MDOC_Po): + /* FALLTHROUGH */ + case (MDOC_Pq): + term_word(p, "("); + break; + case (MDOC__T): + /* FALLTHROUGH */ + case (MDOC_Qo): + /* FALLTHROUGH */ + case (MDOC_Qq): + term_word(p, "\""); + break; + case (MDOC_Ql): + /* FALLTHROUGH */ + case (MDOC_So): + /* FALLTHROUGH */ + case (MDOC_Sq): + term_word(p, "`"); + break; + default: + abort(); + /* NOTREACHED */ + } + + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp_quote_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type && MDOC_ELEM != n->type) + return; + + p->flags |= TERMP_NOSPACE; + + switch (n->tok) { + case (MDOC_Ao): + /* FALLTHROUGH */ + case (MDOC_Aq): + term_word(p, ">"); + break; + case (MDOC_Bro): + /* FALLTHROUGH */ + case (MDOC_Brq): + term_word(p, "}"); + break; + case (MDOC_Oo): + /* FALLTHROUGH */ + case (MDOC_Op): + /* FALLTHROUGH */ + case (MDOC_Bo): + /* FALLTHROUGH */ + case (MDOC_Bq): + term_word(p, "]"); + break; + case (MDOC_Do): + /* FALLTHROUGH */ + case (MDOC_Dq): + term_word(p, "''"); + break; + case (MDOC_Eo): + break; + case (MDOC_Po): + /* FALLTHROUGH */ + case (MDOC_Pq): + term_word(p, ")"); + break; + case (MDOC__T): + /* FALLTHROUGH */ + case (MDOC_Qo): + /* FALLTHROUGH */ + case (MDOC_Qq): + term_word(p, "\""); + break; + case (MDOC_Ql): + /* FALLTHROUGH */ + case (MDOC_So): + /* FALLTHROUGH */ + case (MDOC_Sq): + term_word(p, "'"); + break; + default: + abort(); + /* NOTREACHED */ + } +} + + +/* ARGSUSED */ +static int +termp_fo_pre(DECL_ARGS) +{ + + if (MDOC_BLOCK == n->type) { + synopsis_pre(p, n); + return(1); + } else if (MDOC_BODY == n->type) { + p->flags |= TERMP_NOSPACE; + term_word(p, "("); + p->flags |= TERMP_NOSPACE; + return(1); + } + + if (NULL == n->child) + return(0); + + /* XXX: we drop non-initial arguments as per groff. */ + + assert(n->child->string); + term_fontpush(p, TERMFONT_BOLD); + term_word(p, n->child->string); + return(0); +} + + +/* ARGSUSED */ +static void +termp_fo_post(DECL_ARGS) +{ + + if (MDOC_BODY != n->type) + return; + + p->flags |= TERMP_NOSPACE; + term_word(p, ")"); + + if (MDOC_SYNPRETTY & n->flags) { + p->flags |= TERMP_NOSPACE; + term_word(p, ";"); + } +} + + +/* ARGSUSED */ +static int +termp_bf_pre(DECL_ARGS) +{ + + if (MDOC_HEAD == n->type) + return(0); + else if (MDOC_BLOCK != n->type) + return(1); + + if (FONT_Em == n->norm->Bf.font) + term_fontpush(p, TERMFONT_UNDER); + else if (FONT_Sy == n->norm->Bf.font) + term_fontpush(p, TERMFONT_BOLD); + else + term_fontpush(p, TERMFONT_NONE); + + return(1); +} + + +/* ARGSUSED */ +static int +termp_sm_pre(DECL_ARGS) +{ + + assert(n->child && MDOC_TEXT == n->child->type); + if (0 == strcmp("on", n->child->string)) { + if (p->col) + p->flags &= ~TERMP_NOSPACE; + p->flags &= ~TERMP_NONOSPACE; + } else + p->flags |= TERMP_NONOSPACE; + + return(0); +} + + +/* ARGSUSED */ +static int +termp_ap_pre(DECL_ARGS) +{ + + p->flags |= TERMP_NOSPACE; + term_word(p, "'"); + p->flags |= TERMP_NOSPACE; + return(1); +} + + +/* ARGSUSED */ +static void +termp____post(DECL_ARGS) +{ + + /* + * Handle lists of authors. In general, print each followed by + * a comma. Don't print the comma if there are only two + * authors. + */ + if (MDOC__A == n->tok && n->next && MDOC__A == n->next->tok) + if (NULL == n->next->next || MDOC__A != n->next->next->tok) + if (NULL == n->prev || MDOC__A != n->prev->tok) + return; + + /* TODO: %U. */ + + if (NULL == n->parent || MDOC_Rs != n->parent->tok) + return; + + p->flags |= TERMP_NOSPACE; + if (NULL == n->next) { + term_word(p, "."); + p->flags |= TERMP_SENTENCE; + } else + term_word(p, ","); +} + + +/* ARGSUSED */ +static int +termp_li_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_NONE); + return(1); +} + + +/* ARGSUSED */ +static int +termp_lk_pre(DECL_ARGS) +{ + const struct mdoc_node *nn, *sv; + + term_fontpush(p, TERMFONT_UNDER); + + nn = sv = n->child; + + if (NULL == nn || NULL == nn->next) + return(1); + + for (nn = nn->next; nn; nn = nn->next) + term_word(p, nn->string); + + term_fontpop(p); + + p->flags |= TERMP_NOSPACE; + term_word(p, ":"); + + term_fontpush(p, TERMFONT_BOLD); + term_word(p, sv->string); + term_fontpop(p); + + return(0); +} + + +/* ARGSUSED */ +static int +termp_bk_pre(DECL_ARGS) +{ + + switch (n->type) { + case (MDOC_BLOCK): + break; + case (MDOC_HEAD): + return(0); + case (MDOC_BODY): + if (n->parent->args || 0 == n->prev->nchild) + p->flags |= TERMP_PREKEEP; + break; + default: + abort(); + /* NOTREACHED */ + } + + return(1); +} + + +/* ARGSUSED */ +static void +termp_bk_post(DECL_ARGS) +{ + + if (MDOC_BODY == n->type) + p->flags &= ~(TERMP_KEEP | TERMP_PREKEEP); +} + +/* ARGSUSED */ +static void +termp__t_post(DECL_ARGS) +{ + + /* + * If we're in an `Rs' and there's a journal present, then quote + * us instead of underlining us (for disambiguation). + */ + if (n->parent && MDOC_Rs == n->parent->tok && + n->parent->norm->Rs.quote_T) + termp_quote_post(p, pair, m, n); + + termp____post(p, pair, m, n); +} + +/* ARGSUSED */ +static int +termp__t_pre(DECL_ARGS) +{ + + /* + * If we're in an `Rs' and there's a journal present, then quote + * us instead of underlining us (for disambiguation). + */ + if (n->parent && MDOC_Rs == n->parent->tok && + n->parent->norm->Rs.quote_T) + return(termp_quote_pre(p, pair, m, n)); + + term_fontpush(p, TERMFONT_UNDER); + return(1); +} + +/* ARGSUSED */ +static int +termp_under_pre(DECL_ARGS) +{ + + term_fontpush(p, TERMFONT_UNDER); + return(1); +} diff --git a/usr/src/cmd/mandoc/mdoc_validate.c b/usr/src/cmd/mandoc/mdoc_validate.c new file mode 100644 index 0000000000..060ccdadec --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc_validate.c @@ -0,0 +1,2403 @@ +/* $Id: mdoc_validate.c,v 1.182 2012/03/23 05:50:25 kristaps Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifndef OSNAME +#include <sys/utsname.h> +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" +#include "libmandoc.h" + +/* FIXME: .Bl -diag can't have non-text children in HEAD. */ + +#define PRE_ARGS struct mdoc *mdoc, struct mdoc_node *n +#define POST_ARGS struct mdoc *mdoc + +#define NUMSIZ 32 +#define DATESIZE 32 + +enum check_ineq { + CHECK_LT, + CHECK_GT, + CHECK_EQ +}; + +enum check_lvl { + CHECK_WARN, + CHECK_ERROR, +}; + +typedef int (*v_pre)(PRE_ARGS); +typedef int (*v_post)(POST_ARGS); + +struct valids { + v_pre *pre; + v_post *post; +}; + +static int check_count(struct mdoc *, enum mdoc_type, + enum check_lvl, enum check_ineq, int); +static int check_parent(PRE_ARGS, enum mdoct, enum mdoc_type); +static void check_text(struct mdoc *, int, int, char *); +static void check_argv(struct mdoc *, + struct mdoc_node *, struct mdoc_argv *); +static void check_args(struct mdoc *, struct mdoc_node *); +static int concat(char *, const struct mdoc_node *, size_t); +static enum mdoc_sec a2sec(const char *); +static size_t macro2len(enum mdoct); + +static int ebool(POST_ARGS); +static int berr_ge1(POST_ARGS); +static int bwarn_ge1(POST_ARGS); +static int ewarn_eq0(POST_ARGS); +static int ewarn_eq1(POST_ARGS); +static int ewarn_ge1(POST_ARGS); +static int ewarn_le1(POST_ARGS); +static int hwarn_eq0(POST_ARGS); +static int hwarn_eq1(POST_ARGS); +static int hwarn_ge1(POST_ARGS); +static int hwarn_le1(POST_ARGS); + +static int post_an(POST_ARGS); +static int post_at(POST_ARGS); +static int post_bf(POST_ARGS); +static int post_bl(POST_ARGS); +static int post_bl_block(POST_ARGS); +static int post_bl_block_width(POST_ARGS); +static int post_bl_block_tag(POST_ARGS); +static int post_bl_head(POST_ARGS); +static int post_bx(POST_ARGS); +static int post_dd(POST_ARGS); +static int post_dt(POST_ARGS); +static int post_defaults(POST_ARGS); +static int post_literal(POST_ARGS); +static int post_eoln(POST_ARGS); +static int post_it(POST_ARGS); +static int post_lb(POST_ARGS); +static int post_nm(POST_ARGS); +static int post_ns(POST_ARGS); +static int post_os(POST_ARGS); +static int post_ignpar(POST_ARGS); +static int post_prol(POST_ARGS); +static int post_root(POST_ARGS); +static int post_rs(POST_ARGS); +static int post_sh(POST_ARGS); +static int post_sh_body(POST_ARGS); +static int post_sh_head(POST_ARGS); +static int post_st(POST_ARGS); +static int post_std(POST_ARGS); +static int post_vt(POST_ARGS); +static int pre_an(PRE_ARGS); +static int pre_bd(PRE_ARGS); +static int pre_bl(PRE_ARGS); +static int pre_dd(PRE_ARGS); +static int pre_display(PRE_ARGS); +static int pre_dt(PRE_ARGS); +static int pre_it(PRE_ARGS); +static int pre_literal(PRE_ARGS); +static int pre_os(PRE_ARGS); +static int pre_par(PRE_ARGS); +static int pre_sh(PRE_ARGS); +static int pre_ss(PRE_ARGS); +static int pre_std(PRE_ARGS); + +static v_post posts_an[] = { post_an, NULL }; +static v_post posts_at[] = { post_at, post_defaults, NULL }; +static v_post posts_bd[] = { post_literal, hwarn_eq0, bwarn_ge1, NULL }; +static v_post posts_bf[] = { hwarn_le1, post_bf, NULL }; +static v_post posts_bk[] = { hwarn_eq0, bwarn_ge1, NULL }; +static v_post posts_bl[] = { bwarn_ge1, post_bl, NULL }; +static v_post posts_bx[] = { post_bx, NULL }; +static v_post posts_bool[] = { ebool, NULL }; +static v_post posts_eoln[] = { post_eoln, NULL }; +static v_post posts_defaults[] = { post_defaults, NULL }; +static v_post posts_dd[] = { post_dd, post_prol, NULL }; +static v_post posts_dl[] = { post_literal, bwarn_ge1, NULL }; +static v_post posts_dt[] = { post_dt, post_prol, NULL }; +static v_post posts_fo[] = { hwarn_eq1, bwarn_ge1, NULL }; +static v_post posts_it[] = { post_it, NULL }; +static v_post posts_lb[] = { post_lb, NULL }; +static v_post posts_nd[] = { berr_ge1, NULL }; +static v_post posts_nm[] = { post_nm, NULL }; +static v_post posts_notext[] = { ewarn_eq0, NULL }; +static v_post posts_ns[] = { post_ns, NULL }; +static v_post posts_os[] = { post_os, post_prol, NULL }; +static v_post posts_rs[] = { post_rs, NULL }; +static v_post posts_sh[] = { post_ignpar, hwarn_ge1, post_sh, NULL }; +static v_post posts_sp[] = { ewarn_le1, NULL }; +static v_post posts_ss[] = { post_ignpar, hwarn_ge1, NULL }; +static v_post posts_st[] = { post_st, NULL }; +static v_post posts_std[] = { post_std, NULL }; +static v_post posts_text[] = { ewarn_ge1, NULL }; +static v_post posts_text1[] = { ewarn_eq1, NULL }; +static v_post posts_vt[] = { post_vt, NULL }; +static v_post posts_wline[] = { bwarn_ge1, NULL }; +static v_pre pres_an[] = { pre_an, NULL }; +static v_pre pres_bd[] = { pre_display, pre_bd, pre_literal, pre_par, NULL }; +static v_pre pres_bl[] = { pre_bl, pre_par, NULL }; +static v_pre pres_d1[] = { pre_display, NULL }; +static v_pre pres_dl[] = { pre_literal, pre_display, NULL }; +static v_pre pres_dd[] = { pre_dd, NULL }; +static v_pre pres_dt[] = { pre_dt, NULL }; +static v_pre pres_er[] = { NULL, NULL }; +static v_pre pres_fd[] = { NULL, NULL }; +static v_pre pres_it[] = { pre_it, pre_par, NULL }; +static v_pre pres_os[] = { pre_os, NULL }; +static v_pre pres_pp[] = { pre_par, NULL }; +static v_pre pres_sh[] = { pre_sh, NULL }; +static v_pre pres_ss[] = { pre_ss, NULL }; +static v_pre pres_std[] = { pre_std, NULL }; + +static const struct valids mdoc_valids[MDOC_MAX] = { + { NULL, NULL }, /* Ap */ + { pres_dd, posts_dd }, /* Dd */ + { pres_dt, posts_dt }, /* Dt */ + { pres_os, posts_os }, /* Os */ + { pres_sh, posts_sh }, /* Sh */ + { pres_ss, posts_ss }, /* Ss */ + { pres_pp, posts_notext }, /* Pp */ + { pres_d1, posts_wline }, /* D1 */ + { pres_dl, posts_dl }, /* Dl */ + { pres_bd, posts_bd }, /* Bd */ + { NULL, NULL }, /* Ed */ + { pres_bl, posts_bl }, /* Bl */ + { NULL, NULL }, /* El */ + { pres_it, posts_it }, /* It */ + { NULL, NULL }, /* Ad */ + { pres_an, posts_an }, /* An */ + { NULL, posts_defaults }, /* Ar */ + { NULL, NULL }, /* Cd */ + { NULL, NULL }, /* Cm */ + { NULL, NULL }, /* Dv */ + { pres_er, NULL }, /* Er */ + { NULL, NULL }, /* Ev */ + { pres_std, posts_std }, /* Ex */ + { NULL, NULL }, /* Fa */ + { pres_fd, posts_text }, /* Fd */ + { NULL, NULL }, /* Fl */ + { NULL, NULL }, /* Fn */ + { NULL, NULL }, /* Ft */ + { NULL, NULL }, /* Ic */ + { NULL, posts_text1 }, /* In */ + { NULL, posts_defaults }, /* Li */ + { NULL, posts_nd }, /* Nd */ + { NULL, posts_nm }, /* Nm */ + { NULL, NULL }, /* Op */ + { NULL, NULL }, /* Ot */ + { NULL, posts_defaults }, /* Pa */ + { pres_std, posts_std }, /* Rv */ + { NULL, posts_st }, /* St */ + { NULL, NULL }, /* Va */ + { NULL, posts_vt }, /* Vt */ + { NULL, posts_text }, /* Xr */ + { NULL, posts_text }, /* %A */ + { NULL, posts_text }, /* %B */ /* FIXME: can be used outside Rs/Re. */ + { NULL, posts_text }, /* %D */ + { NULL, posts_text }, /* %I */ + { NULL, posts_text }, /* %J */ + { NULL, posts_text }, /* %N */ + { NULL, posts_text }, /* %O */ + { NULL, posts_text }, /* %P */ + { NULL, posts_text }, /* %R */ + { NULL, posts_text }, /* %T */ /* FIXME: can be used outside Rs/Re. */ + { NULL, posts_text }, /* %V */ + { NULL, NULL }, /* Ac */ + { NULL, NULL }, /* Ao */ + { NULL, NULL }, /* Aq */ + { NULL, posts_at }, /* At */ + { NULL, NULL }, /* Bc */ + { NULL, posts_bf }, /* Bf */ + { NULL, NULL }, /* Bo */ + { NULL, NULL }, /* Bq */ + { NULL, NULL }, /* Bsx */ + { NULL, posts_bx }, /* Bx */ + { NULL, posts_bool }, /* Db */ + { NULL, NULL }, /* Dc */ + { NULL, NULL }, /* Do */ + { NULL, NULL }, /* Dq */ + { NULL, NULL }, /* Ec */ + { NULL, NULL }, /* Ef */ + { NULL, NULL }, /* Em */ + { NULL, NULL }, /* Eo */ + { NULL, NULL }, /* Fx */ + { NULL, NULL }, /* Ms */ + { NULL, posts_notext }, /* No */ + { NULL, posts_ns }, /* Ns */ + { NULL, NULL }, /* Nx */ + { NULL, NULL }, /* Ox */ + { NULL, NULL }, /* Pc */ + { NULL, posts_text1 }, /* Pf */ + { NULL, NULL }, /* Po */ + { NULL, NULL }, /* Pq */ + { NULL, NULL }, /* Qc */ + { NULL, NULL }, /* Ql */ + { NULL, NULL }, /* Qo */ + { NULL, NULL }, /* Qq */ + { NULL, NULL }, /* Re */ + { NULL, posts_rs }, /* Rs */ + { NULL, NULL }, /* Sc */ + { NULL, NULL }, /* So */ + { NULL, NULL }, /* Sq */ + { NULL, posts_bool }, /* Sm */ + { NULL, NULL }, /* Sx */ + { NULL, NULL }, /* Sy */ + { NULL, NULL }, /* Tn */ + { NULL, NULL }, /* Ux */ + { NULL, NULL }, /* Xc */ + { NULL, NULL }, /* Xo */ + { NULL, posts_fo }, /* Fo */ + { NULL, NULL }, /* Fc */ + { NULL, NULL }, /* Oo */ + { NULL, NULL }, /* Oc */ + { NULL, posts_bk }, /* Bk */ + { NULL, NULL }, /* Ek */ + { NULL, posts_eoln }, /* Bt */ + { NULL, NULL }, /* Hf */ + { NULL, NULL }, /* Fr */ + { NULL, posts_eoln }, /* Ud */ + { NULL, posts_lb }, /* Lb */ + { NULL, posts_notext }, /* Lp */ + { NULL, NULL }, /* Lk */ + { NULL, posts_defaults }, /* Mt */ + { NULL, NULL }, /* Brq */ + { NULL, NULL }, /* Bro */ + { NULL, NULL }, /* Brc */ + { NULL, posts_text }, /* %C */ + { NULL, NULL }, /* Es */ + { NULL, NULL }, /* En */ + { NULL, NULL }, /* Dx */ + { NULL, posts_text }, /* %Q */ + { NULL, posts_notext }, /* br */ + { pres_pp, posts_sp }, /* sp */ + { NULL, posts_text1 }, /* %U */ + { NULL, NULL }, /* Ta */ +}; + +#define RSORD_MAX 14 /* Number of `Rs' blocks. */ + +static const enum mdoct rsord[RSORD_MAX] = { + MDOC__A, + MDOC__T, + MDOC__B, + MDOC__I, + MDOC__J, + MDOC__R, + MDOC__N, + MDOC__V, + MDOC__P, + MDOC__Q, + MDOC__D, + MDOC__O, + MDOC__C, + MDOC__U +}; + +static const char * const secnames[SEC__MAX] = { + NULL, + "NAME", + "LIBRARY", + "SYNOPSIS", + "DESCRIPTION", + "IMPLEMENTATION NOTES", + "RETURN VALUES", + "ENVIRONMENT", + "FILES", + "EXIT STATUS", + "EXAMPLES", + "DIAGNOSTICS", + "COMPATIBILITY", + "ERRORS", + "SEE ALSO", + "STANDARDS", + "HISTORY", + "AUTHORS", + "CAVEATS", + "BUGS", + "SECURITY CONSIDERATIONS", + NULL +}; + +int +mdoc_valid_pre(struct mdoc *mdoc, struct mdoc_node *n) +{ + v_pre *p; + int line, pos; + char *tp; + + switch (n->type) { + case (MDOC_TEXT): + tp = n->string; + line = n->line; + pos = n->pos; + check_text(mdoc, line, pos, tp); + /* FALLTHROUGH */ + case (MDOC_TBL): + /* FALLTHROUGH */ + case (MDOC_EQN): + /* FALLTHROUGH */ + case (MDOC_ROOT): + return(1); + default: + break; + } + + check_args(mdoc, n); + + if (NULL == mdoc_valids[n->tok].pre) + return(1); + for (p = mdoc_valids[n->tok].pre; *p; p++) + if ( ! (*p)(mdoc, n)) + return(0); + return(1); +} + + +int +mdoc_valid_post(struct mdoc *mdoc) +{ + v_post *p; + + if (MDOC_VALID & mdoc->last->flags) + return(1); + mdoc->last->flags |= MDOC_VALID; + + switch (mdoc->last->type) { + case (MDOC_TEXT): + /* FALLTHROUGH */ + case (MDOC_EQN): + /* FALLTHROUGH */ + case (MDOC_TBL): + return(1); + case (MDOC_ROOT): + return(post_root(mdoc)); + default: + break; + } + + if (NULL == mdoc_valids[mdoc->last->tok].post) + return(1); + for (p = mdoc_valids[mdoc->last->tok].post; *p; p++) + if ( ! (*p)(mdoc)) + return(0); + + return(1); +} + +static int +check_count(struct mdoc *m, enum mdoc_type type, + enum check_lvl lvl, enum check_ineq ineq, int val) +{ + const char *p; + enum mandocerr t; + + if (m->last->type != type) + return(1); + + switch (ineq) { + case (CHECK_LT): + p = "less than "; + if (m->last->nchild < val) + return(1); + break; + case (CHECK_GT): + p = "more than "; + if (m->last->nchild > val) + return(1); + break; + case (CHECK_EQ): + p = ""; + if (val == m->last->nchild) + return(1); + break; + default: + abort(); + /* NOTREACHED */ + } + + t = lvl == CHECK_WARN ? MANDOCERR_ARGCWARN : MANDOCERR_ARGCOUNT; + mandoc_vmsg(t, m->parse, m->last->line, m->last->pos, + "want %s%d children (have %d)", + p, val, m->last->nchild); + return(1); +} + +static int +berr_ge1(POST_ARGS) +{ + + return(check_count(mdoc, MDOC_BODY, CHECK_ERROR, CHECK_GT, 0)); +} + +static int +bwarn_ge1(POST_ARGS) +{ + return(check_count(mdoc, MDOC_BODY, CHECK_WARN, CHECK_GT, 0)); +} + +static int +ewarn_eq0(POST_ARGS) +{ + return(check_count(mdoc, MDOC_ELEM, CHECK_WARN, CHECK_EQ, 0)); +} + +static int +ewarn_eq1(POST_ARGS) +{ + return(check_count(mdoc, MDOC_ELEM, CHECK_WARN, CHECK_EQ, 1)); +} + +static int +ewarn_ge1(POST_ARGS) +{ + return(check_count(mdoc, MDOC_ELEM, CHECK_WARN, CHECK_GT, 0)); +} + +static int +ewarn_le1(POST_ARGS) +{ + return(check_count(mdoc, MDOC_ELEM, CHECK_WARN, CHECK_LT, 2)); +} + +static int +hwarn_eq0(POST_ARGS) +{ + return(check_count(mdoc, MDOC_HEAD, CHECK_WARN, CHECK_EQ, 0)); +} + +static int +hwarn_eq1(POST_ARGS) +{ + return(check_count(mdoc, MDOC_HEAD, CHECK_WARN, CHECK_EQ, 1)); +} + +static int +hwarn_ge1(POST_ARGS) +{ + return(check_count(mdoc, MDOC_HEAD, CHECK_WARN, CHECK_GT, 0)); +} + +static int +hwarn_le1(POST_ARGS) +{ + return(check_count(mdoc, MDOC_HEAD, CHECK_WARN, CHECK_LT, 2)); +} + +static void +check_args(struct mdoc *m, struct mdoc_node *n) +{ + int i; + + if (NULL == n->args) + return; + + assert(n->args->argc); + for (i = 0; i < (int)n->args->argc; i++) + check_argv(m, n, &n->args->argv[i]); +} + +static void +check_argv(struct mdoc *m, struct mdoc_node *n, struct mdoc_argv *v) +{ + int i; + + for (i = 0; i < (int)v->sz; i++) + check_text(m, v->line, v->pos, v->value[i]); + + /* FIXME: move to post_std(). */ + + if (MDOC_Std == v->arg) + if ( ! (v->sz || m->meta.name)) + mdoc_nmsg(m, n, MANDOCERR_NONAME); +} + +static void +check_text(struct mdoc *m, int ln, int pos, char *p) +{ + char *cp; + + if (MDOC_LITERAL & m->flags) + return; + + for (cp = p; NULL != (p = strchr(p, '\t')); p++) + mdoc_pmsg(m, ln, pos + (int)(p - cp), MANDOCERR_BADTAB); +} + +static int +check_parent(PRE_ARGS, enum mdoct tok, enum mdoc_type t) +{ + + assert(n->parent); + if ((MDOC_ROOT == t || tok == n->parent->tok) && + (t == n->parent->type)) + return(1); + + mandoc_vmsg(MANDOCERR_SYNTCHILD, mdoc->parse, n->line, + n->pos, "want parent %s", MDOC_ROOT == t ? + "<root>" : mdoc_macronames[tok]); + return(0); +} + + +static int +pre_display(PRE_ARGS) +{ + struct mdoc_node *node; + + if (MDOC_BLOCK != n->type) + return(1); + + for (node = mdoc->last->parent; node; node = node->parent) + if (MDOC_BLOCK == node->type) + if (MDOC_Bd == node->tok) + break; + + if (node) + mdoc_nmsg(mdoc, n, MANDOCERR_NESTEDDISP); + + return(1); +} + + +static int +pre_bl(PRE_ARGS) +{ + int i, comp, dup; + const char *offs, *width; + enum mdoc_list lt; + struct mdoc_node *np; + + if (MDOC_BLOCK != n->type) { + if (ENDBODY_NOT != n->end) { + assert(n->pending); + np = n->pending->parent; + } else + np = n->parent; + + assert(np); + assert(MDOC_BLOCK == np->type); + assert(MDOC_Bl == np->tok); + return(1); + } + + /* + * First figure out which kind of list to use: bind ourselves to + * the first mentioned list type and warn about any remaining + * ones. If we find no list type, we default to LIST_item. + */ + + /* LINTED */ + for (i = 0; n->args && i < (int)n->args->argc; i++) { + lt = LIST__NONE; + dup = comp = 0; + width = offs = NULL; + switch (n->args->argv[i].arg) { + /* Set list types. */ + case (MDOC_Bullet): + lt = LIST_bullet; + break; + case (MDOC_Dash): + lt = LIST_dash; + break; + case (MDOC_Enum): + lt = LIST_enum; + break; + case (MDOC_Hyphen): + lt = LIST_hyphen; + break; + case (MDOC_Item): + lt = LIST_item; + break; + case (MDOC_Tag): + lt = LIST_tag; + break; + case (MDOC_Diag): + lt = LIST_diag; + break; + case (MDOC_Hang): + lt = LIST_hang; + break; + case (MDOC_Ohang): + lt = LIST_ohang; + break; + case (MDOC_Inset): + lt = LIST_inset; + break; + case (MDOC_Column): + lt = LIST_column; + break; + /* Set list arguments. */ + case (MDOC_Compact): + dup = n->norm->Bl.comp; + comp = 1; + break; + case (MDOC_Width): + /* NB: this can be empty! */ + if (n->args->argv[i].sz) { + width = n->args->argv[i].value[0]; + dup = (NULL != n->norm->Bl.width); + break; + } + mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV); + break; + case (MDOC_Offset): + /* NB: this can be empty! */ + if (n->args->argv[i].sz) { + offs = n->args->argv[i].value[0]; + dup = (NULL != n->norm->Bl.offs); + break; + } + mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV); + break; + default: + continue; + } + + /* Check: duplicate auxiliary arguments. */ + + if (dup) + mdoc_nmsg(mdoc, n, MANDOCERR_ARGVREP); + + if (comp && ! dup) + n->norm->Bl.comp = comp; + if (offs && ! dup) + n->norm->Bl.offs = offs; + if (width && ! dup) + n->norm->Bl.width = width; + + /* Check: multiple list types. */ + + if (LIST__NONE != lt && n->norm->Bl.type != LIST__NONE) + mdoc_nmsg(mdoc, n, MANDOCERR_LISTREP); + + /* Assign list type. */ + + if (LIST__NONE != lt && n->norm->Bl.type == LIST__NONE) { + n->norm->Bl.type = lt; + /* Set column information, too. */ + if (LIST_column == lt) { + n->norm->Bl.ncols = + n->args->argv[i].sz; + n->norm->Bl.cols = (void *) + n->args->argv[i].value; + } + } + + /* The list type should come first. */ + + if (n->norm->Bl.type == LIST__NONE) + if (n->norm->Bl.width || + n->norm->Bl.offs || + n->norm->Bl.comp) + mdoc_nmsg(mdoc, n, MANDOCERR_LISTFIRST); + + continue; + } + + /* Allow lists to default to LIST_item. */ + + if (LIST__NONE == n->norm->Bl.type) { + mdoc_nmsg(mdoc, n, MANDOCERR_LISTTYPE); + n->norm->Bl.type = LIST_item; + } + + /* + * Validate the width field. Some list types don't need width + * types and should be warned about them. Others should have it + * and must also be warned. + */ + + switch (n->norm->Bl.type) { + case (LIST_tag): + if (n->norm->Bl.width) + break; + mdoc_nmsg(mdoc, n, MANDOCERR_NOWIDTHARG); + break; + case (LIST_column): + /* FALLTHROUGH */ + case (LIST_diag): + /* FALLTHROUGH */ + case (LIST_ohang): + /* FALLTHROUGH */ + case (LIST_inset): + /* FALLTHROUGH */ + case (LIST_item): + if (n->norm->Bl.width) + mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV); + break; + default: + break; + } + + return(1); +} + + +static int +pre_bd(PRE_ARGS) +{ + int i, dup, comp; + enum mdoc_disp dt; + const char *offs; + struct mdoc_node *np; + + if (MDOC_BLOCK != n->type) { + if (ENDBODY_NOT != n->end) { + assert(n->pending); + np = n->pending->parent; + } else + np = n->parent; + + assert(np); + assert(MDOC_BLOCK == np->type); + assert(MDOC_Bd == np->tok); + return(1); + } + + /* LINTED */ + for (i = 0; n->args && i < (int)n->args->argc; i++) { + dt = DISP__NONE; + dup = comp = 0; + offs = NULL; + + switch (n->args->argv[i].arg) { + case (MDOC_Centred): + dt = DISP_centred; + break; + case (MDOC_Ragged): + dt = DISP_ragged; + break; + case (MDOC_Unfilled): + dt = DISP_unfilled; + break; + case (MDOC_Filled): + dt = DISP_filled; + break; + case (MDOC_Literal): + dt = DISP_literal; + break; + case (MDOC_File): + mdoc_nmsg(mdoc, n, MANDOCERR_BADDISP); + return(0); + case (MDOC_Offset): + /* NB: this can be empty! */ + if (n->args->argv[i].sz) { + offs = n->args->argv[i].value[0]; + dup = (NULL != n->norm->Bd.offs); + break; + } + mdoc_nmsg(mdoc, n, MANDOCERR_IGNARGV); + break; + case (MDOC_Compact): + comp = 1; + dup = n->norm->Bd.comp; + break; + default: + abort(); + /* NOTREACHED */ + } + + /* Check whether we have duplicates. */ + + if (dup) + mdoc_nmsg(mdoc, n, MANDOCERR_ARGVREP); + + /* Make our auxiliary assignments. */ + + if (offs && ! dup) + n->norm->Bd.offs = offs; + if (comp && ! dup) + n->norm->Bd.comp = comp; + + /* Check whether a type has already been assigned. */ + + if (DISP__NONE != dt && n->norm->Bd.type != DISP__NONE) + mdoc_nmsg(mdoc, n, MANDOCERR_DISPREP); + + /* Make our type assignment. */ + + if (DISP__NONE != dt && n->norm->Bd.type == DISP__NONE) + n->norm->Bd.type = dt; + } + + if (DISP__NONE == n->norm->Bd.type) { + mdoc_nmsg(mdoc, n, MANDOCERR_DISPTYPE); + n->norm->Bd.type = DISP_ragged; + } + + return(1); +} + + +static int +pre_ss(PRE_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return(1); + return(check_parent(mdoc, n, MDOC_Sh, MDOC_BODY)); +} + + +static int +pre_sh(PRE_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return(1); + + roff_regunset(mdoc->roff, REG_nS); + return(check_parent(mdoc, n, MDOC_MAX, MDOC_ROOT)); +} + + +static int +pre_it(PRE_ARGS) +{ + + if (MDOC_BLOCK != n->type) + return(1); + + return(check_parent(mdoc, n, MDOC_Bl, MDOC_BODY)); +} + + +static int +pre_an(PRE_ARGS) +{ + int i; + + if (NULL == n->args) + return(1); + + for (i = 1; i < (int)n->args->argc; i++) + mdoc_pmsg(mdoc, n->args->argv[i].line, + n->args->argv[i].pos, MANDOCERR_IGNARGV); + + if (MDOC_Split == n->args->argv[0].arg) + n->norm->An.auth = AUTH_split; + else if (MDOC_Nosplit == n->args->argv[0].arg) + n->norm->An.auth = AUTH_nosplit; + else + abort(); + + return(1); +} + +static int +pre_std(PRE_ARGS) +{ + + if (n->args && 1 == n->args->argc) + if (MDOC_Std == n->args->argv[0].arg) + return(1); + + mdoc_nmsg(mdoc, n, MANDOCERR_NOARGV); + return(1); +} + +static int +pre_dt(PRE_ARGS) +{ + + if (NULL == mdoc->meta.date || mdoc->meta.os) + mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGOOO); + + if (mdoc->meta.title) + mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGREP); + + return(1); +} + +static int +pre_os(PRE_ARGS) +{ + + if (NULL == mdoc->meta.title || NULL == mdoc->meta.date) + mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGOOO); + + if (mdoc->meta.os) + mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGREP); + + return(1); +} + +static int +pre_dd(PRE_ARGS) +{ + + if (mdoc->meta.title || mdoc->meta.os) + mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGOOO); + + if (mdoc->meta.date) + mdoc_nmsg(mdoc, n, MANDOCERR_PROLOGREP); + + return(1); +} + + +static int +post_bf(POST_ARGS) +{ + struct mdoc_node *np; + enum mdocargt arg; + + /* + * Unlike other data pointers, these are "housed" by the HEAD + * element, which contains the goods. + */ + + if (MDOC_HEAD != mdoc->last->type) { + if (ENDBODY_NOT != mdoc->last->end) { + assert(mdoc->last->pending); + np = mdoc->last->pending->parent->head; + } else if (MDOC_BLOCK != mdoc->last->type) { + np = mdoc->last->parent->head; + } else + np = mdoc->last->head; + + assert(np); + assert(MDOC_HEAD == np->type); + assert(MDOC_Bf == np->tok); + return(1); + } + + np = mdoc->last; + assert(MDOC_BLOCK == np->parent->type); + assert(MDOC_Bf == np->parent->tok); + + /* + * Cannot have both argument and parameter. + * If neither is specified, let it through with a warning. + */ + + if (np->parent->args && np->child) { + mdoc_nmsg(mdoc, np, MANDOCERR_SYNTARGVCOUNT); + return(0); + } else if (NULL == np->parent->args && NULL == np->child) { + mdoc_nmsg(mdoc, np, MANDOCERR_FONTTYPE); + return(1); + } + + /* Extract argument into data. */ + + if (np->parent->args) { + arg = np->parent->args->argv[0].arg; + if (MDOC_Emphasis == arg) + np->norm->Bf.font = FONT_Em; + else if (MDOC_Literal == arg) + np->norm->Bf.font = FONT_Li; + else if (MDOC_Symbolic == arg) + np->norm->Bf.font = FONT_Sy; + else + abort(); + return(1); + } + + /* Extract parameter into data. */ + + if (0 == strcmp(np->child->string, "Em")) + np->norm->Bf.font = FONT_Em; + else if (0 == strcmp(np->child->string, "Li")) + np->norm->Bf.font = FONT_Li; + else if (0 == strcmp(np->child->string, "Sy")) + np->norm->Bf.font = FONT_Sy; + else + mdoc_nmsg(mdoc, np, MANDOCERR_FONTTYPE); + + return(1); +} + +static int +post_lb(POST_ARGS) +{ + const char *p; + char *buf; + size_t sz; + + check_count(mdoc, MDOC_ELEM, CHECK_WARN, CHECK_EQ, 1); + + assert(mdoc->last->child); + assert(MDOC_TEXT == mdoc->last->child->type); + + p = mdoc_a2lib(mdoc->last->child->string); + + /* If lookup ok, replace with table value. */ + + if (p) { + free(mdoc->last->child->string); + mdoc->last->child->string = mandoc_strdup(p); + return(1); + } + + /* If not, use "library ``xxxx''. */ + + sz = strlen(mdoc->last->child->string) + + 2 + strlen("\\(lqlibrary\\(rq"); + buf = mandoc_malloc(sz); + snprintf(buf, sz, "library \\(lq%s\\(rq", + mdoc->last->child->string); + free(mdoc->last->child->string); + mdoc->last->child->string = buf; + return(1); +} + +static int +post_eoln(POST_ARGS) +{ + + if (mdoc->last->child) + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_ARGSLOST); + return(1); +} + + +static int +post_vt(POST_ARGS) +{ + const struct mdoc_node *n; + + /* + * The Vt macro comes in both ELEM and BLOCK form, both of which + * have different syntaxes (yet more context-sensitive + * behaviour). ELEM types must have a child, which is already + * guaranteed by the in_line parsing routine; BLOCK types, + * specifically the BODY, should only have TEXT children. + */ + + if (MDOC_BODY != mdoc->last->type) + return(1); + + for (n = mdoc->last->child; n; n = n->next) + if (MDOC_TEXT != n->type) + mdoc_nmsg(mdoc, n, MANDOCERR_CHILD); + + return(1); +} + + +static int +post_nm(POST_ARGS) +{ + char buf[BUFSIZ]; + int c; + + /* If no child specified, make sure we have the meta name. */ + + if (NULL == mdoc->last->child && NULL == mdoc->meta.name) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NONAME); + return(1); + } else if (mdoc->meta.name) + return(1); + + /* If no meta name, set it from the child. */ + + buf[0] = '\0'; + if (-1 == (c = concat(buf, mdoc->last->child, BUFSIZ))) { + mdoc_nmsg(mdoc, mdoc->last->child, MANDOCERR_MEM); + return(0); + } + + assert(c); + mdoc->meta.name = mandoc_strdup(buf); + return(1); +} + +static int +post_literal(POST_ARGS) +{ + + /* + * The `Dl' (note "el" not "one") and `Bd' macros unset the + * MDOC_LITERAL flag as they leave. Note that `Bd' only sets + * this in literal mode, but it doesn't hurt to just switch it + * off in general since displays can't be nested. + */ + + if (MDOC_BODY == mdoc->last->type) + mdoc->flags &= ~MDOC_LITERAL; + + return(1); +} + +static int +post_defaults(POST_ARGS) +{ + struct mdoc_node *nn; + + /* + * The `Ar' defaults to "file ..." if no value is provided as an + * argument; the `Mt' and `Pa' macros use "~"; the `Li' just + * gets an empty string. + */ + + if (mdoc->last->child) + return(1); + + nn = mdoc->last; + mdoc->next = MDOC_NEXT_CHILD; + + switch (nn->tok) { + case (MDOC_Ar): + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "file")) + return(0); + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "...")) + return(0); + break; + case (MDOC_At): + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "AT&T")) + return(0); + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "UNIX")) + return(0); + break; + case (MDOC_Li): + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "")) + return(0); + break; + case (MDOC_Pa): + /* FALLTHROUGH */ + case (MDOC_Mt): + if ( ! mdoc_word_alloc(mdoc, nn->line, nn->pos, "~")) + return(0); + break; + default: + abort(); + /* NOTREACHED */ + } + + mdoc->last = nn; + return(1); +} + +static int +post_at(POST_ARGS) +{ + const char *p, *q; + char *buf; + size_t sz; + + /* + * If we have a child, look it up in the standard keys. If a + * key exist, use that instead of the child; if it doesn't, + * prefix "AT&T UNIX " to the existing data. + */ + + if (NULL == mdoc->last->child) + return(1); + + assert(MDOC_TEXT == mdoc->last->child->type); + p = mdoc_a2att(mdoc->last->child->string); + + if (p) { + free(mdoc->last->child->string); + mdoc->last->child->string = mandoc_strdup(p); + } else { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADATT); + p = "AT&T UNIX "; + q = mdoc->last->child->string; + sz = strlen(p) + strlen(q) + 1; + buf = mandoc_malloc(sz); + strlcpy(buf, p, sz); + strlcat(buf, q, sz); + free(mdoc->last->child->string); + mdoc->last->child->string = buf; + } + + return(1); +} + +static int +post_an(POST_ARGS) +{ + struct mdoc_node *np; + + np = mdoc->last; + if (AUTH__NONE == np->norm->An.auth) { + if (0 == np->child) + check_count(mdoc, MDOC_ELEM, CHECK_WARN, CHECK_GT, 0); + } else if (np->child) + check_count(mdoc, MDOC_ELEM, CHECK_WARN, CHECK_EQ, 0); + + return(1); +} + + +static int +post_it(POST_ARGS) +{ + int i, cols; + enum mdoc_list lt; + struct mdoc_node *n, *c; + enum mandocerr er; + + if (MDOC_BLOCK != mdoc->last->type) + return(1); + + n = mdoc->last->parent->parent; + lt = n->norm->Bl.type; + + if (LIST__NONE == lt) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_LISTTYPE); + return(1); + } + + switch (lt) { + case (LIST_tag): + if (mdoc->last->head->child) + break; + /* FIXME: give this a dummy value. */ + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOARGS); + break; + case (LIST_hang): + /* FALLTHROUGH */ + case (LIST_ohang): + /* FALLTHROUGH */ + case (LIST_inset): + /* FALLTHROUGH */ + case (LIST_diag): + if (NULL == mdoc->last->head->child) + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOARGS); + break; + case (LIST_bullet): + /* FALLTHROUGH */ + case (LIST_dash): + /* FALLTHROUGH */ + case (LIST_enum): + /* FALLTHROUGH */ + case (LIST_hyphen): + if (NULL == mdoc->last->body->child) + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOBODY); + /* FALLTHROUGH */ + case (LIST_item): + if (mdoc->last->head->child) + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_ARGSLOST); + break; + case (LIST_column): + cols = (int)n->norm->Bl.ncols; + + assert(NULL == mdoc->last->head->child); + + if (NULL == mdoc->last->body->child) + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NOBODY); + + for (i = 0, c = mdoc->last->child; c; c = c->next) + if (MDOC_BODY == c->type) + i++; + + if (i < cols) + er = MANDOCERR_ARGCOUNT; + else if (i == cols || i == cols + 1) + break; + else + er = MANDOCERR_SYNTARGCOUNT; + + mandoc_vmsg(er, mdoc->parse, mdoc->last->line, + mdoc->last->pos, + "columns == %d (have %d)", cols, i); + return(MANDOCERR_ARGCOUNT == er); + default: + break; + } + + return(1); +} + +static int +post_bl_block(POST_ARGS) +{ + struct mdoc_node *n; + + /* + * These are fairly complicated, so we've broken them into two + * functions. post_bl_block_tag() is called when a -tag is + * specified, but no -width (it must be guessed). The second + * when a -width is specified (macro indicators must be + * rewritten into real lengths). + */ + + n = mdoc->last; + + if (LIST_tag == n->norm->Bl.type && + NULL == n->norm->Bl.width) { + if ( ! post_bl_block_tag(mdoc)) + return(0); + } else if (NULL != n->norm->Bl.width) { + if ( ! post_bl_block_width(mdoc)) + return(0); + } else + return(1); + + assert(n->norm->Bl.width); + return(1); +} + +static int +post_bl_block_width(POST_ARGS) +{ + size_t width; + int i; + enum mdoct tok; + struct mdoc_node *n; + char buf[NUMSIZ]; + + n = mdoc->last; + + /* + * Calculate the real width of a list from the -width string, + * which may contain a macro (with a known default width), a + * literal string, or a scaling width. + * + * If the value to -width is a macro, then we re-write it to be + * the macro's width as set in share/tmac/mdoc/doc-common. + */ + + if (0 == strcmp(n->norm->Bl.width, "Ds")) + width = 6; + else if (MDOC_MAX == (tok = mdoc_hash_find(n->norm->Bl.width))) + return(1); + else if (0 == (width = macro2len(tok))) { + mdoc_nmsg(mdoc, n, MANDOCERR_BADWIDTH); + return(1); + } + + /* The value already exists: free and reallocate it. */ + + assert(n->args); + + for (i = 0; i < (int)n->args->argc; i++) + if (MDOC_Width == n->args->argv[i].arg) + break; + + assert(i < (int)n->args->argc); + + snprintf(buf, NUMSIZ, "%un", (unsigned int)width); + free(n->args->argv[i].value[0]); + n->args->argv[i].value[0] = mandoc_strdup(buf); + + /* Set our width! */ + n->norm->Bl.width = n->args->argv[i].value[0]; + return(1); +} + +static int +post_bl_block_tag(POST_ARGS) +{ + struct mdoc_node *n, *nn; + size_t sz, ssz; + int i; + char buf[NUMSIZ]; + + /* + * Calculate the -width for a `Bl -tag' list if it hasn't been + * provided. Uses the first head macro. NOTE AGAIN: this is + * ONLY if the -width argument has NOT been provided. See + * post_bl_block_width() for converting the -width string. + */ + + sz = 10; + n = mdoc->last; + + for (nn = n->body->child; nn; nn = nn->next) { + if (MDOC_It != nn->tok) + continue; + + assert(MDOC_BLOCK == nn->type); + nn = nn->head->child; + + if (nn == NULL) + break; + + if (MDOC_TEXT == nn->type) { + sz = strlen(nn->string) + 1; + break; + } + + if (0 != (ssz = macro2len(nn->tok))) + sz = ssz; + + break; + } + + /* Defaults to ten ens. */ + + snprintf(buf, NUMSIZ, "%un", (unsigned int)sz); + + /* + * We have to dynamically add this to the macro's argument list. + * We're guaranteed that a MDOC_Width doesn't already exist. + */ + + assert(n->args); + i = (int)(n->args->argc)++; + + n->args->argv = mandoc_realloc(n->args->argv, + n->args->argc * sizeof(struct mdoc_argv)); + + n->args->argv[i].arg = MDOC_Width; + n->args->argv[i].line = n->line; + n->args->argv[i].pos = n->pos; + n->args->argv[i].sz = 1; + n->args->argv[i].value = mandoc_malloc(sizeof(char *)); + n->args->argv[i].value[0] = mandoc_strdup(buf); + + /* Set our width! */ + n->norm->Bl.width = n->args->argv[i].value[0]; + return(1); +} + + +static int +post_bl_head(POST_ARGS) +{ + struct mdoc_node *np, *nn, *nnp; + int i, j; + + if (LIST_column != mdoc->last->norm->Bl.type) + /* FIXME: this should be ERROR class... */ + return(hwarn_eq0(mdoc)); + + /* + * Convert old-style lists, where the column width specifiers + * trail as macro parameters, to the new-style ("normal-form") + * lists where they're argument values following -column. + */ + + /* First, disallow both types and allow normal-form. */ + + /* + * TODO: technically, we can accept both and just merge the two + * lists, but I'll leave that for another day. + */ + + if (mdoc->last->norm->Bl.ncols && mdoc->last->nchild) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_COLUMNS); + return(0); + } else if (NULL == mdoc->last->child) + return(1); + + np = mdoc->last->parent; + assert(np->args); + + for (j = 0; j < (int)np->args->argc; j++) + if (MDOC_Column == np->args->argv[j].arg) + break; + + assert(j < (int)np->args->argc); + assert(0 == np->args->argv[j].sz); + + /* + * Accommodate for new-style groff column syntax. Shuffle the + * child nodes, all of which must be TEXT, as arguments for the + * column field. Then, delete the head children. + */ + + np->args->argv[j].sz = (size_t)mdoc->last->nchild; + np->args->argv[j].value = mandoc_malloc + ((size_t)mdoc->last->nchild * sizeof(char *)); + + mdoc->last->norm->Bl.ncols = np->args->argv[j].sz; + mdoc->last->norm->Bl.cols = (void *)np->args->argv[j].value; + + for (i = 0, nn = mdoc->last->child; nn; i++) { + np->args->argv[j].value[i] = nn->string; + nn->string = NULL; + nnp = nn; + nn = nn->next; + mdoc_node_delete(NULL, nnp); + } + + mdoc->last->nchild = 0; + mdoc->last->child = NULL; + + return(1); +} + +static int +post_bl(POST_ARGS) +{ + struct mdoc_node *n; + + if (MDOC_HEAD == mdoc->last->type) + return(post_bl_head(mdoc)); + if (MDOC_BLOCK == mdoc->last->type) + return(post_bl_block(mdoc)); + if (MDOC_BODY != mdoc->last->type) + return(1); + + for (n = mdoc->last->child; n; n = n->next) { + switch (n->tok) { + case (MDOC_Lp): + /* FALLTHROUGH */ + case (MDOC_Pp): + mdoc_nmsg(mdoc, n, MANDOCERR_CHILD); + /* FALLTHROUGH */ + case (MDOC_It): + /* FALLTHROUGH */ + case (MDOC_Sm): + continue; + default: + break; + } + + mdoc_nmsg(mdoc, n, MANDOCERR_SYNTCHILD); + return(0); + } + + return(1); +} + +static int +ebool(struct mdoc *mdoc) +{ + + if (NULL == mdoc->last->child) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_MACROEMPTY); + mdoc_node_delete(mdoc, mdoc->last); + return(1); + } + check_count(mdoc, MDOC_ELEM, CHECK_WARN, CHECK_EQ, 1); + + assert(MDOC_TEXT == mdoc->last->child->type); + + if (0 == strcmp(mdoc->last->child->string, "on")) + return(1); + if (0 == strcmp(mdoc->last->child->string, "off")) + return(1); + + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADBOOL); + return(1); +} + +static int +post_root(POST_ARGS) +{ + int erc; + struct mdoc_node *n; + + erc = 0; + + /* Check that we have a finished prologue. */ + + if ( ! (MDOC_PBODY & mdoc->flags)) { + erc++; + mdoc_nmsg(mdoc, mdoc->first, MANDOCERR_NODOCPROLOG); + } + + n = mdoc->first; + assert(n); + + /* Check that we begin with a proper `Sh'. */ + + if (NULL == n->child) { + erc++; + mdoc_nmsg(mdoc, n, MANDOCERR_NODOCBODY); + } else if (MDOC_BLOCK != n->child->type || + MDOC_Sh != n->child->tok) { + erc++; + /* Can this be lifted? See rxdebug.1 for example. */ + mdoc_nmsg(mdoc, n, MANDOCERR_NODOCBODY); + } + + return(erc ? 0 : 1); +} + +static int +post_st(POST_ARGS) +{ + struct mdoc_node *ch; + const char *p; + + if (NULL == (ch = mdoc->last->child)) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_MACROEMPTY); + mdoc_node_delete(mdoc, mdoc->last); + return(1); + } + + assert(MDOC_TEXT == ch->type); + + if (NULL == (p = mdoc_a2st(ch->string))) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADSTANDARD); + mdoc_node_delete(mdoc, mdoc->last); + } else { + free(ch->string); + ch->string = mandoc_strdup(p); + } + + return(1); +} + +static int +post_rs(POST_ARGS) +{ + struct mdoc_node *nn, *next, *prev; + int i, j; + + switch (mdoc->last->type) { + case (MDOC_HEAD): + check_count(mdoc, MDOC_HEAD, CHECK_WARN, CHECK_EQ, 0); + return(1); + case (MDOC_BODY): + if (mdoc->last->child) + break; + check_count(mdoc, MDOC_BODY, CHECK_WARN, CHECK_GT, 0); + return(1); + default: + return(1); + } + + /* + * Make sure only certain types of nodes are allowed within the + * the `Rs' body. Delete offending nodes and raise a warning. + * Do this before re-ordering for the sake of clarity. + */ + + next = NULL; + for (nn = mdoc->last->child; nn; nn = next) { + for (i = 0; i < RSORD_MAX; i++) + if (nn->tok == rsord[i]) + break; + + if (i < RSORD_MAX) { + if (MDOC__J == rsord[i] || MDOC__B == rsord[i]) + mdoc->last->norm->Rs.quote_T++; + next = nn->next; + continue; + } + + next = nn->next; + mdoc_nmsg(mdoc, nn, MANDOCERR_CHILD); + mdoc_node_delete(mdoc, nn); + } + + /* + * Nothing to sort if only invalid nodes were found + * inside the `Rs' body. + */ + + if (NULL == mdoc->last->child) + return(1); + + /* + * The full `Rs' block needs special handling to order the + * sub-elements according to `rsord'. Pick through each element + * and correctly order it. This is a insertion sort. + */ + + next = NULL; + for (nn = mdoc->last->child->next; nn; nn = next) { + /* Determine order of `nn'. */ + for (i = 0; i < RSORD_MAX; i++) + if (rsord[i] == nn->tok) + break; + + /* + * Remove `nn' from the chain. This somewhat + * repeats mdoc_node_unlink(), but since we're + * just re-ordering, there's no need for the + * full unlink process. + */ + + if (NULL != (next = nn->next)) + next->prev = nn->prev; + + if (NULL != (prev = nn->prev)) + prev->next = nn->next; + + nn->prev = nn->next = NULL; + + /* + * Scan back until we reach a node that's + * ordered before `nn'. + */ + + for ( ; prev ; prev = prev->prev) { + /* Determine order of `prev'. */ + for (j = 0; j < RSORD_MAX; j++) + if (rsord[j] == prev->tok) + break; + + if (j <= i) + break; + } + + /* + * Set `nn' back into its correct place in front + * of the `prev' node. + */ + + nn->prev = prev; + + if (prev) { + if (prev->next) + prev->next->prev = nn; + nn->next = prev->next; + prev->next = nn; + } else { + mdoc->last->child->prev = nn; + nn->next = mdoc->last->child; + mdoc->last->child = nn; + } + } + + return(1); +} + +static int +post_ns(POST_ARGS) +{ + + if (MDOC_LINE & mdoc->last->flags) + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_IGNNS); + return(1); +} + +static int +post_sh(POST_ARGS) +{ + + if (MDOC_HEAD == mdoc->last->type) + return(post_sh_head(mdoc)); + if (MDOC_BODY == mdoc->last->type) + return(post_sh_body(mdoc)); + + return(1); +} + +static int +post_sh_body(POST_ARGS) +{ + struct mdoc_node *n; + + if (SEC_NAME != mdoc->lastsec) + return(1); + + /* + * Warn if the NAME section doesn't contain the `Nm' and `Nd' + * macros (can have multiple `Nm' and one `Nd'). Note that the + * children of the BODY declaration can also be "text". + */ + + if (NULL == (n = mdoc->last->child)) { + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADNAMESEC); + return(1); + } + + for ( ; n && n->next; n = n->next) { + if (MDOC_ELEM == n->type && MDOC_Nm == n->tok) + continue; + if (MDOC_TEXT == n->type) + continue; + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADNAMESEC); + } + + assert(n); + if (MDOC_BLOCK == n->type && MDOC_Nd == n->tok) + return(1); + + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_BADNAMESEC); + return(1); +} + +static int +post_sh_head(POST_ARGS) +{ + char buf[BUFSIZ]; + struct mdoc_node *n; + enum mdoc_sec sec; + int c; + + /* + * Process a new section. Sections are either "named" or + * "custom". Custom sections are user-defined, while named ones + * follow a conventional order and may only appear in certain + * manual sections. + */ + + sec = SEC_CUSTOM; + buf[0] = '\0'; + if (-1 == (c = concat(buf, mdoc->last->child, BUFSIZ))) { + mdoc_nmsg(mdoc, mdoc->last->child, MANDOCERR_MEM); + return(0); + } else if (1 == c) + sec = a2sec(buf); + + /* The NAME should be first. */ + + if (SEC_NAME != sec && SEC_NONE == mdoc->lastnamed) + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_NAMESECFIRST); + + /* The SYNOPSIS gets special attention in other areas. */ + + if (SEC_SYNOPSIS == sec) + mdoc->flags |= MDOC_SYNOPSIS; + else + mdoc->flags &= ~MDOC_SYNOPSIS; + + /* Mark our last section. */ + + mdoc->lastsec = sec; + + /* + * Set the section attribute for the current HEAD, for its + * parent BLOCK, and for the HEAD children; the latter can + * only be TEXT nodes, so no recursion is needed. + * For other blocks and elements, including .Sh BODY, this is + * done when allocating the node data structures, but for .Sh + * BLOCK and HEAD, the section is still unknown at that time. + */ + + mdoc->last->parent->sec = sec; + mdoc->last->sec = sec; + for (n = mdoc->last->child; n; n = n->next) + n->sec = sec; + + /* We don't care about custom sections after this. */ + + if (SEC_CUSTOM == sec) + return(1); + + /* + * Check whether our non-custom section is being repeated or is + * out of order. + */ + + if (sec == mdoc->lastnamed) + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECREP); + + if (sec < mdoc->lastnamed) + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECOOO); + + /* Mark the last named section. */ + + mdoc->lastnamed = sec; + + /* Check particular section/manual conventions. */ + + assert(mdoc->meta.msec); + + switch (sec) { + case (SEC_RETURN_VALUES): + /* FALLTHROUGH */ + case (SEC_ERRORS): + /* FALLTHROUGH */ + case (SEC_LIBRARY): + if (*mdoc->meta.msec == '2') + break; + if (*mdoc->meta.msec == '3') + break; + if (*mdoc->meta.msec == '9') + break; + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_SECMSEC); + break; + default: + break; + } + + return(1); +} + +static int +post_ignpar(POST_ARGS) +{ + struct mdoc_node *np; + + if (MDOC_BODY != mdoc->last->type) + return(1); + + if (NULL != (np = mdoc->last->child)) + if (MDOC_Pp == np->tok || MDOC_Lp == np->tok) { + mdoc_nmsg(mdoc, np, MANDOCERR_IGNPAR); + mdoc_node_delete(mdoc, np); + } + + if (NULL != (np = mdoc->last->last)) + if (MDOC_Pp == np->tok || MDOC_Lp == np->tok) { + mdoc_nmsg(mdoc, np, MANDOCERR_IGNPAR); + mdoc_node_delete(mdoc, np); + } + + return(1); +} + +static int +pre_par(PRE_ARGS) +{ + + if (NULL == mdoc->last) + return(1); + if (MDOC_ELEM != n->type && MDOC_BLOCK != n->type) + return(1); + + /* + * Don't allow prior `Lp' or `Pp' prior to a paragraph-type + * block: `Lp', `Pp', or non-compact `Bd' or `Bl'. + */ + + if (MDOC_Pp != mdoc->last->tok && MDOC_Lp != mdoc->last->tok) + return(1); + if (MDOC_Bl == n->tok && n->norm->Bl.comp) + return(1); + if (MDOC_Bd == n->tok && n->norm->Bd.comp) + return(1); + if (MDOC_It == n->tok && n->parent->norm->Bl.comp) + return(1); + + mdoc_nmsg(mdoc, mdoc->last, MANDOCERR_IGNPAR); + mdoc_node_delete(mdoc, mdoc->last); + return(1); +} + +static int +pre_literal(PRE_ARGS) +{ + + if (MDOC_BODY != n->type) + return(1); + + /* + * The `Dl' (note "el" not "one") and `Bd -literal' and `Bd + * -unfilled' macros set MDOC_LITERAL on entrance to the body. + */ + + switch (n->tok) { + case (MDOC_Dl): + mdoc->flags |= MDOC_LITERAL; + break; + case (MDOC_Bd): + if (DISP_literal == n->norm->Bd.type) + mdoc->flags |= MDOC_LITERAL; + if (DISP_unfilled == n->norm->Bd.type) + mdoc->flags |= MDOC_LITERAL; + break; + default: + abort(); + /* NOTREACHED */ + } + + return(1); +} + +static int +post_dd(POST_ARGS) +{ + char buf[DATESIZE]; + struct mdoc_node *n; + int c; + + if (mdoc->meta.date) + free(mdoc->meta.date); + + n = mdoc->last; + if (NULL == n->child || '\0' == n->child->string[0]) { + mdoc->meta.date = mandoc_normdate + (mdoc->parse, NULL, n->line, n->pos); + return(1); + } + + buf[0] = '\0'; + if (-1 == (c = concat(buf, n->child, DATESIZE))) { + mdoc_nmsg(mdoc, n->child, MANDOCERR_MEM); + return(0); + } + + assert(c); + mdoc->meta.date = mandoc_normdate + (mdoc->parse, buf, n->line, n->pos); + + return(1); +} + +static int +post_dt(POST_ARGS) +{ + struct mdoc_node *nn, *n; + const char *cp; + char *p; + + n = mdoc->last; + + if (mdoc->meta.title) + free(mdoc->meta.title); + if (mdoc->meta.vol) + free(mdoc->meta.vol); + if (mdoc->meta.arch) + free(mdoc->meta.arch); + + mdoc->meta.title = mdoc->meta.vol = mdoc->meta.arch = NULL; + + /* First make all characters uppercase. */ + + if (NULL != (nn = n->child)) + for (p = nn->string; *p; p++) { + if (toupper((unsigned char)*p) == *p) + continue; + + /* + * FIXME: don't be lazy: have this make all + * characters be uppercase and just warn once. + */ + mdoc_nmsg(mdoc, nn, MANDOCERR_UPPERCASE); + break; + } + + /* Handles: `.Dt' + * --> title = unknown, volume = local, msec = 0, arch = NULL + */ + + if (NULL == (nn = n->child)) { + /* XXX: make these macro values. */ + /* FIXME: warn about missing values. */ + mdoc->meta.title = mandoc_strdup("UNKNOWN"); + mdoc->meta.vol = mandoc_strdup("LOCAL"); + mdoc->meta.msec = mandoc_strdup("1"); + return(1); + } + + /* Handles: `.Dt TITLE' + * --> title = TITLE, volume = local, msec = 0, arch = NULL + */ + + mdoc->meta.title = mandoc_strdup + ('\0' == nn->string[0] ? "UNKNOWN" : nn->string); + + if (NULL == (nn = nn->next)) { + /* FIXME: warn about missing msec. */ + /* XXX: make this a macro value. */ + mdoc->meta.vol = mandoc_strdup("LOCAL"); + mdoc->meta.msec = mandoc_strdup("1"); + return(1); + } + + /* Handles: `.Dt TITLE SEC' + * --> title = TITLE, volume = SEC is msec ? + * format(msec) : SEC, + * msec = SEC is msec ? atoi(msec) : 0, + * arch = NULL + */ + + cp = mandoc_a2msec(nn->string); + if (cp) { + mdoc->meta.vol = mandoc_strdup(cp); + mdoc->meta.msec = mandoc_strdup(nn->string); + } else { + mdoc_nmsg(mdoc, n, MANDOCERR_BADMSEC); + mdoc->meta.vol = mandoc_strdup(nn->string); + mdoc->meta.msec = mandoc_strdup(nn->string); + } + + if (NULL == (nn = nn->next)) + return(1); + + /* Handles: `.Dt TITLE SEC VOL' + * --> title = TITLE, volume = VOL is vol ? + * format(VOL) : + * VOL is arch ? format(arch) : + * VOL + */ + + cp = mdoc_a2vol(nn->string); + if (cp) { + free(mdoc->meta.vol); + mdoc->meta.vol = mandoc_strdup(cp); + } else { + /* FIXME: warn about bad arch. */ + cp = mdoc_a2arch(nn->string); + if (NULL == cp) { + free(mdoc->meta.vol); + mdoc->meta.vol = mandoc_strdup(nn->string); + } else + mdoc->meta.arch = mandoc_strdup(cp); + } + + /* Ignore any subsequent parameters... */ + /* FIXME: warn about subsequent parameters. */ + + return(1); +} + +static int +post_prol(POST_ARGS) +{ + /* + * Remove prologue macros from the document after they're + * processed. The final document uses mdoc_meta for these + * values and discards the originals. + */ + + mdoc_node_delete(mdoc, mdoc->last); + if (mdoc->meta.title && mdoc->meta.date && mdoc->meta.os) + mdoc->flags |= MDOC_PBODY; + + return(1); +} + +static int +post_bx(POST_ARGS) +{ + struct mdoc_node *n; + + /* + * Make `Bx's second argument always start with an uppercase + * letter. Groff checks if it's an "accepted" term, but we just + * uppercase blindly. + */ + + n = mdoc->last->child; + if (n && NULL != (n = n->next)) + *n->string = (char)toupper + ((unsigned char)*n->string); + + return(1); +} + +static int +post_os(POST_ARGS) +{ + struct mdoc_node *n; + char buf[BUFSIZ]; + int c; +#ifndef OSNAME + struct utsname utsname; +#endif + + n = mdoc->last; + + /* + * Set the operating system by way of the `Os' macro. Note that + * if an argument isn't provided and -DOSNAME="\"foo\"" is + * provided during compilation, this value will be used instead + * of filling in "sysname release" from uname(). + */ + + if (mdoc->meta.os) + free(mdoc->meta.os); + + buf[0] = '\0'; + if (-1 == (c = concat(buf, n->child, BUFSIZ))) { + mdoc_nmsg(mdoc, n->child, MANDOCERR_MEM); + return(0); + } + + assert(c); + + /* XXX: yes, these can all be dynamically-adjusted buffers, but + * it's really not worth the extra hackery. + */ + + if ('\0' == buf[0]) { +#ifdef OSNAME + if (strlcat(buf, OSNAME, BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, n, MANDOCERR_MEM); + return(0); + } +#else /*!OSNAME */ + if (-1 == uname(&utsname)) { + mdoc_nmsg(mdoc, n, MANDOCERR_UNAME); + mdoc->meta.os = mandoc_strdup("UNKNOWN"); + return(post_prol(mdoc)); + } + + if (strlcat(buf, utsname.sysname, BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, n, MANDOCERR_MEM); + return(0); + } + if (strlcat(buf, " ", BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, n, MANDOCERR_MEM); + return(0); + } + if (strlcat(buf, utsname.release, BUFSIZ) >= BUFSIZ) { + mdoc_nmsg(mdoc, n, MANDOCERR_MEM); + return(0); + } +#endif /*!OSNAME*/ + } + + mdoc->meta.os = mandoc_strdup(buf); + return(1); +} + +static int +post_std(POST_ARGS) +{ + struct mdoc_node *nn, *n; + + n = mdoc->last; + + /* + * Macros accepting `-std' as an argument have the name of the + * current document (`Nm') filled in as the argument if it's not + * provided. + */ + + if (n->child) + return(1); + + if (NULL == mdoc->meta.name) + return(1); + + nn = n; + mdoc->next = MDOC_NEXT_CHILD; + + if ( ! mdoc_word_alloc(mdoc, n->line, n->pos, mdoc->meta.name)) + return(0); + + mdoc->last = nn; + return(1); +} + +/* + * Concatenate a node, stopping at the first non-text. + * Concatenation is separated by a single whitespace. + * Returns -1 on fatal (string overrun) error, 0 if child nodes were + * encountered, 1 otherwise. + */ +static int +concat(char *p, const struct mdoc_node *n, size_t sz) +{ + + for ( ; NULL != n; n = n->next) { + if (MDOC_TEXT != n->type) + return(0); + if ('\0' != p[0] && strlcat(p, " ", sz) >= sz) + return(-1); + if (strlcat(p, n->string, sz) >= sz) + return(-1); + concat(p, n->child, sz); + } + + return(1); +} + +static enum mdoc_sec +a2sec(const char *p) +{ + int i; + + for (i = 0; i < (int)SEC__MAX; i++) + if (secnames[i] && 0 == strcmp(p, secnames[i])) + return((enum mdoc_sec)i); + + return(SEC_CUSTOM); +} + +static size_t +macro2len(enum mdoct macro) +{ + + switch (macro) { + case(MDOC_Ad): + return(12); + case(MDOC_Ao): + return(12); + case(MDOC_An): + return(12); + case(MDOC_Aq): + return(12); + case(MDOC_Ar): + return(12); + case(MDOC_Bo): + return(12); + case(MDOC_Bq): + return(12); + case(MDOC_Cd): + return(12); + case(MDOC_Cm): + return(10); + case(MDOC_Do): + return(10); + case(MDOC_Dq): + return(12); + case(MDOC_Dv): + return(12); + case(MDOC_Eo): + return(12); + case(MDOC_Em): + return(10); + case(MDOC_Er): + return(17); + case(MDOC_Ev): + return(15); + case(MDOC_Fa): + return(12); + case(MDOC_Fl): + return(10); + case(MDOC_Fo): + return(16); + case(MDOC_Fn): + return(16); + case(MDOC_Ic): + return(10); + case(MDOC_Li): + return(16); + case(MDOC_Ms): + return(6); + case(MDOC_Nm): + return(10); + case(MDOC_No): + return(12); + case(MDOC_Oo): + return(10); + case(MDOC_Op): + return(14); + case(MDOC_Pa): + return(32); + case(MDOC_Pf): + return(12); + case(MDOC_Po): + return(12); + case(MDOC_Pq): + return(12); + case(MDOC_Ql): + return(16); + case(MDOC_Qo): + return(12); + case(MDOC_So): + return(12); + case(MDOC_Sq): + return(12); + case(MDOC_Sy): + return(6); + case(MDOC_Sx): + return(16); + case(MDOC_Tn): + return(10); + case(MDOC_Va): + return(12); + case(MDOC_Vt): + return(12); + case(MDOC_Xr): + return(10); + default: + break; + }; + return(0); +} diff --git a/usr/src/cmd/mandoc/msec.c b/usr/src/cmd/mandoc/msec.c new file mode 100644 index 0000000000..dd7d11c650 --- /dev/null +++ b/usr/src/cmd/mandoc/msec.c @@ -0,0 +1,37 @@ +/* $Id: msec.c,v 1.10 2011/12/02 01:37:14 schwarze Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "libmandoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mandoc_a2msec(const char *p) +{ + +#include "msec.in" + + return(NULL); +} diff --git a/usr/src/cmd/mandoc/msec.in b/usr/src/cmd/mandoc/msec.in new file mode 100644 index 0000000000..e4cf09df3b --- /dev/null +++ b/usr/src/cmd/mandoc/msec.in @@ -0,0 +1,244 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + */ + +LINE("1", "User Commands") +LINE("1B", "illumos/BSD Compatibility Package Commands") +LINE("1b", "illumos/BSD Compatibility Package Commands") +LINE("1C", "Communication Commands") +LINE("1c", "Communication Commands") +LINE("1F", "FMLI Commands") +LINE("1f", "FMLI Commands") +LINE("1G", "Graphics and CAD Commands") +LINE("1g", "Graphics and CAD Commands") +LINE("1HAS", "User Commands") +LINE("1has", "User Commands") +LINE("1M", "Maintenance Commands") +LINE("1m", "Maintenance Commands") +LINE("1S", "illumos Specific Commands") +LINE("1s", "illumos Specific Commands") +LINE("2", "System Calls") +LINE("3", "Introduction to Library Functions") +LINE("3AIO", "Asynchronous I/O Library Functions") +LINE("3aio", "Asynchronous I/O Library Functions") +LINE("3BSM", "Security and Auditing Library Functions") +LINE("3bsm", "Security and Auditing Library Functions") +LINE("3C", "Standard C Library Functions") +LINE("3c", "Standard C Library Functions") +LINE("3C_DB", "Threads Debugging Library Functions") +LINE("3C_db", "Threads Debugging Library Functions") +LINE("3CFGADM", "Configuration Administration Library Functions") +LINE("3cfgadm", "Configuration Administration Library Functions") +LINE("3COMPPUTIL", "Communication Protocol Parser Utilities Library Functions") +LINE("3compputil", "Communication Protocol Parser Utilities Library Functions") +LINE("3CONTRACT", "Contract Management Library Functions") +LINE("3contract", "Contract Management Library Functions") +LINE("3CPC", "CPU Performance Counters Library Functions") +LINE("3cpc", "CPU Performance Counters Library Functions") +LINE("3CURSES", "Curses Library Functions") +LINE("3curses", "Curses Library Functions") +LINE("3DAT", "Direct Access Transport Library Functions") +LINE("3dat", "Direct Access Transport Library Functions") +LINE("3DEVID", "Device ID Library Functions") +LINE("3devid", "Device ID Library Functions") +LINE("3DEVINFO", "Device Information Library Functions") +LINE("3devinfo", "Device Information Library Functions") +LINE("3DL", "Dynamic Linking Library Functions") +LINE("3dl", "Dynamic Linking Library Functions") +LINE("3DLPI", "Data Link Provider Interface Library Functions") +LINE("3dlpi", "Data Link Provider Interface Library Functions") +LINE("3DMI", "DMI Library Functions") +LINE("3dmi", "DMI Library Functions") +LINE("3DNS_SD", "DNS Service Discovery Library Functions") +LINE("3dns_sd", "DNS Service Discovery Library Functions") +LINE("3DOOR", "Door Library Functions") +LINE("3door", "Door Library Functions") +LINE("3ELF", "ELF Library Functions") +LINE("3elf", "ELF Library Functions") +LINE("3EXACCT", "Extended Accounting File Access Library Functions") +LINE("3exacct", "Extended Accounting File Access Library Functions") +LINE("3EXT", "Extended Library Functions") +LINE("3ext", "Extended Library Functions") +LINE("3FCOE", "FCoE Port Management Library Functions") +LINE("3fcoe", "FCoE Port Management Library Functions") +LINE("3FSTYP", "File System Type Identification Library Functions") +LINE("3fstyp", "File System Type Identification Library Functions") +LINE("3GEN", "String Pattern-Matching Library Functions") +LINE("3gen", "String Pattern-Matching Library Functions") +LINE("3GSS", "Generic Security Services API Library Functions") +LINE("3gss", "Generic Security Services API Library Functions") +LINE("3HEAD", "Headers") +LINE("3head", "Headers") +LINE("3ISCSIT", "iSCSI Management Library Functions") +LINE("3iscsit", "iSCSI Management Library Functions") +LINE("3KRB", "Kerberos Library Functions") +LINE("3krb", "Kerberos Library Functions") +LINE("3KSTAT", "Kernel Statistics Library Functions") +LINE("3kstat", "Kernel Statistics Library Functions") +LINE("3KVM", "Kernel VM Library Functions") +LINE("3kvm", "Kernel VM Library Functions") +LINE("3LDAP", "LDAP Library Functions") +LINE("3ldap", "LDAP Library Functions") +LINE("3LGRP", "Locality Group Library Functions") +LINE("3lgrp", "Locality Group Library Functions") +LINE("3LIB", "Interface Libraries") +LINE("3lib", "Interface Libraries") +LINE("3LIBUCB", "illumos/BSD Compatibility Interface Libraries") +LINE("3libucb", "illumos/BSD Compatibility Interface Libraries") +LINE("3M", "Mathematical Library Functions") +LINE("3m", "Mathematical Library Functions") +LINE("3MAIL", "User Mailbox Library Functions") +LINE("3mail", "User Mailbox Library Functions") +LINE("3MALLOC", "Memory Allocation Library Functions") +LINE("3malloc", "Memory Allocation Library Functions") +LINE("3MP", "Multiple Precision Library Functions") +LINE("3mp", "Multiple Precision Library Functions") +LINE("3MPAPI", "Common Multipath Management Library Functions") +LINE("3mpapi", "Common Multipath Management Library Functions") +LINE("3NSL", "Networking Services Library Functions") +LINE("3nsl", "Networking Services Library Functions") +LINE("3NVPAIR", "Name-value Pair Library Functions") +LINE("3nvpair", "Name-value Pair Library Functions") +LINE("3PAM", "PAM Library Functions") +LINE("3pam", "PAM Library Functions") +LINE("3PAPI", "PAPI Library Functions") +LINE("3papi", "PAPI Library Functions") +LINE("3PERL", "Perl Library Functions") +LINE("3perl", "Perl Library Functions") +LINE("3PICL", "PICL Library Functions") +LINE("3picl", "PICL Library Functions") +LINE("3PICLTREE", "PICL Plug-In Library Functions") +LINE("3picltree", "PICL Plug-In Library Functions") +LINE("3PLOT", "Graphics Interface Library Functions") +LINE("3plot", "Graphics Interface Library Functions") +LINE("3POOL", "Pool Configuration Manipulation Library Functions") +LINE("3pool", "Pool Configuration Manipulation Library Functions") +LINE("3PROC", "Process Control Library Functions") +LINE("3proc", "Process Control Library Functions") +LINE("3PROJECT", "Project Database Access Library Functions") +LINE("3project", "Project Database Access Library Functions") +LINE("3RAC", "Remote Asynchronous Calls Library Functions") +LINE("3rac", "Remote Asynchronous Calls Library Functions") +LINE("3RESOLV", "Resolver Library Functions") +LINE("3resolv", "Resolver Library Functions") +LINE("3RPC", "RPC Library Functions") +LINE("3rpc", "RPC Library Functions") +LINE("3RSM", "Remote Shared Memory Library Functions") +LINE("3rsm", "Remote Shared Memory Library Functions") +LINE("3RT", "Realtime Library Functions") +LINE("3rt", "Realtime Library Functions") +LINE("3SASL", "Simple Authentication Security Layer Library Functions") +LINE("3sasl", "Simple Authentication Security Layer Library Functions") +LINE("3SCF", "Service Configuration Facility Library Functions") +LINE("3scf", "Service Configuration Facility Library Functions") +LINE("3SCHED", "LWP Scheduling Library Functions") +LINE("3sched", "LWP Scheduling Library Functions") +LINE("3SEC", "File Access Control Library Functions") +LINE("3sec", "File Access Control Library Functions") +LINE("3SECDB", "Security Attributes Database Library Functions") +LINE("3secdb", "Security Attributes Database Library Functions") +LINE("3SIP", "Session Initiation Protocol Library Functions") +LINE("3sip", "Session Initiation Protocol Library Functions") +LINE("3SLP", "Service Location Protocol Library Functions") +LINE("3slp", "Service Location Protocol Library Functions") +LINE("3SNMP", "SNMP Library Functions") +LINE("3snmp", "SNMP Library Functions") +LINE("3SOCKET", "Sockets Library Functions") +LINE("3socket", "Sockets Library Functions") +LINE("3STMF", "SCSI Target Mode Framework Library Functions") +LINE("3stmf", "SCSI Target Mode Framework Library Functions") +LINE("3SYSEVENT", "System Event Library Functions") +LINE("3sysevent", "System Event Library Functions") +LINE("3TECLA", "Interactive Command-line Input Library Functions") +LINE("3tecla", "Interactive Command-line Input Library Functions") +LINE("3THR", "Threads Library Functions") +LINE("3thr", "Threads Library Functions") +LINE("3TNF", "TNF Library Functions") +LINE("3tnf", "TNF Library Functions") +LINE("3TSOL", "Trusted Extensions Library Functions") +LINE("3tsol", "Trusted Extensions Library Functions") +LINE("3UCB", "illumos/BSD Compatibility Library Functions") +LINE("3ucb", "illumos/BSD Compatibility Library Functions") +LINE("3UUID", "Universally Unique Identifier Library Functions") +LINE("3uuid", "Universally Unique Identifier Library Functions") +LINE("3VOLMGT", "Volume Management Library Functions") +LINE("3volmgt", "Volume Management Library Functions") +LINE("3XCURSES", "X/Open Curses Library Functions") +LINE("3xcurses", "X/Open Curses Library Functions") +LINE("3XFN", "XFN Interface Library Functions") +LINE("3xfn", "XFN Interface Library Functions") +LINE("3XNET", "X/Open Networking Services Library Functions") +LINE("3xnet", "X/Open Networking Services Library Functions") +LINE("3B", "illumos/BSD Compatibility Library Functions") +LINE("3b", "illumos/BSD Compatibility Library Functions") +LINE("3E", "C Library Functions") +LINE("3e", "C Library Functions") +LINE("3F", "Fortran Library Routines") +LINE("3f", "Fortran Library Routines") +LINE("3G", "C Library Functions") +LINE("3g", "C Library Functions") +LINE("3K", "Kernel VM Library Functions") +LINE("3k", "Kernel VM Library Functions") +LINE("3L", "Lightweight Processes Library") +LINE("3l", "Lightweight Processes Library") +LINE("3N", "Network Functions") +LINE("3n", "Network Functions") +LINE("3R", "Realtime Library") +LINE("3r", "Realtime Library") +LINE("3S", "Standard I/O Functions") +LINE("3s", "Standard I/O Functions") +LINE("3T", "Threads Library") +LINE("3t", "Threads Library") +LINE("3W", "C Library Functions") +LINE("3w", "C Library Functions") +LINE("3X", "Miscellaneous Library Functions") +LINE("3x", "Miscellaneous Library Functions") +LINE("3XC", "X/Open Curses Library Functions") +LINE("3xc", "X/Open Curses Library Functions") +LINE("3XN", "X/Open Networking Services Library Functions") +LINE("3xn", "X/Open Networking Services Library Functions") +LINE("4", "File Formats") +LINE("4B", "illumos/BSD Compatibility Package File Formats") +LINE("4b", "illumos/BSD Compatibility Package File Formats") +LINE("5", "Standards, Environments, and Macros") +LINE("6", "Games and Demos") +LINE("7", "Device and Network Interfaces") +LINE("7B", "illumos/BSD Compatibility Special Files") +LINE("7b", "illumos/BSD Compatibility Special Files") +LINE("7D", "Devices") +LINE("7d", "Devices") +LINE("7FS", "File Systems") +LINE("7fs", "File Systems") +LINE("7I", "Ioctl Requests") +LINE("7i", "Ioctl Requests") +LINE("7IPP", "IP Quality of Service Modules") +LINE("7ipp", "IP Quality of Service Modules") +LINE("7M", "STREAMS Modules") +LINE("7m", "STREAMS Modules") +LINE("7P", "Protocols") +LINE("7p", "Protocols") +LINE("8", "Maintenance Procedures") +LINE("8C", "Maintenance Procedures") +LINE("8c", "Maintenance Procedures") +LINE("8S", "Maintenance Procedures") +LINE("8s", "Maintenance Procedures") +LINE("9", "Device Driver Interfaces") +LINE("9E", "Driver Entry Points") +LINE("9e", "Driver Entry Points") +LINE("9F", "Kernel Functions for Drivers") +LINE("9f", "Kernel Functions for Drivers") +LINE("9P", "Kernel Properties for Drivers") +LINE("9p", "Kernel Properties for Drivers") +LINE("9S", "Data Structures for Drivers") +LINE("9s", "Data Structures for Drivers") diff --git a/usr/src/cmd/mandoc/out.c b/usr/src/cmd/mandoc/out.c new file mode 100644 index 0000000000..8dbd68ac11 --- /dev/null +++ b/usr/src/cmd/mandoc/out.c @@ -0,0 +1,303 @@ +/* $Id: out.c,v 1.43 2011/09/20 23:05:49 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "out.h" + +static void tblcalc_data(struct rofftbl *, struct roffcol *, + const struct tbl *, const struct tbl_dat *); +static void tblcalc_literal(struct rofftbl *, struct roffcol *, + const struct tbl_dat *); +static void tblcalc_number(struct rofftbl *, struct roffcol *, + const struct tbl *, const struct tbl_dat *); + +/* + * Convert a `scaling unit' to a consistent form, or fail. Scaling + * units are documented in groff.7, mdoc.7, man.7. + */ +int +a2roffsu(const char *src, struct roffsu *dst, enum roffscale def) +{ + char buf[BUFSIZ], hasd; + int i; + enum roffscale unit; + + if ('\0' == *src) + return(0); + + i = hasd = 0; + + switch (*src) { + case ('+'): + src++; + break; + case ('-'): + buf[i++] = *src++; + break; + default: + break; + } + + if ('\0' == *src) + return(0); + + while (i < BUFSIZ) { + if ( ! isdigit((unsigned char)*src)) { + if ('.' != *src) + break; + else if (hasd) + break; + else + hasd = 1; + } + buf[i++] = *src++; + } + + if (BUFSIZ == i || (*src && *(src + 1))) + return(0); + + buf[i] = '\0'; + + switch (*src) { + case ('c'): + unit = SCALE_CM; + break; + case ('i'): + unit = SCALE_IN; + break; + case ('P'): + unit = SCALE_PC; + break; + case ('p'): + unit = SCALE_PT; + break; + case ('f'): + unit = SCALE_FS; + break; + case ('v'): + unit = SCALE_VS; + break; + case ('m'): + unit = SCALE_EM; + break; + case ('\0'): + if (SCALE_MAX == def) + return(0); + unit = SCALE_BU; + break; + case ('u'): + unit = SCALE_BU; + break; + case ('M'): + unit = SCALE_MM; + break; + case ('n'): + unit = SCALE_EN; + break; + default: + return(0); + } + + /* FIXME: do this in the caller. */ + if ((dst->scale = atof(buf)) < 0) + dst->scale = 0; + dst->unit = unit; + return(1); +} + +/* + * Calculate the abstract widths and decimal positions of columns in a + * table. This routine allocates the columns structures then runs over + * all rows and cells in the table. The function pointers in "tbl" are + * used for the actual width calculations. + */ +void +tblcalc(struct rofftbl *tbl, const struct tbl_span *sp) +{ + const struct tbl_dat *dp; + const struct tbl_head *hp; + struct roffcol *col; + int spans; + + /* + * Allocate the master column specifiers. These will hold the + * widths and decimal positions for all cells in the column. It + * must be freed and nullified by the caller. + */ + + assert(NULL == tbl->cols); + tbl->cols = mandoc_calloc + ((size_t)sp->tbl->cols, sizeof(struct roffcol)); + + hp = sp->head; + + for ( ; sp; sp = sp->next) { + if (TBL_SPAN_DATA != sp->pos) + continue; + spans = 1; + /* + * Account for the data cells in the layout, matching it + * to data cells in the data section. + */ + for (dp = sp->first; dp; dp = dp->next) { + /* Do not used spanned cells in the calculation. */ + if (0 < --spans) + continue; + spans = dp->spans; + if (1 < spans) + continue; + assert(dp->layout); + col = &tbl->cols[dp->layout->head->ident]; + tblcalc_data(tbl, col, sp->tbl, dp); + } + } + + /* + * Calculate width of the spanners. These get one space for a + * vertical line, two for a double-vertical line. + */ + + for ( ; hp; hp = hp->next) { + col = &tbl->cols[hp->ident]; + switch (hp->pos) { + case (TBL_HEAD_VERT): + col->width = (*tbl->len)(1, tbl->arg); + break; + case (TBL_HEAD_DVERT): + col->width = (*tbl->len)(2, tbl->arg); + break; + default: + break; + } + } +} + +static void +tblcalc_data(struct rofftbl *tbl, struct roffcol *col, + const struct tbl *tp, const struct tbl_dat *dp) +{ + size_t sz; + + /* Branch down into data sub-types. */ + + switch (dp->layout->pos) { + case (TBL_CELL_HORIZ): + /* FALLTHROUGH */ + case (TBL_CELL_DHORIZ): + sz = (*tbl->len)(1, tbl->arg); + if (col->width < sz) + col->width = sz; + break; + case (TBL_CELL_LONG): + /* FALLTHROUGH */ + case (TBL_CELL_CENTRE): + /* FALLTHROUGH */ + case (TBL_CELL_LEFT): + /* FALLTHROUGH */ + case (TBL_CELL_RIGHT): + tblcalc_literal(tbl, col, dp); + break; + case (TBL_CELL_NUMBER): + tblcalc_number(tbl, col, tp, dp); + break; + case (TBL_CELL_DOWN): + break; + default: + abort(); + /* NOTREACHED */ + } +} + +static void +tblcalc_literal(struct rofftbl *tbl, struct roffcol *col, + const struct tbl_dat *dp) +{ + size_t sz; + const char *str; + + str = dp->string ? dp->string : ""; + sz = (*tbl->slen)(str, tbl->arg); + + if (col->width < sz) + col->width = sz; +} + +static void +tblcalc_number(struct rofftbl *tbl, struct roffcol *col, + const struct tbl *tp, const struct tbl_dat *dp) +{ + int i; + size_t sz, psz, ssz, d; + const char *str; + char *cp; + char buf[2]; + + /* + * First calculate number width and decimal place (last + 1 for + * non-decimal numbers). If the stored decimal is subsequent to + * ours, make our size longer by that difference + * (right-"shifting"); similarly, if ours is subsequent the + * stored, then extend the stored size by the difference. + * Finally, re-assign the stored values. + */ + + str = dp->string ? dp->string : ""; + sz = (*tbl->slen)(str, tbl->arg); + + /* FIXME: TBL_DATA_HORIZ et al.? */ + + buf[0] = tp->decimal; + buf[1] = '\0'; + + psz = (*tbl->slen)(buf, tbl->arg); + + if (NULL != (cp = strrchr(str, tp->decimal))) { + buf[1] = '\0'; + for (ssz = 0, i = 0; cp != &str[i]; i++) { + buf[0] = str[i]; + ssz += (*tbl->slen)(buf, tbl->arg); + } + d = ssz + psz; + } else + d = sz + psz; + + /* Adjust the settings for this column. */ + + if (col->decimal > d) { + sz += col->decimal - d; + d = col->decimal; + } else + col->width += d - col->decimal; + + if (sz > col->width) + col->width = sz; + if (d > col->decimal) + col->decimal = d; +} diff --git a/usr/src/cmd/mandoc/out.h b/usr/src/cmd/mandoc/out.h new file mode 100644 index 0000000000..1c18c6c314 --- /dev/null +++ b/usr/src/cmd/mandoc/out.h @@ -0,0 +1,71 @@ +/* $Id: out.h,v 1.21 2011/07/17 15:24:25 kristaps Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef OUT_H +#define OUT_H + +enum roffscale { + SCALE_CM, /* centimeters (c) */ + SCALE_IN, /* inches (i) */ + SCALE_PC, /* pica (P) */ + SCALE_PT, /* points (p) */ + SCALE_EM, /* ems (m) */ + SCALE_MM, /* mini-ems (M) */ + SCALE_EN, /* ens (n) */ + SCALE_BU, /* default horizontal (u) */ + SCALE_VS, /* default vertical (v) */ + SCALE_FS, /* syn. for u (f) */ + SCALE_MAX +}; + +struct roffcol { + size_t width; /* width of cell */ + size_t decimal; /* decimal position in cell */ +}; + +struct roffsu { + enum roffscale unit; + double scale; +}; + +typedef size_t (*tbl_strlen)(const char *, void *); +typedef size_t (*tbl_len)(size_t, void *); + +struct rofftbl { + tbl_strlen slen; /* calculate string length */ + tbl_len len; /* produce width of empty space */ + struct roffcol *cols; /* master column specifiers */ + void *arg; /* passed to slen and len */ +}; + +__BEGIN_DECLS + +#define SCALE_VS_INIT(p, v) \ + do { (p)->unit = SCALE_VS; \ + (p)->scale = (v); } \ + while (/* CONSTCOND */ 0) + +#define SCALE_HS_INIT(p, v) \ + do { (p)->unit = SCALE_BU; \ + (p)->scale = (v); } \ + while (/* CONSTCOND */ 0) + +int a2roffsu(const char *, struct roffsu *, enum roffscale); +void tblcalc(struct rofftbl *tbl, const struct tbl_span *); + +__END_DECLS + +#endif /*!OUT_H*/ diff --git a/usr/src/cmd/mandoc/preconv.c b/usr/src/cmd/mandoc/preconv.c new file mode 100644 index 0000000000..a0b2d6415e --- /dev/null +++ b/usr/src/cmd/mandoc/preconv.c @@ -0,0 +1,528 @@ +/* $Id: preconv.c,v 1.5 2011/07/24 18:15:14 kristaps Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_MMAP +#include <sys/stat.h> +#include <sys/mman.h> +#endif + +#include <assert.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* + * The read_whole_file() and resize_buf() functions are copied from + * read.c, including all dependency code (MAP_FILE, etc.). + */ + +#ifndef MAP_FILE +#define MAP_FILE 0 +#endif + +enum enc { + ENC_UTF_8, /* UTF-8 */ + ENC_US_ASCII, /* US-ASCII */ + ENC_LATIN_1, /* Latin-1 */ + ENC__MAX +}; + +struct buf { + char *buf; /* binary input buffer */ + size_t sz; /* size of binary buffer */ + size_t offs; /* starting buffer offset */ +}; + +struct encode { + const char *name; + int (*conv)(const struct buf *); +}; + +static int cue_enc(const struct buf *, size_t *, enum enc *); +static int conv_latin_1(const struct buf *); +static int conv_us_ascii(const struct buf *); +static int conv_utf_8(const struct buf *); +static int read_whole_file(const char *, int, + struct buf *, int *); +static void resize_buf(struct buf *, size_t); +static void usage(void); + +static const struct encode encs[ENC__MAX] = { + { "utf-8", conv_utf_8 }, /* ENC_UTF_8 */ + { "us-ascii", conv_us_ascii }, /* ENC_US_ASCII */ + { "latin-1", conv_latin_1 }, /* ENC_LATIN_1 */ +}; + +static const char *progname; + +static void +usage(void) +{ + + fprintf(stderr, "usage: %s " + "[-D enc] " + "[-e ENC] " + "[file]\n", progname); +} + +static int +conv_latin_1(const struct buf *b) +{ + size_t i; + unsigned char cu; + const char *cp; + + cp = b->buf + (int)b->offs; + + /* + * Latin-1 falls into the first 256 code-points of Unicode, so + * there's no need for any sort of translation. Just make the + * 8-bit characters use the Unicode escape. + * Note that binary values 128 < v < 160 are passed through + * unmodified to mandoc. + */ + + for (i = b->offs; i < b->sz; i++) { + cu = (unsigned char)*cp++; + cu < 160U ? putchar(cu) : printf("\\[u%.4X]", cu); + } + + return(1); +} + +static int +conv_us_ascii(const struct buf *b) +{ + + /* + * US-ASCII has no conversion since it falls into the first 128 + * bytes of Unicode. + */ + + fwrite(b->buf, 1, b->sz, stdout); + return(1); +} + +static int +conv_utf_8(const struct buf *b) +{ + int state, be; + unsigned int accum; + size_t i; + unsigned char cu; + const char *cp; + const long one = 1L; + + cp = b->buf + (int)b->offs; + state = 0; + accum = 0U; + be = 0; + + /* Quick test for big-endian value. */ + + if ( ! (*((const char *)(&one)))) + be = 1; + + for (i = b->offs; i < b->sz; i++) { + cu = (unsigned char)*cp++; + if (state) { + if ( ! (cu & 128) || (cu & 64)) { + /* Bad sequence header. */ + return(0); + } + + /* Accept only legitimate bit patterns. */ + + if (cu > 191 || cu < 128) { + /* Bad in-sequence bits. */ + return(0); + } + + accum |= (cu & 63) << --state * 6; + + /* + * Accum is held in little-endian order as + * stipulated by the UTF-8 sequence coding. We + * need to convert to a native big-endian if our + * architecture requires it. + */ + + if (0 == state && be) + accum = (accum >> 24) | + ((accum << 8) & 0x00FF0000) | + ((accum >> 8) & 0x0000FF00) | + (accum << 24); + + if (0 == state) { + accum < 128U ? putchar(accum) : + printf("\\[u%.4X]", accum); + accum = 0U; + } + } else if (cu & (1 << 7)) { + /* + * Entering a UTF-8 state: if we encounter a + * UTF-8 bitmask, calculate the expected UTF-8 + * state from it. + */ + for (state = 0; state < 7; state++) + if ( ! (cu & (1 << (7 - state)))) + break; + + /* Accept only legitimate bit patterns. */ + + switch (state) { + case (4): + if (cu <= 244 && cu >= 240) { + accum = (cu & 7) << 18; + break; + } + /* Bad 4-sequence start bits. */ + return(0); + case (3): + if (cu <= 239 && cu >= 224) { + accum = (cu & 15) << 12; + break; + } + /* Bad 3-sequence start bits. */ + return(0); + case (2): + if (cu <= 223 && cu >= 194) { + accum = (cu & 31) << 6; + break; + } + /* Bad 2-sequence start bits. */ + return(0); + default: + /* Bad sequence bit mask. */ + return(0); + } + state--; + } else + putchar(cu); + } + + if (0 != state) { + /* Bad trailing bits. */ + return(0); + } + + return(1); +} + +static void +resize_buf(struct buf *buf, size_t initial) +{ + + buf->sz = buf->sz > initial / 2 ? + 2 * buf->sz : initial; + + buf->buf = realloc(buf->buf, buf->sz); + if (NULL == buf->buf) { + perror(NULL); + exit(EXIT_FAILURE); + } +} + +static int +read_whole_file(const char *f, int fd, + struct buf *fb, int *with_mmap) +{ + size_t off; + ssize_t ssz; + +#ifdef HAVE_MMAP + struct stat st; + if (-1 == fstat(fd, &st)) { + perror(f); + return(0); + } + + /* + * If we're a regular file, try just reading in the whole entry + * via mmap(). This is faster than reading it into blocks, and + * since each file is only a few bytes to begin with, I'm not + * concerned that this is going to tank any machines. + */ + + if (S_ISREG(st.st_mode) && st.st_size >= (1U << 31)) { + fprintf(stderr, "%s: input too large\n", f); + return(0); + } + + if (S_ISREG(st.st_mode)) { + *with_mmap = 1; + fb->sz = (size_t)st.st_size; + fb->buf = mmap(NULL, fb->sz, PROT_READ, + MAP_FILE|MAP_SHARED, fd, 0); + if (fb->buf != MAP_FAILED) + return(1); + } +#endif + + /* + * If this isn't a regular file (like, say, stdin), then we must + * go the old way and just read things in bit by bit. + */ + + *with_mmap = 0; + off = 0; + fb->sz = 0; + fb->buf = NULL; + for (;;) { + if (off == fb->sz && fb->sz == (1U << 31)) { + fprintf(stderr, "%s: input too large\n", f); + break; + } + + if (off == fb->sz) + resize_buf(fb, 65536); + + ssz = read(fd, fb->buf + (int)off, fb->sz - off); + if (ssz == 0) { + fb->sz = off; + return(1); + } + if (ssz == -1) { + perror(f); + break; + } + off += (size_t)ssz; + } + + free(fb->buf); + fb->buf = NULL; + return(0); +} + +static int +cue_enc(const struct buf *b, size_t *offs, enum enc *enc) +{ + const char *ln, *eoln, *eoph; + size_t sz, phsz, nsz; + int i; + + ln = b->buf + (int)*offs; + sz = b->sz - *offs; + + /* Look for the end-of-line. */ + + if (NULL == (eoln = memchr(ln, '\n', sz))) + return(-1); + + /* Set next-line marker. */ + + *offs = (size_t)((eoln + 1) - b->buf); + + /* Check if we have the correct header/trailer. */ + + if ((sz = (size_t)(eoln - ln)) < 10 || + memcmp(ln, ".\\\" -*-", 7) || + memcmp(eoln - 3, "-*-", 3)) + return(0); + + /* Move after the header and adjust for the trailer. */ + + ln += 7; + sz -= 10; + + while (sz > 0) { + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + break; + + /* Find the end-of-phrase marker (or eoln). */ + + if (NULL == (eoph = memchr(ln, ';', sz))) + eoph = eoln - 3; + else + eoph++; + + /* Only account for the "coding" phrase. */ + + if ((phsz = (size_t)(eoph - ln)) < 7 || + strncasecmp(ln, "coding:", 7)) { + sz -= phsz; + ln += phsz; + continue; + } + + sz -= 7; + ln += 7; + + while (sz > 0 && ' ' == *ln) { + ln++; + sz--; + } + if (0 == sz) + break; + + /* Check us against known encodings. */ + + for (i = 0; i < (int)ENC__MAX; i++) { + nsz = strlen(encs[i].name); + if (phsz < nsz) + continue; + if (strncasecmp(ln, encs[i].name, nsz)) + continue; + + *enc = (enum enc)i; + return(1); + } + + /* Unknown encoding. */ + + *enc = ENC__MAX; + return(1); + } + + return(0); +} + +int +main(int argc, char *argv[]) +{ + int i, ch, map, fd, rc; + struct buf b; + const char *fn; + enum enc enc, def; + unsigned char bom[3] = { 0xEF, 0xBB, 0xBF }; + size_t offs; + extern int optind; + extern char *optarg; + + progname = strrchr(argv[0], '/'); + if (progname == NULL) + progname = argv[0]; + else + ++progname; + + fn = "<stdin>"; + fd = STDIN_FILENO; + rc = EXIT_FAILURE; + enc = def = ENC__MAX; + map = 0; + + memset(&b, 0, sizeof(struct buf)); + + while (-1 != (ch = getopt(argc, argv, "D:e:rdvh"))) + switch (ch) { + case ('D'): + /* FALLTHROUGH */ + case ('e'): + for (i = 0; i < (int)ENC__MAX; i++) { + if (strcasecmp(optarg, encs[i].name)) + continue; + break; + } + if (i < (int)ENC__MAX) { + if ('D' == ch) + def = (enum enc)i; + else + enc = (enum enc)i; + break; + } + + fprintf(stderr, "%s: Bad encoding\n", optarg); + return(EXIT_FAILURE); + case ('r'): + /* FALLTHROUGH */ + case ('d'): + /* FALLTHROUGH */ + case ('v'): + /* Compatibility with GNU preconv. */ + break; + case ('h'): + /* Compatibility with GNU preconv. */ + /* FALLTHROUGH */ + default: + usage(); + return(EXIT_FAILURE); + } + + argc -= optind; + argv += optind; + + /* + * Open and read the first argument on the command-line. + * If we don't have one, we default to stdin. + */ + + if (argc > 0) { + fn = *argv; + fd = open(fn, O_RDONLY, 0); + if (-1 == fd) { + perror(fn); + return(EXIT_FAILURE); + } + } + + if ( ! read_whole_file(fn, fd, &b, &map)) + goto out; + + /* Try to read the UTF-8 BOM. */ + + if (ENC__MAX == enc) + if (b.sz > 3 && 0 == memcmp(b.buf, bom, 3)) { + b.offs = 3; + enc = ENC_UTF_8; + } + + /* Try reading from the "-*-" cue. */ + + if (ENC__MAX == enc) { + offs = b.offs; + ch = cue_enc(&b, &offs, &enc); + if (0 == ch) + ch = cue_enc(&b, &offs, &enc); + } + + /* + * No encoding has been detected. + * Thus, we either fall into our default encoder, if specified, + * or use Latin-1 if all else fails. + */ + + if (ENC__MAX == enc) + enc = ENC__MAX == def ? ENC_LATIN_1 : def; + + if ( ! (*encs[(int)enc].conv)(&b)) { + fprintf(stderr, "%s: Bad encoding\n", fn); + goto out; + } + + rc = EXIT_SUCCESS; +out: +#ifdef HAVE_MMAP + if (map) + munmap(b.buf, b.sz); + else +#endif + free(b.buf); + + if (fd > STDIN_FILENO) + close(fd); + + return(rc); +} diff --git a/usr/src/cmd/mandoc/predefs.in b/usr/src/cmd/mandoc/predefs.in new file mode 100644 index 0000000000..70074bb617 --- /dev/null +++ b/usr/src/cmd/mandoc/predefs.in @@ -0,0 +1,65 @@ +/* $Id: predefs.in,v 1.3 2011/07/31 11:36:49 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * The predefined-string translation tables. Each corresponds to a + * predefined strings from (e.g.) tmac/mdoc/doc-nroff. The left-hand + * side corresponds to the input sequence (\*x, \*(xx and so on). The + * right-hand side is what's produced by libroff. + * + * XXX - C-escape strings! + * XXX - update PREDEF_MAX in roff.c if adding more! + */ + +PREDEF("Am", "&") +PREDEF("Ba", "|") +PREDEF("Ge", "\\(>=") +PREDEF("Gt", ">") +PREDEF("If", "infinity") +PREDEF("Le", "\\(<=") +PREDEF("Lq", "\\(lq") +PREDEF("Lt", "<") +PREDEF("Na", "NaN") +PREDEF("Ne", "\\(!=") +PREDEF("Pi", "pi") +PREDEF("Pm", "\\(+-") +PREDEF("Rq", "\\(rq") +PREDEF("left-bracket", "[") +PREDEF("left-parenthesis", "(") +PREDEF("lp", "(") +PREDEF("left-singlequote", "\\(oq") +PREDEF("q", "\\(dq") +PREDEF("quote-left", "\\(oq") +PREDEF("quote-right", "\\(cq") +PREDEF("R", "\\(rg") +PREDEF("right-bracket", "]") +PREDEF("right-parenthesis", ")") +PREDEF("rp", ")") +PREDEF("right-singlequote", "\\(cq") +PREDEF("Tm", "(Tm)") +PREDEF("Px", "POSIX") +PREDEF("Ai", "ANSI") +PREDEF("\'", "\\\'") +PREDEF("aa", "\\(aa") +PREDEF("ga", "\\(ga") +PREDEF("`", "\\`") +PREDEF("lq", "\\(lq") +PREDEF("rq", "\\(rq") +PREDEF("ua", "\\(ua") +PREDEF("va", "\\(va") +PREDEF("<=", "\\(<=") +PREDEF(">=", "\\(>=") diff --git a/usr/src/cmd/mandoc/read.c b/usr/src/cmd/mandoc/read.c new file mode 100644 index 0000000000..5b14e357d2 --- /dev/null +++ b/usr/src/cmd/mandoc/read.c @@ -0,0 +1,846 @@ +/* $Id: read.c,v 1.28 2012/02/16 20:51:31 joerg Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_MMAP +# include <sys/stat.h> +# include <sys/mman.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "mandoc.h" +#include "libmandoc.h" +#include "mdoc.h" +#include "man.h" +#include "main.h" + +#ifndef MAP_FILE +#define MAP_FILE 0 +#endif + +#define REPARSE_LIMIT 1000 + +struct buf { + char *buf; /* binary input buffer */ + size_t sz; /* size of binary buffer */ +}; + +struct mparse { + enum mandoclevel file_status; /* status of current parse */ + enum mandoclevel wlevel; /* ignore messages below this */ + int line; /* line number in the file */ + enum mparset inttype; /* which parser to use */ + struct man *pman; /* persistent man parser */ + struct mdoc *pmdoc; /* persistent mdoc parser */ + struct man *man; /* man parser */ + struct mdoc *mdoc; /* mdoc parser */ + struct roff *roff; /* roff parser (!NULL) */ + int reparse_count; /* finite interp. stack */ + mandocmsg mmsg; /* warning/error message handler */ + void *arg; /* argument to mmsg */ + const char *file; + struct buf *secondary; +}; + +static void resize_buf(struct buf *, size_t); +static void mparse_buf_r(struct mparse *, struct buf, int); +static void mparse_readfd_r(struct mparse *, int, const char *, int); +static void pset(const char *, int, struct mparse *); +static int read_whole_file(const char *, int, struct buf *, int *); +static void mparse_end(struct mparse *); + +static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { + MANDOCERR_OK, + MANDOCERR_WARNING, + MANDOCERR_WARNING, + MANDOCERR_ERROR, + MANDOCERR_FATAL, + MANDOCERR_MAX, + MANDOCERR_MAX +}; + +static const char * const mandocerrs[MANDOCERR_MAX] = { + "ok", + + "generic warning", + + /* related to the prologue */ + "no title in document", + "document title should be all caps", + "unknown manual section", + "date missing, using today's date", + "cannot parse date, using it verbatim", + "prologue macros out of order", + "duplicate prologue macro", + "macro not allowed in prologue", + "macro not allowed in body", + + /* related to document structure */ + ".so is fragile, better use ln(1)", + "NAME section must come first", + "bad NAME section contents", + "manual name not yet set", + "sections out of conventional order", + "duplicate section name", + "section not in conventional manual section", + + /* related to macros and nesting */ + "skipping obsolete macro", + "skipping paragraph macro", + "skipping no-space macro", + "blocks badly nested", + "child violates parent syntax", + "nested displays are not portable", + "already in literal mode", + "line scope broken", + + /* related to missing macro arguments */ + "skipping empty macro", + "argument count wrong", + "missing display type", + "list type must come first", + "tag lists require a width argument", + "missing font type", + "skipping end of block that is not open", + + /* related to bad macro arguments */ + "skipping argument", + "duplicate argument", + "duplicate display type", + "duplicate list type", + "unknown AT&T UNIX version", + "bad Boolean value", + "unknown font", + "unknown standard specifier", + "bad width argument", + + /* related to plain text */ + "blank line in non-literal context", + "tab in non-literal context", + "end of line whitespace", + "bad comment style", + "bad escape sequence", + "unterminated quoted string", + + /* related to equations */ + "unexpected literal in equation", + + "generic error", + + /* related to equations */ + "unexpected equation scope closure", + "equation scope open on exit", + "overlapping equation scopes", + "unexpected end of equation", + "equation syntax error", + + /* related to tables */ + "bad table syntax", + "bad table option", + "bad table layout", + "no table layout cells specified", + "no table data cells specified", + "ignore data in cell", + "data block still open", + "ignoring extra data cells", + + "input stack limit exceeded, infinite loop?", + "skipping bad character", + "escaped character not allowed in a name", + "skipping text before the first section header", + "skipping unknown macro", + "NOT IMPLEMENTED, please use groff: skipping request", + "argument count wrong", + "skipping end of block that is not open", + "missing end of block", + "scope open on exit", + "uname(3) system call failed", + "macro requires line argument(s)", + "macro requires body argument(s)", + "macro requires argument(s)", + "missing list type", + "line argument(s) will be lost", + "body argument(s) will be lost", + + "generic fatal error", + + "not a manual", + "column syntax is inconsistent", + "NOT IMPLEMENTED: .Bd -file", + "argument count wrong, violates syntax", + "child violates parent syntax", + "argument count wrong, violates syntax", + "NOT IMPLEMENTED: .so with absolute path or \"..\"", + "no document body", + "no document prologue", + "static buffer exhausted", +}; + +static const char * const mandoclevels[MANDOCLEVEL_MAX] = { + "SUCCESS", + "RESERVED", + "WARNING", + "ERROR", + "FATAL", + "BADARG", + "SYSERR" +}; + +static void +resize_buf(struct buf *buf, size_t initial) +{ + + buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; + buf->buf = mandoc_realloc(buf->buf, buf->sz); +} + +static void +pset(const char *buf, int pos, struct mparse *curp) +{ + int i; + + /* + * Try to intuit which kind of manual parser should be used. If + * passed in by command-line (-man, -mdoc), then use that + * explicitly. If passed as -mandoc, then try to guess from the + * line: either skip dot-lines, use -mdoc when finding `.Dt', or + * default to -man, which is more lenient. + * + * Separate out pmdoc/pman from mdoc/man: the first persists + * through all parsers, while the latter is used per-parse. + */ + + if ('.' == buf[0] || '\'' == buf[0]) { + for (i = 1; buf[i]; i++) + if (' ' != buf[i] && '\t' != buf[i]) + break; + if ('\0' == buf[i]) + return; + } + + switch (curp->inttype) { + case (MPARSE_MDOC): + if (NULL == curp->pmdoc) + curp->pmdoc = mdoc_alloc(curp->roff, curp); + assert(curp->pmdoc); + curp->mdoc = curp->pmdoc; + return; + case (MPARSE_MAN): + if (NULL == curp->pman) + curp->pman = man_alloc(curp->roff, curp); + assert(curp->pman); + curp->man = curp->pman; + return; + default: + break; + } + + if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { + if (NULL == curp->pmdoc) + curp->pmdoc = mdoc_alloc(curp->roff, curp); + assert(curp->pmdoc); + curp->mdoc = curp->pmdoc; + return; + } + + if (NULL == curp->pman) + curp->pman = man_alloc(curp->roff, curp); + assert(curp->pman); + curp->man = curp->pman; +} + +/* + * Main parse routine for an opened file. This is called for each + * opened file and simply loops around the full input file, possibly + * nesting (i.e., with `so'). + */ +static void +mparse_buf_r(struct mparse *curp, struct buf blk, int start) +{ + const struct tbl_span *span; + struct buf ln; + enum rofferr rr; + int i, of, rc; + int pos; /* byte number in the ln buffer */ + int lnn; /* line number in the real file */ + unsigned char c; + + memset(&ln, 0, sizeof(struct buf)); + + lnn = curp->line; + pos = 0; + + for (i = 0; i < (int)blk.sz; ) { + if (0 == pos && '\0' == blk.buf[i]) + break; + + if (start) { + curp->line = lnn; + curp->reparse_count = 0; + } + + while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { + + /* + * When finding an unescaped newline character, + * leave the character loop to process the line. + * Skip a preceding carriage return, if any. + */ + + if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz && + '\n' == blk.buf[i + 1]) + ++i; + if ('\n' == blk.buf[i]) { + ++i; + ++lnn; + break; + } + + /* + * Warn about bogus characters. If you're using + * non-ASCII encoding, you're screwing your + * readers. Since I'd rather this not happen, + * I'll be helpful and replace these characters + * with "?", so we don't display gibberish. + * Note to manual writers: use special characters. + */ + + c = (unsigned char) blk.buf[i]; + + if ( ! (isascii(c) && + (isgraph(c) || isblank(c)))) { + mandoc_msg(MANDOCERR_BADCHAR, curp, + curp->line, pos, NULL); + i++; + if (pos >= (int)ln.sz) + resize_buf(&ln, 256); + ln.buf[pos++] = '?'; + continue; + } + + /* Trailing backslash = a plain char. */ + + if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { + if (pos >= (int)ln.sz) + resize_buf(&ln, 256); + ln.buf[pos++] = blk.buf[i++]; + continue; + } + + /* + * Found escape and at least one other character. + * When it's a newline character, skip it. + * When there is a carriage return in between, + * skip that one as well. + */ + + if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz && + '\n' == blk.buf[i + 2]) + ++i; + if ('\n' == blk.buf[i + 1]) { + i += 2; + ++lnn; + continue; + } + + if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { + i += 2; + /* Comment, skip to end of line */ + for (; i < (int)blk.sz; ++i) { + if ('\n' == blk.buf[i]) { + ++i; + ++lnn; + break; + } + } + + /* Backout trailing whitespaces */ + for (; pos > 0; --pos) { + if (ln.buf[pos - 1] != ' ') + break; + if (pos > 2 && ln.buf[pos - 2] == '\\') + break; + } + break; + } + + /* Some other escape sequence, copy & cont. */ + + if (pos + 1 >= (int)ln.sz) + resize_buf(&ln, 256); + + ln.buf[pos++] = blk.buf[i++]; + ln.buf[pos++] = blk.buf[i++]; + } + + if (pos >= (int)ln.sz) + resize_buf(&ln, 256); + + ln.buf[pos] = '\0'; + + /* + * A significant amount of complexity is contained by + * the roff preprocessor. It's line-oriented but can be + * expressed on one line, so we need at times to + * readjust our starting point and re-run it. The roff + * preprocessor can also readjust the buffers with new + * data, so we pass them in wholesale. + */ + + of = 0; + + /* + * Maintain a lookaside buffer of all parsed lines. We + * only do this if mparse_keep() has been invoked (the + * buffer may be accessed with mparse_getkeep()). + */ + + if (curp->secondary) { + curp->secondary->buf = + mandoc_realloc + (curp->secondary->buf, + curp->secondary->sz + pos + 2); + memcpy(curp->secondary->buf + + curp->secondary->sz, + ln.buf, pos); + curp->secondary->sz += pos; + curp->secondary->buf + [curp->secondary->sz] = '\n'; + curp->secondary->sz++; + curp->secondary->buf + [curp->secondary->sz] = '\0'; + } +rerun: + rr = roff_parseln + (curp->roff, curp->line, + &ln.buf, &ln.sz, of, &of); + + switch (rr) { + case (ROFF_REPARSE): + if (REPARSE_LIMIT >= ++curp->reparse_count) + mparse_buf_r(curp, ln, 0); + else + mandoc_msg(MANDOCERR_ROFFLOOP, curp, + curp->line, pos, NULL); + pos = 0; + continue; + case (ROFF_APPEND): + pos = (int)strlen(ln.buf); + continue; + case (ROFF_RERUN): + goto rerun; + case (ROFF_IGN): + pos = 0; + continue; + case (ROFF_ERR): + assert(MANDOCLEVEL_FATAL <= curp->file_status); + break; + case (ROFF_SO): + /* + * We remove `so' clauses from our lookaside + * buffer because we're going to descend into + * the file recursively. + */ + if (curp->secondary) + curp->secondary->sz -= pos + 1; + mparse_readfd_r(curp, -1, ln.buf + of, 1); + if (MANDOCLEVEL_FATAL <= curp->file_status) + break; + pos = 0; + continue; + default: + break; + } + + /* + * If we encounter errors in the recursive parse, make + * sure we don't continue parsing. + */ + + if (MANDOCLEVEL_FATAL <= curp->file_status) + break; + + /* + * If input parsers have not been allocated, do so now. + * We keep these instanced between parsers, but set them + * locally per parse routine since we can use different + * parsers with each one. + */ + + if ( ! (curp->man || curp->mdoc)) + pset(ln.buf + of, pos - of, curp); + + /* + * Lastly, push down into the parsers themselves. One + * of these will have already been set in the pset() + * routine. + * If libroff returns ROFF_TBL, then add it to the + * currently open parse. Since we only get here if + * there does exist data (see tbl_data.c), we're + * guaranteed that something's been allocated. + * Do the same for ROFF_EQN. + */ + + rc = -1; + + if (ROFF_TBL == rr) + while (NULL != (span = roff_span(curp->roff))) { + rc = curp->man ? + man_addspan(curp->man, span) : + mdoc_addspan(curp->mdoc, span); + if (0 == rc) + break; + } + else if (ROFF_EQN == rr) + rc = curp->mdoc ? + mdoc_addeqn(curp->mdoc, + roff_eqn(curp->roff)) : + man_addeqn(curp->man, + roff_eqn(curp->roff)); + else if (curp->man || curp->mdoc) + rc = curp->man ? + man_parseln(curp->man, + curp->line, ln.buf, of) : + mdoc_parseln(curp->mdoc, + curp->line, ln.buf, of); + + if (0 == rc) { + assert(MANDOCLEVEL_FATAL <= curp->file_status); + break; + } + + /* Temporary buffers typically are not full. */ + + if (0 == start && '\0' == blk.buf[i]) + break; + + /* Start the next input line. */ + + pos = 0; + } + + free(ln.buf); +} + +static int +read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap) +{ + size_t off; + ssize_t ssz; + +#ifdef HAVE_MMAP + struct stat st; + if (-1 == fstat(fd, &st)) { + perror(file); + return(0); + } + + /* + * If we're a regular file, try just reading in the whole entry + * via mmap(). This is faster than reading it into blocks, and + * since each file is only a few bytes to begin with, I'm not + * concerned that this is going to tank any machines. + */ + + if (S_ISREG(st.st_mode)) { + if (st.st_size >= (1U << 31)) { + fprintf(stderr, "%s: input too large\n", file); + return(0); + } + *with_mmap = 1; + fb->sz = (size_t)st.st_size; + fb->buf = mmap(NULL, fb->sz, PROT_READ, + MAP_FILE|MAP_SHARED, fd, 0); + if (fb->buf != MAP_FAILED) + return(1); + } +#endif + + /* + * If this isn't a regular file (like, say, stdin), then we must + * go the old way and just read things in bit by bit. + */ + + *with_mmap = 0; + off = 0; + fb->sz = 0; + fb->buf = NULL; + for (;;) { + if (off == fb->sz) { + if (fb->sz == (1U << 31)) { + fprintf(stderr, "%s: input too large\n", file); + break; + } + resize_buf(fb, 65536); + } + ssz = read(fd, fb->buf + (int)off, fb->sz - off); + if (ssz == 0) { + fb->sz = off; + return(1); + } + if (ssz == -1) { + perror(file); + break; + } + off += (size_t)ssz; + } + + free(fb->buf); + fb->buf = NULL; + return(0); +} + +static void +mparse_end(struct mparse *curp) +{ + + if (MANDOCLEVEL_FATAL <= curp->file_status) + return; + + if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) { + assert(MANDOCLEVEL_FATAL <= curp->file_status); + return; + } + + if (curp->man && ! man_endparse(curp->man)) { + assert(MANDOCLEVEL_FATAL <= curp->file_status); + return; + } + + if ( ! (curp->man || curp->mdoc)) { + mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL); + curp->file_status = MANDOCLEVEL_FATAL; + return; + } + + roff_endparse(curp->roff); +} + +static void +mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file, + int re) +{ + const char *svfile; + + /* Line number is per-file. */ + svfile = curp->file; + curp->file = file; + curp->line = 1; + + mparse_buf_r(curp, blk, 1); + + if (0 == re && MANDOCLEVEL_FATAL > curp->file_status) + mparse_end(curp); + + curp->file = svfile; +} + +enum mandoclevel +mparse_readmem(struct mparse *curp, const void *buf, size_t len, + const char *file) +{ + struct buf blk; + + blk.buf = UNCONST(buf); + blk.sz = len; + + mparse_parse_buffer(curp, blk, file, 0); + return(curp->file_status); +} + +static void +mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re) +{ + struct buf blk; + int with_mmap; + + if (-1 == fd) + if (-1 == (fd = open(file, O_RDONLY, 0))) { + perror(file); + curp->file_status = MANDOCLEVEL_SYSERR; + return; + } + /* + * Run for each opened file; may be called more than once for + * each full parse sequence if the opened file is nested (i.e., + * from `so'). Simply sucks in the whole file and moves into + * the parse phase for the file. + */ + + if ( ! read_whole_file(file, fd, &blk, &with_mmap)) { + curp->file_status = MANDOCLEVEL_SYSERR; + return; + } + + mparse_parse_buffer(curp, blk, file, re); + +#ifdef HAVE_MMAP + if (with_mmap) + munmap(blk.buf, blk.sz); + else +#endif + free(blk.buf); + + if (STDIN_FILENO != fd && -1 == close(fd)) + perror(file); +} + +enum mandoclevel +mparse_readfd(struct mparse *curp, int fd, const char *file) +{ + + mparse_readfd_r(curp, fd, file, 0); + return(curp->file_status); +} + +struct mparse * +mparse_alloc(enum mparset inttype, enum mandoclevel wlevel, mandocmsg mmsg, void *arg) +{ + struct mparse *curp; + + assert(wlevel <= MANDOCLEVEL_FATAL); + + curp = mandoc_calloc(1, sizeof(struct mparse)); + + curp->wlevel = wlevel; + curp->mmsg = mmsg; + curp->arg = arg; + curp->inttype = inttype; + + curp->roff = roff_alloc(curp); + return(curp); +} + +void +mparse_reset(struct mparse *curp) +{ + + roff_reset(curp->roff); + + if (curp->mdoc) + mdoc_reset(curp->mdoc); + if (curp->man) + man_reset(curp->man); + if (curp->secondary) + curp->secondary->sz = 0; + + curp->file_status = MANDOCLEVEL_OK; + curp->mdoc = NULL; + curp->man = NULL; +} + +void +mparse_free(struct mparse *curp) +{ + + if (curp->pmdoc) + mdoc_free(curp->pmdoc); + if (curp->pman) + man_free(curp->pman); + if (curp->roff) + roff_free(curp->roff); + if (curp->secondary) + free(curp->secondary->buf); + + free(curp->secondary); + free(curp); +} + +void +mparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man) +{ + + if (mdoc) + *mdoc = curp->mdoc; + if (man) + *man = curp->man; +} + +void +mandoc_vmsg(enum mandocerr t, struct mparse *m, + int ln, int pos, const char *fmt, ...) +{ + char buf[256]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf) - 1, fmt, ap); + va_end(ap); + + mandoc_msg(t, m, ln, pos, buf); +} + +void +mandoc_msg(enum mandocerr er, struct mparse *m, + int ln, int col, const char *msg) +{ + enum mandoclevel level; + + level = MANDOCLEVEL_FATAL; + while (er < mandoclimits[level]) + level--; + + if (level < m->wlevel) + return; + + if (m->mmsg) + (*m->mmsg)(er, level, m->file, ln, col, msg); + + if (m->file_status < level) + m->file_status = level; +} + +const char * +mparse_strerror(enum mandocerr er) +{ + + return(mandocerrs[er]); +} + +const char * +mparse_strlevel(enum mandoclevel lvl) +{ + return(mandoclevels[lvl]); +} + +void +mparse_keep(struct mparse *p) +{ + + assert(NULL == p->secondary); + p->secondary = mandoc_calloc(1, sizeof(struct buf)); +} + +const char * +mparse_getkeep(const struct mparse *p) +{ + + assert(p->secondary); + return(p->secondary->sz ? p->secondary->buf : NULL); +} diff --git a/usr/src/cmd/mandoc/roff.c b/usr/src/cmd/mandoc/roff.c new file mode 100644 index 0000000000..b479cc298c --- /dev/null +++ b/usr/src/cmd/mandoc/roff.c @@ -0,0 +1,1768 @@ +/* $Id: roff.c,v 1.172 2011/10/24 21:41:45 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "libroff.h" +#include "libmandoc.h" + +/* Maximum number of nested if-else conditionals. */ +#define RSTACK_MAX 128 + +/* Maximum number of string expansions per line, to break infinite loops. */ +#define EXPAND_LIMIT 1000 + +enum rofft { + ROFF_ad, + ROFF_am, + ROFF_ami, + ROFF_am1, + ROFF_de, + ROFF_dei, + ROFF_de1, + ROFF_ds, + ROFF_el, + ROFF_hy, + ROFF_ie, + ROFF_if, + ROFF_ig, + ROFF_it, + ROFF_ne, + ROFF_nh, + ROFF_nr, + ROFF_ns, + ROFF_ps, + ROFF_rm, + ROFF_so, + ROFF_ta, + ROFF_tr, + ROFF_TS, + ROFF_TE, + ROFF_T_, + ROFF_EQ, + ROFF_EN, + ROFF_cblock, + ROFF_ccond, + ROFF_USERDEF, + ROFF_MAX +}; + +enum roffrule { + ROFFRULE_ALLOW, + ROFFRULE_DENY +}; + +/* + * A single register entity. If "set" is zero, the value of the + * register should be the default one, which is per-register. + * Registers are assumed to be unsigned ints for now. + */ +struct reg { + int set; /* whether set or not */ + unsigned int u; /* unsigned integer */ +}; + +/* + * An incredibly-simple string buffer. + */ +struct roffstr { + char *p; /* nil-terminated buffer */ + size_t sz; /* saved strlen(p) */ +}; + +/* + * A key-value roffstr pair as part of a singly-linked list. + */ +struct roffkv { + struct roffstr key; + struct roffstr val; + struct roffkv *next; /* next in list */ +}; + +struct roff { + struct mparse *parse; /* parse point */ + struct roffnode *last; /* leaf of stack */ + enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */ + int rstackpos; /* position in rstack */ + struct reg regs[REG__MAX]; + struct roffkv *strtab; /* user-defined strings & macros */ + struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ + struct roffstr *xtab; /* single-byte trans table (`tr') */ + const char *current_string; /* value of last called user macro */ + struct tbl_node *first_tbl; /* first table parsed */ + struct tbl_node *last_tbl; /* last table parsed */ + struct tbl_node *tbl; /* current table being parsed */ + struct eqn_node *last_eqn; /* last equation parsed */ + struct eqn_node *first_eqn; /* first equation parsed */ + struct eqn_node *eqn; /* current equation being parsed */ +}; + +struct roffnode { + enum rofft tok; /* type of node */ + struct roffnode *parent; /* up one in stack */ + int line; /* parse line */ + int col; /* parse col */ + char *name; /* node name, e.g. macro name */ + char *end; /* end-rules: custom token */ + int endspan; /* end-rules: next-line or infty */ + enum roffrule rule; /* current evaluation rule */ +}; + +#define ROFF_ARGS struct roff *r, /* parse ctx */ \ + enum rofft tok, /* tok of macro */ \ + char **bufp, /* input buffer */ \ + size_t *szp, /* size of input buffer */ \ + int ln, /* parse line */ \ + int ppos, /* original pos in buffer */ \ + int pos, /* current pos in buffer */ \ + int *offs /* reset offset of buffer data */ + +typedef enum rofferr (*roffproc)(ROFF_ARGS); + +struct roffmac { + const char *name; /* macro name */ + roffproc proc; /* process new macro */ + roffproc text; /* process as child text of macro */ + roffproc sub; /* process as child of macro */ + int flags; +#define ROFFMAC_STRUCT (1 << 0) /* always interpret */ + struct roffmac *next; +}; + +struct predef { + const char *name; /* predefined input name */ + const char *str; /* replacement symbol */ +}; + +#define PREDEF(__name, __str) \ + { (__name), (__str) }, + +static enum rofft roffhash_find(const char *, size_t); +static void roffhash_init(void); +static void roffnode_cleanscope(struct roff *); +static void roffnode_pop(struct roff *); +static void roffnode_push(struct roff *, enum rofft, + const char *, int, int); +static enum rofferr roff_block(ROFF_ARGS); +static enum rofferr roff_block_text(ROFF_ARGS); +static enum rofferr roff_block_sub(ROFF_ARGS); +static enum rofferr roff_cblock(ROFF_ARGS); +static enum rofferr roff_ccond(ROFF_ARGS); +static enum rofferr roff_cond(ROFF_ARGS); +static enum rofferr roff_cond_text(ROFF_ARGS); +static enum rofferr roff_cond_sub(ROFF_ARGS); +static enum rofferr roff_ds(ROFF_ARGS); +static enum roffrule roff_evalcond(const char *, int *); +static void roff_free1(struct roff *); +static void roff_freestr(struct roffkv *); +static char *roff_getname(struct roff *, char **, int, int); +static const char *roff_getstrn(const struct roff *, + const char *, size_t); +static enum rofferr roff_line_ignore(ROFF_ARGS); +static enum rofferr roff_nr(ROFF_ARGS); +static void roff_openeqn(struct roff *, const char *, + int, int, const char *); +static enum rofft roff_parse(struct roff *, const char *, int *); +static enum rofferr roff_parsetext(char *); +static enum rofferr roff_res(struct roff *, + char **, size_t *, int, int); +static enum rofferr roff_rm(ROFF_ARGS); +static void roff_setstr(struct roff *, + const char *, const char *, int); +static void roff_setstrn(struct roffkv **, const char *, + size_t, const char *, size_t, int); +static enum rofferr roff_so(ROFF_ARGS); +static enum rofferr roff_tr(ROFF_ARGS); +static enum rofferr roff_TE(ROFF_ARGS); +static enum rofferr roff_TS(ROFF_ARGS); +static enum rofferr roff_EQ(ROFF_ARGS); +static enum rofferr roff_EN(ROFF_ARGS); +static enum rofferr roff_T_(ROFF_ARGS); +static enum rofferr roff_userdef(ROFF_ARGS); + +/* See roffhash_find() */ + +#define ASCII_HI 126 +#define ASCII_LO 33 +#define HASHWIDTH (ASCII_HI - ASCII_LO + 1) + +static struct roffmac *hash[HASHWIDTH]; + +static struct roffmac roffs[ROFF_MAX] = { + { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, + { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "ds", roff_ds, NULL, NULL, 0, NULL }, + { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, + { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, + { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, + { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, + { "it", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, + { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, + { "nr", roff_nr, NULL, NULL, 0, NULL }, + { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, + { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, + { "rm", roff_rm, NULL, NULL, 0, NULL }, + { "so", roff_so, NULL, NULL, 0, NULL }, + { "ta", roff_line_ignore, NULL, NULL, 0, NULL }, + { "tr", roff_tr, NULL, NULL, 0, NULL }, + { "TS", roff_TS, NULL, NULL, 0, NULL }, + { "TE", roff_TE, NULL, NULL, 0, NULL }, + { "T&", roff_T_, NULL, NULL, 0, NULL }, + { "EQ", roff_EQ, NULL, NULL, 0, NULL }, + { "EN", roff_EN, NULL, NULL, 0, NULL }, + { ".", roff_cblock, NULL, NULL, 0, NULL }, + { "\\}", roff_ccond, NULL, NULL, 0, NULL }, + { NULL, roff_userdef, NULL, NULL, 0, NULL }, +}; + +/* Array of injected predefined strings. */ +#define PREDEFS_MAX 38 +static const struct predef predefs[PREDEFS_MAX] = { +#include "predefs.in" +}; + +/* See roffhash_find() */ +#define ROFF_HASH(p) (p[0] - ASCII_LO) + +static void +roffhash_init(void) +{ + struct roffmac *n; + int buc, i; + + for (i = 0; i < (int)ROFF_USERDEF; i++) { + assert(roffs[i].name[0] >= ASCII_LO); + assert(roffs[i].name[0] <= ASCII_HI); + + buc = ROFF_HASH(roffs[i].name); + + if (NULL != (n = hash[buc])) { + for ( ; n->next; n = n->next) + /* Do nothing. */ ; + n->next = &roffs[i]; + } else + hash[buc] = &roffs[i]; + } +} + +/* + * Look up a roff token by its name. Returns ROFF_MAX if no macro by + * the nil-terminated string name could be found. + */ +static enum rofft +roffhash_find(const char *p, size_t s) +{ + int buc; + struct roffmac *n; + + /* + * libroff has an extremely simple hashtable, for the time + * being, which simply keys on the first character, which must + * be printable, then walks a chain. It works well enough until + * optimised. + */ + + if (p[0] < ASCII_LO || p[0] > ASCII_HI) + return(ROFF_MAX); + + buc = ROFF_HASH(p); + + if (NULL == (n = hash[buc])) + return(ROFF_MAX); + for ( ; n; n = n->next) + if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) + return((enum rofft)(n - roffs)); + + return(ROFF_MAX); +} + + +/* + * Pop the current node off of the stack of roff instructions currently + * pending. + */ +static void +roffnode_pop(struct roff *r) +{ + struct roffnode *p; + + assert(r->last); + p = r->last; + + r->last = r->last->parent; + free(p->name); + free(p->end); + free(p); +} + + +/* + * Push a roff node onto the instruction stack. This must later be + * removed with roffnode_pop(). + */ +static void +roffnode_push(struct roff *r, enum rofft tok, const char *name, + int line, int col) +{ + struct roffnode *p; + + p = mandoc_calloc(1, sizeof(struct roffnode)); + p->tok = tok; + if (name) + p->name = mandoc_strdup(name); + p->parent = r->last; + p->line = line; + p->col = col; + p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY; + + r->last = p; +} + + +static void +roff_free1(struct roff *r) +{ + struct tbl_node *t; + struct eqn_node *e; + int i; + + while (NULL != (t = r->first_tbl)) { + r->first_tbl = t->next; + tbl_free(t); + } + + r->first_tbl = r->last_tbl = r->tbl = NULL; + + while (NULL != (e = r->first_eqn)) { + r->first_eqn = e->next; + eqn_free(e); + } + + r->first_eqn = r->last_eqn = r->eqn = NULL; + + while (r->last) + roffnode_pop(r); + + roff_freestr(r->strtab); + roff_freestr(r->xmbtab); + + r->strtab = r->xmbtab = NULL; + + if (r->xtab) + for (i = 0; i < 128; i++) + free(r->xtab[i].p); + + free(r->xtab); + r->xtab = NULL; +} + +void +roff_reset(struct roff *r) +{ + int i; + + roff_free1(r); + + memset(&r->regs, 0, sizeof(struct reg) * REG__MAX); + + for (i = 0; i < PREDEFS_MAX; i++) + roff_setstr(r, predefs[i].name, predefs[i].str, 0); +} + + +void +roff_free(struct roff *r) +{ + + roff_free1(r); + free(r); +} + + +struct roff * +roff_alloc(struct mparse *parse) +{ + struct roff *r; + int i; + + r = mandoc_calloc(1, sizeof(struct roff)); + r->parse = parse; + r->rstackpos = -1; + + roffhash_init(); + + for (i = 0; i < PREDEFS_MAX; i++) + roff_setstr(r, predefs[i].name, predefs[i].str, 0); + + return(r); +} + +/* + * Pre-filter each and every line for reserved words (one beginning with + * `\*', e.g., `\*(ab'). These must be handled before the actual line + * is processed. + * This also checks the syntax of regular escapes. + */ +static enum rofferr +roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos) +{ + enum mandoc_esc esc; + const char *stesc; /* start of an escape sequence ('\\') */ + const char *stnam; /* start of the name, after "[(*" */ + const char *cp; /* end of the name, e.g. before ']' */ + const char *res; /* the string to be substituted */ + int i, maxl, expand_count; + size_t nsz; + char *n; + + expand_count = 0; + +again: + cp = *bufp + pos; + while (NULL != (cp = strchr(cp, '\\'))) { + stesc = cp++; + + /* + * The second character must be an asterisk. + * If it isn't, skip it anyway: It is escaped, + * so it can't start another escape sequence. + */ + + if ('\0' == *cp) + return(ROFF_CONT); + + if ('*' != *cp) { + res = cp; + esc = mandoc_escape(&cp, NULL, NULL); + if (ESCAPE_ERROR != esc) + continue; + cp = res; + mandoc_msg + (MANDOCERR_BADESCAPE, r->parse, + ln, (int)(stesc - *bufp), NULL); + return(ROFF_CONT); + } + + cp++; + + /* + * The third character decides the length + * of the name of the string. + * Save a pointer to the name. + */ + + switch (*cp) { + case ('\0'): + return(ROFF_CONT); + case ('('): + cp++; + maxl = 2; + break; + case ('['): + cp++; + maxl = 0; + break; + default: + maxl = 1; + break; + } + stnam = cp; + + /* Advance to the end of the name. */ + + for (i = 0; 0 == maxl || i < maxl; i++, cp++) { + if ('\0' == *cp) { + mandoc_msg + (MANDOCERR_BADESCAPE, + r->parse, ln, + (int)(stesc - *bufp), NULL); + return(ROFF_CONT); + } + if (0 == maxl && ']' == *cp) + break; + } + + /* + * Retrieve the replacement string; if it is + * undefined, resume searching for escapes. + */ + + res = roff_getstrn(r, stnam, (size_t)i); + + if (NULL == res) { + mandoc_msg + (MANDOCERR_BADESCAPE, r->parse, + ln, (int)(stesc - *bufp), NULL); + res = ""; + } + + /* Replace the escape sequence by the string. */ + + pos = stesc - *bufp; + + nsz = *szp + strlen(res) + 1; + n = mandoc_malloc(nsz); + + strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1)); + strlcat(n, res, nsz); + strlcat(n, cp + (maxl ? 0 : 1), nsz); + + free(*bufp); + + *bufp = n; + *szp = nsz; + + if (EXPAND_LIMIT >= ++expand_count) + goto again; + + /* Just leave the string unexpanded. */ + mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL); + return(ROFF_IGN); + } + return(ROFF_CONT); +} + +/* + * Process text streams: convert all breakable hyphens into ASCII_HYPH. + */ +static enum rofferr +roff_parsetext(char *p) +{ + size_t sz; + const char *start; + enum mandoc_esc esc; + + start = p; + + while ('\0' != *p) { + sz = strcspn(p, "-\\"); + p += sz; + + if ('\0' == *p) + break; + + if ('\\' == *p) { + /* Skip over escapes. */ + p++; + esc = mandoc_escape + ((const char **)&p, NULL, NULL); + if (ESCAPE_ERROR == esc) + break; + continue; + } else if (p == start) { + p++; + continue; + } + + if (isalpha((unsigned char)p[-1]) && + isalpha((unsigned char)p[1])) + *p = ASCII_HYPH; + p++; + } + + return(ROFF_CONT); +} + +enum rofferr +roff_parseln(struct roff *r, int ln, char **bufp, + size_t *szp, int pos, int *offs) +{ + enum rofft t; + enum rofferr e; + int ppos, ctl; + + /* + * Run the reserved-word filter only if we have some reserved + * words to fill in. + */ + + e = roff_res(r, bufp, szp, ln, pos); + if (ROFF_IGN == e) + return(e); + assert(ROFF_CONT == e); + + ppos = pos; + ctl = mandoc_getcontrol(*bufp, &pos); + + /* + * First, if a scope is open and we're not a macro, pass the + * text through the macro's filter. If a scope isn't open and + * we're not a macro, just let it through. + * Finally, if there's an equation scope open, divert it into it + * no matter our state. + */ + + if (r->last && ! ctl) { + t = r->last->tok; + assert(roffs[t].text); + e = (*roffs[t].text) + (r, t, bufp, szp, ln, pos, pos, offs); + assert(ROFF_IGN == e || ROFF_CONT == e); + if (ROFF_CONT != e) + return(e); + if (r->eqn) + return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); + if (r->tbl) + return(tbl_read(r->tbl, ln, *bufp, pos)); + return(roff_parsetext(*bufp + pos)); + } else if ( ! ctl) { + if (r->eqn) + return(eqn_read(&r->eqn, ln, *bufp, pos, offs)); + if (r->tbl) + return(tbl_read(r->tbl, ln, *bufp, pos)); + return(roff_parsetext(*bufp + pos)); + } else if (r->eqn) + return(eqn_read(&r->eqn, ln, *bufp, ppos, offs)); + + /* + * If a scope is open, go to the child handler for that macro, + * as it may want to preprocess before doing anything with it. + * Don't do so if an equation is open. + */ + + if (r->last) { + t = r->last->tok; + assert(roffs[t].sub); + return((*roffs[t].sub) + (r, t, bufp, szp, + ln, ppos, pos, offs)); + } + + /* + * Lastly, as we've no scope open, try to look up and execute + * the new macro. If no macro is found, simply return and let + * the compilers handle it. + */ + + if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) + return(ROFF_CONT); + + assert(roffs[t].proc); + return((*roffs[t].proc) + (r, t, bufp, szp, + ln, ppos, pos, offs)); +} + + +void +roff_endparse(struct roff *r) +{ + + if (r->last) + mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, + r->last->line, r->last->col, NULL); + + if (r->eqn) { + mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, + r->eqn->eqn.ln, r->eqn->eqn.pos, NULL); + eqn_end(&r->eqn); + } + + if (r->tbl) { + mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse, + r->tbl->line, r->tbl->pos, NULL); + tbl_end(&r->tbl); + } +} + +/* + * Parse a roff node's type from the input buffer. This must be in the + * form of ".foo xxx" in the usual way. + */ +static enum rofft +roff_parse(struct roff *r, const char *buf, int *pos) +{ + const char *mac; + size_t maclen; + enum rofft t; + + if ('\0' == buf[*pos] || '"' == buf[*pos] || + '\t' == buf[*pos] || ' ' == buf[*pos]) + return(ROFF_MAX); + + /* + * We stop the macro parse at an escape, tab, space, or nil. + * However, `\}' is also a valid macro, so make sure we don't + * clobber it by seeing the `\' as the end of token. + */ + + mac = buf + *pos; + maclen = strcspn(mac + 1, " \\\t\0") + 1; + + t = (r->current_string = roff_getstrn(r, mac, maclen)) + ? ROFF_USERDEF : roffhash_find(mac, maclen); + + *pos += (int)maclen; + + while (buf[*pos] && ' ' == buf[*pos]) + (*pos)++; + + return(t); +} + +/* ARGSUSED */ +static enum rofferr +roff_cblock(ROFF_ARGS) +{ + + /* + * A block-close `..' should only be invoked as a child of an + * ignore macro, otherwise raise a warning and just ignore it. + */ + + if (NULL == r->last) { + mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); + return(ROFF_IGN); + } + + switch (r->last->tok) { + case (ROFF_am): + /* FALLTHROUGH */ + case (ROFF_ami): + /* FALLTHROUGH */ + case (ROFF_am1): + /* FALLTHROUGH */ + case (ROFF_de): + /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ + /* FALLTHROUGH */ + case (ROFF_dei): + /* FALLTHROUGH */ + case (ROFF_ig): + break; + default: + mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); + return(ROFF_IGN); + } + + if ((*bufp)[pos]) + mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); + + roffnode_pop(r); + roffnode_cleanscope(r); + return(ROFF_IGN); + +} + + +static void +roffnode_cleanscope(struct roff *r) +{ + + while (r->last) { + if (--r->last->endspan < 0) + break; + roffnode_pop(r); + } +} + + +/* ARGSUSED */ +static enum rofferr +roff_ccond(ROFF_ARGS) +{ + + if (NULL == r->last) { + mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); + return(ROFF_IGN); + } + + switch (r->last->tok) { + case (ROFF_el): + /* FALLTHROUGH */ + case (ROFF_ie): + /* FALLTHROUGH */ + case (ROFF_if): + break; + default: + mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); + return(ROFF_IGN); + } + + if (r->last->endspan > -1) { + mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); + return(ROFF_IGN); + } + + if ((*bufp)[pos]) + mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); + + roffnode_pop(r); + roffnode_cleanscope(r); + return(ROFF_IGN); +} + + +/* ARGSUSED */ +static enum rofferr +roff_block(ROFF_ARGS) +{ + int sv; + size_t sz; + char *name; + + name = NULL; + + if (ROFF_ig != tok) { + if ('\0' == (*bufp)[pos]) { + mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); + return(ROFF_IGN); + } + + /* + * Re-write `de1', since we don't really care about + * groff's strange compatibility mode, into `de'. + */ + + if (ROFF_de1 == tok) + tok = ROFF_de; + if (ROFF_de == tok) + name = *bufp + pos; + else + mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, + roffs[tok].name); + + while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) + pos++; + + while (isspace((unsigned char)(*bufp)[pos])) + (*bufp)[pos++] = '\0'; + } + + roffnode_push(r, tok, name, ln, ppos); + + /* + * At the beginning of a `de' macro, clear the existing string + * with the same name, if there is one. New content will be + * added from roff_block_text() in multiline mode. + */ + + if (ROFF_de == tok) + roff_setstr(r, name, "", 0); + + if ('\0' == (*bufp)[pos]) + return(ROFF_IGN); + + /* If present, process the custom end-of-line marker. */ + + sv = pos; + while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos])) + pos++; + + /* + * Note: groff does NOT like escape characters in the input. + * Instead of detecting this, we're just going to let it fly and + * to hell with it. + */ + + assert(pos > sv); + sz = (size_t)(pos - sv); + + if (1 == sz && '.' == (*bufp)[sv]) + return(ROFF_IGN); + + r->last->end = mandoc_malloc(sz + 1); + + memcpy(r->last->end, *bufp + sv, sz); + r->last->end[(int)sz] = '\0'; + + if ((*bufp)[pos]) + mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL); + + return(ROFF_IGN); +} + + +/* ARGSUSED */ +static enum rofferr +roff_block_sub(ROFF_ARGS) +{ + enum rofft t; + int i, j; + + /* + * First check whether a custom macro exists at this level. If + * it does, then check against it. This is some of groff's + * stranger behaviours. If we encountered a custom end-scope + * tag and that tag also happens to be a "real" macro, then we + * need to try interpreting it again as a real macro. If it's + * not, then return ignore. Else continue. + */ + + if (r->last->end) { + for (i = pos, j = 0; r->last->end[j]; j++, i++) + if ((*bufp)[i] != r->last->end[j]) + break; + + if ('\0' == r->last->end[j] && + ('\0' == (*bufp)[i] || + ' ' == (*bufp)[i] || + '\t' == (*bufp)[i])) { + roffnode_pop(r); + roffnode_cleanscope(r); + + while (' ' == (*bufp)[i] || '\t' == (*bufp)[i]) + i++; + + pos = i; + if (ROFF_MAX != roff_parse(r, *bufp, &pos)) + return(ROFF_RERUN); + return(ROFF_IGN); + } + } + + /* + * If we have no custom end-query or lookup failed, then try + * pulling it out of the hashtable. + */ + + t = roff_parse(r, *bufp, &pos); + + /* + * Macros other than block-end are only significant + * in `de' blocks; elsewhere, simply throw them away. + */ + if (ROFF_cblock != t) { + if (ROFF_de == tok) + roff_setstr(r, r->last->name, *bufp + ppos, 1); + return(ROFF_IGN); + } + + assert(roffs[t].proc); + return((*roffs[t].proc)(r, t, bufp, szp, + ln, ppos, pos, offs)); +} + + +/* ARGSUSED */ +static enum rofferr +roff_block_text(ROFF_ARGS) +{ + + if (ROFF_de == tok) + roff_setstr(r, r->last->name, *bufp + pos, 1); + + return(ROFF_IGN); +} + + +/* ARGSUSED */ +static enum rofferr +roff_cond_sub(ROFF_ARGS) +{ + enum rofft t; + enum roffrule rr; + char *ep; + + rr = r->last->rule; + roffnode_cleanscope(r); + + /* + * If the macro is unknown, first check if it contains a closing + * delimiter `\}'. If it does, close out our scope and return + * the currently-scoped rule (ignore or continue). Else, drop + * into the currently-scoped rule. + */ + + if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) { + ep = &(*bufp)[pos]; + for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { + ep++; + if ('}' != *ep) + continue; + + /* + * Make the \} go away. + * This is a little haphazard, as it's not quite + * clear how nroff does this. + * If we're at the end of line, then just chop + * off the \} and resize the buffer. + * If we aren't, then conver it to spaces. + */ + + if ('\0' == *(ep + 1)) { + *--ep = '\0'; + *szp -= 2; + } else + *(ep - 1) = *ep = ' '; + + roff_ccond(r, ROFF_ccond, bufp, szp, + ln, pos, pos + 2, offs); + break; + } + return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); + } + + /* + * A denied conditional must evaluate its children if and only + * if they're either structurally required (such as loops and + * conditionals) or a closing macro. + */ + + if (ROFFRULE_DENY == rr) + if ( ! (ROFFMAC_STRUCT & roffs[t].flags)) + if (ROFF_ccond != t) + return(ROFF_IGN); + + assert(roffs[t].proc); + return((*roffs[t].proc)(r, t, bufp, szp, + ln, ppos, pos, offs)); +} + +/* ARGSUSED */ +static enum rofferr +roff_cond_text(ROFF_ARGS) +{ + char *ep; + enum roffrule rr; + + rr = r->last->rule; + roffnode_cleanscope(r); + + ep = &(*bufp)[pos]; + for ( ; NULL != (ep = strchr(ep, '\\')); ep++) { + ep++; + if ('}' != *ep) + continue; + *ep = '&'; + roff_ccond(r, ROFF_ccond, bufp, szp, + ln, pos, pos + 2, offs); + } + return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT); +} + +static enum roffrule +roff_evalcond(const char *v, int *pos) +{ + + switch (v[*pos]) { + case ('n'): + (*pos)++; + return(ROFFRULE_ALLOW); + case ('e'): + /* FALLTHROUGH */ + case ('o'): + /* FALLTHROUGH */ + case ('t'): + (*pos)++; + return(ROFFRULE_DENY); + default: + break; + } + + while (v[*pos] && ' ' != v[*pos]) + (*pos)++; + return(ROFFRULE_DENY); +} + +/* ARGSUSED */ +static enum rofferr +roff_line_ignore(ROFF_ARGS) +{ + + if (ROFF_it == tok) + mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it"); + + return(ROFF_IGN); +} + +/* ARGSUSED */ +static enum rofferr +roff_cond(ROFF_ARGS) +{ + int sv; + enum roffrule rule; + + /* + * An `.el' has no conditional body: it will consume the value + * of the current rstack entry set in prior `ie' calls or + * defaults to DENY. + * + * If we're not an `el', however, then evaluate the conditional. + */ + + rule = ROFF_el == tok ? + (r->rstackpos < 0 ? + ROFFRULE_DENY : r->rstack[r->rstackpos--]) : + roff_evalcond(*bufp, &pos); + + sv = pos; + while (' ' == (*bufp)[pos]) + pos++; + + /* + * Roff is weird. If we have just white-space after the + * conditional, it's considered the BODY and we exit without + * really doing anything. Warn about this. It's probably + * wrong. + */ + + if ('\0' == (*bufp)[pos] && sv != pos) { + mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL); + return(ROFF_IGN); + } + + roffnode_push(r, tok, NULL, ln, ppos); + + r->last->rule = rule; + + /* + * An if-else will put the NEGATION of the current evaluated + * conditional into the stack of rules. + */ + + if (ROFF_ie == tok) { + if (r->rstackpos == RSTACK_MAX - 1) { + mandoc_msg(MANDOCERR_MEM, + r->parse, ln, ppos, NULL); + return(ROFF_ERR); + } + r->rstack[++r->rstackpos] = + ROFFRULE_DENY == r->last->rule ? + ROFFRULE_ALLOW : ROFFRULE_DENY; + } + + /* If the parent has false as its rule, then so do we. */ + + if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule) + r->last->rule = ROFFRULE_DENY; + + /* + * Determine scope. If we're invoked with "\{" trailing the + * conditional, then we're in a multiline scope. Else our scope + * expires on the next line. + */ + + r->last->endspan = 1; + + if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) { + r->last->endspan = -1; + pos += 2; + } + + /* + * If there are no arguments on the line, the next-line scope is + * assumed. + */ + + if ('\0' == (*bufp)[pos]) + return(ROFF_IGN); + + /* Otherwise re-run the roff parser after recalculating. */ + + *offs = pos; + return(ROFF_RERUN); +} + + +/* ARGSUSED */ +static enum rofferr +roff_ds(ROFF_ARGS) +{ + char *name, *string; + + /* + * A symbol is named by the first word following the macro + * invocation up to a space. Its value is anything after the + * name's trailing whitespace and optional double-quote. Thus, + * + * [.ds foo "bar " ] + * + * will have `bar " ' as its value. + */ + + string = *bufp + pos; + name = roff_getname(r, &string, ln, pos); + if ('\0' == *name) + return(ROFF_IGN); + + /* Read past initial double-quote. */ + if ('"' == *string) + string++; + + /* The rest is the value. */ + roff_setstr(r, name, string, 0); + return(ROFF_IGN); +} + +int +roff_regisset(const struct roff *r, enum regs reg) +{ + + return(r->regs[(int)reg].set); +} + +unsigned int +roff_regget(const struct roff *r, enum regs reg) +{ + + return(r->regs[(int)reg].u); +} + +void +roff_regunset(struct roff *r, enum regs reg) +{ + + r->regs[(int)reg].set = 0; +} + +/* ARGSUSED */ +static enum rofferr +roff_nr(ROFF_ARGS) +{ + const char *key; + char *val; + int iv; + + val = *bufp + pos; + key = roff_getname(r, &val, ln, pos); + + if (0 == strcmp(key, "nS")) { + r->regs[(int)REG_nS].set = 1; + if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0) + r->regs[(int)REG_nS].u = (unsigned)iv; + else + r->regs[(int)REG_nS].u = 0u; + } + + return(ROFF_IGN); +} + +/* ARGSUSED */ +static enum rofferr +roff_rm(ROFF_ARGS) +{ + const char *name; + char *cp; + + cp = *bufp + pos; + while ('\0' != *cp) { + name = roff_getname(r, &cp, ln, (int)(cp - *bufp)); + if ('\0' != *name) + roff_setstr(r, name, NULL, 0); + } + return(ROFF_IGN); +} + +/* ARGSUSED */ +static enum rofferr +roff_TE(ROFF_ARGS) +{ + + if (NULL == r->tbl) + mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); + else + tbl_end(&r->tbl); + + return(ROFF_IGN); +} + +/* ARGSUSED */ +static enum rofferr +roff_T_(ROFF_ARGS) +{ + + if (NULL == r->tbl) + mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); + else + tbl_restart(ppos, ln, r->tbl); + + return(ROFF_IGN); +} + +#if 0 +static int +roff_closeeqn(struct roff *r) +{ + + return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0); +} +#endif + +static void +roff_openeqn(struct roff *r, const char *name, int line, + int offs, const char *buf) +{ + struct eqn_node *e; + int poff; + + assert(NULL == r->eqn); + e = eqn_alloc(name, offs, line, r->parse); + + if (r->last_eqn) + r->last_eqn->next = e; + else + r->first_eqn = r->last_eqn = e; + + r->eqn = r->last_eqn = e; + + if (buf) { + poff = 0; + eqn_read(&r->eqn, line, buf, offs, &poff); + } +} + +/* ARGSUSED */ +static enum rofferr +roff_EQ(ROFF_ARGS) +{ + + roff_openeqn(r, *bufp + pos, ln, ppos, NULL); + return(ROFF_IGN); +} + +/* ARGSUSED */ +static enum rofferr +roff_EN(ROFF_ARGS) +{ + + mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL); + return(ROFF_IGN); +} + +/* ARGSUSED */ +static enum rofferr +roff_TS(ROFF_ARGS) +{ + struct tbl_node *t; + + if (r->tbl) { + mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL); + tbl_end(&r->tbl); + } + + t = tbl_alloc(ppos, ln, r->parse); + + if (r->last_tbl) + r->last_tbl->next = t; + else + r->first_tbl = r->last_tbl = t; + + r->tbl = r->last_tbl = t; + return(ROFF_IGN); +} + +/* ARGSUSED */ +static enum rofferr +roff_tr(ROFF_ARGS) +{ + const char *p, *first, *second; + size_t fsz, ssz; + enum mandoc_esc esc; + + p = *bufp + pos; + + if ('\0' == *p) { + mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL); + return(ROFF_IGN); + } + + while ('\0' != *p) { + fsz = ssz = 1; + + first = p++; + if ('\\' == *first) { + esc = mandoc_escape(&p, NULL, NULL); + if (ESCAPE_ERROR == esc) { + mandoc_msg + (MANDOCERR_BADESCAPE, r->parse, + ln, (int)(p - *bufp), NULL); + return(ROFF_IGN); + } + fsz = (size_t)(p - first); + } + + second = p++; + if ('\\' == *second) { + esc = mandoc_escape(&p, NULL, NULL); + if (ESCAPE_ERROR == esc) { + mandoc_msg + (MANDOCERR_BADESCAPE, r->parse, + ln, (int)(p - *bufp), NULL); + return(ROFF_IGN); + } + ssz = (size_t)(p - second); + } else if ('\0' == *second) { + mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, + ln, (int)(p - *bufp), NULL); + second = " "; + p--; + } + + if (fsz > 1) { + roff_setstrn(&r->xmbtab, first, + fsz, second, ssz, 0); + continue; + } + + if (NULL == r->xtab) + r->xtab = mandoc_calloc + (128, sizeof(struct roffstr)); + + free(r->xtab[(int)*first].p); + r->xtab[(int)*first].p = mandoc_strndup(second, ssz); + r->xtab[(int)*first].sz = ssz; + } + + return(ROFF_IGN); +} + +/* ARGSUSED */ +static enum rofferr +roff_so(ROFF_ARGS) +{ + char *name; + + mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL); + + /* + * Handle `so'. Be EXTREMELY careful, as we shouldn't be + * opening anything that's not in our cwd or anything beneath + * it. Thus, explicitly disallow traversing up the file-system + * or using absolute paths. + */ + + name = *bufp + pos; + if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) { + mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL); + return(ROFF_ERR); + } + + *offs = pos; + return(ROFF_SO); +} + +/* ARGSUSED */ +static enum rofferr +roff_userdef(ROFF_ARGS) +{ + const char *arg[9]; + char *cp, *n1, *n2; + int i; + + /* + * Collect pointers to macro argument strings + * and null-terminate them. + */ + cp = *bufp + pos; + for (i = 0; i < 9; i++) + arg[i] = '\0' == *cp ? "" : + mandoc_getarg(r->parse, &cp, ln, &pos); + + /* + * Expand macro arguments. + */ + *szp = 0; + n1 = cp = mandoc_strdup(r->current_string); + while (NULL != (cp = strstr(cp, "\\$"))) { + i = cp[2] - '1'; + if (0 > i || 8 < i) { + /* Not an argument invocation. */ + cp += 2; + continue; + } + + *szp = strlen(n1) - 3 + strlen(arg[i]) + 1; + n2 = mandoc_malloc(*szp); + + strlcpy(n2, n1, (size_t)(cp - n1 + 1)); + strlcat(n2, arg[i], *szp); + strlcat(n2, cp + 3, *szp); + + cp = n2 + (cp - n1); + free(n1); + n1 = n2; + } + + /* + * Replace the macro invocation + * by the expanded macro. + */ + free(*bufp); + *bufp = n1; + if (0 == *szp) + *szp = strlen(*bufp) + 1; + + return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ? + ROFF_REPARSE : ROFF_APPEND); +} + +static char * +roff_getname(struct roff *r, char **cpp, int ln, int pos) +{ + char *name, *cp; + + name = *cpp; + if ('\0' == *name) + return(name); + + /* Read until end of name. */ + for (cp = name; '\0' != *cp && ' ' != *cp; cp++) { + if ('\\' != *cp) + continue; + cp++; + if ('\\' == *cp) + continue; + mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL); + *cp = '\0'; + name = cp; + } + + /* Nil-terminate name. */ + if ('\0' != *cp) + *(cp++) = '\0'; + + /* Read past spaces. */ + while (' ' == *cp) + cp++; + + *cpp = cp; + return(name); +} + +/* + * Store *string into the user-defined string called *name. + * In multiline mode, append to an existing entry and append '\n'; + * else replace the existing entry, if there is one. + * To clear an existing entry, call with (*r, *name, NULL, 0). + */ +static void +roff_setstr(struct roff *r, const char *name, const char *string, + int multiline) +{ + + roff_setstrn(&r->strtab, name, strlen(name), string, + string ? strlen(string) : 0, multiline); +} + +static void +roff_setstrn(struct roffkv **r, const char *name, size_t namesz, + const char *string, size_t stringsz, int multiline) +{ + struct roffkv *n; + char *c; + int i; + size_t oldch, newch; + + /* Search for an existing string with the same name. */ + n = *r; + + while (n && strcmp(name, n->key.p)) + n = n->next; + + if (NULL == n) { + /* Create a new string table entry. */ + n = mandoc_malloc(sizeof(struct roffkv)); + n->key.p = mandoc_strndup(name, namesz); + n->key.sz = namesz; + n->val.p = NULL; + n->val.sz = 0; + n->next = *r; + *r = n; + } else if (0 == multiline) { + /* In multiline mode, append; else replace. */ + free(n->val.p); + n->val.p = NULL; + n->val.sz = 0; + } + + if (NULL == string) + return; + + /* + * One additional byte for the '\n' in multiline mode, + * and one for the terminating '\0'. + */ + newch = stringsz + (multiline ? 2u : 1u); + + if (NULL == n->val.p) { + n->val.p = mandoc_malloc(newch); + *n->val.p = '\0'; + oldch = 0; + } else { + oldch = n->val.sz; + n->val.p = mandoc_realloc(n->val.p, oldch + newch); + } + + /* Skip existing content in the destination buffer. */ + c = n->val.p + (int)oldch; + + /* Append new content to the destination buffer. */ + i = 0; + while (i < (int)stringsz) { + /* + * Rudimentary roff copy mode: + * Handle escaped backslashes. + */ + if ('\\' == string[i] && '\\' == string[i + 1]) + i++; + *c++ = string[i++]; + } + + /* Append terminating bytes. */ + if (multiline) + *c++ = '\n'; + + *c = '\0'; + n->val.sz = (int)(c - n->val.p); +} + +static const char * +roff_getstrn(const struct roff *r, const char *name, size_t len) +{ + const struct roffkv *n; + + for (n = r->strtab; n; n = n->next) + if (0 == strncmp(name, n->key.p, len) && + '\0' == n->key.p[(int)len]) + return(n->val.p); + + return(NULL); +} + +static void +roff_freestr(struct roffkv *r) +{ + struct roffkv *n, *nn; + + for (n = r; n; n = nn) { + free(n->key.p); + free(n->val.p); + nn = n->next; + free(n); + } +} + +const struct tbl_span * +roff_span(const struct roff *r) +{ + + return(r->tbl ? tbl_span(r->tbl) : NULL); +} + +const struct eqn * +roff_eqn(const struct roff *r) +{ + + return(r->last_eqn ? &r->last_eqn->eqn : NULL); +} + +/* + * Duplicate an input string, making the appropriate character + * conversations (as stipulated by `tr') along the way. + * Returns a heap-allocated string with all the replacements made. + */ +char * +roff_strdup(const struct roff *r, const char *p) +{ + const struct roffkv *cp; + char *res; + const char *pp; + size_t ssz, sz; + enum mandoc_esc esc; + + if (NULL == r->xmbtab && NULL == r->xtab) + return(mandoc_strdup(p)); + else if ('\0' == *p) + return(mandoc_strdup("")); + + /* + * Step through each character looking for term matches + * (remember that a `tr' can be invoked with an escape, which is + * a glyph but the escape is multi-character). + * We only do this if the character hash has been initialised + * and the string is >0 length. + */ + + res = NULL; + ssz = 0; + + while ('\0' != *p) { + if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { + sz = r->xtab[(int)*p].sz; + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, r->xtab[(int)*p].p, sz); + ssz += sz; + p++; + continue; + } else if ('\\' != *p) { + res = mandoc_realloc(res, ssz + 2); + res[ssz++] = *p++; + continue; + } + + /* Search for term matches. */ + for (cp = r->xmbtab; cp; cp = cp->next) + if (0 == strncmp(p, cp->key.p, cp->key.sz)) + break; + + if (NULL != cp) { + /* + * A match has been found. + * Append the match to the array and move + * forward by its keysize. + */ + res = mandoc_realloc + (res, ssz + cp->val.sz + 1); + memcpy(res + ssz, cp->val.p, cp->val.sz); + ssz += cp->val.sz; + p += (int)cp->key.sz; + continue; + } + + /* + * Handle escapes carefully: we need to copy + * over just the escape itself, or else we might + * do replacements within the escape itself. + * Make sure to pass along the bogus string. + */ + pp = p++; + esc = mandoc_escape(&p, NULL, NULL); + if (ESCAPE_ERROR == esc) { + sz = strlen(pp); + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, pp, sz); + break; + } + /* + * We bail out on bad escapes. + * No need to warn: we already did so when + * roff_res() was called. + */ + sz = (int)(p - pp); + res = mandoc_realloc(res, ssz + sz + 1); + memcpy(res + ssz, pp, sz); + ssz += sz; + } + + res[(int)ssz] = '\0'; + return(res); +} diff --git a/usr/src/cmd/mandoc/st.c b/usr/src/cmd/mandoc/st.c new file mode 100644 index 0000000000..70c21a269e --- /dev/null +++ b/usr/src/cmd/mandoc/st.c @@ -0,0 +1,39 @@ +/* $Id: st.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2st(const char *p) +{ + +#include "st.in" + + return(NULL); +} diff --git a/usr/src/cmd/mandoc/st.in b/usr/src/cmd/mandoc/st.in new file mode 100644 index 0000000000..3ba41dd359 --- /dev/null +++ b/usr/src/cmd/mandoc/st.in @@ -0,0 +1,78 @@ +/* $Id: st.in,v 1.19 2012/02/26 21:47:09 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file defines the .St macro arguments. If you add a new + * standard, make sure that the left-and side corresponds to the .St + * argument (like .St -p1003.1) and the right-hand side corresponds to + * the formatted output string. + * + * Be sure to escape strings. + * The non-breaking blanks prevent ending an output line right before + * a number. Groff prevent line breaks at the same places. + * + * REMEMBER TO ADD NEW STANDARDS TO MDOC.7! + */ + +LINE("-p1003.1-88", "IEEE Std 1003.1-1988 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-90", "IEEE Std 1003.1-1990 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-96", "ISO/IEC 9945-1:1996 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-2001", "IEEE Std 1003.1-2001 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-2004", "IEEE Std 1003.1-2004 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1-2008", "IEEE Std 1003.1-2008 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1", "IEEE Std 1003.1 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1b", "IEEE Std 1003.1b (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1b-93", "IEEE Std 1003.1b-1993 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1c-95", "IEEE Std 1003.1c-1995 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1g-2000", "IEEE Std 1003.1g-2000 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.1i-95", "IEEE Std 1003.1i-1995 (\\(lqPOSIX.1\\(rq)") +LINE("-p1003.2-92", "IEEE Std 1003.2-1992 (\\(lqPOSIX.2\\(rq)") +LINE("-p1003.2a-92", "IEEE Std 1003.2a-1992 (\\(lqPOSIX.2\\(rq)") +LINE("-p1387.2-95", "IEEE Std 1387.2-1995 (\\(lqPOSIX.7.2\\(rq)") +LINE("-p1003.2", "IEEE Std 1003.2 (\\(lqPOSIX.2\\(rq)") +LINE("-p1387.2", "IEEE Std 1387.2 (\\(lqPOSIX.7.2\\(rq)") +LINE("-isoC", "ISO/IEC 9899:1990 (\\(lqISO\\~C90\\(rq)") +LINE("-isoC-90", "ISO/IEC 9899:1990 (\\(lqISO\\~C90\\(rq)") +LINE("-isoC-amd1", "ISO/IEC 9899/AMD1:1995 (\\(lqISO\\~C90, Amendment 1\\(rq)") +LINE("-isoC-tcor1", "ISO/IEC 9899/TCOR1:1994 (\\(lqISO\\~C90, Technical Corrigendum 1\\(rq)") +LINE("-isoC-tcor2", "ISO/IEC 9899/TCOR2:1995 (\\(lqISO\\~C90, Technical Corrigendum 2\\(rq)") +LINE("-isoC-99", "ISO/IEC 9899:1999 (\\(lqISO\\~C99\\(rq)") +LINE("-isoC-2011", "ISO/IEC 9899:2011 (\\(lqISO\\~C11\\(rq)") +LINE("-iso9945-1-90", "ISO/IEC 9945-1:1990 (\\(lqPOSIX.1\\(rq)") +LINE("-iso9945-1-96", "ISO/IEC 9945-1:1996 (\\(lqPOSIX.1\\(rq)") +LINE("-iso9945-2-93", "ISO/IEC 9945-2:1993 (\\(lqPOSIX.2\\(rq)") +LINE("-ansiC", "ANSI X3.159-1989 (\\(lqANSI\\~C89\\(rq)") +LINE("-ansiC-89", "ANSI X3.159-1989 (\\(lqANSI\\~C89\\(rq)") +LINE("-ansiC-99", "ANSI/ISO/IEC 9899-1999 (\\(lqANSI\\~C99\\(rq)") +LINE("-ieee754", "IEEE Std 754-1985") +LINE("-iso8802-3", "ISO 8802-3: 1989") +LINE("-iso8601", "ISO 8601") +LINE("-ieee1275-94", "IEEE Std 1275-1994 (\\(lqOpen Firmware\\(rq)") +LINE("-xpg3", "X/Open Portability Guide Issue\\~3 (\\(lqXPG3\\(rq)") +LINE("-xpg4", "X/Open Portability Guide Issue\\~4 (\\(lqXPG4\\(rq)") +LINE("-xpg4.2", "X/Open Portability Guide Issue\\~4, Version\\~2 (\\(lqXPG4.2\\(rq)") +LINE("-xpg4.3", "X/Open Portability Guide Issue\\~4, Version\\~3 (\\(lqXPG4.3\\(rq)") +LINE("-xbd5", "X/Open Base Definitions Issue\\~5 (\\(lqXBD5\\(rq)") +LINE("-xcu5", "X/Open Commands and Utilities Issue\\~5 (\\(lqXCU5\\(rq)") +LINE("-xsh5", "X/Open System Interfaces and Headers Issue\\~5 (\\(lqXSH5\\(rq)") +LINE("-xns5", "X/Open Networking Services Issue\\~5 (\\(lqXNS5\\(rq)") +LINE("-xns5.2", "X/Open Networking Services Issue\\~5.2 (\\(lqXNS5.2\\(rq)") +LINE("-xns5.2d2.0", "X/Open Networking Services Issue\\~5.2 Draft\\~2.0 (\\(lqXNS5.2D2.0\\(rq)") +LINE("-xcurses4.2", "X/Open Curses Issue\\~4, Version\\~2 (\\(lqXCURSES4.2\\(rq)") +LINE("-susv2", "Version\\~2 of the Single UNIX Specification") +LINE("-susv3", "Version\\~3 of the Single UNIX Specification") +LINE("-svid4", "System\\~V Interface Definition, Fourth Edition (\\(lqSVID4\\(rq)") diff --git a/usr/src/cmd/mandoc/tbl.c b/usr/src/cmd/mandoc/tbl.c new file mode 100644 index 0000000000..b3d651be07 --- /dev/null +++ b/usr/src/cmd/mandoc/tbl.c @@ -0,0 +1,175 @@ +/* $Id: tbl.c,v 1.26 2011/07/25 15:37:00 kristaps Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "libmandoc.h" +#include "libroff.h" + +enum rofferr +tbl_read(struct tbl_node *tbl, int ln, const char *p, int offs) +{ + int len; + const char *cp; + + cp = &p[offs]; + len = (int)strlen(cp); + + /* + * If we're in the options section and we don't have a + * terminating semicolon, assume we've moved directly into the + * layout section. No need to report a warning: this is, + * apparently, standard behaviour. + */ + + if (TBL_PART_OPTS == tbl->part && len) + if (';' != cp[len - 1]) + tbl->part = TBL_PART_LAYOUT; + + /* Now process each logical section of the table. */ + + switch (tbl->part) { + case (TBL_PART_OPTS): + return(tbl_option(tbl, ln, p) ? ROFF_IGN : ROFF_ERR); + case (TBL_PART_LAYOUT): + return(tbl_layout(tbl, ln, p) ? ROFF_IGN : ROFF_ERR); + case (TBL_PART_CDATA): + return(tbl_cdata(tbl, ln, p) ? ROFF_TBL : ROFF_IGN); + default: + break; + } + + /* + * This only returns zero if the line is empty, so we ignore it + * and continue on. + */ + return(tbl_data(tbl, ln, p) ? ROFF_TBL : ROFF_IGN); +} + +struct tbl_node * +tbl_alloc(int pos, int line, struct mparse *parse) +{ + struct tbl_node *p; + + p = mandoc_calloc(1, sizeof(struct tbl_node)); + p->line = line; + p->pos = pos; + p->parse = parse; + p->part = TBL_PART_OPTS; + p->opts.tab = '\t'; + p->opts.linesize = 12; + p->opts.decimal = '.'; + return(p); +} + +void +tbl_free(struct tbl_node *p) +{ + struct tbl_row *rp; + struct tbl_cell *cp; + struct tbl_span *sp; + struct tbl_dat *dp; + struct tbl_head *hp; + + while (NULL != (rp = p->first_row)) { + p->first_row = rp->next; + while (rp->first) { + cp = rp->first; + rp->first = cp->next; + free(cp); + } + free(rp); + } + + while (NULL != (sp = p->first_span)) { + p->first_span = sp->next; + while (sp->first) { + dp = sp->first; + sp->first = dp->next; + if (dp->string) + free(dp->string); + free(dp); + } + free(sp); + } + + while (NULL != (hp = p->first_head)) { + p->first_head = hp->next; + free(hp); + } + + free(p); +} + +void +tbl_restart(int line, int pos, struct tbl_node *tbl) +{ + if (TBL_PART_CDATA == tbl->part) + mandoc_msg(MANDOCERR_TBLBLOCK, tbl->parse, + tbl->line, tbl->pos, NULL); + + tbl->part = TBL_PART_LAYOUT; + tbl->line = line; + tbl->pos = pos; + + if (NULL == tbl->first_span || NULL == tbl->first_span->first) + mandoc_msg(MANDOCERR_TBLNODATA, tbl->parse, + tbl->line, tbl->pos, NULL); +} + +const struct tbl_span * +tbl_span(struct tbl_node *tbl) +{ + struct tbl_span *span; + + assert(tbl); + span = tbl->current_span ? tbl->current_span->next + : tbl->first_span; + if (span) + tbl->current_span = span; + return(span); +} + +void +tbl_end(struct tbl_node **tblp) +{ + struct tbl_node *tbl; + + tbl = *tblp; + *tblp = NULL; + + if (NULL == tbl->first_span || NULL == tbl->first_span->first) + mandoc_msg(MANDOCERR_TBLNODATA, tbl->parse, + tbl->line, tbl->pos, NULL); + + if (tbl->last_span) + tbl->last_span->flags |= TBL_SPAN_LAST; + + if (TBL_PART_CDATA == tbl->part) + mandoc_msg(MANDOCERR_TBLBLOCK, tbl->parse, + tbl->line, tbl->pos, NULL); +} + diff --git a/usr/src/cmd/mandoc/tbl_data.c b/usr/src/cmd/mandoc/tbl_data.c new file mode 100644 index 0000000000..129695d8bb --- /dev/null +++ b/usr/src/cmd/mandoc/tbl_data.c @@ -0,0 +1,276 @@ +/* $Id: tbl_data.c,v 1.24 2011/03/20 16:02:05 kristaps Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "libmandoc.h" +#include "libroff.h" + +static int data(struct tbl_node *, struct tbl_span *, + int, const char *, int *); +static struct tbl_span *newspan(struct tbl_node *, int, + struct tbl_row *); + +static int +data(struct tbl_node *tbl, struct tbl_span *dp, + int ln, const char *p, int *pos) +{ + struct tbl_dat *dat; + struct tbl_cell *cp; + int sv, spans; + + cp = NULL; + if (dp->last && dp->last->layout) + cp = dp->last->layout->next; + else if (NULL == dp->last) + cp = dp->layout->first; + + /* + * Skip over spanners and vertical lines to data formats, since + * we want to match data with data layout cells in the header. + */ + + while (cp && (TBL_CELL_VERT == cp->pos || + TBL_CELL_DVERT == cp->pos || + TBL_CELL_SPAN == cp->pos)) + cp = cp->next; + + /* + * Stop processing when we reach the end of the available layout + * cells. This means that we have extra input. + */ + + if (NULL == cp) { + mandoc_msg(MANDOCERR_TBLEXTRADAT, + tbl->parse, ln, *pos, NULL); + /* Skip to the end... */ + while (p[*pos]) + (*pos)++; + return(1); + } + + dat = mandoc_calloc(1, sizeof(struct tbl_dat)); + dat->layout = cp; + dat->pos = TBL_DATA_NONE; + + assert(TBL_CELL_SPAN != cp->pos); + + for (spans = 0, cp = cp->next; cp; cp = cp->next) + if (TBL_CELL_SPAN == cp->pos) + spans++; + else + break; + + dat->spans = spans; + + if (dp->last) { + dp->last->next = dat; + dp->last = dat; + } else + dp->last = dp->first = dat; + + sv = *pos; + while (p[*pos] && p[*pos] != tbl->opts.tab) + (*pos)++; + + /* + * Check for a continued-data scope opening. This consists of a + * trailing `T{' at the end of the line. Subsequent lines, + * until a standalone `T}', are included in our cell. + */ + + if (*pos - sv == 2 && 'T' == p[sv] && '{' == p[sv + 1]) { + tbl->part = TBL_PART_CDATA; + return(0); + } + + assert(*pos - sv >= 0); + + dat->string = mandoc_malloc((size_t)(*pos - sv + 1)); + memcpy(dat->string, &p[sv], (size_t)(*pos - sv)); + dat->string[*pos - sv] = '\0'; + + if (p[*pos]) + (*pos)++; + + if ( ! strcmp(dat->string, "_")) + dat->pos = TBL_DATA_HORIZ; + else if ( ! strcmp(dat->string, "=")) + dat->pos = TBL_DATA_DHORIZ; + else if ( ! strcmp(dat->string, "\\_")) + dat->pos = TBL_DATA_NHORIZ; + else if ( ! strcmp(dat->string, "\\=")) + dat->pos = TBL_DATA_NDHORIZ; + else + dat->pos = TBL_DATA_DATA; + + if (TBL_CELL_HORIZ == dat->layout->pos || + TBL_CELL_DHORIZ == dat->layout->pos || + TBL_CELL_DOWN == dat->layout->pos) + if (TBL_DATA_DATA == dat->pos && '\0' != *dat->string) + mandoc_msg(MANDOCERR_TBLIGNDATA, + tbl->parse, ln, sv, NULL); + + return(1); +} + +/* ARGSUSED */ +int +tbl_cdata(struct tbl_node *tbl, int ln, const char *p) +{ + struct tbl_dat *dat; + size_t sz; + int pos; + + pos = 0; + + dat = tbl->last_span->last; + + if (p[pos] == 'T' && p[pos + 1] == '}') { + pos += 2; + if (p[pos] == tbl->opts.tab) { + tbl->part = TBL_PART_DATA; + pos++; + return(data(tbl, tbl->last_span, ln, p, &pos)); + } else if ('\0' == p[pos]) { + tbl->part = TBL_PART_DATA; + return(1); + } + + /* Fallthrough: T} is part of a word. */ + } + + dat->pos = TBL_DATA_DATA; + + if (dat->string) { + sz = strlen(p) + strlen(dat->string) + 2; + dat->string = mandoc_realloc(dat->string, sz); + strlcat(dat->string, " ", sz); + strlcat(dat->string, p, sz); + } else + dat->string = mandoc_strdup(p); + + if (TBL_CELL_DOWN == dat->layout->pos) + mandoc_msg(MANDOCERR_TBLIGNDATA, + tbl->parse, ln, pos, NULL); + + return(0); +} + +static struct tbl_span * +newspan(struct tbl_node *tbl, int line, struct tbl_row *rp) +{ + struct tbl_span *dp; + + dp = mandoc_calloc(1, sizeof(struct tbl_span)); + dp->line = line; + dp->tbl = &tbl->opts; + dp->layout = rp; + dp->head = tbl->first_head; + + if (tbl->last_span) { + tbl->last_span->next = dp; + tbl->last_span = dp; + } else { + tbl->last_span = tbl->first_span = dp; + tbl->current_span = NULL; + dp->flags |= TBL_SPAN_FIRST; + } + + return(dp); +} + +int +tbl_data(struct tbl_node *tbl, int ln, const char *p) +{ + struct tbl_span *dp; + struct tbl_row *rp; + int pos; + + pos = 0; + + if ('\0' == p[pos]) { + mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, pos, NULL); + return(0); + } + + /* + * Choose a layout row: take the one following the last parsed + * span's. If that doesn't exist, use the last parsed span's. + * If there's no last parsed span, use the first row. Lastly, + * if the last span was a horizontal line, use the same layout + * (it doesn't "consume" the layout). + */ + + if (tbl->last_span) { + assert(tbl->last_span->layout); + if (tbl->last_span->pos == TBL_SPAN_DATA) { + for (rp = tbl->last_span->layout->next; + rp && rp->first; rp = rp->next) { + switch (rp->first->pos) { + case (TBL_CELL_HORIZ): + dp = newspan(tbl, ln, rp); + dp->pos = TBL_SPAN_HORIZ; + continue; + case (TBL_CELL_DHORIZ): + dp = newspan(tbl, ln, rp); + dp->pos = TBL_SPAN_DHORIZ; + continue; + default: + break; + } + break; + } + } else + rp = tbl->last_span->layout; + + if (NULL == rp) + rp = tbl->last_span->layout; + } else + rp = tbl->first_row; + + assert(rp); + + dp = newspan(tbl, ln, rp); + + if ( ! strcmp(p, "_")) { + dp->pos = TBL_SPAN_HORIZ; + return(1); + } else if ( ! strcmp(p, "=")) { + dp->pos = TBL_SPAN_DHORIZ; + return(1); + } + + dp->pos = TBL_SPAN_DATA; + + /* This returns 0 when TBL_PART_CDATA is entered. */ + + while ('\0' != p[pos]) + if ( ! data(tbl, dp, ln, p, &pos)) + return(0); + + return(1); +} diff --git a/usr/src/cmd/mandoc/tbl_html.c b/usr/src/cmd/mandoc/tbl_html.c new file mode 100644 index 0000000000..8e7dc05de0 --- /dev/null +++ b/usr/src/cmd/mandoc/tbl_html.c @@ -0,0 +1,151 @@ +/* $Id: tbl_html.c,v 1.9 2011/09/18 14:14:15 schwarze Exp $ */ +/* + * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "html.h" + +static void html_tblopen(struct html *, const struct tbl_span *); +static size_t html_tbl_len(size_t, void *); +static size_t html_tbl_strlen(const char *, void *); + +/* ARGSUSED */ +static size_t +html_tbl_len(size_t sz, void *arg) +{ + + return(sz); +} + +/* ARGSUSED */ +static size_t +html_tbl_strlen(const char *p, void *arg) +{ + + return(strlen(p)); +} + +static void +html_tblopen(struct html *h, const struct tbl_span *sp) +{ + const struct tbl_head *hp; + struct htmlpair tag; + struct roffsu su; + struct roffcol *col; + + if (TBL_SPAN_FIRST & sp->flags) { + h->tbl.len = html_tbl_len; + h->tbl.slen = html_tbl_strlen; + tblcalc(&h->tbl, sp); + } + + assert(NULL == h->tblt); + PAIR_CLASS_INIT(&tag, "tbl"); + h->tblt = print_otag(h, TAG_TABLE, 1, &tag); + + for (hp = sp->head; hp; hp = hp->next) { + bufinit(h); + col = &h->tbl.cols[hp->ident]; + SCALE_HS_INIT(&su, col->width); + bufcat_su(h, "width", &su); + PAIR_STYLE_INIT(&tag, h); + print_otag(h, TAG_COL, 1, &tag); + } + + print_otag(h, TAG_TBODY, 0, NULL); +} + +void +print_tblclose(struct html *h) +{ + + assert(h->tblt); + print_tagq(h, h->tblt); + h->tblt = NULL; +} + +void +print_tbl(struct html *h, const struct tbl_span *sp) +{ + const struct tbl_head *hp; + const struct tbl_dat *dp; + struct htmlpair tag; + struct tag *tt; + + /* Inhibit printing of spaces: we do padding ourselves. */ + + if (NULL == h->tblt) + html_tblopen(h, sp); + + assert(h->tblt); + + h->flags |= HTML_NONOSPACE; + h->flags |= HTML_NOSPACE; + + tt = print_otag(h, TAG_TR, 0, NULL); + + switch (sp->pos) { + case (TBL_SPAN_HORIZ): + /* FALLTHROUGH */ + case (TBL_SPAN_DHORIZ): + PAIR_INIT(&tag, ATTR_COLSPAN, "0"); + print_otag(h, TAG_TD, 1, &tag); + break; + default: + dp = sp->first; + for (hp = sp->head; hp; hp = hp->next) { + print_stagq(h, tt); + print_otag(h, TAG_TD, 0, NULL); + + switch (hp->pos) { + case (TBL_HEAD_VERT): + /* FALLTHROUGH */ + case (TBL_HEAD_DVERT): + continue; + case (TBL_HEAD_DATA): + if (NULL == dp) + break; + if (TBL_CELL_DOWN != dp->layout->pos) + if (dp->string) + print_text(h, dp->string); + dp = dp->next; + break; + } + } + break; + } + + print_tagq(h, tt); + + h->flags &= ~HTML_NONOSPACE; + + if (TBL_SPAN_LAST & sp->flags) { + assert(h->tbl.cols); + free(h->tbl.cols); + h->tbl.cols = NULL; + print_tblclose(h); + } + +} diff --git a/usr/src/cmd/mandoc/tbl_layout.c b/usr/src/cmd/mandoc/tbl_layout.c new file mode 100644 index 0000000000..7601f146ca --- /dev/null +++ b/usr/src/cmd/mandoc/tbl_layout.c @@ -0,0 +1,472 @@ +/* $Id: tbl_layout.c,v 1.22 2011/09/18 14:14:15 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mandoc.h" +#include "libmandoc.h" +#include "libroff.h" + +struct tbl_phrase { + char name; + enum tbl_cellt key; +}; + +/* + * FIXME: we can make this parse a lot nicer by, when an error is + * encountered in a layout key, bailing to the next key (i.e. to the + * next whitespace then continuing). + */ + +#define KEYS_MAX 11 + +static const struct tbl_phrase keys[KEYS_MAX] = { + { 'c', TBL_CELL_CENTRE }, + { 'r', TBL_CELL_RIGHT }, + { 'l', TBL_CELL_LEFT }, + { 'n', TBL_CELL_NUMBER }, + { 's', TBL_CELL_SPAN }, + { 'a', TBL_CELL_LONG }, + { '^', TBL_CELL_DOWN }, + { '-', TBL_CELL_HORIZ }, + { '_', TBL_CELL_HORIZ }, + { '=', TBL_CELL_DHORIZ }, + { '|', TBL_CELL_VERT } +}; + +static int mods(struct tbl_node *, struct tbl_cell *, + int, const char *, int *); +static int cell(struct tbl_node *, struct tbl_row *, + int, const char *, int *); +static void row(struct tbl_node *, int, const char *, int *); +static struct tbl_cell *cell_alloc(struct tbl_node *, + struct tbl_row *, enum tbl_cellt); +static void head_adjust(const struct tbl_cell *, + struct tbl_head *); + +static int +mods(struct tbl_node *tbl, struct tbl_cell *cp, + int ln, const char *p, int *pos) +{ + char buf[5]; + int i; + + /* Not all types accept modifiers. */ + + switch (cp->pos) { + case (TBL_CELL_DOWN): + /* FALLTHROUGH */ + case (TBL_CELL_HORIZ): + /* FALLTHROUGH */ + case (TBL_CELL_DHORIZ): + /* FALLTHROUGH */ + case (TBL_CELL_VERT): + /* FALLTHROUGH */ + case (TBL_CELL_DVERT): + return(1); + default: + break; + } + +mod: + /* + * XXX: since, at least for now, modifiers are non-conflicting + * (are separable by value, regardless of position), we let + * modifiers come in any order. The existing tbl doesn't let + * this happen. + */ + switch (p[*pos]) { + case ('\0'): + /* FALLTHROUGH */ + case (' '): + /* FALLTHROUGH */ + case ('\t'): + /* FALLTHROUGH */ + case (','): + /* FALLTHROUGH */ + case ('.'): + return(1); + default: + break; + } + + /* Throw away parenthesised expression. */ + + if ('(' == p[*pos]) { + (*pos)++; + while (p[*pos] && ')' != p[*pos]) + (*pos)++; + if (')' == p[*pos]) { + (*pos)++; + goto mod; + } + mandoc_msg(MANDOCERR_TBLLAYOUT, + tbl->parse, ln, *pos, NULL); + return(0); + } + + /* Parse numerical spacing from modifier string. */ + + if (isdigit((unsigned char)p[*pos])) { + for (i = 0; i < 4; i++) { + if ( ! isdigit((unsigned char)p[*pos + i])) + break; + buf[i] = p[*pos + i]; + } + buf[i] = '\0'; + + /* No greater than 4 digits. */ + + if (4 == i) { + mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, + ln, *pos, NULL); + return(0); + } + + *pos += i; + cp->spacing = (size_t)atoi(buf); + + goto mod; + /* NOTREACHED */ + } + + /* TODO: GNU has many more extensions. */ + + switch (tolower((unsigned char)p[(*pos)++])) { + case ('z'): + cp->flags |= TBL_CELL_WIGN; + goto mod; + case ('u'): + cp->flags |= TBL_CELL_UP; + goto mod; + case ('e'): + cp->flags |= TBL_CELL_EQUAL; + goto mod; + case ('t'): + cp->flags |= TBL_CELL_TALIGN; + goto mod; + case ('d'): + cp->flags |= TBL_CELL_BALIGN; + goto mod; + case ('w'): /* XXX for now, ignore minimal column width */ + goto mod; + case ('f'): + break; + case ('r'): + /* FALLTHROUGH */ + case ('b'): + /* FALLTHROUGH */ + case ('i'): + (*pos)--; + break; + default: + mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, + ln, *pos - 1, NULL); + return(0); + } + + switch (tolower((unsigned char)p[(*pos)++])) { + case ('3'): + /* FALLTHROUGH */ + case ('b'): + cp->flags |= TBL_CELL_BOLD; + goto mod; + case ('2'): + /* FALLTHROUGH */ + case ('i'): + cp->flags |= TBL_CELL_ITALIC; + goto mod; + case ('1'): + /* FALLTHROUGH */ + case ('r'): + goto mod; + default: + break; + } + + mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, + ln, *pos - 1, NULL); + return(0); +} + +static int +cell(struct tbl_node *tbl, struct tbl_row *rp, + int ln, const char *p, int *pos) +{ + int i; + enum tbl_cellt c; + + /* Parse the column position (`r', `R', `|', ...). */ + + for (i = 0; i < KEYS_MAX; i++) + if (tolower((unsigned char)p[*pos]) == keys[i].name) + break; + + if (KEYS_MAX == i) { + mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, + ln, *pos, NULL); + return(0); + } + + c = keys[i].key; + + /* + * If a span cell is found first, raise a warning and abort the + * parse. If a span cell is found and the last layout element + * isn't a "normal" layout, bail. + * + * FIXME: recover from this somehow? + */ + + if (TBL_CELL_SPAN == c) { + if (NULL == rp->first) { + mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, + ln, *pos, NULL); + return(0); + } else if (rp->last) + switch (rp->last->pos) { + case (TBL_CELL_VERT): + case (TBL_CELL_DVERT): + case (TBL_CELL_HORIZ): + case (TBL_CELL_DHORIZ): + mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, + ln, *pos, NULL); + return(0); + default: + break; + } + } + + /* + * If a vertical spanner is found, we may not be in the first + * row. + */ + + if (TBL_CELL_DOWN == c && rp == tbl->first_row) { + mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL); + return(0); + } + + (*pos)++; + + /* Extra check for the double-vertical. */ + + if (TBL_CELL_VERT == c && '|' == p[*pos]) { + (*pos)++; + c = TBL_CELL_DVERT; + } + + /* Disallow adjacent spacers. */ + + if (rp->last && (TBL_CELL_VERT == c || TBL_CELL_DVERT == c) && + (TBL_CELL_VERT == rp->last->pos || + TBL_CELL_DVERT == rp->last->pos)) { + mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL); + return(0); + } + + /* Allocate cell then parse its modifiers. */ + + return(mods(tbl, cell_alloc(tbl, rp, c), ln, p, pos)); +} + + +static void +row(struct tbl_node *tbl, int ln, const char *p, int *pos) +{ + struct tbl_row *rp; + +row: /* + * EBNF describing this section: + * + * row ::= row_list [:space:]* [.]?[\n] + * row_list ::= [:space:]* row_elem row_tail + * row_tail ::= [:space:]*[,] row_list | + * epsilon + * row_elem ::= [\t\ ]*[:alpha:]+ + */ + + rp = mandoc_calloc(1, sizeof(struct tbl_row)); + if (tbl->last_row) { + tbl->last_row->next = rp; + tbl->last_row = rp; + } else + tbl->last_row = tbl->first_row = rp; + +cell: + while (isspace((unsigned char)p[*pos])) + (*pos)++; + + /* Safely exit layout context. */ + + if ('.' == p[*pos]) { + tbl->part = TBL_PART_DATA; + if (NULL == tbl->first_row) + mandoc_msg(MANDOCERR_TBLNOLAYOUT, tbl->parse, + ln, *pos, NULL); + (*pos)++; + return; + } + + /* End (and possibly restart) a row. */ + + if (',' == p[*pos]) { + (*pos)++; + goto row; + } else if ('\0' == p[*pos]) + return; + + if ( ! cell(tbl, rp, ln, p, pos)) + return; + + goto cell; + /* NOTREACHED */ +} + +int +tbl_layout(struct tbl_node *tbl, int ln, const char *p) +{ + int pos; + + pos = 0; + row(tbl, ln, p, &pos); + + /* Always succeed. */ + return(1); +} + +static struct tbl_cell * +cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos) +{ + struct tbl_cell *p, *pp; + struct tbl_head *h, *hp; + + p = mandoc_calloc(1, sizeof(struct tbl_cell)); + + if (NULL != (pp = rp->last)) { + rp->last->next = p; + rp->last = p; + } else + rp->last = rp->first = p; + + p->pos = pos; + + /* + * This is a little bit complicated. Here we determine the + * header the corresponds to a cell. We add headers dynamically + * when need be or re-use them, otherwise. As an example, given + * the following: + * + * 1 c || l + * 2 | c | l + * 3 l l + * 3 || c | l |. + * + * We first add the new headers (as there are none) in (1); then + * in (2) we insert the first spanner (as it doesn't match up + * with the header); then we re-use the prior data headers, + * skipping over the spanners; then we re-use everything and add + * a last spanner. Note that VERT headers are made into DVERT + * ones. + */ + + h = pp ? pp->head->next : tbl->first_head; + + if (h) { + /* Re-use data header. */ + if (TBL_HEAD_DATA == h->pos && + (TBL_CELL_VERT != p->pos && + TBL_CELL_DVERT != p->pos)) { + p->head = h; + return(p); + } + + /* Re-use spanner header. */ + if (TBL_HEAD_DATA != h->pos && + (TBL_CELL_VERT == p->pos || + TBL_CELL_DVERT == p->pos)) { + head_adjust(p, h); + p->head = h; + return(p); + } + + /* Right-shift headers with a new spanner. */ + if (TBL_HEAD_DATA == h->pos && + (TBL_CELL_VERT == p->pos || + TBL_CELL_DVERT == p->pos)) { + hp = mandoc_calloc(1, sizeof(struct tbl_head)); + hp->ident = tbl->opts.cols++; + hp->prev = h->prev; + if (h->prev) + h->prev->next = hp; + if (h == tbl->first_head) + tbl->first_head = hp; + h->prev = hp; + hp->next = h; + head_adjust(p, hp); + p->head = hp; + return(p); + } + + if (NULL != (h = h->next)) { + head_adjust(p, h); + p->head = h; + return(p); + } + + /* Fall through to default case... */ + } + + hp = mandoc_calloc(1, sizeof(struct tbl_head)); + hp->ident = tbl->opts.cols++; + + if (tbl->last_head) { + hp->prev = tbl->last_head; + tbl->last_head->next = hp; + tbl->last_head = hp; + } else + tbl->last_head = tbl->first_head = hp; + + head_adjust(p, hp); + p->head = hp; + return(p); +} + +static void +head_adjust(const struct tbl_cell *cellp, struct tbl_head *head) +{ + if (TBL_CELL_VERT != cellp->pos && + TBL_CELL_DVERT != cellp->pos) { + head->pos = TBL_HEAD_DATA; + return; + } + + if (TBL_CELL_VERT == cellp->pos) + if (TBL_HEAD_DVERT != head->pos) + head->pos = TBL_HEAD_VERT; + + if (TBL_CELL_DVERT == cellp->pos) + head->pos = TBL_HEAD_DVERT; +} + diff --git a/usr/src/cmd/mandoc/tbl_opts.c b/usr/src/cmd/mandoc/tbl_opts.c new file mode 100644 index 0000000000..5bd67f80ee --- /dev/null +++ b/usr/src/cmd/mandoc/tbl_opts.c @@ -0,0 +1,270 @@ +/* $Id: tbl_opts.c,v 1.12 2011/09/18 14:14:15 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "libmandoc.h" +#include "libroff.h" + +enum tbl_ident { + KEY_CENTRE = 0, + KEY_DELIM, + KEY_EXPAND, + KEY_BOX, + KEY_DBOX, + KEY_ALLBOX, + KEY_TAB, + KEY_LINESIZE, + KEY_NOKEEP, + KEY_DPOINT, + KEY_NOSPACE, + KEY_FRAME, + KEY_DFRAME, + KEY_MAX +}; + +struct tbl_phrase { + const char *name; + int key; + enum tbl_ident ident; +}; + +/* Handle Commonwealth/American spellings. */ +#define KEY_MAXKEYS 14 + +/* Maximum length of key name string. */ +#define KEY_MAXNAME 13 + +/* Maximum length of key number size. */ +#define KEY_MAXNUMSZ 10 + +static const struct tbl_phrase keys[KEY_MAXKEYS] = { + { "center", TBL_OPT_CENTRE, KEY_CENTRE}, + { "centre", TBL_OPT_CENTRE, KEY_CENTRE}, + { "delim", 0, KEY_DELIM}, + { "expand", TBL_OPT_EXPAND, KEY_EXPAND}, + { "box", TBL_OPT_BOX, KEY_BOX}, + { "doublebox", TBL_OPT_DBOX, KEY_DBOX}, + { "allbox", TBL_OPT_ALLBOX, KEY_ALLBOX}, + { "frame", TBL_OPT_BOX, KEY_FRAME}, + { "doubleframe", TBL_OPT_DBOX, KEY_DFRAME}, + { "tab", 0, KEY_TAB}, + { "linesize", 0, KEY_LINESIZE}, + { "nokeep", TBL_OPT_NOKEEP, KEY_NOKEEP}, + { "decimalpoint", 0, KEY_DPOINT}, + { "nospaces", TBL_OPT_NOSPACE, KEY_NOSPACE}, +}; + +static int arg(struct tbl_node *, int, + const char *, int *, enum tbl_ident); +static void opt(struct tbl_node *, int, + const char *, int *); + +static int +arg(struct tbl_node *tbl, int ln, const char *p, int *pos, enum tbl_ident key) +{ + int i; + char buf[KEY_MAXNUMSZ]; + + while (isspace((unsigned char)p[*pos])) + (*pos)++; + + /* Arguments always begin with a parenthesis. */ + + if ('(' != p[*pos]) { + mandoc_msg(MANDOCERR_TBL, tbl->parse, + ln, *pos, NULL); + return(0); + } + + (*pos)++; + + /* + * The arguments can be ANY value, so we can't just stop at the + * next close parenthesis (the argument can be a closed + * parenthesis itself). + */ + + switch (key) { + case (KEY_DELIM): + if ('\0' == p[(*pos)++]) { + mandoc_msg(MANDOCERR_TBL, tbl->parse, + ln, *pos - 1, NULL); + return(0); + } + + if ('\0' == p[(*pos)++]) { + mandoc_msg(MANDOCERR_TBL, tbl->parse, + ln, *pos - 1, NULL); + return(0); + } + break; + case (KEY_TAB): + if ('\0' != (tbl->opts.tab = p[(*pos)++])) + break; + + mandoc_msg(MANDOCERR_TBL, tbl->parse, + ln, *pos - 1, NULL); + return(0); + case (KEY_LINESIZE): + for (i = 0; i < KEY_MAXNUMSZ && p[*pos]; i++, (*pos)++) { + buf[i] = p[*pos]; + if ( ! isdigit((unsigned char)buf[i])) + break; + } + + if (i < KEY_MAXNUMSZ) { + buf[i] = '\0'; + tbl->opts.linesize = atoi(buf); + break; + } + + mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL); + return(0); + case (KEY_DPOINT): + if ('\0' != (tbl->opts.decimal = p[(*pos)++])) + break; + + mandoc_msg(MANDOCERR_TBL, tbl->parse, + ln, *pos - 1, NULL); + return(0); + default: + abort(); + /* NOTREACHED */ + } + + /* End with a close parenthesis. */ + + if (')' == p[(*pos)++]) + return(1); + + mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos - 1, NULL); + return(0); +} + +static void +opt(struct tbl_node *tbl, int ln, const char *p, int *pos) +{ + int i, sv; + char buf[KEY_MAXNAME]; + + /* + * Parse individual options from the stream as surrounded by + * this goto. Each pass through the routine parses out a single + * option and registers it. Option arguments are processed in + * the arg() function. + */ + +again: /* + * EBNF describing this section: + * + * options ::= option_list [:space:]* [;][\n] + * option_list ::= option option_tail + * option_tail ::= [:space:]+ option_list | + * ::= epsilon + * option ::= [:alpha:]+ args + * args ::= [:space:]* [(] [:alpha:]+ [)] + */ + + while (isspace((unsigned char)p[*pos])) + (*pos)++; + + /* Safe exit point. */ + + if (';' == p[*pos]) + return; + + /* Copy up to first non-alpha character. */ + + for (sv = *pos, i = 0; i < KEY_MAXNAME; i++, (*pos)++) { + buf[i] = (char)tolower((unsigned char)p[*pos]); + if ( ! isalpha((unsigned char)buf[i])) + break; + } + + /* Exit if buffer is empty (or overrun). */ + + if (KEY_MAXNAME == i || 0 == i) { + mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, *pos, NULL); + return; + } + + buf[i] = '\0'; + + while (isspace((unsigned char)p[*pos])) + (*pos)++; + + /* + * Look through all of the available keys to find one that + * matches the input. FIXME: hashtable this. + */ + + for (i = 0; i < KEY_MAXKEYS; i++) { + if (strcmp(buf, keys[i].name)) + continue; + + /* + * Note: this is more difficult to recover from, as we + * can be anywhere in the option sequence and it's + * harder to jump to the next. Meanwhile, just bail out + * of the sequence altogether. + */ + + if (keys[i].key) + tbl->opts.opts |= keys[i].key; + else if ( ! arg(tbl, ln, p, pos, keys[i].ident)) + return; + + break; + } + + /* + * Allow us to recover from bad options by continuing to another + * parse sequence. + */ + + if (KEY_MAXKEYS == i) + mandoc_msg(MANDOCERR_TBLOPT, tbl->parse, ln, sv, NULL); + + goto again; + /* NOTREACHED */ +} + +int +tbl_option(struct tbl_node *tbl, int ln, const char *p) +{ + int pos; + + /* + * Table options are always on just one line, so automatically + * switch into the next input mode here. + */ + tbl->part = TBL_PART_LAYOUT; + + pos = 0; + opt(tbl, ln, p, &pos); + + /* Always succeed. */ + return(1); +} diff --git a/usr/src/cmd/mandoc/tbl_term.c b/usr/src/cmd/mandoc/tbl_term.c new file mode 100644 index 0000000000..f1928f02cb --- /dev/null +++ b/usr/src/cmd/mandoc/tbl_term.c @@ -0,0 +1,444 @@ +/* $Id: tbl_term.c,v 1.21 2011/09/20 23:05:49 schwarze Exp $ */ +/* + * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "term.h" + +static size_t term_tbl_len(size_t, void *); +static size_t term_tbl_strlen(const char *, void *); +static void tbl_char(struct termp *, char, size_t); +static void tbl_data(struct termp *, const struct tbl *, + const struct tbl_dat *, + const struct roffcol *); +static size_t tbl_rulewidth(struct termp *, const struct tbl_head *); +static void tbl_hframe(struct termp *, const struct tbl_span *, int); +static void tbl_literal(struct termp *, const struct tbl_dat *, + const struct roffcol *); +static void tbl_number(struct termp *, const struct tbl *, + const struct tbl_dat *, + const struct roffcol *); +static void tbl_hrule(struct termp *, const struct tbl_span *); +static void tbl_vrule(struct termp *, const struct tbl_head *); + + +static size_t +term_tbl_strlen(const char *p, void *arg) +{ + + return(term_strlen((const struct termp *)arg, p)); +} + +static size_t +term_tbl_len(size_t sz, void *arg) +{ + + return(term_len((const struct termp *)arg, sz)); +} + +void +term_tbl(struct termp *tp, const struct tbl_span *sp) +{ + const struct tbl_head *hp; + const struct tbl_dat *dp; + struct roffcol *col; + int spans; + size_t rmargin, maxrmargin; + + rmargin = tp->rmargin; + maxrmargin = tp->maxrmargin; + + tp->rmargin = tp->maxrmargin = TERM_MAXMARGIN; + + /* Inhibit printing of spaces: we do padding ourselves. */ + + tp->flags |= TERMP_NONOSPACE; + tp->flags |= TERMP_NOSPACE; + + /* + * The first time we're invoked for a given table block, + * calculate the table widths and decimal positions. + */ + + if (TBL_SPAN_FIRST & sp->flags) { + term_flushln(tp); + + tp->tbl.len = term_tbl_len; + tp->tbl.slen = term_tbl_strlen; + tp->tbl.arg = tp; + + tblcalc(&tp->tbl, sp); + } + + /* Horizontal frame at the start of boxed tables. */ + + if (TBL_SPAN_FIRST & sp->flags) { + if (TBL_OPT_DBOX & sp->tbl->opts) + tbl_hframe(tp, sp, 1); + if (TBL_OPT_DBOX & sp->tbl->opts || + TBL_OPT_BOX & sp->tbl->opts) + tbl_hframe(tp, sp, 0); + } + + /* Vertical frame at the start of each row. */ + + if (TBL_OPT_BOX & sp->tbl->opts || TBL_OPT_DBOX & sp->tbl->opts) + term_word(tp, TBL_SPAN_HORIZ == sp->pos || + TBL_SPAN_DHORIZ == sp->pos ? "+" : "|"); + + /* + * Now print the actual data itself depending on the span type. + * Spanner spans get a horizontal rule; data spanners have their + * data printed by matching data to header. + */ + + switch (sp->pos) { + case (TBL_SPAN_HORIZ): + /* FALLTHROUGH */ + case (TBL_SPAN_DHORIZ): + tbl_hrule(tp, sp); + break; + case (TBL_SPAN_DATA): + /* Iterate over template headers. */ + dp = sp->first; + spans = 0; + for (hp = sp->head; hp; hp = hp->next) { + /* + * If the current data header is invoked during + * a spanner ("spans" > 0), don't emit anything + * at all. + */ + switch (hp->pos) { + case (TBL_HEAD_VERT): + /* FALLTHROUGH */ + case (TBL_HEAD_DVERT): + if (spans <= 0) + tbl_vrule(tp, hp); + continue; + case (TBL_HEAD_DATA): + break; + } + + if (--spans >= 0) + continue; + + /* + * All cells get a leading blank, except the + * first one and those after double rulers. + */ + + if (hp->prev && TBL_HEAD_DVERT != hp->prev->pos) + tbl_char(tp, ASCII_NBRSP, 1); + + col = &tp->tbl.cols[hp->ident]; + tbl_data(tp, sp->tbl, dp, col); + + /* No trailing blanks. */ + + if (NULL == hp->next) + break; + + /* + * Add another blank between cells, + * or two when there is no vertical ruler. + */ + + tbl_char(tp, ASCII_NBRSP, + TBL_HEAD_VERT == hp->next->pos || + TBL_HEAD_DVERT == hp->next->pos ? 1 : 2); + + /* + * Go to the next data cell and assign the + * number of subsequent spans, if applicable. + */ + + if (dp) { + spans = dp->spans; + dp = dp->next; + } + } + break; + } + + /* Vertical frame at the end of each row. */ + + if (TBL_OPT_BOX & sp->tbl->opts || TBL_OPT_DBOX & sp->tbl->opts) + term_word(tp, TBL_SPAN_HORIZ == sp->pos || + TBL_SPAN_DHORIZ == sp->pos ? "+" : " |"); + term_flushln(tp); + + /* + * If we're the last row, clean up after ourselves: clear the + * existing table configuration and set it to NULL. + */ + + if (TBL_SPAN_LAST & sp->flags) { + if (TBL_OPT_DBOX & sp->tbl->opts || + TBL_OPT_BOX & sp->tbl->opts) + tbl_hframe(tp, sp, 0); + if (TBL_OPT_DBOX & sp->tbl->opts) + tbl_hframe(tp, sp, 1); + assert(tp->tbl.cols); + free(tp->tbl.cols); + tp->tbl.cols = NULL; + } + + tp->flags &= ~TERMP_NONOSPACE; + tp->rmargin = rmargin; + tp->maxrmargin = maxrmargin; + +} + +/* + * Horizontal rules extend across the entire table. + * Calculate the width by iterating over columns. + */ +static size_t +tbl_rulewidth(struct termp *tp, const struct tbl_head *hp) +{ + size_t width; + + width = tp->tbl.cols[hp->ident].width; + if (TBL_HEAD_DATA == hp->pos) { + /* Account for leading blanks. */ + if (hp->prev && TBL_HEAD_DVERT != hp->prev->pos) + width++; + /* Account for trailing blanks. */ + width++; + if (hp->next && + TBL_HEAD_VERT != hp->next->pos && + TBL_HEAD_DVERT != hp->next->pos) + width++; + } + return(width); +} + +/* + * Rules inside the table can be single or double + * and have crossings with vertical rules marked with pluses. + */ +static void +tbl_hrule(struct termp *tp, const struct tbl_span *sp) +{ + const struct tbl_head *hp; + char c; + + c = '-'; + if (TBL_SPAN_DHORIZ == sp->pos) + c = '='; + + for (hp = sp->head; hp; hp = hp->next) + tbl_char(tp, + TBL_HEAD_DATA == hp->pos ? c : '+', + tbl_rulewidth(tp, hp)); +} + +/* + * Rules above and below the table are always single + * and have an additional plus at the beginning and end. + * For double frames, this function is called twice, + * and the outer one does not have crossings. + */ +static void +tbl_hframe(struct termp *tp, const struct tbl_span *sp, int outer) +{ + const struct tbl_head *hp; + + term_word(tp, "+"); + for (hp = sp->head; hp; hp = hp->next) + tbl_char(tp, + outer || TBL_HEAD_DATA == hp->pos ? '-' : '+', + tbl_rulewidth(tp, hp)); + term_word(tp, "+"); + term_flushln(tp); +} + +static void +tbl_data(struct termp *tp, const struct tbl *tbl, + const struct tbl_dat *dp, + const struct roffcol *col) +{ + + if (NULL == dp) { + tbl_char(tp, ASCII_NBRSP, col->width); + return; + } + assert(dp->layout); + + switch (dp->pos) { + case (TBL_DATA_NONE): + tbl_char(tp, ASCII_NBRSP, col->width); + return; + case (TBL_DATA_HORIZ): + /* FALLTHROUGH */ + case (TBL_DATA_NHORIZ): + tbl_char(tp, '-', col->width); + return; + case (TBL_DATA_NDHORIZ): + /* FALLTHROUGH */ + case (TBL_DATA_DHORIZ): + tbl_char(tp, '=', col->width); + return; + default: + break; + } + + switch (dp->layout->pos) { + case (TBL_CELL_HORIZ): + tbl_char(tp, '-', col->width); + break; + case (TBL_CELL_DHORIZ): + tbl_char(tp, '=', col->width); + break; + case (TBL_CELL_LONG): + /* FALLTHROUGH */ + case (TBL_CELL_CENTRE): + /* FALLTHROUGH */ + case (TBL_CELL_LEFT): + /* FALLTHROUGH */ + case (TBL_CELL_RIGHT): + tbl_literal(tp, dp, col); + break; + case (TBL_CELL_NUMBER): + tbl_number(tp, tbl, dp, col); + break; + case (TBL_CELL_DOWN): + tbl_char(tp, ASCII_NBRSP, col->width); + break; + default: + abort(); + /* NOTREACHED */ + } +} + +static void +tbl_vrule(struct termp *tp, const struct tbl_head *hp) +{ + + switch (hp->pos) { + case (TBL_HEAD_VERT): + term_word(tp, "|"); + break; + case (TBL_HEAD_DVERT): + term_word(tp, "||"); + break; + default: + break; + } +} + +static void +tbl_char(struct termp *tp, char c, size_t len) +{ + size_t i, sz; + char cp[2]; + + cp[0] = c; + cp[1] = '\0'; + + sz = term_strlen(tp, cp); + + for (i = 0; i < len; i += sz) + term_word(tp, cp); +} + +static void +tbl_literal(struct termp *tp, const struct tbl_dat *dp, + const struct roffcol *col) +{ + size_t len, padl, padr; + + assert(dp->string); + len = term_strlen(tp, dp->string); + padr = col->width > len ? col->width - len : 0; + padl = 0; + + switch (dp->layout->pos) { + case (TBL_CELL_LONG): + padl = term_len(tp, 1); + padr = padr > padl ? padr - padl : 0; + break; + case (TBL_CELL_CENTRE): + if (2 > padr) + break; + padl = padr / 2; + padr -= padl; + break; + case (TBL_CELL_RIGHT): + padl = padr; + padr = 0; + break; + default: + break; + } + + tbl_char(tp, ASCII_NBRSP, padl); + term_word(tp, dp->string); + tbl_char(tp, ASCII_NBRSP, padr); +} + +static void +tbl_number(struct termp *tp, const struct tbl *tbl, + const struct tbl_dat *dp, + const struct roffcol *col) +{ + char *cp; + char buf[2]; + size_t sz, psz, ssz, d, padl; + int i; + + /* + * See calc_data_number(). Left-pad by taking the offset of our + * and the maximum decimal; right-pad by the remaining amount. + */ + + assert(dp->string); + + sz = term_strlen(tp, dp->string); + + buf[0] = tbl->decimal; + buf[1] = '\0'; + + psz = term_strlen(tp, buf); + + if (NULL != (cp = strrchr(dp->string, tbl->decimal))) { + buf[1] = '\0'; + for (ssz = 0, i = 0; cp != &dp->string[i]; i++) { + buf[0] = dp->string[i]; + ssz += term_strlen(tp, buf); + } + d = ssz + psz; + } else + d = sz + psz; + + padl = col->decimal - d; + + tbl_char(tp, ASCII_NBRSP, padl); + term_word(tp, dp->string); + if (col->width > sz + padl) + tbl_char(tp, ASCII_NBRSP, col->width - sz - padl); +} + diff --git a/usr/src/cmd/mandoc/term.c b/usr/src/cmd/mandoc/term.c new file mode 100644 index 0000000000..4ca15ed6fa --- /dev/null +++ b/usr/src/cmd/mandoc/term.c @@ -0,0 +1,736 @@ +/* $Id: term.c,v 1.201 2011/09/21 09:57:13 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc.h" +#include "out.h" +#include "term.h" +#include "main.h" + +static void adjbuf(struct termp *p, int); +static void bufferc(struct termp *, char); +static void encode(struct termp *, const char *, size_t); +static void encode1(struct termp *, int); + +void +term_free(struct termp *p) +{ + + if (p->buf) + free(p->buf); + if (p->symtab) + mchars_free(p->symtab); + + free(p); +} + + +void +term_begin(struct termp *p, term_margin head, + term_margin foot, const void *arg) +{ + + p->headf = head; + p->footf = foot; + p->argf = arg; + (*p->begin)(p); +} + + +void +term_end(struct termp *p) +{ + + (*p->end)(p); +} + +/* + * Flush a line of text. A "line" is loosely defined as being something + * that should be followed by a newline, regardless of whether it's + * broken apart by newlines getting there. A line can also be a + * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does + * not have a trailing newline. + * + * The following flags may be specified: + * + * - TERMP_NOBREAK: this is the most important and is used when making + * columns. In short: don't print a newline and instead expect the + * next call to do the padding up to the start of the next column. + * + * - TERMP_TWOSPACE: make sure there is room for at least two space + * characters of padding. Otherwise, rather break the line. + * + * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and + * the line is overrun, and don't pad-right if it's underrun. + * + * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when + * overrunning, instead save the position and continue at that point + * when the next invocation. + * + * In-line line breaking: + * + * If TERMP_NOBREAK is specified and the line overruns the right + * margin, it will break and pad-right to the right margin after + * writing. If maxrmargin is violated, it will break and continue + * writing from the right-margin, which will lead to the above scenario + * upon exit. Otherwise, the line will break at the right margin. + */ +void +term_flushln(struct termp *p) +{ + int i; /* current input position in p->buf */ + size_t vis; /* current visual position on output */ + size_t vbl; /* number of blanks to prepend to output */ + size_t vend; /* end of word visual position on output */ + size_t bp; /* visual right border position */ + size_t dv; /* temporary for visual pos calculations */ + int j; /* temporary loop index for p->buf */ + int jhy; /* last hyph before overflow w/r/t j */ + size_t maxvis; /* output position of visible boundary */ + size_t mmax; /* used in calculating bp */ + + /* + * First, establish the maximum columns of "visible" content. + * This is usually the difference between the right-margin and + * an indentation, but can be, for tagged lists or columns, a + * small set of values. + */ + assert (p->rmargin >= p->offset); + dv = p->rmargin - p->offset; + maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; + dv = p->maxrmargin - p->offset; + mmax = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; + + bp = TERMP_NOBREAK & p->flags ? mmax : maxvis; + + /* + * Calculate the required amount of padding. + */ + vbl = p->offset + p->overstep > p->viscol ? + p->offset + p->overstep - p->viscol : 0; + + vis = vend = 0; + i = 0; + + while (i < p->col) { + /* + * Handle literal tab characters: collapse all + * subsequent tabs into a single huge set of spaces. + */ + while (i < p->col && '\t' == p->buf[i]) { + vend = (vis / p->tabwidth + 1) * p->tabwidth; + vbl += vend - vis; + vis = vend; + i++; + } + + /* + * Count up visible word characters. Control sequences + * (starting with the CSI) aren't counted. A space + * generates a non-printing word, which is valid (the + * space is printed according to regular spacing rules). + */ + + for (j = i, jhy = 0; j < p->col; j++) { + if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j]) + break; + + /* Back over the the last printed character. */ + if (8 == p->buf[j]) { + assert(j); + vend -= (*p->width)(p, p->buf[j - 1]); + continue; + } + + /* Regular word. */ + /* Break at the hyphen point if we overrun. */ + if (vend > vis && vend < bp && + ASCII_HYPH == p->buf[j]) + jhy = j; + + vend += (*p->width)(p, p->buf[j]); + } + + /* + * Find out whether we would exceed the right margin. + * If so, break to the next line. + */ + if (vend > bp && 0 == jhy && vis > 0) { + vend -= vis; + (*p->endline)(p); + p->viscol = 0; + if (TERMP_NOBREAK & p->flags) { + vbl = p->rmargin; + vend += p->rmargin - p->offset; + } else + vbl = p->offset; + + /* Remove the p->overstep width. */ + + bp += (size_t)p->overstep; + p->overstep = 0; + } + + /* Write out the [remaining] word. */ + for ( ; i < p->col; i++) { + if (vend > bp && jhy > 0 && i > jhy) + break; + if ('\t' == p->buf[i]) + break; + if (' ' == p->buf[i]) { + j = i; + while (' ' == p->buf[i]) + i++; + dv = (size_t)(i - j) * (*p->width)(p, ' '); + vbl += dv; + vend += dv; + break; + } + if (ASCII_NBRSP == p->buf[i]) { + vbl += (*p->width)(p, ' '); + continue; + } + + /* + * Now we definitely know there will be + * printable characters to output, + * so write preceding white space now. + */ + if (vbl) { + (*p->advance)(p, vbl); + p->viscol += vbl; + vbl = 0; + } + + if (ASCII_HYPH == p->buf[i]) { + (*p->letter)(p, '-'); + p->viscol += (*p->width)(p, '-'); + continue; + } + + (*p->letter)(p, p->buf[i]); + if (8 == p->buf[i]) + p->viscol -= (*p->width)(p, p->buf[i-1]); + else + p->viscol += (*p->width)(p, p->buf[i]); + } + vis = vend; + } + + /* + * If there was trailing white space, it was not printed; + * so reset the cursor position accordingly. + */ + if (vis) + vis -= vbl; + + p->col = 0; + p->overstep = 0; + + if ( ! (TERMP_NOBREAK & p->flags)) { + p->viscol = 0; + (*p->endline)(p); + return; + } + + if (TERMP_HANG & p->flags) { + /* We need one blank after the tag. */ + p->overstep = (int)(vis - maxvis + (*p->width)(p, ' ')); + + /* + * Behave exactly the same way as groff: + * If we have overstepped the margin, temporarily move + * it to the right and flag the rest of the line to be + * shorter. + * If we landed right at the margin, be happy. + * If we are one step before the margin, temporarily + * move it one step LEFT and flag the rest of the line + * to be longer. + */ + if (p->overstep < -1) + p->overstep = 0; + return; + + } else if (TERMP_DANGLE & p->flags) + return; + + /* If the column was overrun, break the line. */ + if (maxvis <= vis + + ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) { + (*p->endline)(p); + p->viscol = 0; + } +} + + +/* + * A newline only breaks an existing line; it won't assert vertical + * space. All data in the output buffer is flushed prior to the newline + * assertion. + */ +void +term_newln(struct termp *p) +{ + + p->flags |= TERMP_NOSPACE; + if (p->col || p->viscol) + term_flushln(p); +} + + +/* + * Asserts a vertical space (a full, empty line-break between lines). + * Note that if used twice, this will cause two blank spaces and so on. + * All data in the output buffer is flushed prior to the newline + * assertion. + */ +void +term_vspace(struct termp *p) +{ + + term_newln(p); + p->viscol = 0; + (*p->endline)(p); +} + +void +term_fontlast(struct termp *p) +{ + enum termfont f; + + f = p->fontl; + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; +} + + +void +term_fontrepl(struct termp *p, enum termfont f) +{ + + p->fontl = p->fontq[p->fonti]; + p->fontq[p->fonti] = f; +} + + +void +term_fontpush(struct termp *p, enum termfont f) +{ + + assert(p->fonti + 1 < 10); + p->fontl = p->fontq[p->fonti]; + p->fontq[++p->fonti] = f; +} + + +const void * +term_fontq(struct termp *p) +{ + + return(&p->fontq[p->fonti]); +} + + +enum termfont +term_fonttop(struct termp *p) +{ + + return(p->fontq[p->fonti]); +} + + +void +term_fontpopq(struct termp *p, const void *key) +{ + + while (p->fonti >= 0 && key != &p->fontq[p->fonti]) + p->fonti--; + assert(p->fonti >= 0); +} + + +void +term_fontpop(struct termp *p) +{ + + assert(p->fonti); + p->fonti--; +} + +/* + * Handle pwords, partial words, which may be either a single word or a + * phrase that cannot be broken down (such as a literal string). This + * handles word styling. + */ +void +term_word(struct termp *p, const char *word) +{ + const char *seq, *cp; + char c; + int sz, uc; + size_t ssz; + enum mandoc_esc esc; + + if ( ! (TERMP_NOSPACE & p->flags)) { + if ( ! (TERMP_KEEP & p->flags)) { + if (TERMP_PREKEEP & p->flags) + p->flags |= TERMP_KEEP; + bufferc(p, ' '); + if (TERMP_SENTENCE & p->flags) + bufferc(p, ' '); + } else + bufferc(p, ASCII_NBRSP); + } + + if ( ! (p->flags & TERMP_NONOSPACE)) + p->flags &= ~TERMP_NOSPACE; + else + p->flags |= TERMP_NOSPACE; + + p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); + + while ('\0' != *word) { + if ((ssz = strcspn(word, "\\")) > 0) + encode(p, word, ssz); + + word += (int)ssz; + if ('\\' != *word) + continue; + + word++; + esc = mandoc_escape(&word, &seq, &sz); + if (ESCAPE_ERROR == esc) + break; + + if (TERMENC_ASCII != p->enc) + switch (esc) { + case (ESCAPE_UNICODE): + uc = mchars_num2uc(seq + 1, sz - 1); + if ('\0' == uc) + break; + encode1(p, uc); + continue; + case (ESCAPE_SPECIAL): + uc = mchars_spec2cp(p->symtab, seq, sz); + if (uc <= 0) + break; + encode1(p, uc); + continue; + default: + break; + } + + switch (esc) { + case (ESCAPE_UNICODE): + encode1(p, '?'); + break; + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, sz); + if ('\0' != c) + encode(p, &c, 1); + break; + case (ESCAPE_SPECIAL): + cp = mchars_spec2str(p->symtab, seq, sz, &ssz); + if (NULL != cp) + encode(p, cp, ssz); + else if (1 == ssz) + encode(p, seq, sz); + break; + case (ESCAPE_FONTBOLD): + term_fontrepl(p, TERMFONT_BOLD); + break; + case (ESCAPE_FONTITALIC): + term_fontrepl(p, TERMFONT_UNDER); + break; + case (ESCAPE_FONT): + /* FALLTHROUGH */ + case (ESCAPE_FONTROMAN): + term_fontrepl(p, TERMFONT_NONE); + break; + case (ESCAPE_FONTPREV): + term_fontlast(p); + break; + case (ESCAPE_NOSPACE): + if ('\0' == *word) + p->flags |= TERMP_NOSPACE; + break; + default: + break; + } + } +} + +static void +adjbuf(struct termp *p, int sz) +{ + + if (0 == p->maxcols) + p->maxcols = 1024; + while (sz >= p->maxcols) + p->maxcols <<= 2; + + p->buf = mandoc_realloc + (p->buf, sizeof(int) * (size_t)p->maxcols); +} + +static void +bufferc(struct termp *p, char c) +{ + + if (p->col + 1 >= p->maxcols) + adjbuf(p, p->col + 1); + + p->buf[p->col++] = c; +} + +/* + * See encode(). + * Do this for a single (probably unicode) value. + * Does not check for non-decorated glyphs. + */ +static void +encode1(struct termp *p, int c) +{ + enum termfont f; + + if (p->col + 4 >= p->maxcols) + adjbuf(p, p->col + 4); + + f = term_fonttop(p); + + if (TERMFONT_NONE == f) { + p->buf[p->col++] = c; + return; + } else if (TERMFONT_UNDER == f) { + p->buf[p->col++] = '_'; + } else + p->buf[p->col++] = c; + + p->buf[p->col++] = 8; + p->buf[p->col++] = c; +} + +static void +encode(struct termp *p, const char *word, size_t sz) +{ + enum termfont f; + int i, len; + + /* LINTED */ + len = sz; + + /* + * Encode and buffer a string of characters. If the current + * font mode is unset, buffer directly, else encode then buffer + * character by character. + */ + + if (TERMFONT_NONE == (f = term_fonttop(p))) { + if (p->col + len >= p->maxcols) + adjbuf(p, p->col + len); + for (i = 0; i < len; i++) + p->buf[p->col++] = word[i]; + return; + } + + /* Pre-buffer, assuming worst-case. */ + + if (p->col + 1 + (len * 3) >= p->maxcols) + adjbuf(p, p->col + 1 + (len * 3)); + + for (i = 0; i < len; i++) { + if (ASCII_HYPH != word[i] && + ! isgraph((unsigned char)word[i])) { + p->buf[p->col++] = word[i]; + continue; + } + + if (TERMFONT_UNDER == f) + p->buf[p->col++] = '_'; + else if (ASCII_HYPH == word[i]) + p->buf[p->col++] = '-'; + else + p->buf[p->col++] = word[i]; + + p->buf[p->col++] = 8; + p->buf[p->col++] = word[i]; + } +} + +size_t +term_len(const struct termp *p, size_t sz) +{ + + return((*p->width)(p, ' ') * sz); +} + + +size_t +term_strlen(const struct termp *p, const char *cp) +{ + size_t sz, rsz, i; + int ssz, c; + const char *seq, *rhs; + enum mandoc_esc esc; + static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; + + /* + * Account for escaped sequences within string length + * calculations. This follows the logic in term_word() as we + * must calculate the width of produced strings. + */ + + sz = 0; + while ('\0' != *cp) { + rsz = strcspn(cp, rej); + for (i = 0; i < rsz; i++) + sz += (*p->width)(p, *cp++); + + c = 0; + switch (*cp) { + case ('\\'): + cp++; + esc = mandoc_escape(&cp, &seq, &ssz); + if (ESCAPE_ERROR == esc) + return(sz); + + if (TERMENC_ASCII != p->enc) + switch (esc) { + case (ESCAPE_UNICODE): + c = mchars_num2uc + (seq + 1, ssz - 1); + if ('\0' == c) + break; + sz += (*p->width)(p, c); + continue; + case (ESCAPE_SPECIAL): + c = mchars_spec2cp + (p->symtab, seq, ssz); + if (c <= 0) + break; + sz += (*p->width)(p, c); + continue; + default: + break; + } + + rhs = NULL; + + switch (esc) { + case (ESCAPE_UNICODE): + sz += (*p->width)(p, '?'); + break; + case (ESCAPE_NUMBERED): + c = mchars_num2char(seq, ssz); + if ('\0' != c) + sz += (*p->width)(p, c); + break; + case (ESCAPE_SPECIAL): + rhs = mchars_spec2str + (p->symtab, seq, ssz, &rsz); + + if (ssz != 1 || rhs) + break; + + rhs = seq; + rsz = ssz; + break; + default: + break; + } + + if (NULL == rhs) + break; + + for (i = 0; i < rsz; i++) + sz += (*p->width)(p, *rhs++); + break; + case (ASCII_NBRSP): + sz += (*p->width)(p, ' '); + cp++; + break; + case (ASCII_HYPH): + sz += (*p->width)(p, '-'); + cp++; + break; + default: + break; + } + } + + return(sz); +} + +/* ARGSUSED */ +size_t +term_vspan(const struct termp *p, const struct roffsu *su) +{ + double r; + + switch (su->unit) { + case (SCALE_CM): + r = su->scale * 2; + break; + case (SCALE_IN): + r = su->scale * 6; + break; + case (SCALE_PC): + r = su->scale; + break; + case (SCALE_PT): + r = su->scale / 8; + break; + case (SCALE_MM): + r = su->scale / 1000; + break; + case (SCALE_VS): + r = su->scale; + break; + default: + r = su->scale - 1; + break; + } + + if (r < 0.0) + r = 0.0; + return(/* LINTED */(size_t) + r); +} + +size_t +term_hspan(const struct termp *p, const struct roffsu *su) +{ + double v; + + v = ((*p->hspan)(p, su)); + if (v < 0.0) + v = 0.0; + return((size_t) /* LINTED */ + v); +} diff --git a/usr/src/cmd/mandoc/term.h b/usr/src/cmd/mandoc/term.h new file mode 100644 index 0000000000..56d076e54a --- /dev/null +++ b/usr/src/cmd/mandoc/term.h @@ -0,0 +1,128 @@ +/* $Id: term.h,v 1.90 2011/12/04 23:10:52 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifndef TERM_H +#define TERM_H + +__BEGIN_DECLS + +struct termp; + +enum termenc { + TERMENC_ASCII, + TERMENC_LOCALE, + TERMENC_UTF8 +}; + +enum termtype { + TERMTYPE_CHAR, + TERMTYPE_PS, + TERMTYPE_PDF +}; + +enum termfont { + TERMFONT_NONE = 0, + TERMFONT_BOLD, + TERMFONT_UNDER, + TERMFONT__MAX +}; + +#define TERM_MAXMARGIN 100000 /* FIXME */ + +typedef void (*term_margin)(struct termp *, const void *); + +struct termp_tbl { + int width; /* width in fixed chars */ + int decimal; /* decimal point position */ +}; + +struct termp { + enum termtype type; + struct rofftbl tbl; /* table configuration */ + int mdocstyle; /* imitate mdoc(7) output */ + size_t defindent; /* Default indent for text. */ + size_t defrmargin; /* Right margin of the device. */ + size_t rmargin; /* Current right margin. */ + size_t maxrmargin; /* Max right margin. */ + int maxcols; /* Max size of buf. */ + size_t offset; /* Margin offest. */ + size_t tabwidth; /* Distance of tab positions. */ + int col; /* Bytes in buf. */ + size_t viscol; /* Chars on current line. */ + int overstep; /* See termp_flushln(). */ + int flags; +#define TERMP_SENTENCE (1 << 1) /* Space before a sentence. */ +#define TERMP_NOSPACE (1 << 2) /* No space before words. */ +#define TERMP_NOBREAK (1 << 4) /* See term_flushln(). */ +#define TERMP_IGNDELIM (1 << 6) /* Delims like regulars. */ +#define TERMP_NONOSPACE (1 << 7) /* No space (no autounset). */ +#define TERMP_DANGLE (1 << 8) /* See term_flushln(). */ +#define TERMP_HANG (1 << 9) /* See term_flushln(). */ +#define TERMP_TWOSPACE (1 << 10) /* See term_flushln(). */ +#define TERMP_NOSPLIT (1 << 11) /* See termp_an_pre/post(). */ +#define TERMP_SPLIT (1 << 12) /* See termp_an_pre/post(). */ +#define TERMP_ANPREC (1 << 13) /* See termp_an_pre(). */ +#define TERMP_KEEP (1 << 14) /* Keep words together. */ +#define TERMP_PREKEEP (1 << 15) /* ...starting with the next one. */ + int *buf; /* Output buffer. */ + enum termenc enc; /* Type of encoding. */ + struct mchars *symtab; /* Encoded-symbol table. */ + enum termfont fontl; /* Last font set. */ + enum termfont fontq[10]; /* Symmetric fonts. */ + int fonti; /* Index of font stack. */ + term_margin headf; /* invoked to print head */ + term_margin footf; /* invoked to print foot */ + void (*letter)(struct termp *, int); + void (*begin)(struct termp *); + void (*end)(struct termp *); + void (*endline)(struct termp *); + void (*advance)(struct termp *, size_t); + size_t (*width)(const struct termp *, int); + double (*hspan)(const struct termp *, + const struct roffsu *); + const void *argf; /* arg for headf/footf */ + struct termp_ps *ps; +}; + +void term_eqn(struct termp *, const struct eqn *); +void term_tbl(struct termp *, const struct tbl_span *); +void term_free(struct termp *); +void term_newln(struct termp *); +void term_vspace(struct termp *); +void term_word(struct termp *, const char *); +void term_flushln(struct termp *); +void term_begin(struct termp *, term_margin, + term_margin, const void *); +void term_end(struct termp *); + +size_t term_hspan(const struct termp *, + const struct roffsu *); +size_t term_vspan(const struct termp *, + const struct roffsu *); +size_t term_strlen(const struct termp *, const char *); +size_t term_len(const struct termp *, size_t); + +enum termfont term_fonttop(struct termp *); +const void *term_fontq(struct termp *); +void term_fontpush(struct termp *, enum termfont); +void term_fontpop(struct termp *); +void term_fontpopq(struct termp *, const void *); +void term_fontrepl(struct termp *, enum termfont); +void term_fontlast(struct termp *); + +__END_DECLS + +#endif /*!TERM_H*/ diff --git a/usr/src/cmd/mandoc/term_ascii.c b/usr/src/cmd/mandoc/term_ascii.c new file mode 100644 index 0000000000..2f114786f6 --- /dev/null +++ b/usr/src/cmd/mandoc/term_ascii.c @@ -0,0 +1,289 @@ +/* $Id: term_ascii.c,v 1.20 2011/12/04 23:10:52 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#ifdef USE_WCHAR +# include <locale.h> +#endif +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#ifdef USE_WCHAR +# include <wchar.h> +#endif + +#include "mandoc.h" +#include "out.h" +#include "term.h" +#include "main.h" + +/* + * Sadly, this doesn't seem to be defined on systems even when they + * support it. For the time being, remove it and let those compiling + * the software decide for themselves what to use. + */ +#if 0 +#if ! defined(__STDC_ISO_10646__) +# undef USE_WCHAR +#endif +#endif + +static struct termp *ascii_init(enum termenc, char *); +static double ascii_hspan(const struct termp *, + const struct roffsu *); +static size_t ascii_width(const struct termp *, int); +static void ascii_advance(struct termp *, size_t); +static void ascii_begin(struct termp *); +static void ascii_end(struct termp *); +static void ascii_endline(struct termp *); +static void ascii_letter(struct termp *, int); + +#ifdef USE_WCHAR +static void locale_advance(struct termp *, size_t); +static void locale_endline(struct termp *); +static void locale_letter(struct termp *, int); +static size_t locale_width(const struct termp *, int); +#endif + +static struct termp * +ascii_init(enum termenc enc, char *outopts) +{ + const char *toks[4]; + char *v; + struct termp *p; + + p = mandoc_calloc(1, sizeof(struct termp)); + p->enc = enc; + + p->tabwidth = 5; + p->defrmargin = 78; + + p->begin = ascii_begin; + p->end = ascii_end; + p->hspan = ascii_hspan; + p->type = TERMTYPE_CHAR; + + p->enc = TERMENC_ASCII; + p->advance = ascii_advance; + p->endline = ascii_endline; + p->letter = ascii_letter; + p->width = ascii_width; + +#ifdef USE_WCHAR + if (TERMENC_ASCII != enc) { + v = TERMENC_LOCALE == enc ? + setlocale(LC_ALL, "") : + setlocale(LC_CTYPE, "UTF-8"); + if (NULL != v && MB_CUR_MAX > 1) { + p->enc = enc; + p->advance = locale_advance; + p->endline = locale_endline; + p->letter = locale_letter; + p->width = locale_width; + } + } +#endif + + toks[0] = "indent"; + toks[1] = "width"; + toks[2] = "mdoc"; + toks[3] = NULL; + + while (outopts && *outopts) + switch (getsubopt(&outopts, UNCONST(toks), &v)) { + case (0): + p->defindent = (size_t)atoi(v); + break; + case (1): + p->defrmargin = (size_t)atoi(v); + break; + case (2): + /* + * Temporary, undocumented mode + * to imitate mdoc(7) output style. + */ + p->mdocstyle = 1; + p->defindent = 5; + break; + default: + break; + } + + /* Enforce a lower boundary. */ + if (p->defrmargin < 58) + p->defrmargin = 58; + + return(p); +} + +void * +ascii_alloc(char *outopts) +{ + + return(ascii_init(TERMENC_ASCII, outopts)); +} + +void * +utf8_alloc(char *outopts) +{ + + return(ascii_init(TERMENC_UTF8, outopts)); +} + + +void * +locale_alloc(char *outopts) +{ + + return(ascii_init(TERMENC_LOCALE, outopts)); +} + +/* ARGSUSED */ +static size_t +ascii_width(const struct termp *p, int c) +{ + + return(1); +} + +void +ascii_free(void *arg) +{ + + term_free((struct termp *)arg); +} + +/* ARGSUSED */ +static void +ascii_letter(struct termp *p, int c) +{ + + putchar(c); +} + +static void +ascii_begin(struct termp *p) +{ + + (*p->headf)(p, p->argf); +} + +static void +ascii_end(struct termp *p) +{ + + (*p->footf)(p, p->argf); +} + +/* ARGSUSED */ +static void +ascii_endline(struct termp *p) +{ + + putchar('\n'); +} + +/* ARGSUSED */ +static void +ascii_advance(struct termp *p, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + putchar(' '); +} + +/* ARGSUSED */ +static double +ascii_hspan(const struct termp *p, const struct roffsu *su) +{ + double r; + + /* + * Approximate based on character width. These are generated + * entirely by eyeballing the screen, but appear to be correct. + */ + + switch (su->unit) { + case (SCALE_CM): + r = 4 * su->scale; + break; + case (SCALE_IN): + r = 10 * su->scale; + break; + case (SCALE_PC): + r = (10 * su->scale) / 6; + break; + case (SCALE_PT): + r = (10 * su->scale) / 72; + break; + case (SCALE_MM): + r = su->scale / 1000; + break; + case (SCALE_VS): + r = su->scale * 2 - 1; + break; + default: + r = su->scale; + break; + } + + return(r); +} + +#ifdef USE_WCHAR +/* ARGSUSED */ +static size_t +locale_width(const struct termp *p, int c) +{ + int rc; + + return((rc = wcwidth(c)) < 0 ? 0 : rc); +} + +/* ARGSUSED */ +static void +locale_advance(struct termp *p, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) + putwchar(L' '); +} + +/* ARGSUSED */ +static void +locale_endline(struct termp *p) +{ + + putwchar(L'\n'); +} + +/* ARGSUSED */ +static void +locale_letter(struct termp *p, int c) +{ + + putwchar(c); +} +#endif diff --git a/usr/src/cmd/mandoc/term_ps.c b/usr/src/cmd/mandoc/term_ps.c new file mode 100644 index 0000000000..e8a906858a --- /dev/null +++ b/usr/src/cmd/mandoc/term_ps.c @@ -0,0 +1,1185 @@ +/* $Id: term_ps.c,v 1.54 2011/10/16 12:20:34 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/types.h> + +#include <assert.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "mandoc.h" +#include "out.h" +#include "main.h" +#include "term.h" + +/* These work the buffer used by the header and footer. */ +#define PS_BUFSLOP 128 + +/* Convert PostScript point "x" to an AFM unit. */ +#define PNT2AFM(p, x) /* LINTED */ \ + (size_t)((double)(x) * (1000.0 / (double)(p)->ps->scale)) + +/* Convert an AFM unit "x" to a PostScript points */ +#define AFM2PNT(p, x) /* LINTED */ \ + ((double)(x) / (1000.0 / (double)(p)->ps->scale)) + +struct glyph { + unsigned short wx; /* WX in AFM */ +}; + +struct font { + const char *name; /* FontName in AFM */ +#define MAXCHAR 95 /* total characters we can handle */ + struct glyph gly[MAXCHAR]; /* glyph metrics */ +}; + +struct termp_ps { + int flags; +#define PS_INLINE (1 << 0) /* we're in a word */ +#define PS_MARGINS (1 << 1) /* we're in the margins */ +#define PS_NEWPAGE (1 << 2) /* new page, no words yet */ + size_t pscol; /* visible column (AFM units) */ + size_t psrow; /* visible row (AFM units) */ + char *psmarg; /* margin buf */ + size_t psmargsz; /* margin buf size */ + size_t psmargcur; /* cur index in margin buf */ + char last; /* character buffer */ + enum termfont lastf; /* last set font */ + size_t scale; /* font scaling factor */ + size_t pages; /* number of pages shown */ + size_t lineheight; /* line height (AFM units) */ + size_t top; /* body top (AFM units) */ + size_t bottom; /* body bottom (AFM units) */ + size_t height; /* page height (AFM units */ + size_t width; /* page width (AFM units) */ + size_t left; /* body left (AFM units) */ + size_t header; /* header pos (AFM units) */ + size_t footer; /* footer pos (AFM units) */ + size_t pdfbytes; /* current output byte */ + size_t pdflastpg; /* byte of last page mark */ + size_t pdfbody; /* start of body object */ + size_t *pdfobjs; /* table of object offsets */ + size_t pdfobjsz; /* size of pdfobjs */ +}; + +static double ps_hspan(const struct termp *, + const struct roffsu *); +static size_t ps_width(const struct termp *, int); +static void ps_advance(struct termp *, size_t); +static void ps_begin(struct termp *); +static void ps_closepage(struct termp *); +static void ps_end(struct termp *); +static void ps_endline(struct termp *); +static void ps_fclose(struct termp *); +static void ps_growbuf(struct termp *, size_t); +static void ps_letter(struct termp *, int); +static void ps_pclose(struct termp *); +static void ps_pletter(struct termp *, int); +static void ps_printf(struct termp *, const char *, ...); +static void ps_putchar(struct termp *, char); +static void ps_setfont(struct termp *, enum termfont); +static struct termp *pspdf_alloc(char *); +static void pdf_obj(struct termp *, size_t); + +/* + * We define, for the time being, three fonts: bold, oblique/italic, and + * normal (roman). The following table hard-codes the font metrics for + * ASCII, i.e., 32--127. + */ + +static const struct font fonts[TERMFONT__MAX] = { + { "Times-Roman", { + { 250 }, + { 333 }, + { 408 }, + { 500 }, + { 500 }, + { 833 }, + { 778 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 564 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 278 }, + { 278 }, + { 564 }, + { 564 }, + { 564 }, + { 444 }, + { 921 }, + { 722 }, + { 667 }, + { 667 }, + { 722 }, + { 611 }, + { 556 }, + { 722 }, + { 722 }, + { 333 }, + { 389 }, + { 722 }, + { 611 }, + { 889 }, + { 722 }, + { 722 }, + { 556 }, + { 722 }, + { 667 }, + { 556 }, + { 611 }, + { 722 }, + { 722 }, + { 944 }, + { 722 }, + { 722 }, + { 611 }, + { 333 }, + { 278 }, + { 333 }, + { 469 }, + { 500 }, + { 333 }, + { 444 }, + { 500 }, + { 444 }, + { 500}, + { 444}, + { 333}, + { 500}, + { 500}, + { 278}, + { 278}, + { 500}, + { 278}, + { 778}, + { 500}, + { 500}, + { 500}, + { 500}, + { 333}, + { 389}, + { 278}, + { 500}, + { 500}, + { 722}, + { 500}, + { 500}, + { 444}, + { 480}, + { 200}, + { 480}, + { 541}, + } }, + { "Times-Bold", { + { 250 }, + { 333 }, + { 555 }, + { 500 }, + { 500 }, + { 1000 }, + { 833 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 570 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 333 }, + { 333 }, + { 570 }, + { 570 }, + { 570 }, + { 500 }, + { 930 }, + { 722 }, + { 667 }, + { 722 }, + { 722 }, + { 667 }, + { 611 }, + { 778 }, + { 778 }, + { 389 }, + { 500 }, + { 778 }, + { 667 }, + { 944 }, + { 722 }, + { 778 }, + { 611 }, + { 778 }, + { 722 }, + { 556 }, + { 667 }, + { 722 }, + { 722 }, + { 1000 }, + { 722 }, + { 722 }, + { 667 }, + { 333 }, + { 278 }, + { 333 }, + { 581 }, + { 500 }, + { 333 }, + { 500 }, + { 556 }, + { 444 }, + { 556 }, + { 444 }, + { 333 }, + { 500 }, + { 556 }, + { 278 }, + { 333 }, + { 556 }, + { 278 }, + { 833 }, + { 556 }, + { 500 }, + { 556 }, + { 556 }, + { 444 }, + { 389 }, + { 333 }, + { 556 }, + { 500 }, + { 722 }, + { 500 }, + { 500 }, + { 444 }, + { 394 }, + { 220 }, + { 394 }, + { 520 }, + } }, + { "Times-Italic", { + { 250 }, + { 333 }, + { 420 }, + { 500 }, + { 500 }, + { 833 }, + { 778 }, + { 333 }, + { 333 }, + { 333 }, + { 500 }, + { 675 }, + { 250 }, + { 333 }, + { 250 }, + { 278 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 333 }, + { 333 }, + { 675 }, + { 675 }, + { 675 }, + { 500 }, + { 920 }, + { 611 }, + { 611 }, + { 667 }, + { 722 }, + { 611 }, + { 611 }, + { 722 }, + { 722 }, + { 333 }, + { 444 }, + { 667 }, + { 556 }, + { 833 }, + { 667 }, + { 722 }, + { 611 }, + { 722 }, + { 611 }, + { 500 }, + { 556 }, + { 722 }, + { 611 }, + { 833 }, + { 611 }, + { 556 }, + { 556 }, + { 389 }, + { 278 }, + { 389 }, + { 422 }, + { 500 }, + { 333 }, + { 500 }, + { 500 }, + { 444 }, + { 500 }, + { 444 }, + { 278 }, + { 500 }, + { 500 }, + { 278 }, + { 278 }, + { 444 }, + { 278 }, + { 722 }, + { 500 }, + { 500 }, + { 500 }, + { 500 }, + { 389 }, + { 389 }, + { 278 }, + { 500 }, + { 444 }, + { 667 }, + { 444 }, + { 444 }, + { 389 }, + { 400 }, + { 275 }, + { 400 }, + { 541 }, + } }, +}; + +void * +pdf_alloc(char *outopts) +{ + struct termp *p; + + if (NULL != (p = pspdf_alloc(outopts))) + p->type = TERMTYPE_PDF; + + return(p); +} + +void * +ps_alloc(char *outopts) +{ + struct termp *p; + + if (NULL != (p = pspdf_alloc(outopts))) + p->type = TERMTYPE_PS; + + return(p); +} + +static struct termp * +pspdf_alloc(char *outopts) +{ + struct termp *p; + unsigned int pagex, pagey; + size_t marginx, marginy, lineheight; + const char *toks[2]; + const char *pp; + char *v; + + p = mandoc_calloc(1, sizeof(struct termp)); + p->enc = TERMENC_ASCII; + p->ps = mandoc_calloc(1, sizeof(struct termp_ps)); + + p->advance = ps_advance; + p->begin = ps_begin; + p->end = ps_end; + p->endline = ps_endline; + p->hspan = ps_hspan; + p->letter = ps_letter; + p->width = ps_width; + + toks[0] = "paper"; + toks[1] = NULL; + + pp = NULL; + + while (outopts && *outopts) + switch (getsubopt(&outopts, UNCONST(toks), &v)) { + case (0): + pp = v; + break; + default: + break; + } + + /* Default to US letter (millimetres). */ + + pagex = 216; + pagey = 279; + + /* + * The ISO-269 paper sizes can be calculated automatically, but + * it would require bringing in -lm for pow() and I'd rather not + * do that. So just do it the easy way for now. Since this + * only happens once, I'm not terribly concerned. + */ + + if (pp && strcasecmp(pp, "letter")) { + if (0 == strcasecmp(pp, "a3")) { + pagex = 297; + pagey = 420; + } else if (0 == strcasecmp(pp, "a4")) { + pagex = 210; + pagey = 297; + } else if (0 == strcasecmp(pp, "a5")) { + pagex = 148; + pagey = 210; + } else if (0 == strcasecmp(pp, "legal")) { + pagex = 216; + pagey = 356; + } else if (2 != sscanf(pp, "%ux%u", &pagex, &pagey)) + fprintf(stderr, "%s: Unknown paper\n", pp); + } + + /* + * This MUST be defined before any PNT2AFM or AFM2PNT + * calculations occur. + */ + + p->ps->scale = 11; + + /* Remember millimetres -> AFM units. */ + + pagex = PNT2AFM(p, ((double)pagex * 2.834)); + pagey = PNT2AFM(p, ((double)pagey * 2.834)); + + /* Margins are 1/9 the page x and y. */ + + marginx = /* LINTED */ + (size_t)((double)pagex / 9.0); + marginy = /* LINTED */ + (size_t)((double)pagey / 9.0); + + /* Line-height is 1.4em. */ + + lineheight = PNT2AFM(p, ((double)p->ps->scale * 1.4)); + + p->ps->width = (size_t)pagex; + p->ps->height = (size_t)pagey; + p->ps->header = pagey - (marginy / 2) - (lineheight / 2); + p->ps->top = pagey - marginy; + p->ps->footer = (marginy / 2) - (lineheight / 2); + p->ps->bottom = marginy; + p->ps->left = marginx; + p->ps->lineheight = lineheight; + + p->defrmargin = pagex - (marginx * 2); + return(p); +} + + +void +pspdf_free(void *arg) +{ + struct termp *p; + + p = (struct termp *)arg; + + if (p->ps->psmarg) + free(p->ps->psmarg); + if (p->ps->pdfobjs) + free(p->ps->pdfobjs); + + free(p->ps); + term_free(p); +} + + +static void +ps_printf(struct termp *p, const char *fmt, ...) +{ + va_list ap; + int pos, len; + + va_start(ap, fmt); + + /* + * If we're running in regular mode, then pipe directly into + * vprintf(). If we're processing margins, then push the data + * into our growable margin buffer. + */ + + if ( ! (PS_MARGINS & p->ps->flags)) { + len = vprintf(fmt, ap); + va_end(ap); + p->ps->pdfbytes += /* LINTED */ + len < 0 ? 0 : (size_t)len; + return; + } + + /* + * XXX: I assume that the in-margin print won't exceed + * PS_BUFSLOP (128 bytes), which is reasonable but still an + * assumption that will cause pukeage if it's not the case. + */ + + ps_growbuf(p, PS_BUFSLOP); + + pos = (int)p->ps->psmargcur; + vsnprintf(&p->ps->psmarg[pos], PS_BUFSLOP, fmt, ap); + + va_end(ap); + + p->ps->psmargcur = strlen(p->ps->psmarg); +} + + +static void +ps_putchar(struct termp *p, char c) +{ + int pos; + + /* See ps_printf(). */ + + if ( ! (PS_MARGINS & p->ps->flags)) { + /* LINTED */ + putchar(c); + p->ps->pdfbytes++; + return; + } + + ps_growbuf(p, 2); + + pos = (int)p->ps->psmargcur++; + p->ps->psmarg[pos++] = c; + p->ps->psmarg[pos] = '\0'; +} + + +static void +pdf_obj(struct termp *p, size_t obj) +{ + + assert(obj > 0); + + if ((obj - 1) >= p->ps->pdfobjsz) { + p->ps->pdfobjsz = obj + 128; + p->ps->pdfobjs = realloc + (p->ps->pdfobjs, + p->ps->pdfobjsz * sizeof(size_t)); + if (NULL == p->ps->pdfobjs) { + perror(NULL); + exit((int)MANDOCLEVEL_SYSERR); + } + } + + p->ps->pdfobjs[(int)obj - 1] = p->ps->pdfbytes; + ps_printf(p, "%zu 0 obj\n", obj); +} + + +static void +ps_closepage(struct termp *p) +{ + int i; + size_t len, base; + + /* + * Close out a page that we've already flushed to output. In + * PostScript, we simply note that the page must be showed. In + * PDF, we must now create the Length, Resource, and Page node + * for the page contents. + */ + + assert(p->ps->psmarg && p->ps->psmarg[0]); + ps_printf(p, "%s", p->ps->psmarg); + + if (TERMTYPE_PS != p->type) { + ps_printf(p, "ET\n"); + + len = p->ps->pdfbytes - p->ps->pdflastpg; + base = p->ps->pages * 4 + p->ps->pdfbody; + + ps_printf(p, "endstream\nendobj\n"); + + /* Length of content. */ + pdf_obj(p, base + 1); + ps_printf(p, "%zu\nendobj\n", len); + + /* Resource for content. */ + pdf_obj(p, base + 2); + ps_printf(p, "<<\n/ProcSet [/PDF /Text]\n"); + ps_printf(p, "/Font <<\n"); + for (i = 0; i < (int)TERMFONT__MAX; i++) + ps_printf(p, "/F%d %d 0 R\n", i, 3 + i); + ps_printf(p, ">>\n>>\n"); + + /* Page node. */ + pdf_obj(p, base + 3); + ps_printf(p, "<<\n"); + ps_printf(p, "/Type /Page\n"); + ps_printf(p, "/Parent 2 0 R\n"); + ps_printf(p, "/Resources %zu 0 R\n", base + 2); + ps_printf(p, "/Contents %zu 0 R\n", base); + ps_printf(p, ">>\nendobj\n"); + } else + ps_printf(p, "showpage\n"); + + p->ps->pages++; + p->ps->psrow = p->ps->top; + assert( ! (PS_NEWPAGE & p->ps->flags)); + p->ps->flags |= PS_NEWPAGE; +} + + +/* ARGSUSED */ +static void +ps_end(struct termp *p) +{ + size_t i, xref, base; + + /* + * At the end of the file, do one last showpage. This is the + * same behaviour as groff(1) and works for multiple pages as + * well as just one. + */ + + if ( ! (PS_NEWPAGE & p->ps->flags)) { + assert(0 == p->ps->flags); + assert('\0' == p->ps->last); + ps_closepage(p); + } + + if (TERMTYPE_PS == p->type) { + ps_printf(p, "%%%%Trailer\n"); + ps_printf(p, "%%%%Pages: %zu\n", p->ps->pages); + ps_printf(p, "%%%%EOF\n"); + return; + } + + pdf_obj(p, 2); + ps_printf(p, "<<\n/Type /Pages\n"); + ps_printf(p, "/MediaBox [0 0 %zu %zu]\n", + (size_t)AFM2PNT(p, p->ps->width), + (size_t)AFM2PNT(p, p->ps->height)); + + ps_printf(p, "/Count %zu\n", p->ps->pages); + ps_printf(p, "/Kids ["); + + for (i = 0; i < p->ps->pages; i++) + ps_printf(p, " %zu 0 R", i * 4 + + p->ps->pdfbody + 3); + + base = (p->ps->pages - 1) * 4 + + p->ps->pdfbody + 4; + + ps_printf(p, "]\n>>\nendobj\n"); + pdf_obj(p, base); + ps_printf(p, "<<\n"); + ps_printf(p, "/Type /Catalog\n"); + ps_printf(p, "/Pages 2 0 R\n"); + ps_printf(p, ">>\n"); + xref = p->ps->pdfbytes; + ps_printf(p, "xref\n"); + ps_printf(p, "0 %zu\n", base + 1); + ps_printf(p, "0000000000 65535 f \n"); + + for (i = 0; i < base; i++) + ps_printf(p, "%.10zu 00000 n \n", + p->ps->pdfobjs[(int)i]); + + ps_printf(p, "trailer\n"); + ps_printf(p, "<<\n"); + ps_printf(p, "/Size %zu\n", base + 1); + ps_printf(p, "/Root %zu 0 R\n", base); + ps_printf(p, "/Info 1 0 R\n"); + ps_printf(p, ">>\n"); + ps_printf(p, "startxref\n"); + ps_printf(p, "%zu\n", xref); + ps_printf(p, "%%%%EOF\n"); +} + + +static void +ps_begin(struct termp *p) +{ + time_t t; + int i; + + /* + * Print margins into margin buffer. Nothing gets output to the + * screen yet, so we don't need to initialise the primary state. + */ + + if (p->ps->psmarg) { + assert(p->ps->psmargsz); + p->ps->psmarg[0] = '\0'; + } + + /*p->ps->pdfbytes = 0;*/ + p->ps->psmargcur = 0; + p->ps->flags = PS_MARGINS; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->header; + + ps_setfont(p, TERMFONT_NONE); + + (*p->headf)(p, p->argf); + (*p->endline)(p); + + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->footer; + + (*p->footf)(p, p->argf); + (*p->endline)(p); + + p->ps->flags &= ~PS_MARGINS; + + assert(0 == p->ps->flags); + assert(p->ps->psmarg); + assert('\0' != p->ps->psmarg[0]); + + /* + * Print header and initialise page state. Following this, + * stuff gets printed to the screen, so make sure we're sane. + */ + + t = time(NULL); + + if (TERMTYPE_PS == p->type) { + ps_printf(p, "%%!PS-Adobe-3.0\n"); + ps_printf(p, "%%%%CreationDate: %s", ctime(&t)); + ps_printf(p, "%%%%DocumentData: Clean7Bit\n"); + ps_printf(p, "%%%%Orientation: Portrait\n"); + ps_printf(p, "%%%%Pages: (atend)\n"); + ps_printf(p, "%%%%PageOrder: Ascend\n"); + ps_printf(p, "%%%%DocumentMedia: " + "Default %zu %zu 0 () ()\n", + (size_t)AFM2PNT(p, p->ps->width), + (size_t)AFM2PNT(p, p->ps->height)); + ps_printf(p, "%%%%DocumentNeededResources: font"); + + for (i = 0; i < (int)TERMFONT__MAX; i++) + ps_printf(p, " %s", fonts[i].name); + + ps_printf(p, "\n%%%%EndComments\n"); + } else { + ps_printf(p, "%%PDF-1.1\n"); + pdf_obj(p, 1); + ps_printf(p, "<<\n"); + ps_printf(p, ">>\n"); + ps_printf(p, "endobj\n"); + + for (i = 0; i < (int)TERMFONT__MAX; i++) { + pdf_obj(p, (size_t)i + 3); + ps_printf(p, "<<\n"); + ps_printf(p, "/Type /Font\n"); + ps_printf(p, "/Subtype /Type1\n"); + ps_printf(p, "/Name /F%zu\n", i); + ps_printf(p, "/BaseFont /%s\n", fonts[i].name); + ps_printf(p, ">>\n"); + } + } + + p->ps->pdfbody = (size_t)TERMFONT__MAX + 3; + p->ps->pscol = p->ps->left; + p->ps->psrow = p->ps->top; + p->ps->flags |= PS_NEWPAGE; + ps_setfont(p, TERMFONT_NONE); +} + + +static void +ps_pletter(struct termp *p, int c) +{ + int f; + + /* + * If we haven't opened a page context, then output that we're + * in a new page and make sure the font is correctly set. + */ + + if (PS_NEWPAGE & p->ps->flags) { + if (TERMTYPE_PS == p->type) { + ps_printf(p, "%%%%Page: %zu %zu\n", + p->ps->pages + 1, + p->ps->pages + 1); + ps_printf(p, "/%s %zu selectfont\n", + fonts[(int)p->ps->lastf].name, + p->ps->scale); + } else { + pdf_obj(p, p->ps->pdfbody + + p->ps->pages * 4); + ps_printf(p, "<<\n"); + ps_printf(p, "/Length %zu 0 R\n", + p->ps->pdfbody + 1 + + p->ps->pages * 4); + ps_printf(p, ">>\nstream\n"); + } + p->ps->pdflastpg = p->ps->pdfbytes; + p->ps->flags &= ~PS_NEWPAGE; + } + + /* + * If we're not in a PostScript "word" context, then open one + * now at the current cursor. + */ + + if ( ! (PS_INLINE & p->ps->flags)) { + if (TERMTYPE_PS != p->type) { + ps_printf(p, "BT\n/F%d %zu Tf\n", + (int)p->ps->lastf, + p->ps->scale); + ps_printf(p, "%.3f %.3f Td\n(", + AFM2PNT(p, p->ps->pscol), + AFM2PNT(p, p->ps->psrow)); + } else + ps_printf(p, "%.3f %.3f moveto\n(", + AFM2PNT(p, p->ps->pscol), + AFM2PNT(p, p->ps->psrow)); + p->ps->flags |= PS_INLINE; + } + + assert( ! (PS_NEWPAGE & p->ps->flags)); + + /* + * We need to escape these characters as per the PostScript + * specification. We would also escape non-graphable characters + * (like tabs), but none of them would get to this point and + * it's superfluous to abort() on them. + */ + + switch (c) { + case ('('): + /* FALLTHROUGH */ + case (')'): + /* FALLTHROUGH */ + case ('\\'): + ps_putchar(p, '\\'); + break; + default: + break; + } + + /* Write the character and adjust where we are on the page. */ + + f = (int)p->ps->lastf; + + if (c <= 32 || (c - 32 >= MAXCHAR)) { + ps_putchar(p, ' '); + p->ps->pscol += (size_t)fonts[f].gly[0].wx; + return; + } + + ps_putchar(p, (char)c); + c -= 32; + p->ps->pscol += (size_t)fonts[f].gly[c].wx; +} + + +static void +ps_pclose(struct termp *p) +{ + + /* + * Spit out that we're exiting a word context (this is a + * "partial close" because we don't check the last-char buffer + * or anything). + */ + + if ( ! (PS_INLINE & p->ps->flags)) + return; + + if (TERMTYPE_PS != p->type) { + ps_printf(p, ") Tj\nET\n"); + } else + ps_printf(p, ") show\n"); + + p->ps->flags &= ~PS_INLINE; +} + + +static void +ps_fclose(struct termp *p) +{ + + /* + * Strong closure: if we have a last-char, spit it out after + * checking that we're in the right font mode. This will of + * course open a new scope, if applicable. + * + * Following this, close out any scope that's open. + */ + + if ('\0' != p->ps->last) { + if (p->ps->lastf != TERMFONT_NONE) { + ps_pclose(p); + ps_setfont(p, TERMFONT_NONE); + } + ps_pletter(p, p->ps->last); + p->ps->last = '\0'; + } + + if ( ! (PS_INLINE & p->ps->flags)) + return; + + ps_pclose(p); +} + + +static void +ps_letter(struct termp *p, int arg) +{ + char cc, c; + + /* LINTED */ + c = arg >= 128 || arg <= 0 ? '?' : arg; + + /* + * State machine dictates whether to buffer the last character + * or not. Basically, encoded words are detected by checking if + * we're an "8" and switching on the buffer. Then we put "8" in + * our buffer, and on the next charater, flush both character + * and buffer. Thus, "regular" words are detected by having a + * regular character and a regular buffer character. + */ + + if ('\0' == p->ps->last) { + assert(8 != c); + p->ps->last = c; + return; + } else if (8 == p->ps->last) { + assert(8 != c); + p->ps->last = '\0'; + } else if (8 == c) { + assert(8 != p->ps->last); + if ('_' == p->ps->last) { + if (p->ps->lastf != TERMFONT_UNDER) { + ps_pclose(p); + ps_setfont(p, TERMFONT_UNDER); + } + } else if (p->ps->lastf != TERMFONT_BOLD) { + ps_pclose(p); + ps_setfont(p, TERMFONT_BOLD); + } + p->ps->last = c; + return; + } else { + if (p->ps->lastf != TERMFONT_NONE) { + ps_pclose(p); + ps_setfont(p, TERMFONT_NONE); + } + cc = p->ps->last; + p->ps->last = c; + c = cc; + } + + ps_pletter(p, c); +} + + +static void +ps_advance(struct termp *p, size_t len) +{ + + /* + * Advance some spaces. This can probably be made smarter, + * i.e., to have multiple space-separated words in the same + * scope, but this is easier: just close out the current scope + * and readjust our column settings. + */ + + ps_fclose(p); + p->ps->pscol += len; +} + + +static void +ps_endline(struct termp *p) +{ + + /* Close out any scopes we have open: we're at eoln. */ + + ps_fclose(p); + + /* + * If we're in the margin, don't try to recalculate our current + * row. XXX: if the column tries to be fancy with multiple + * lines, we'll do nasty stuff. + */ + + if (PS_MARGINS & p->ps->flags) + return; + + /* Left-justify. */ + + p->ps->pscol = p->ps->left; + + /* If we haven't printed anything, return. */ + + if (PS_NEWPAGE & p->ps->flags) + return; + + /* + * Put us down a line. If we're at the page bottom, spit out a + * showpage and restart our row. + */ + + if (p->ps->psrow >= p->ps->lineheight + + p->ps->bottom) { + p->ps->psrow -= p->ps->lineheight; + return; + } + + ps_closepage(p); +} + + +static void +ps_setfont(struct termp *p, enum termfont f) +{ + + assert(f < TERMFONT__MAX); + p->ps->lastf = f; + + /* + * If we're still at the top of the page, let the font-setting + * be delayed until we actually have stuff to print. + */ + + if (PS_NEWPAGE & p->ps->flags) + return; + + if (TERMTYPE_PS == p->type) + ps_printf(p, "/%s %zu selectfont\n", + fonts[(int)f].name, + p->ps->scale); + else + ps_printf(p, "/F%d %zu Tf\n", + (int)f, + p->ps->scale); +} + + +/* ARGSUSED */ +static size_t +ps_width(const struct termp *p, int c) +{ + + if (c <= 32 || c - 32 >= MAXCHAR) + return((size_t)fonts[(int)TERMFONT_NONE].gly[0].wx); + + c -= 32; + return((size_t)fonts[(int)TERMFONT_NONE].gly[c].wx); +} + + +static double +ps_hspan(const struct termp *p, const struct roffsu *su) +{ + double r; + + /* + * All of these measurements are derived by converting from the + * native measurement to AFM units. + */ + + switch (su->unit) { + case (SCALE_CM): + r = PNT2AFM(p, su->scale * 28.34); + break; + case (SCALE_IN): + r = PNT2AFM(p, su->scale * 72); + break; + case (SCALE_PC): + r = PNT2AFM(p, su->scale * 12); + break; + case (SCALE_PT): + r = PNT2AFM(p, su->scale * 100); + break; + case (SCALE_EM): + r = su->scale * + fonts[(int)TERMFONT_NONE].gly[109 - 32].wx; + break; + case (SCALE_MM): + r = PNT2AFM(p, su->scale * 2.834); + break; + case (SCALE_EN): + r = su->scale * + fonts[(int)TERMFONT_NONE].gly[110 - 32].wx; + break; + case (SCALE_VS): + r = su->scale * p->ps->lineheight; + break; + default: + r = su->scale; + break; + } + + return(r); +} + +static void +ps_growbuf(struct termp *p, size_t sz) +{ + if (p->ps->psmargcur + sz <= p->ps->psmargsz) + return; + + if (sz < PS_BUFSLOP) + sz = PS_BUFSLOP; + + p->ps->psmargsz += sz; + + p->ps->psmarg = mandoc_realloc + (p->ps->psmarg, p->ps->psmargsz); +} + diff --git a/usr/src/cmd/mandoc/tree.c b/usr/src/cmd/mandoc/tree.c new file mode 100644 index 0000000000..1430c737e0 --- /dev/null +++ b/usr/src/cmd/mandoc/tree.c @@ -0,0 +1,349 @@ +/* $Id: tree.c,v 1.47 2011/09/18 14:14:15 schwarze Exp $ */ +/* + * Copyright (c) 2008, 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#include "mandoc.h" +#include "mdoc.h" +#include "man.h" +#include "main.h" + +static void print_box(const struct eqn_box *, int); +static void print_man(const struct man_node *, int); +static void print_mdoc(const struct mdoc_node *, int); +static void print_span(const struct tbl_span *, int); + + +/* ARGSUSED */ +void +tree_mdoc(void *arg, const struct mdoc *mdoc) +{ + + print_mdoc(mdoc_node(mdoc), 0); +} + + +/* ARGSUSED */ +void +tree_man(void *arg, const struct man *man) +{ + + print_man(man_node(man), 0); +} + + +static void +print_mdoc(const struct mdoc_node *n, int indent) +{ + const char *p, *t; + int i, j; + size_t argc, sz; + char **params; + struct mdoc_argv *argv; + + argv = NULL; + argc = sz = 0; + params = NULL; + t = p = NULL; + + switch (n->type) { + case (MDOC_ROOT): + t = "root"; + break; + case (MDOC_BLOCK): + t = "block"; + break; + case (MDOC_HEAD): + t = "block-head"; + break; + case (MDOC_BODY): + if (n->end) + t = "body-end"; + else + t = "block-body"; + break; + case (MDOC_TAIL): + t = "block-tail"; + break; + case (MDOC_ELEM): + t = "elem"; + break; + case (MDOC_TEXT): + t = "text"; + break; + case (MDOC_TBL): + /* FALLTHROUGH */ + case (MDOC_EQN): + break; + default: + abort(); + /* NOTREACHED */ + } + + switch (n->type) { + case (MDOC_TEXT): + p = n->string; + break; + case (MDOC_BODY): + p = mdoc_macronames[n->tok]; + break; + case (MDOC_HEAD): + p = mdoc_macronames[n->tok]; + break; + case (MDOC_TAIL): + p = mdoc_macronames[n->tok]; + break; + case (MDOC_ELEM): + p = mdoc_macronames[n->tok]; + if (n->args) { + argv = n->args->argv; + argc = n->args->argc; + } + break; + case (MDOC_BLOCK): + p = mdoc_macronames[n->tok]; + if (n->args) { + argv = n->args->argv; + argc = n->args->argc; + } + break; + case (MDOC_TBL): + /* FALLTHROUGH */ + case (MDOC_EQN): + break; + case (MDOC_ROOT): + p = "root"; + break; + default: + abort(); + /* NOTREACHED */ + } + + if (n->span) { + assert(NULL == p && NULL == t); + print_span(n->span, indent); + } else if (n->eqn) { + assert(NULL == p && NULL == t); + print_box(n->eqn->root, indent); + } else { + for (i = 0; i < indent; i++) + putchar('\t'); + + printf("%s (%s)", p, t); + + for (i = 0; i < (int)argc; i++) { + printf(" -%s", mdoc_argnames[argv[i].arg]); + if (argv[i].sz > 0) + printf(" ["); + for (j = 0; j < (int)argv[i].sz; j++) + printf(" [%s]", argv[i].value[j]); + if (argv[i].sz > 0) + printf(" ]"); + } + + for (i = 0; i < (int)sz; i++) + printf(" [%s]", params[i]); + + printf(" %d:%d\n", n->line, n->pos); + } + + if (n->child) + print_mdoc(n->child, indent + 1); + if (n->next) + print_mdoc(n->next, indent); +} + + +static void +print_man(const struct man_node *n, int indent) +{ + const char *p, *t; + int i; + + t = p = NULL; + + switch (n->type) { + case (MAN_ROOT): + t = "root"; + break; + case (MAN_ELEM): + t = "elem"; + break; + case (MAN_TEXT): + t = "text"; + break; + case (MAN_BLOCK): + t = "block"; + break; + case (MAN_HEAD): + t = "block-head"; + break; + case (MAN_BODY): + t = "block-body"; + break; + case (MAN_TAIL): + t = "block-tail"; + break; + case (MAN_TBL): + /* FALLTHROUGH */ + case (MAN_EQN): + break; + default: + abort(); + /* NOTREACHED */ + } + + switch (n->type) { + case (MAN_TEXT): + p = n->string; + break; + case (MAN_ELEM): + /* FALLTHROUGH */ + case (MAN_BLOCK): + /* FALLTHROUGH */ + case (MAN_HEAD): + /* FALLTHROUGH */ + case (MAN_TAIL): + /* FALLTHROUGH */ + case (MAN_BODY): + p = man_macronames[n->tok]; + break; + case (MAN_ROOT): + p = "root"; + break; + case (MAN_TBL): + /* FALLTHROUGH */ + case (MAN_EQN): + break; + default: + abort(); + /* NOTREACHED */ + } + + if (n->span) { + assert(NULL == p && NULL == t); + print_span(n->span, indent); + } else if (n->eqn) { + assert(NULL == p && NULL == t); + print_box(n->eqn->root, indent); + } else { + for (i = 0; i < indent; i++) + putchar('\t'); + printf("%s (%s) %d:%d\n", p, t, n->line, n->pos); + } + + if (n->child) + print_man(n->child, indent + 1); + if (n->next) + print_man(n->next, indent); +} + +static void +print_box(const struct eqn_box *ep, int indent) +{ + int i; + const char *t; + + if (NULL == ep) + return; + for (i = 0; i < indent; i++) + putchar('\t'); + + t = NULL; + switch (ep->type) { + case (EQN_ROOT): + t = "eqn-root"; + break; + case (EQN_LIST): + t = "eqn-list"; + break; + case (EQN_SUBEXPR): + t = "eqn-expr"; + break; + case (EQN_TEXT): + t = "eqn-text"; + break; + case (EQN_MATRIX): + t = "eqn-matrix"; + break; + } + + assert(t); + printf("%s(%d, %d, %d, %d, %d, \"%s\", \"%s\") %s\n", + t, EQN_DEFSIZE == ep->size ? 0 : ep->size, + ep->pos, ep->font, ep->mark, ep->pile, + ep->left ? ep->left : "", + ep->right ? ep->right : "", + ep->text ? ep->text : ""); + + print_box(ep->first, indent + 1); + print_box(ep->next, indent); +} + +static void +print_span(const struct tbl_span *sp, int indent) +{ + const struct tbl_dat *dp; + int i; + + for (i = 0; i < indent; i++) + putchar('\t'); + + switch (sp->pos) { + case (TBL_SPAN_HORIZ): + putchar('-'); + return; + case (TBL_SPAN_DHORIZ): + putchar('='); + return; + default: + break; + } + + for (dp = sp->first; dp; dp = dp->next) { + switch (dp->pos) { + case (TBL_DATA_HORIZ): + /* FALLTHROUGH */ + case (TBL_DATA_NHORIZ): + putchar('-'); + continue; + case (TBL_DATA_DHORIZ): + /* FALLTHROUGH */ + case (TBL_DATA_NDHORIZ): + putchar('='); + continue; + default: + break; + } + printf("[\"%s\"", dp->string ? dp->string : ""); + if (dp->spans) + printf("(%d)", dp->spans); + if (NULL == dp->layout) + putchar('*'); + putchar(']'); + putchar(' '); + } + + printf("(tbl) %d:1\n", sp->line); +} diff --git a/usr/src/cmd/mandoc/vol.c b/usr/src/cmd/mandoc/vol.c new file mode 100644 index 0000000000..3ea7441a42 --- /dev/null +++ b/usr/src/cmd/mandoc/vol.c @@ -0,0 +1,39 @@ +/* $Id: vol.c,v 1.9 2011/03/22 14:33:05 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "mdoc.h" +#include "mandoc.h" +#include "libmdoc.h" + +#define LINE(x, y) \ + if (0 == strcmp(p, x)) return(y); + +const char * +mdoc_a2vol(const char *p) +{ + +#include "vol.in" + + return(NULL); +} diff --git a/usr/src/cmd/mandoc/vol.in b/usr/src/cmd/mandoc/vol.in new file mode 100644 index 0000000000..7650b57a14 --- /dev/null +++ b/usr/src/cmd/mandoc/vol.in @@ -0,0 +1,35 @@ +/* $Id: vol.in,v 1.6 2010/06/19 20:46:28 kristaps Exp $ */ +/* + * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This file defines volume titles for .Dt. + * + * Be sure to escape strings. + */ + +LINE("USD", "User\'s Supplementary Documents") +LINE("PS1", "Programmer\'s Supplementary Documents") +LINE("AMD", "Ancestral Manual Documents") +LINE("SMM", "System Manager\'s Manual") +LINE("URM", "User\'s Reference Manual") +LINE("PRM", "Programmer\'s Manual") +LINE("KM", "Kernel Manual") +LINE("IND", "Manual Master Index") +LINE("MMI", "Manual Master Index") +LINE("LOCAL", "Local Manual") +LINE("LOC", "Local Manual") +LINE("CON", "Contributed Software Manual") diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index 79884b41f0..763e560bac 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -56,6 +56,19 @@ extern int64_t mdb_gethrtime(void); int aok; #endif +enum spa_flags { + SPA_FLAG_CONFIG = 1 << 0, + SPA_FLAG_VDEVS = 1 << 1, + SPA_FLAG_ERRORS = 1 << 2, + SPA_FLAG_METASLAB_GROUPS = 1 << 3, + SPA_FLAG_METASLABS = 1 << 4, + SPA_FLAG_HISTOGRAMS = 1 << 5 +}; + +#define SPA_FLAG_ALL_VDEV \ + (SPA_FLAG_VDEVS | SPA_FLAG_ERRORS | SPA_FLAG_METASLAB_GROUPS | \ + SPA_FLAG_METASLABS | SPA_FLAG_HISTOGRAMS) + static int getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp, const char *member, int len, void *buf) @@ -113,6 +126,53 @@ strisprint(const char *cp) return (B_TRUE); } +#define NICENUM_BUFLEN 6 + +static int +snprintfrac(char *buf, int len, + uint64_t numerator, uint64_t denom, int frac_digits) +{ + int mul = 1; + int whole, frac, i; + + for (i = frac_digits; i; i--) + mul *= 10; + whole = numerator / denom; + frac = mul * numerator / denom - mul * whole; + return (mdb_snprintf(buf, len, "%u.%0*u", whole, frac_digits, frac)); +} + +static void +mdb_nicenum(uint64_t num, char *buf) +{ + uint64_t n = num; + int index = 0; + char *u; + + while (n >= 1024) { + n = (n + (1024 / 2)) / 1024; /* Round up or down */ + index++; + } + + u = &" \0K\0M\0G\0T\0P\0E\0"[index*2]; + + if (index == 0) { + (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu", + (u_longlong_t)n); + } else if (n < 10 && (num & (num - 1)) != 0) { + (void) snprintfrac(buf, NICENUM_BUFLEN, + num, 1ULL << 10 * index, 2); + strcat(buf, u); + } else if (n < 100 && (num & (num - 1)) != 0) { + (void) snprintfrac(buf, NICENUM_BUFLEN, + num, 1ULL << 10 * index, 1); + strcat(buf, u); + } else { + (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu%s", + (u_longlong_t)n, u); + } +} + static int verbose; static int @@ -1013,6 +1073,9 @@ typedef struct mdb_spa_print { * -c Print configuration information as well * -v Print vdev state * -e Print vdev error stats + * -m Print vdev metaslab info + * -M print vdev metaslab group info + * -h Print histogram info (must be combined with -m or -M) * * Print a summarized spa_t. When given no arguments, prints out a table of all * active pools on the system. @@ -1024,14 +1087,15 @@ spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED", "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" }; const char *state; - int config = FALSE; - int vdevs = FALSE; - int errors = FALSE; + int spa_flags = 0; if (mdb_getopts(argc, argv, - 'c', MDB_OPT_SETBITS, TRUE, &config, - 'v', MDB_OPT_SETBITS, TRUE, &vdevs, - 'e', MDB_OPT_SETBITS, TRUE, &errors, + 'c', MDB_OPT_SETBITS, SPA_FLAG_CONFIG, &spa_flags, + 'v', MDB_OPT_SETBITS, SPA_FLAG_VDEVS, &spa_flags, + 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, + 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, + 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, + 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, NULL) != argc) return (DCMD_USAGE); @@ -1064,7 +1128,7 @@ spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name); - if (config) { + if (spa_flags & SPA_FLAG_CONFIG) { mdb_printf("\n"); mdb_inc_indent(4); if (mdb_call_dcmd("spa_config", addr, flags, 0, @@ -1073,15 +1137,27 @@ spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_dec_indent(4); } - if (vdevs || errors) { + if (spa_flags & SPA_FLAG_ALL_VDEV) { mdb_arg_t v; + char opts[100] = "-"; + int args = + (spa_flags | SPA_FLAG_VDEVS) == SPA_FLAG_VDEVS ? 0 : 1; + + if (spa_flags & SPA_FLAG_ERRORS) + strcat(opts, "e"); + if (spa_flags & SPA_FLAG_METASLABS) + strcat(opts, "m"); + if (spa_flags & SPA_FLAG_METASLAB_GROUPS) + strcat(opts, "M"); + if (spa_flags & SPA_FLAG_HISTOGRAMS) + strcat(opts, "h"); v.a_type = MDB_TYPE_STRING; - v.a_un.a_str = "-e"; + v.a_un.a_str = opts; mdb_printf("\n"); mdb_inc_indent(4); - if (mdb_call_dcmd("spa_vdevs", addr, flags, errors ? 1 : 0, + if (mdb_call_dcmd("spa_vdevs", addr, flags, args, &v) != DCMD_OK) return (DCMD_ERR); mdb_dec_indent(4); @@ -1123,6 +1199,161 @@ spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 0, NULL)); } +const char histo_stars[] = "****************************************"; +const int histo_width = sizeof (histo_stars) - 1; + +static void +dump_histogram(const uint64_t *histo, int size, int offset) +{ + int i; + int minidx = size - 1; + int maxidx = 0; + uint64_t max = 0; + + for (i = 0; i < size; i++) { + if (histo[i] > max) + max = histo[i]; + if (histo[i] > 0 && i > maxidx) + maxidx = i; + if (histo[i] > 0 && i < minidx) + minidx = i; + } + + if (max < histo_width) + max = histo_width; + + for (i = minidx; i <= maxidx; i++) { + mdb_printf("%3u: %6llu %s\n", + i + offset, (u_longlong_t)histo[i], + &histo_stars[(max - histo[i]) * histo_width / max]); + } +} + +typedef struct mdb_range_tree { + uint64_t rt_space; +} mdb_range_tree_t; + +typedef struct mdb_metaslab_group { + uint64_t mg_fragmentation; + uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE]; +} mdb_metaslab_group_t; + +typedef struct mdb_metaslab { + uint64_t ms_id; + uint64_t ms_start; + uint64_t ms_size; + uint64_t ms_fragmentation; + uintptr_t ms_alloctree[TXG_SIZE]; + uintptr_t ms_freetree[TXG_SIZE]; + uintptr_t ms_tree; + uintptr_t ms_sm; +} mdb_metaslab_t; + +typedef struct mdb_space_map_phys_t { + uint64_t smp_alloc; + uint64_t smp_histogram[SPACE_MAP_HISTOGRAM_SIZE]; +} mdb_space_map_phys_t; + +typedef struct mdb_space_map { + uint64_t sm_size; + uint8_t sm_shift; + uint64_t sm_alloc; + uintptr_t sm_phys; +} mdb_space_map_t; + +typedef struct mdb_vdev { + uintptr_t vdev_ms; + uint64_t vdev_ms_count; + vdev_stat_t vdev_stat; +} mdb_vdev_t; + +static int +metaslab_stats(uintptr_t addr, int spa_flags) +{ + mdb_vdev_t vdev; + uintptr_t *vdev_ms; + + if (mdb_ctf_vread(&vdev, "vdev_t", "mdb_vdev_t", + (uintptr_t)addr, 0) == -1) { + mdb_warn("failed to read vdev at %p\n", addr); + return (DCMD_ERR); + } + + mdb_inc_indent(4); + mdb_printf("%<u>%-?s %6s %20s %10s %9s%</u>\n", "ADDR", "ID", + "OFFSET", "FREE", "FRAGMENTATION"); + + vdev_ms = mdb_alloc(vdev.vdev_ms_count * sizeof (void *), + UM_SLEEP | UM_GC); + if (mdb_vread(vdev_ms, vdev.vdev_ms_count * sizeof (void *), + (uintptr_t)vdev.vdev_ms) == -1) { + mdb_warn("failed to read vdev_ms at %p\n", vdev.vdev_ms); + return (DCMD_ERR); + } + + for (int m = 0; m < vdev.vdev_ms_count; m++) { + mdb_metaslab_t ms; + mdb_space_map_t sm = { 0 }; + char free[NICENUM_BUFLEN]; + + if (mdb_ctf_vread(&ms, "metaslab_t", "mdb_metaslab_t", + (uintptr_t)vdev_ms[m], 0) == -1) + return (DCMD_ERR); + + if (ms.ms_sm != NULL && + mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t", + ms.ms_sm, 0) == -1) + return (DCMD_ERR); + + mdb_nicenum(ms.ms_size - sm.sm_alloc, free); + + mdb_printf("%0?p %6llu %20llx %10s ", vdev_ms[m], ms.ms_id, + ms.ms_start, free); + if (ms.ms_fragmentation == ZFS_FRAG_INVALID) + mdb_printf("%9s\n", "-"); + else + mdb_printf("%9llu%%\n", ms.ms_fragmentation); + + if ((spa_flags & SPA_FLAG_HISTOGRAMS) && ms.ms_sm != NULL) { + mdb_space_map_phys_t smp; + + if (sm.sm_phys == NULL) + continue; + + (void) mdb_ctf_vread(&smp, "space_map_phys_t", + "mdb_space_map_phys_t", sm.sm_phys, 0); + + dump_histogram(smp.smp_histogram, + SPACE_MAP_HISTOGRAM_SIZE, sm.sm_shift); + } + } + mdb_dec_indent(4); + return (DCMD_OK); +} + +static int +metaslab_group_stats(uintptr_t addr, int spa_flags) +{ + mdb_metaslab_group_t mg; + if (mdb_ctf_vread(&mg, "metaslab_group_t", "mdb_metaslab_group_t", + (uintptr_t)addr, 0) == -1) { + mdb_warn("failed to read vdev_mg at %p\n", addr); + return (DCMD_ERR); + } + + mdb_inc_indent(4); + mdb_printf("%<u>%-?s %15s%</u>\n", "ADDR", "FRAGMENTATION"); + if (mg.mg_fragmentation == ZFS_FRAG_INVALID) + mdb_printf("%0?p %15s\n", addr, "-"); + else + mdb_printf("%0?p %15llu%%\n", addr, mg.mg_fragmentation); + + if (spa_flags & SPA_FLAG_HISTOGRAMS) + dump_histogram(mg.mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); + mdb_dec_indent(4); + return (DCMD_OK); +} + /* * ::vdev * @@ -1136,8 +1367,8 @@ spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) * With '-e', the statistics associated with the vdev are printed as well. */ static int -do_print_vdev(uintptr_t addr, int flags, int depth, int stats, - int recursive) +do_print_vdev(uintptr_t addr, int flags, int depth, boolean_t recursive, + int spa_flags) { vdev_t vdev; char desc[MAXNAMELEN]; @@ -1264,7 +1495,7 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int stats, mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc); - if (stats) { + if (spa_flags & SPA_FLAG_ERRORS) { vdev_stat_t *vs = &vdev.vdev_stat; int i; @@ -1290,10 +1521,17 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int stats, mdb_printf("ECKSUM %10#llx\n", vs->vs_checksum_errors); mdb_dec_indent(4); + mdb_printf("\n"); } - if (stats) - mdb_printf("\n"); + if (spa_flags & SPA_FLAG_METASLAB_GROUPS && + vdev.vdev_mg != NULL) { + metaslab_group_stats((uintptr_t)vdev.vdev_mg, + spa_flags); + } + if (spa_flags & SPA_FLAG_METASLABS && vdev.vdev_ms != NULL) { + metaslab_stats((uintptr_t)addr, spa_flags); + } } children = vdev.vdev_children; @@ -1309,9 +1547,10 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int stats, } for (c = 0; c < children; c++) { - if (do_print_vdev(child[c], flags, depth + 2, stats, - recursive)) + if (do_print_vdev(child[c], flags, depth + 2, recursive, + spa_flags)) { return (DCMD_ERR); + } } return (DCMD_OK); @@ -1320,15 +1559,17 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int stats, static int vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { - int recursive = FALSE; - int stats = FALSE; uint64_t depth = 0; + boolean_t recursive = B_FALSE; + int spa_flags = 0; if (mdb_getopts(argc, argv, + 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, + 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, + 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, + 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, 'r', MDB_OPT_SETBITS, TRUE, &recursive, - 'e', MDB_OPT_SETBITS, TRUE, &stats, - 'd', MDB_OPT_UINT64, &depth, - NULL) != argc) + 'd', MDB_OPT_UINT64, &depth, NULL) != argc) return (DCMD_USAGE); if (!(flags & DCMD_ADDRSPEC)) { @@ -1336,7 +1577,7 @@ vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_ERR); } - return (do_print_vdev(addr, flags, (int)depth, stats, recursive)); + return (do_print_vdev(addr, flags, (int)depth, recursive, spa_flags)); } typedef struct metaslab_walk_data { @@ -1449,34 +1690,6 @@ typedef struct mdb_dsl_dir_phys { uint64_t dd_uncompressed_bytes; } mdb_dsl_dir_phys_t; -typedef struct mdb_vdev { - uintptr_t vdev_parent; - uintptr_t vdev_ms; - uint64_t vdev_ms_count; - vdev_stat_t vdev_stat; -} mdb_vdev_t; - -typedef struct mdb_space_map_phys_t { - uint64_t smp_alloc; -} mdb_space_map_phys_t; - -typedef struct mdb_space_map { - uint64_t sm_size; - uint64_t sm_alloc; - uintptr_t sm_phys; -} mdb_space_map_t; - -typedef struct mdb_range_tree { - uint64_t rt_space; -} mdb_range_tree_t; - -typedef struct mdb_metaslab { - uintptr_t ms_alloctree[TXG_SIZE]; - uintptr_t ms_freetree[TXG_SIZE]; - uintptr_t ms_tree; - uintptr_t ms_sm; -} mdb_metaslab_t; - typedef struct space_data { uint64_t ms_alloctree[TXG_SIZE]; uint64_t ms_freetree[TXG_SIZE]; @@ -1492,7 +1705,7 @@ space_cb(uintptr_t addr, const void *unknown, void *arg) space_data_t *sd = arg; mdb_metaslab_t ms; mdb_range_tree_t rt; - mdb_space_map_t sm; + mdb_space_map_t sm = { 0 }; mdb_space_map_phys_t smp = { 0 }; int i; @@ -1503,15 +1716,22 @@ space_cb(uintptr_t addr, const void *unknown, void *arg) for (i = 0; i < TXG_SIZE; i++) { if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", ms.ms_alloctree[i], 0) == -1) + return (WALK_ERR); + sd->ms_alloctree[i] += rt.rt_space; if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", ms.ms_freetree[i], 0) == -1) + return (WALK_ERR); + sd->ms_freetree[i] += rt.rt_space; } if (mdb_ctf_vread(&rt, "range_tree_t", - "mdb_range_tree_t", ms.ms_tree, 0) == -1 || + "mdb_range_tree_t", ms.ms_tree, 0) == -1) + return (WALK_ERR); + + if (ms.ms_sm != NULL && mdb_ctf_vread(&sm, "space_map_t", "mdb_space_map_t", ms.ms_sm, 0) == -1) return (WALK_ERR); @@ -1675,7 +1895,10 @@ spa_print_aux(mdb_spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v, /* * ::spa_vdevs * - * -e Include error stats + * -e Include error stats + * -m Include metaslab information + * -M Include metaslab group information + * -h Include histogram information (requires -m or -M) * * Print out a summarized list of vdevs for the given spa_t. * This is accomplished by invoking "::vdev -re" on the root vdev, as well as @@ -1686,11 +1909,15 @@ static int spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { mdb_arg_t v[3]; - int errors = FALSE; int ret; + char opts[100] = "-r"; + int spa_flags = 0; if (mdb_getopts(argc, argv, - 'e', MDB_OPT_SETBITS, TRUE, &errors, + 'e', MDB_OPT_SETBITS, SPA_FLAG_ERRORS, &spa_flags, + 'm', MDB_OPT_SETBITS, SPA_FLAG_METASLABS, &spa_flags, + 'M', MDB_OPT_SETBITS, SPA_FLAG_METASLAB_GROUPS, &spa_flags, + 'h', MDB_OPT_SETBITS, SPA_FLAG_HISTOGRAMS, &spa_flags, NULL) != argc) return (DCMD_USAGE); @@ -1709,8 +1936,17 @@ spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_OK); } + if (spa_flags & SPA_FLAG_ERRORS) + strcat(opts, "e"); + if (spa_flags & SPA_FLAG_METASLABS) + strcat(opts, "m"); + if (spa_flags & SPA_FLAG_METASLAB_GROUPS) + strcat(opts, "M"); + if (spa_flags & SPA_FLAG_HISTOGRAMS) + strcat(opts, "h"); + v[0].a_type = MDB_TYPE_STRING; - v[0].a_un.a_str = errors ? "-re" : "-r"; + v[0].a_un.a_str = opts; ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev, flags, 1, v); @@ -2131,57 +2367,10 @@ zio_walk_root_step(mdb_walk_state_t *wsp) return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata)); } -#define NICENUM_BUFLEN 6 - -static int -snprintfrac(char *buf, int len, - uint64_t numerator, uint64_t denom, int frac_digits) -{ - int mul = 1; - int whole, frac, i; - - for (i = frac_digits; i; i--) - mul *= 10; - whole = numerator / denom; - frac = mul * numerator / denom - mul * whole; - return (mdb_snprintf(buf, len, "%u.%0*u", whole, frac_digits, frac)); -} - -static void -mdb_nicenum(uint64_t num, char *buf) -{ - uint64_t n = num; - int index = 0; - char *u; - - while (n >= 1024) { - n = (n + (1024 / 2)) / 1024; /* Round up or down */ - index++; - } - - u = &" \0K\0M\0G\0T\0P\0E\0"[index*2]; - - if (index == 0) { - (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu", - (u_longlong_t)n); - } else if (n < 10 && (num & (num - 1)) != 0) { - (void) snprintfrac(buf, NICENUM_BUFLEN, - num, 1ULL << 10 * index, 2); - strcat(buf, u); - } else if (n < 100 && (num & (num - 1)) != 0) { - (void) snprintfrac(buf, NICENUM_BUFLEN, - num, 1ULL << 10 * index, 1); - strcat(buf, u); - } else { - (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu%s", - (u_longlong_t)n, u); - } -} - /* * ::zfs_blkstats * - * -v print verbose per-level information + * -v print verbose per-level information * */ static int @@ -3087,15 +3276,30 @@ static const mdb_dcmd_t dcmds[] = { { "abuf_find", "dva_word[0] dva_word[1]", "find arc_buf_hdr_t of a specified DVA", abuf_find }, - { "spa", "?[-cv]", "spa_t summary", spa_print }, + { "spa", "?[-cevmMh]\n" + "\t-c display spa config\n" + "\t-e display vdev statistics\n" + "\t-v display vdev information\n" + "\t-m display metaslab statistics\n" + "\t-M display metaslab group statistics\n" + "\t-h display histogram (requires -m or -M)\n", + "spa_t summary", spa_print }, { "spa_config", ":", "print spa_t configuration", spa_print_config }, { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space }, - { "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs }, + { "spa_vdevs", ":[-emMh]\n" + "\t-e display vdev statistics\n" + "\t-m dispaly metaslab statistics\n" + "\t-M display metaslab group statistic\n" + "\t-h display histogram (requires -m or -M)\n", + "given a spa_t, print vdev summary", spa_vdevs }, { "vdev", ":[-re]\n" "\t-r display recursively\n" - "\t-e print statistics", + "\t-e display statistics\n" + "\t-m display metaslab statistics\n" + "\t-M display metaslab group statistics\n" + "\t-h display histogram (requires -m or -M)\n", "vdev_t summary", vdev_print }, - { "zio", ":[cpr]\n" + { "zio", ":[-cpr]\n" "\t-c display children\n" "\t-p display parents\n" "\t-r display recursively", diff --git a/usr/src/cmd/sgs/libld/common/sections.c b/usr/src/cmd/sgs/libld/common/sections.c index 01ee18aab3..95cc830f6c 100644 --- a/usr/src/cmd/sgs/libld/common/sections.c +++ b/usr/src/cmd/sgs/libld/common/sections.c @@ -160,6 +160,31 @@ ignore_sym(Ofl_desc *ofl, Ifl_desc *ifl, Sym_desc *sdp, int allow_ldynsym) } } +static Boolean +isdesc_discarded(Is_desc *isp) +{ + Ifl_desc *ifl = isp->is_file; + Os_desc *osp = isp->is_osdesc; + Word ptype = osp->os_sgdesc->sg_phdr.p_type; + + if (isp->is_flags & FLG_IS_DISCARD) + return (TRUE); + + /* + * If the file is discarded, it will take + * the section with it. + */ + if (ifl && + (((ifl->ifl_flags & FLG_IF_FILEREF) == 0) || + ((ptype == PT_LOAD) && + ((isp->is_flags & FLG_IS_SECTREF) == 0) && + (isp->is_shdr->sh_size > 0))) && + (ifl->ifl_flags & FLG_IF_IGNORE)) + return (TRUE); + + return (FALSE); +} + /* * There are situations where we may count output sections (ofl_shdrcnt) * that are subsequently eliminated from the output object. Whether or @@ -181,7 +206,6 @@ adjust_os_count(Ofl_desc *ofl) Sg_desc *sgp; Is_desc *isp; Os_desc *osp; - Ifl_desc *ifl; Aliste idx1; if ((ofl->ofl_flags & FLG_OF_ADJOSCNT) == 0) @@ -194,37 +218,20 @@ adjust_os_count(Ofl_desc *ofl) */ for (APLIST_TRAVERSE(ofl->ofl_segs, idx1, sgp)) { Aliste idx2; - Word ptype = sgp->sg_phdr.p_type; for (APLIST_TRAVERSE(sgp->sg_osdescs, idx2, osp)) { Aliste idx3; int keep = 0, os_isdescs_idx; OS_ISDESCS_TRAVERSE(os_isdescs_idx, osp, idx3, isp) { - ifl = isp->is_file; - - /* Input section is tagged for discard? */ - if (isp->is_flags & FLG_IS_DISCARD) - continue; - - /* - * If the file is discarded, it will take - * the section with it. - */ - if (ifl && - (((ifl->ifl_flags & FLG_IF_FILEREF) == 0) || - ((ptype == PT_LOAD) && - ((isp->is_flags & FLG_IS_SECTREF) == 0) && - (isp->is_shdr->sh_size > 0))) && - (ifl->ifl_flags & FLG_IF_IGNORE)) - continue; - /* * We have found a kept input section, * so the output section will be created. */ - keep = 1; - break; + if (!isdesc_discarded(isp)) { + keep = 1; + break; + } } /* * If no section of this name was kept, decrement @@ -2849,7 +2856,14 @@ ld_make_strmerge(Ofl_desc *ofl, Os_desc *osp, APlist **rel_alpp, */ mstrtab = NULL; for (APLIST_TRAVERSE(osp->os_mstrisdescs, idx, isp)) { - if (isp->is_flags & FLG_IS_DISCARD) + if (isdesc_discarded(isp)) + continue; + + /* + * Input sections of 0 size are dubiously valid since they do + * not even contain the NUL string. Ignore them. + */ + if (isp->is_shdr->sh_size == 0) continue; /* diff --git a/usr/src/cmd/sgs/packages/common/SUNWonld-README b/usr/src/cmd/sgs/packages/common/SUNWonld-README index dedd0ea913..6688c34255 100644 --- a/usr/src/cmd/sgs/packages/common/SUNWonld-README +++ b/usr/src/cmd/sgs/packages/common/SUNWonld-README @@ -1653,3 +1653,4 @@ Bugid Risk Synopsis 4227 ld --library-path is translated to -l-path, not -L 4270 ld(1) argument error reporting is still pretty bad 4383 libelf can't write extended sections when ELF_F_LAYOUT +4959 completely discarded merged string sections will corrupt output objects diff --git a/usr/src/cmd/sgs/rtld/common/malloc.c b/usr/src/cmd/sgs/rtld/common/malloc.c index 1069ec76e7..4e287d4d2f 100644 --- a/usr/src/cmd/sgs/rtld/common/malloc.c +++ b/usr/src/cmd/sgs/rtld/common/malloc.c @@ -207,11 +207,23 @@ void * calloc(size_t num, size_t size) { void * mp; + size_t total; - num *= size; - if ((mp = malloc(num)) == NULL) + if (num == 0 || size == 0) { + total = 0; + } else { + total = num * size; + + /* check for overflow */ + if ((total / num) != size) { + errno = ENOMEM; + return (NULL); + } + } + + if ((mp = malloc(total)) == NULL) return (NULL); - (void) memset(mp, 0, num); + (void) memset(mp, 0, total); return (mp); } diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 5c66d03c77..2def6241b7 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -111,11 +111,11 @@ static void usage(void) { (void) fprintf(stderr, - "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " - "[-U config] [-M inflight I/Os] [-x dumpdir] poolname [object...]\n" + "Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] " + "[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n" " %s [-divPA] [-e -p path...] [-U config] dataset " "[object...]\n" - " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " + " %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " "poolname [vdev [metaslab...]]\n" " %s -R [-A] [-e [-p path...]] poolname " "vdev:offset:size[:flags]\n" @@ -138,6 +138,7 @@ usage(void) (void) fprintf(stderr, " -h pool history\n"); (void) fprintf(stderr, " -b block statistics\n"); (void) fprintf(stderr, " -m metaslabs\n"); + (void) fprintf(stderr, " -M metaslab groups\n"); (void) fprintf(stderr, " -c checksum all metadata (twice for " "all data) blocks\n"); (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); @@ -168,7 +169,7 @@ usage(void) (void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -t <txg> -- highest txg to use when " "searching for uberblocks\n"); - (void) fprintf(stderr, " -M <number of inflight I/Os> -- " + (void) fprintf(stderr, " -I <number of inflight I/Os> -- " "specify the maximum number of " "checksumming I/Os [default is 200]\n"); (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " @@ -548,7 +549,7 @@ get_metaslab_refcount(vdev_t *vd) { int refcount = 0; - if (vd->vdev_top == vd) { + if (vd->vdev_top == vd && !vd->vdev_removing) { for (int m = 0; m < vd->vdev_ms_count; m++) { space_map_t *sm = vd->vdev_ms[m]->ms_sm; @@ -686,9 +687,10 @@ dump_metaslab(metaslab_t *msp) * The space map histogram represents free space in chunks * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). */ - (void) printf("\tOn-disk histogram:\n"); + (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", + (u_longlong_t)msp->ms_fragmentation); dump_histogram(sm->sm_phys->smp_histogram, - SPACE_MAP_HISTOGRAM_SIZE(sm), sm->sm_shift); + SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); } if (dump_opt['d'] > 5 || dump_opt['m'] > 3) { @@ -713,6 +715,47 @@ print_vdev_metaslab_header(vdev_t *vd) } static void +dump_metaslab_groups(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + metaslab_class_t *mc = spa_normal_class(spa); + uint64_t fragmentation; + + metaslab_class_histogram_verify(mc); + + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; + + if (mg->mg_class != mc) + continue; + + metaslab_group_histogram_verify(mg); + mg->mg_fragmentation = metaslab_group_fragmentation(mg); + + (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" + "fragmentation", + (u_longlong_t)tvd->vdev_id, + (u_longlong_t)tvd->vdev_ms_count); + if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { + (void) printf("%3s\n", "-"); + } else { + (void) printf("%3llu%%\n", + (u_longlong_t)mg->mg_fragmentation); + } + dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); + } + + (void) printf("\tpool %s\tfragmentation", spa_name(spa)); + fragmentation = metaslab_class_fragmentation(mc); + if (fragmentation == ZFS_FRAG_INVALID) + (void) printf("\t%3s\n", "-"); + else + (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); + dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); +} + +static void dump_metaslabs(spa_t *spa) { vdev_t *vd, *rvd = spa->spa_root_vdev; @@ -2340,8 +2383,7 @@ zdb_leak(void *arg, uint64_t start, uint64_t size) } static metaslab_ops_t zdb_metaslab_ops = { - NULL, /* alloc */ - NULL /* fragmented */ + NULL /* alloc */ }; static void @@ -2836,6 +2878,8 @@ dump_zpool(spa_t *spa) if (dump_opt['d'] > 2 || dump_opt['m']) dump_metaslabs(spa); + if (dump_opt['M']) + dump_metaslab_groups(spa); if (dump_opt['d'] || dump_opt['i']) { dump_dir(dp->dp_meta_objset); @@ -3330,7 +3374,7 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); while ((c = getopt(argc, argv, - "bcdhilmM:suCDRSAFLXx:evp:t:U:P")) != -1) { + "bcdhilmMI:suCDRSAFLXx:evp:t:U:P")) != -1) { switch (c) { case 'b': case 'c': @@ -3343,6 +3387,7 @@ main(int argc, char **argv) case 'u': case 'C': case 'D': + case 'M': case 'R': case 'S': dump_opt[c]++; @@ -3356,10 +3401,7 @@ main(int argc, char **argv) case 'P': dump_opt[c]++; break; - case 'v': - verbose++; - break; - case 'M': + case 'I': max_inflight = strtoull(optarg, NULL, 0); if (max_inflight == 0) { (void) fprintf(stderr, "maximum number " @@ -3383,9 +3425,6 @@ main(int argc, char **argv) } searchdirs[nsearch++] = optarg; break; - case 'x': - vn_dumpdir = optarg; - break; case 't': max_txg = strtoull(optarg, NULL, 0); if (max_txg < TXG_INITIAL) { @@ -3397,6 +3436,12 @@ main(int argc, char **argv) case 'U': spa_config_path = optarg; break; + case 'v': + verbose++; + break; + case 'x': + vn_dumpdir = optarg; + break; default: usage(); break; diff --git a/usr/src/cmd/zonecfg/zonecfg.c b/usr/src/cmd/zonecfg/zonecfg.c index 384121a589..235019ef46 100644 --- a/usr/src/cmd/zonecfg/zonecfg.c +++ b/usr/src/cmd/zonecfg/zonecfg.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014 Gary Mills * Copyright 2013, Joyent Inc. All rights reserved. */ @@ -935,6 +936,105 @@ long_help(int cmd_num) } /* + * Return the input filename appended to each component of the path + * or the filename itself if it is absolute. + * Parameters: path string, file name, output string. + */ +/* Copied almost verbatim from libtnfctl/prb_findexec.c */ +static const char * +exec_cat(const char *s1, const char *s2, char *si) +{ + char *s; + /* Number of remaining characters in s */ + int cnt = PATH_MAX + 1; + + s = si; + while (*s1 && *s1 != ':') { /* Copy first component of path to si */ + if (cnt > 0) { + *s++ = *s1++; + cnt--; + } else { + s1++; + } + } + if (si != s && cnt > 0) { /* Add slash if s2 is not absolute */ + *s++ = '/'; + cnt--; + } + while (*s2 && cnt > 0) { /* Copy s2 to si */ + *s++ = *s2++; + cnt--; + } + *s = '\0'; /* Terminate the output string */ + return (*s1 ? ++s1 : NULL); /* Return next path component or NULL */ +} + +/* Determine that a name exists in PATH */ +/* Copied with changes from libtnfctl/prb_findexec.c */ +static int +path_find(const char *name) +{ + const char *pathstr; + char fname[PATH_MAX + 2]; + const char *cp; + struct stat stat_buf; + + if ((pathstr = getenv("PATH")) == NULL) { + if (geteuid() == 0 || getuid() == 0) + pathstr = "/usr/sbin:/usr/bin"; + else + pathstr = "/usr/bin:"; + } + cp = strchr(name, '/') ? (const char *) "" : pathstr; + + do { + cp = exec_cat(cp, name, fname); + if (stat(fname, &stat_buf) != -1) { + /* successful find of the file */ + return (0); + } + } while (cp != NULL); + + return (-1); +} + +static FILE * +pager_open(void) { + FILE *newfp; + char *pager, *space; + + pager = getenv("PAGER"); + if (pager == NULL || *pager == '\0') + pager = PAGER; + + space = strchr(pager, ' '); + if (space) + *space = '\0'; + if (path_find(pager) == 0) { + if (space) + *space = ' '; + if ((newfp = popen(pager, "w")) == NULL) + zerr(gettext("PAGER open failed (%s)."), + strerror(errno)); + return (newfp); + } else { + zerr(gettext("PAGER %s does not exist (%s)."), + pager, strerror(errno)); + } + return (NULL); +} + +static void +pager_close(FILE *fp) { + int status; + + status = pclose(fp); + if (status == -1) + zerr(gettext("PAGER close failed (%s)."), + strerror(errno)); +} + +/* * Called with verbose TRUE when help is explicitly requested, FALSE for * unexpected errors. */ @@ -945,28 +1045,13 @@ usage(boolean_t verbose, uint_t flags) FILE *fp = verbose ? stdout : stderr; FILE *newfp; boolean_t need_to_close = B_FALSE; - char *pager, *space; int i; - struct stat statbuf; /* don't page error output */ if (verbose && interactive_mode) { - if ((pager = getenv("PAGER")) == NULL) - pager = PAGER; - - space = strchr(pager, ' '); - if (space) - *space = '\0'; - if (stat(pager, &statbuf) == 0) { - if (space) - *space = ' '; - if ((newfp = popen(pager, "w")) != NULL) { - need_to_close = B_TRUE; - fp = newfp; - } - } else { - zerr(gettext("PAGER %s does not exist (%s)."), - pager, strerror(errno)); + if ((newfp = pager_open()) != NULL) { + need_to_close = B_TRUE; + fp = newfp; } } @@ -1323,7 +1408,7 @@ usage(boolean_t verbose, uint_t flags) pt_to_str(PT_USER), pt_to_str(PT_AUTHS)); } if (need_to_close) - (void) pclose(fp); + (void) pager_close(fp); } static void @@ -5815,7 +5900,6 @@ info_func(cmd_t *cmd) { FILE *fp = stdout; boolean_t need_to_close = B_FALSE; - char *pager, *space; int type; int res1, res2, res3; uint64_t swap_limit; @@ -5830,22 +5914,10 @@ info_func(cmd_t *cmd) /* don't page error output */ if (interactive_mode) { - if ((pager = getenv("PAGER")) == NULL) - pager = PAGER; - space = strchr(pager, ' '); - if (space) - *space = '\0'; - if (stat(pager, &statbuf) == 0) { - if (space) - *space = ' '; - if ((fp = popen(pager, "w")) != NULL) - need_to_close = B_TRUE; - else - fp = stdout; - } else { - zerr(gettext("PAGER %s does not exist (%s)."), - pager, strerror(errno)); - } + if ((fp = pager_open()) != NULL) + need_to_close = B_TRUE; + else + fp = stdout; setbuf(fp, NULL); } @@ -6045,7 +6117,7 @@ info_func(cmd_t *cmd) cleanup: if (need_to_close) - (void) pclose(fp); + (void) pager_close(fp); } /* diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index 31372bbbf3..228cff6cd6 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -2755,10 +2755,15 @@ print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted) boolean_t fixed; size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL); - zfs_nicenum(value, propval, sizeof (propval)); if (prop == ZPOOL_PROP_EXPANDSZ && value == 0) (void) strlcpy(propval, "-", sizeof (propval)); + else if (prop == ZPOOL_PROP_FRAGMENTATION && value == ZFS_FRAG_INVALID) + (void) strlcpy(propval, "-", sizeof (propval)); + else if (prop == ZPOOL_PROP_FRAGMENTATION) + (void) snprintf(propval, sizeof (propval), "%llu%%", value); + else + zfs_nicenum(value, propval, sizeof (propval)); if (scripted) (void) printf("\t%s", propval); @@ -2791,9 +2796,9 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, /* only toplevel vdevs have capacity stats */ if (vs->vs_space == 0) { if (scripted) - (void) printf("\t-\t-\t-"); + (void) printf("\t-\t-\t-\t-"); else - (void) printf(" - - -"); + (void) printf(" - - - -"); } else { print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, scripted); @@ -2801,6 +2806,8 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, scripted); print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, scripted); + print_one_column(ZPOOL_PROP_FRAGMENTATION, + vs->vs_fragmentation, scripted); } print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, scripted); @@ -2886,8 +2893,8 @@ zpool_do_list(int argc, char **argv) int ret; list_cbdata_t cb = { 0 }; static char default_props[] = - "name,size,allocated,free,expandsize,capacity,dedupratio," - "health,altroot"; + "name,size,allocated,free,fragmentation,expandsize,capacity," + "dedupratio,health,altroot"; char *props = default_props; unsigned long interval = 0, count = 0; zpool_list_t *list; |
