diff options
Diffstat (limited to 'usr/src/cmd/mandoc')
69 files changed, 7588 insertions, 3626 deletions
diff --git a/usr/src/cmd/mandoc/Makefile.common b/usr/src/cmd/mandoc/Makefile.common index 9accbd1244..6c0187e3e0 100644 --- a/usr/src/cmd/mandoc/Makefile.common +++ b/usr/src/cmd/mandoc/Makefile.common @@ -32,7 +32,6 @@ OBJS= att.o \ main.o \ man.o \ manpath.o \ - man_hash.o \ man_html.o \ man_macro.o \ man_term.o \ @@ -40,14 +39,15 @@ OBJS= att.o \ mandoc.o \ mandoc_aux.o \ mandoc_ohash.o \ + mandoc_xr.o \ mandocdb.o \ mansearch.o \ mdoc.o \ mdoc_argv.o \ - mdoc_hash.o \ mdoc_html.o \ mdoc_macro.o \ mdoc_man.o \ + mdoc_markdown.o \ mdoc_state.o \ mdoc_term.o \ mdoc_validate.o \ @@ -55,6 +55,9 @@ OBJS= att.o \ out.o \ read.o \ roff.o \ + roff_html.o \ + roff_term.o \ + roff_validate.o \ preconv.o \ st.o \ tag.o \ @@ -67,6 +70,7 @@ OBJS= att.o \ term.o \ term_ascii.o \ term_ps.o \ + term_tab.o \ tree.o OBJS += compat_ohash.o diff --git a/usr/src/cmd/mandoc/THIRDPARTYLICENSE b/usr/src/cmd/mandoc/THIRDPARTYLICENSE index becdb9656a..34420d2920 100644 --- a/usr/src/cmd/mandoc/THIRDPARTYLICENSE +++ b/usr/src/cmd/mandoc/THIRDPARTYLICENSE @@ -1,17 +1,19 @@ -$Id: LICENSE,v 1.12 2016/07/07 23:46:36 schwarze Exp $ +$Id: LICENSE,v 1.17 2017/06/23 15:58:14 schwarze Exp $ With the exceptions noted below, all code and documentation -contained in the mdocml toolkit is protected by the Copyright +contained in the mandoc toolkit is protected by the Copyright of the following developers: Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> -Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org> +Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> Copyright (c) 2009, 2010, 2011, 2012 Joerg Sonnenberger <joerg@netbsd.org> Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de> -Copyright (c) 2014 Baptiste Daroussin <bapt@FreeBSD.org> +Copyright (c) 2014 Baptiste Daroussin <bapt@freebsd.org> +Copyright (c) 2016 Ed Maste <emaste@freebsd.org> +Copyright (c) 2017 Michael Stapelberg <stapelberg@debian.org> Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org> Copyright (c) 1998, 2004, 2010 Todd C. Miller <Todd.Miller@courtesan.com> -Copyright (c) 2008 Otto Moerbeek <otto@drijf.net> +Copyright (c) 2008, 2017 Otto Moerbeek <otto@drijf.net> Copyright (c) 2004 Ted Unangst <tedu@openbsd.org> Copyright (c) 1994 Christos Zoulas <christos@netbsd.org> Copyright (c) 2003, 2007, 2008, 2014 Jason McIntyre <jmc@openbsd.org> @@ -20,7 +22,7 @@ See the individual source files for information about who contributed to which file during which years. -The mdocml distribution as a whole is distributed by its developers +The mandoc distribution as a whole is distributed by its developers under the following license: Permission to use, copy, modify, and distribute this software for any diff --git a/usr/src/cmd/mandoc/THIRDPARTYLICENSE.descrip b/usr/src/cmd/mandoc/THIRDPARTYLICENSE.descrip index b89cd5be30..15a31e7465 100644 --- a/usr/src/cmd/mandoc/THIRDPARTYLICENSE.descrip +++ b/usr/src/cmd/mandoc/THIRDPARTYLICENSE.descrip @@ -1 +1 @@ -MDOCML - The mandoc UNIX manpage compiler toolset +The mandoc UNIX manpage compiler toolset diff --git a/usr/src/cmd/mandoc/att.c b/usr/src/cmd/mandoc/att.c index 872f982395..dd7f2a0d77 100644 --- a/usr/src/cmd/mandoc/att.c +++ b/usr/src/cmd/mandoc/att.c @@ -1,4 +1,4 @@ -/* $Id: att.c,v 1.15 2015/10/06 18:32:19 schwarze Exp $ */ +/* $Id: att.c,v 1.16 2017/06/24 14:38:32 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -19,6 +19,7 @@ #include <sys/types.h> #include <string.h> +#include "mandoc.h" #include "roff.h" #include "mdoc.h" #include "libmdoc.h" diff --git a/usr/src/cmd/mandoc/chars.c b/usr/src/cmd/mandoc/chars.c index f1f5d5d78c..039e6dc090 100644 --- a/usr/src/cmd/mandoc/chars.c +++ b/usr/src/cmd/mandoc/chars.c @@ -1,7 +1,7 @@ -/* $Id: chars.c,v 1.69 2017/02/17 18:28:06 schwarze Exp $ */ +/* $Id: chars.c,v 1.71 2017/06/14 20:57:07 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -61,6 +61,7 @@ static struct ln lines[] = { { "ba", "|", 0x007c }, { "br", "|", 0x2502 }, { "ul", "_", 0x005f }, + { "ru", "_", 0x005f }, { "rn", "-", 0x203e }, { "bb", "|", 0x00a6 }, { "sl", "/", 0x002f }, @@ -81,6 +82,10 @@ static struct ln lines[] = { { "sh", "#", 0x0023 }, { "CR", "_|", 0x21b5 }, { "OK", "\\/", 0x2713 }, + { "CL", "<club>", 0x2663 }, + { "SP", "<spade>", 0x2660 }, + { "HE", "<heart>", 0x2665 }, + { "DI", "<diamond>", 0x2666 }, /* Legal symbols. */ { "co", "(C)", 0x00a9 }, @@ -161,6 +166,7 @@ static struct ln lines[] = { { "uA", "=\b^", 0x21d1 }, { "dA", "=\bv", 0x21d3 }, { "vA", "^=v", 0x21d5 }, + { "an", "-", 0x23af }, /* Logic. */ { "AN", "^", 0x2227 }, @@ -234,11 +240,20 @@ static struct ln lines[] = { { "Ah", "N", 0x2135 }, { "Im", "I", 0x2111 }, { "Re", "R", 0x211c }, + { "wp", "P", 0x2118 }, { "pd", "a", 0x2202 }, { "-h", "/h", 0x210f }, + { "hbar", "/h", 0x210f }, { "12", "1/2", 0x00bd }, { "14", "1/4", 0x00bc }, { "34", "3/4", 0x00be }, + { "18", "1/8", 0x215B }, + { "38", "3/8", 0x215C }, + { "58", "5/8", 0x215D }, + { "78", "7/8", 0x215E }, + { "S1", "1", 0x00B9 }, + { "S2", "2", 0x00B2 }, + { "S3", "3", 0x00B3 }, /* Ligatures. */ { "ff", "ff", 0xfb00 }, @@ -354,6 +369,8 @@ static struct ln lines[] = { { "fm", "\'", 0x2032 }, { "sd", "''", 0x2033 }, { "mc", ",\bu", 0x00b5 }, + { "Of", "_\ba", 0x00aa }, + { "Om", "_\bo", 0x00ba }, /* Greek characters. */ { "*A", "A", 0x0391 }, diff --git a/usr/src/cmd/mandoc/config.h b/usr/src/cmd/mandoc/config.h index 301e1f218d..b8880c4420 100644 --- a/usr/src/cmd/mandoc/config.h +++ b/usr/src/cmd/mandoc/config.h @@ -4,39 +4,24 @@ #include <sys/types.h> #define MAN_CONF_FILE "/etc/man.conf" +#define MANPATH_BASE "/usr/share/man" #define MANPATH_DEFAULT "/usr/share/man:/usr/gnu/share/man" #define UTF8_LOCALE "en_US.UTF-8" #define EFTYPE EINVAL -#define O_DIRECTORY 0 -#define HAVE_CMSG_XPG42 0 -#define HAVE_DIRENT_NAMLEN 0 #define HAVE_ENDIAN 1 #define HAVE_ERR 1 #define HAVE_FTS 1 -#define HAVE_FTS_COMPARE_CONST 0 -#define HAVE_GETLINE 1 -#define HAVE_GETSUBOPT 1 -#define HAVE_ISBLANK 1 -#define HAVE_MKDTEMP 1 #define HAVE_NTOHL 1 #define HAVE_OHASH 0 #define HAVE_PLEDGE 0 #define HAVE_PROGNAME 1 -#define HAVE_REALLOCARRAY 1 #define HAVE_REWB_BSD 1 #define HAVE_REWB_SYSV 1 #define HAVE_SANDBOX_INIT 0 -#define HAVE_STRCASESTR 1 -#define HAVE_STRINGLIST 0 -#define HAVE_STRLCAT 1 -#define HAVE_STRLCPY 1 #define HAVE_STRPTIME 1 -#define HAVE_STRSEP 1 -#define HAVE_STRTONUM 1 #define HAVE_SYS_ENDIAN 0 -#define HAVE_VASPRINTF 1 #define HAVE_WCHAR 1 #define BINM_APROPOS "apropos" diff --git a/usr/src/cmd/mandoc/eqn.c b/usr/src/cmd/mandoc/eqn.c index e9fbdec086..01601a7137 100644 --- a/usr/src/cmd/mandoc/eqn.c +++ b/usr/src/cmd/mandoc/eqn.c @@ -1,7 +1,7 @@ -/* $Id: eqn.c,v 1.61 2016/01/08 00:50:45 schwarze Exp $ */ +/* $Id: eqn.c,v 1.78 2017/07/15 16:26:17 schwarze Exp $ */ /* * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -20,14 +20,16 @@ #include <sys/types.h> #include <assert.h> +#include <ctype.h> #include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> -#include "mandoc.h" #include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" #include "libmandoc.h" #include "libroff.h" @@ -80,9 +82,12 @@ enum eqn_tok { EQN_TOK_TDEFINE, EQN_TOK_NDEFINE, EQN_TOK_UNDEF, - EQN_TOK_EOF, EQN_TOK_ABOVE, - EQN_TOK__MAX + EQN_TOK__MAX, + EQN_TOK_FUNC, + EQN_TOK_QUOTED, + EQN_TOK_SYM, + EQN_TOK_EOF }; static const char *eqn_toks[EQN_TOK__MAX] = { @@ -130,12 +135,18 @@ static const char *eqn_toks[EQN_TOK__MAX] = { "tdefine", /* EQN_TOK_TDEFINE */ "ndefine", /* EQN_TOK_NDEFINE */ "undef", /* EQN_TOK_UNDEF */ - NULL, /* EQN_TOK_EOF */ "above", /* EQN_TOK_ABOVE */ }; +static const char *const eqn_func[] = { + "acos", "acsc", "and", "arc", "asec", "asin", "atan", + "cos", "cosh", "coth", "csc", "det", "exp", "for", + "if", "lim", "ln", "log", "max", "min", + "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", +}; + enum eqn_symt { - EQNSYM_alpha, + EQNSYM_alpha = 0, EQNSYM_beta, EQNSYM_chi, EQNSYM_delta, @@ -266,261 +277,195 @@ static const struct eqnsym eqnsyms[EQNSYM__MAX] = { { "-", "mi" }, /* EQNSYM_minus */ }; +enum parse_mode { + MODE_QUOTED, + MODE_NOSUB, + MODE_SUB, + MODE_TOK +}; + static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); -static void eqn_box_free(struct eqn_box *); static struct eqn_box *eqn_box_makebinary(struct eqn_node *, - enum eqn_post, struct eqn_box *); + struct eqn_box *); static void eqn_def(struct eqn_node *); -static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t); +static struct eqn_def *eqn_def_find(struct eqn_node *); static void eqn_delim(struct eqn_node *); -static const char *eqn_next(struct eqn_node *, char, size_t *, int); -static const char *eqn_nextrawtok(struct eqn_node *, size_t *); -static const char *eqn_nexttok(struct eqn_node *, size_t *); -static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *); -static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **); +static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); static void eqn_undef(struct eqn_node *); -enum rofferr -eqn_read(struct eqn_node **epp, int ln, - const char *p, int pos, int *offs) +struct eqn_node * +eqn_alloc(struct mparse *parse) { - size_t sz; - struct eqn_node *ep; - enum rofferr er; - - ep = *epp; - - /* - * If we're the terminating mark, unset our equation status and - * validate the full equation. - */ - - if (0 == strncmp(p, ".EN", 3)) { - er = eqn_end(epp); - p += 3; - while (' ' == *p || '\t' == *p) - p++; - if ('\0' == *p) - return er; - mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse, - ln, pos, "EN %s", p); - return er; - } - - /* - * Build up the full string, replacing all newlines with regular - * whitespace. - */ - - sz = strlen(p + pos) + 1; - ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1); + struct eqn_node *ep; - /* First invocation: nil terminate the string. */ - - if (0 == ep->sz) - *ep->data = '\0'; - - ep->sz += sz; - strlcat(ep->data, p + pos, ep->sz + 1); - strlcat(ep->data, " ", ep->sz + 1); - return ROFF_IGN; + ep = mandoc_calloc(1, sizeof(*ep)); + ep->parse = parse; + ep->gsize = EQN_DEFSIZE; + return ep; } -struct eqn_node * -eqn_alloc(int pos, int line, struct mparse *parse) +void +eqn_reset(struct eqn_node *ep) { - struct eqn_node *p; - - p = mandoc_calloc(1, sizeof(struct eqn_node)); + free(ep->data); + ep->data = ep->start = ep->end = NULL; + ep->sz = ep->toksz = 0; +} - p->parse = parse; - p->eqn.ln = line; - p->eqn.pos = pos; - p->gsize = EQN_DEFSIZE; +void +eqn_read(struct eqn_node *ep, const char *p) +{ + char *cp; - return p; + if (ep->data == NULL) { + ep->sz = strlen(p); + ep->data = mandoc_strdup(p); + } else { + ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); + free(ep->data); + ep->data = cp; + } + ep->sz += 1; } /* * Find the key "key" of the give size within our eqn-defined values. */ static struct eqn_def * -eqn_def_find(struct eqn_node *ep, const char *key, size_t sz) +eqn_def_find(struct eqn_node *ep) { int i; for (i = 0; i < (int)ep->defsz; i++) if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, - ep->defs[i].keysz, key, sz)) + ep->defs[i].keysz, ep->start, ep->toksz)) return &ep->defs[i]; return NULL; } /* - * Get the next token from the input stream using the given quote - * character. - * Optionally make any replacements. + * Parse a token from the input text. The modes are: + * MODE_QUOTED: Use *ep->start as the delimiter; the token ends + * before its next occurence. Do not interpret the token in any + * way and return EQN_TOK_QUOTED. All other modes behave like + * MODE_QUOTED when *ep->start is '"'. + * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; + * otherwise, it ends before the next whitespace or brace. + * Do not interpret the token and return EQN_TOK__MAX. + * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an + * alias created with define. If it is an alias, replace it with + * its string value and reparse. + * MODE_TOK: Like MODE_SUB, but also check the token against the list + * of tokens, and if there is a match, return that token. Otherwise, + * if the token matches a symbol, return EQN_TOK_SYM; if it matches + * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for + * a token match, *ep->start is set to an allocated string that the + * caller is expected to free. + * All modes skip whitespace following the end of the token. */ -static const char * -eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) +static enum eqn_tok +eqn_next(struct eqn_node *ep, enum parse_mode mode) { - char *start, *next; - int q, diff, lim; - size_t ssz, dummy; - struct eqn_def *def; - - if (NULL == sz) - sz = &dummy; - - lim = 0; - ep->rew = ep->cur; -again: - /* Prevent self-definitions. */ - - if (lim >= EQN_NEST_MAX) { - mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse, - ep->eqn.ln, ep->eqn.pos, NULL); - return NULL; - } - - ep->cur = ep->rew; - start = &ep->data[(int)ep->cur]; - q = 0; + static int last_len, lim; - if ('\0' == *start) - return NULL; - - if (quote == *start) { - ep->cur++; - q = 1; - } - - start = &ep->data[(int)ep->cur]; - - if ( ! q) { - if ('{' == *start || '}' == *start) - ssz = 1; - else - ssz = strcspn(start + 1, " ^~\"{}\t") + 1; - next = start + (int)ssz; - if ('\0' == *next) - next = NULL; - } else - next = strchr(start, quote); - - if (NULL != next) { - *sz = (size_t)(next - start); - ep->cur += *sz; - if (q) - ep->cur++; - while (' ' == ep->data[(int)ep->cur] || - '\t' == ep->data[(int)ep->cur] || - '^' == ep->data[(int)ep->cur] || - '~' == ep->data[(int)ep->cur]) - ep->cur++; - } else { - if (q) - mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse, - ep->eqn.ln, ep->eqn.pos, NULL); - next = strchr(start, '\0'); - *sz = (size_t)(next - start); - ep->cur += *sz; - } - - /* Quotes aren't expanded for values. */ - - if (q || ! repl) - return start; + struct eqn_def *def; + size_t start; + int diff, i, quoted; + enum eqn_tok tok; - if (NULL != (def = eqn_def_find(ep, start, *sz))) { - diff = def->valsz - *sz; + /* + * Reset the recursion counter after advancing + * beyond the end of the previous substitution. + */ + if (ep->end - ep->data >= last_len) + lim = 0; + + ep->start = ep->end; + quoted = mode == MODE_QUOTED; + for (;;) { + switch (*ep->start) { + case '\0': + ep->toksz = 0; + return EQN_TOK_EOF; + case '"': + quoted = 1; + break; + default: + break; + } + if (quoted) { + ep->end = strchr(ep->start + 1, *ep->start); + ep->start++; /* Skip opening quote. */ + if (ep->end == NULL) { + mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse, + ep->node->line, ep->node->pos, NULL); + ep->end = strchr(ep->start, '\0'); + } + } else { + ep->end = ep->start + 1; + if (*ep->start != '{' && *ep->start != '}') + ep->end += strcspn(ep->end, " ^~\"{}\t"); + } + ep->toksz = ep->end - ep->start; + if (quoted && *ep->end != '\0') + ep->end++; /* Skip closing quote. */ + while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) + ep->end++; + if (quoted) /* Cannot return, may have to strndup. */ + break; + if (mode == MODE_NOSUB) + return EQN_TOK__MAX; + if ((def = eqn_def_find(ep)) == NULL) + break; + if (++lim > EQN_NEST_MAX) { + mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse, + ep->node->line, ep->node->pos, NULL); + return EQN_TOK_EOF; + } - if (def->valsz > *sz) { + /* Replace a defined name with its string value. */ + if ((diff = def->valsz - ep->toksz) > 0) { + start = ep->start - ep->data; ep->sz += diff; ep->data = mandoc_realloc(ep->data, ep->sz + 1); - ep->data[ep->sz] = '\0'; - start = &ep->data[(int)ep->rew]; + ep->start = ep->data + start; } - - diff = def->valsz - *sz; - memmove(start + *sz + diff, start + *sz, - (strlen(start) - *sz) + 1); - memcpy(start, def->val, def->valsz); - lim++; - goto again; + if (diff) + memmove(ep->start + def->valsz, ep->start + ep->toksz, + strlen(ep->start + ep->toksz) + 1); + memcpy(ep->start, def->val, def->valsz); + last_len = ep->start - ep->data + def->valsz; } - - return start; -} - -/* - * Get the next delimited token using the default current quote - * character. - */ -static const char * -eqn_nexttok(struct eqn_node *ep, size_t *sz) -{ - - return eqn_next(ep, '"', sz, 1); -} - -/* - * Get next token without replacement. - */ -static const char * -eqn_nextrawtok(struct eqn_node *ep, size_t *sz) -{ - - return eqn_next(ep, '"', sz, 0); -} - -/* - * Parse a token from the stream of text. - * A token consists of one of the recognised eqn(7) strings. - * Strings are separated by delimiting marks. - * This returns EQN_TOK_EOF when there are no more tokens. - * If the token is an unrecognised string literal, then it returns - * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated - * string. - * This must be later freed with free(3). - */ -static enum eqn_tok -eqn_tok_parse(struct eqn_node *ep, char **p) -{ - const char *start; - size_t i, sz; - int quoted; - - if (NULL != p) - *p = NULL; - - quoted = ep->data[ep->cur] == '"'; - - if (NULL == (start = eqn_nexttok(ep, &sz))) - return EQN_TOK_EOF; - + if (mode != MODE_TOK) + return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; if (quoted) { - if (p != NULL) - *p = mandoc_strndup(start, sz); - return EQN_TOK__MAX; + ep->start = mandoc_strndup(ep->start, ep->toksz); + return EQN_TOK_QUOTED; } - - for (i = 0; i < EQN_TOK__MAX; i++) { - if (NULL == eqn_toks[i]) - continue; - if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i]))) - break; + for (tok = 0; tok < EQN_TOK__MAX; tok++) + if (STRNEQ(ep->start, ep->toksz, + eqn_toks[tok], strlen(eqn_toks[tok]))) + return tok; + + for (i = 0; i < EQNSYM__MAX; i++) { + if (STRNEQ(ep->start, ep->toksz, + eqnsyms[i].str, strlen(eqnsyms[i].str))) { + mandoc_asprintf(&ep->start, + "\\[%s]", eqnsyms[i].sym); + return EQN_TOK_SYM; + } } - - if (i == EQN_TOK__MAX && NULL != p) - *p = mandoc_strndup(start, sz); - - return i; + ep->start = mandoc_strndup(ep->start, ep->toksz); + for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) + if (STRNEQ(ep->start, ep->toksz, + eqn_func[i], strlen(eqn_func[i]))) + return EQN_TOK_FUNC; + return EQN_TOK__MAX; } -static void +void eqn_box_free(struct eqn_box *bp) { @@ -549,6 +494,7 @@ eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) bp->parent = parent; bp->parent->args++; bp->expectargs = UINT_MAX; + bp->font = bp->parent->font; bp->size = ep->gsize; if (NULL != parent->first) { @@ -568,8 +514,7 @@ eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) * The new EQN_SUBEXPR will have a two-child limit. */ static struct eqn_box * -eqn_box_makebinary(struct eqn_node *ep, - enum eqn_post pos, struct eqn_box *parent) +eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) { struct eqn_box *b, *newb; @@ -581,7 +526,6 @@ eqn_box_makebinary(struct eqn_node *ep, parent->last = b->prev; b->prev = NULL; newb = eqn_box_alloc(ep, parent); - newb->pos = pos; newb->type = EQN_SUBEXPR; newb->expectargs = 2; newb->args = 1; @@ -597,20 +541,21 @@ eqn_box_makebinary(struct eqn_node *ep, static void eqn_delim(struct eqn_node *ep) { - const char *start; - size_t sz; - - if ((start = eqn_nextrawtok(ep, &sz)) == NULL) + if (ep->end[0] == '\0' || ep->end[1] == '\0') { mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, - ep->eqn.ln, ep->eqn.pos, "delim"); - else if (strncmp(start, "off", 3) == 0) + ep->node->line, ep->node->pos, "delim"); + if (ep->end[0] != '\0') + ep->end++; + } else if (strncmp(ep->end, "off", 3) == 0) { ep->delim = 0; - else if (strncmp(start, "on", 2) == 0) { + ep->end += 3; + } else if (strncmp(ep->end, "on", 2) == 0) { if (ep->odelim && ep->cdelim) ep->delim = 1; - } else if (start[1] != '\0') { - ep->odelim = start[0]; - ep->cdelim = start[1]; + ep->end += 2; + } else { + ep->odelim = *ep->end++; + ep->cdelim = *ep->end++; ep->delim = 1; } } @@ -621,16 +566,14 @@ eqn_delim(struct eqn_node *ep) static void eqn_undef(struct eqn_node *ep) { - const char *start; struct eqn_def *def; - size_t sz; - if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { + if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, - ep->eqn.ln, ep->eqn.pos, "undef"); + ep->node->line, ep->node->pos, "undef"); return; } - if ((def = eqn_def_find(ep, start, sz)) == NULL) + if ((def = eqn_def_find(ep)) == NULL) return; free(def->key); free(def->val); @@ -641,14 +584,12 @@ eqn_undef(struct eqn_node *ep) static void eqn_def(struct eqn_node *ep) { - const char *start; - size_t sz; struct eqn_def *def; int i; - if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { + if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, - ep->eqn.ln, ep->eqn.pos, "define"); + ep->node->line, ep->node->pos, "define"); return; } @@ -656,7 +597,7 @@ eqn_def(struct eqn_node *ep) * Search for a key that already exists. * Create a new key if none is found. */ - if (NULL == (def = eqn_def_find(ep, start, sz))) { + if ((def = eqn_def_find(ep)) == NULL) { /* Find holes in string array. */ for (i = 0; i < (int)ep->defsz; i++) if (0 == ep->defs[i].keysz) @@ -671,14 +612,13 @@ eqn_def(struct eqn_node *ep) def = ep->defs + i; free(def->key); - def->key = mandoc_strndup(start, sz); - def->keysz = sz; + def->key = mandoc_strndup(ep->start, ep->toksz); + def->keysz = ep->toksz; } - start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0); - if (start == NULL) { + if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse, - ep->eqn.ln, ep->eqn.pos, "define %s", def->key); + ep->node->line, ep->node->pos, "define %s", def->key); free(def->key); free(def->val); def->key = def->val = NULL; @@ -686,25 +626,21 @@ eqn_def(struct eqn_node *ep) return; } free(def->val); - def->val = mandoc_strndup(start, sz); - def->valsz = sz; + def->val = mandoc_strndup(ep->start, ep->toksz); + def->valsz = ep->toksz; } -/* - * Recursively parse an eqn(7) expression. - */ -static enum rofferr -eqn_parse(struct eqn_node *ep, struct eqn_box *parent) +void +eqn_parse(struct eqn_node *ep) { - char sym[64]; - struct eqn_box *cur; - const char *start; + struct eqn_box *cur, *nbox, *parent, *split; + const char *cp, *cpn; char *p; - size_t i, sz; - enum eqn_tok tok, subtok; - enum eqn_post pos; + enum eqn_tok tok; + enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; int size; + parent = ep->node->eqn; assert(parent != NULL); /* @@ -713,119 +649,99 @@ eqn_parse(struct eqn_node *ep, struct eqn_box *parent) */ if (ep->data == NULL) - return ROFF_IGN; + return; -next_tok: - tok = eqn_tok_parse(ep, &p); + ep->start = ep->end = ep->data + strspn(ep->data, " ^~"); -this_tok: +next_tok: + tok = eqn_next(ep, MODE_TOK); switch (tok) { - case (EQN_TOK_UNDEF): + case EQN_TOK_UNDEF: eqn_undef(ep); break; - case (EQN_TOK_NDEFINE): - case (EQN_TOK_DEFINE): + case EQN_TOK_NDEFINE: + case EQN_TOK_DEFINE: eqn_def(ep); break; - case (EQN_TOK_TDEFINE): - if (eqn_nextrawtok(ep, NULL) == NULL || - eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL) + case EQN_TOK_TDEFINE: + if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || + eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, - ep->eqn.ln, ep->eqn.pos, "tdefine"); + ep->node->line, ep->node->pos, "tdefine"); break; - case (EQN_TOK_DELIM): + case EQN_TOK_DELIM: eqn_delim(ep); break; - case (EQN_TOK_GFONT): - if (eqn_nextrawtok(ep, NULL) == NULL) + case EQN_TOK_GFONT: + if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + ep->node->line, ep->node->pos, eqn_toks[tok]); break; - case (EQN_TOK_MARK): - case (EQN_TOK_LINEUP): + case EQN_TOK_MARK: + case EQN_TOK_LINEUP: /* Ignore these. */ break; - case (EQN_TOK_DYAD): - case (EQN_TOK_VEC): - case (EQN_TOK_UNDER): - case (EQN_TOK_BAR): - case (EQN_TOK_TILDE): - case (EQN_TOK_HAT): - case (EQN_TOK_DOT): - case (EQN_TOK_DOTDOT): + case EQN_TOK_DYAD: + case EQN_TOK_VEC: + case EQN_TOK_UNDER: + case EQN_TOK_BAR: + case EQN_TOK_TILDE: + case EQN_TOK_HAT: + case EQN_TOK_DOT: + case EQN_TOK_DOTDOT: if (parent->last == NULL) { mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + ep->node->line, ep->node->pos, eqn_toks[tok]); cur = eqn_box_alloc(ep, parent); cur->type = EQN_TEXT; cur->text = mandoc_strdup(""); } - parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent); - parent->type = EQN_LISTONE; + parent = eqn_box_makebinary(ep, parent); + parent->type = EQN_LIST; parent->expectargs = 1; + parent->font = EQNFONT_ROMAN; switch (tok) { - case (EQN_TOK_DOTDOT): - strlcpy(sym, "\\[ad]", sizeof(sym)); + case EQN_TOK_DOTDOT: + parent->top = mandoc_strdup("\\[ad]"); break; - case (EQN_TOK_VEC): - strlcpy(sym, "\\[->]", sizeof(sym)); + case EQN_TOK_VEC: + parent->top = mandoc_strdup("\\[->]"); break; - case (EQN_TOK_DYAD): - strlcpy(sym, "\\[<>]", sizeof(sym)); + case EQN_TOK_DYAD: + parent->top = mandoc_strdup("\\[<>]"); break; - case (EQN_TOK_TILDE): - strlcpy(sym, "\\[a~]", sizeof(sym)); + case EQN_TOK_TILDE: + parent->top = mandoc_strdup("\\[a~]"); break; - case (EQN_TOK_UNDER): - strlcpy(sym, "\\[ul]", sizeof(sym)); + case EQN_TOK_UNDER: + parent->bottom = mandoc_strdup("\\[ul]"); break; - case (EQN_TOK_BAR): - strlcpy(sym, "\\[rl]", sizeof(sym)); + case EQN_TOK_BAR: + parent->top = mandoc_strdup("\\[rn]"); break; - case (EQN_TOK_DOT): - strlcpy(sym, "\\[a.]", sizeof(sym)); + case EQN_TOK_DOT: + parent->top = mandoc_strdup("\\[a.]"); break; - case (EQN_TOK_HAT): - strlcpy(sym, "\\[ha]", sizeof(sym)); - break; - default: - abort(); - } - - switch (tok) { - case (EQN_TOK_DOTDOT): - case (EQN_TOK_VEC): - case (EQN_TOK_DYAD): - case (EQN_TOK_TILDE): - case (EQN_TOK_BAR): - case (EQN_TOK_DOT): - case (EQN_TOK_HAT): - parent->top = mandoc_strdup(sym); - break; - case (EQN_TOK_UNDER): - parent->bottom = mandoc_strdup(sym); + case EQN_TOK_HAT: + parent->top = mandoc_strdup("\\[ha]"); break; default: abort(); } parent = parent->parent; break; - case (EQN_TOK_FWD): - case (EQN_TOK_BACK): - case (EQN_TOK_DOWN): - case (EQN_TOK_UP): - subtok = eqn_tok_parse(ep, NULL); - if (subtok != EQN_TOK__MAX) { + case EQN_TOK_FWD: + case EQN_TOK_BACK: + case EQN_TOK_DOWN: + case EQN_TOK_UP: + if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); - tok = subtok; - goto this_tok; - } + ep->node->line, ep->node->pos, eqn_toks[tok]); break; - case (EQN_TOK_FAT): - case (EQN_TOK_ROMAN): - case (EQN_TOK_ITALIC): - case (EQN_TOK_BOLD): + case EQN_TOK_FAT: + case EQN_TOK_ROMAN: + case EQN_TOK_ITALIC: + case EQN_TOK_BOLD: while (parent->args == parent->expectargs) parent = parent->parent; /* @@ -834,52 +750,54 @@ this_tok: * exactly one of those. */ parent = eqn_box_alloc(ep, parent); - parent->type = EQN_LISTONE; + parent->type = EQN_LIST; parent->expectargs = 1; switch (tok) { - case (EQN_TOK_FAT): + case EQN_TOK_FAT: parent->font = EQNFONT_FAT; break; - case (EQN_TOK_ROMAN): + case EQN_TOK_ROMAN: parent->font = EQNFONT_ROMAN; break; - case (EQN_TOK_ITALIC): + case EQN_TOK_ITALIC: parent->font = EQNFONT_ITALIC; break; - case (EQN_TOK_BOLD): + case EQN_TOK_BOLD: parent->font = EQNFONT_BOLD; break; default: abort(); } break; - case (EQN_TOK_SIZE): - case (EQN_TOK_GSIZE): + case EQN_TOK_SIZE: + case EQN_TOK_GSIZE: /* Accept two values: integral size and a single. */ - if (NULL == (start = eqn_nexttok(ep, &sz))) { + if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + ep->node->line, ep->node->pos, eqn_toks[tok]); break; } - size = mandoc_strntoi(start, sz, 10); + size = mandoc_strntoi(ep->start, ep->toksz, 10); if (-1 == size) { mandoc_msg(MANDOCERR_IT_NONUM, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + ep->node->line, ep->node->pos, eqn_toks[tok]); break; } if (EQN_TOK_GSIZE == tok) { ep->gsize = size; break; } + while (parent->args == parent->expectargs) + parent = parent->parent; parent = eqn_box_alloc(ep, parent); - parent->type = EQN_LISTONE; + parent->type = EQN_LIST; parent->expectargs = 1; parent->size = size; break; - case (EQN_TOK_FROM): - case (EQN_TOK_TO): - case (EQN_TOK_SUB): - case (EQN_TOK_SUP): + case EQN_TOK_FROM: + case EQN_TOK_TO: + case EQN_TOK_SUB: + case EQN_TOK_SUP: /* * We have a left-right-associative expression. * Repivot under a positional node, open a child scope @@ -887,41 +805,53 @@ this_tok: */ if (parent->last == NULL) { mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + ep->node->line, ep->node->pos, eqn_toks[tok]); cur = eqn_box_alloc(ep, parent); cur->type = EQN_TEXT; cur->text = mandoc_strdup(""); } - /* Handle the "subsup" and "fromto" positions. */ - if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) { + while (parent->expectargs == 1 && parent->args == 1) + parent = parent->parent; + if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { + for (cur = parent; cur != NULL; cur = cur->parent) + if (cur->pos == EQNPOS_SUB || + cur->pos == EQNPOS_SUP || + cur->pos == EQNPOS_SUBSUP || + cur->pos == EQNPOS_SQRT || + cur->pos == EQNPOS_OVER) + break; + if (cur != NULL) + parent = cur->parent; + } + if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { parent->expectargs = 3; parent->pos = EQNPOS_SUBSUP; break; } - if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) { + if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { parent->expectargs = 3; parent->pos = EQNPOS_FROMTO; break; } + parent = eqn_box_makebinary(ep, parent); switch (tok) { - case (EQN_TOK_FROM): - pos = EQNPOS_FROM; + case EQN_TOK_FROM: + parent->pos = EQNPOS_FROM; break; - case (EQN_TOK_TO): - pos = EQNPOS_TO; + case EQN_TOK_TO: + parent->pos = EQNPOS_TO; break; - case (EQN_TOK_SUP): - pos = EQNPOS_SUP; + case EQN_TOK_SUP: + parent->pos = EQNPOS_SUP; break; - case (EQN_TOK_SUB): - pos = EQNPOS_SUB; + case EQN_TOK_SUB: + parent->pos = EQNPOS_SUB; break; default: abort(); } - parent = eqn_box_makebinary(ep, pos, parent); break; - case (EQN_TOK_SQRT): + case EQN_TOK_SQRT: while (parent->args == parent->expectargs) parent = parent->parent; /* @@ -934,7 +864,7 @@ this_tok: parent->pos = EQNPOS_SQRT; parent->expectargs = 1; break; - case (EQN_TOK_OVER): + case EQN_TOK_OVER: /* * We have a right-left-associative fraction. * Close out anything that's currently open, then @@ -942,17 +872,20 @@ this_tok: */ if (parent->last == NULL) { mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + ep->node->line, ep->node->pos, eqn_toks[tok]); cur = eqn_box_alloc(ep, parent); cur->type = EQN_TEXT; cur->text = mandoc_strdup(""); } + while (parent->args == parent->expectargs) + parent = parent->parent; while (EQN_SUBEXPR == parent->type) parent = parent->parent; - parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent); + parent = eqn_box_makebinary(ep, parent); + parent->pos = EQNPOS_OVER; break; - case (EQN_TOK_RIGHT): - case (EQN_TOK_BRACE_CLOSE): + case EQN_TOK_RIGHT: + case EQN_TOK_BRACE_CLOSE: /* * Close out the existing brace. * FIXME: this is a shitty sentinel: we should really @@ -960,31 +893,31 @@ this_tok: */ for (cur = parent; cur != NULL; cur = cur->parent) if (cur->type == EQN_LIST && + cur->expectargs > 1 && (tok == EQN_TOK_BRACE_CLOSE || cur->left != NULL)) break; if (cur == NULL) { mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + ep->node->line, ep->node->pos, eqn_toks[tok]); break; } parent = cur; if (EQN_TOK_RIGHT == tok) { - if (NULL == (start = eqn_nexttok(ep, &sz))) { + if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { mandoc_msg(MANDOCERR_REQ_EMPTY, - ep->parse, ep->eqn.ln, - ep->eqn.pos, eqn_toks[tok]); + ep->parse, ep->node->line, + ep->node->pos, eqn_toks[tok]); break; } /* Handling depends on right/left. */ - if (STRNEQ(start, sz, "ceiling", 7)) { - strlcpy(sym, "\\[rc]", sizeof(sym)); - parent->right = mandoc_strdup(sym); - } else if (STRNEQ(start, sz, "floor", 5)) { - strlcpy(sym, "\\[rf]", sizeof(sym)); - parent->right = mandoc_strdup(sym); - } else - parent->right = mandoc_strndup(start, sz); + if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) + parent->right = mandoc_strdup("\\[rc]"); + else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) + parent->right = mandoc_strdup("\\[rf]"); + else + parent->right = + mandoc_strndup(ep->start, ep->toksz); } parent = parent->parent; if (tok == EQN_TOK_BRACE_CLOSE && @@ -992,12 +925,13 @@ this_tok: parent->type == EQN_MATRIX)) parent = parent->parent; /* Close out any "singleton" lists. */ - while (parent->type == EQN_LISTONE && - parent->args == parent->expectargs) + while (parent->type == EQN_LIST && + parent->expectargs == 1 && + parent->args == 1) parent = parent->parent; break; - case (EQN_TOK_BRACE_OPEN): - case (EQN_TOK_LEFT): + case EQN_TOK_BRACE_OPEN: + case EQN_TOK_LEFT: /* * If we already have something in the stack and we're * in an expression, then rewind til we're not any more @@ -1006,65 +940,63 @@ this_tok: while (parent->args == parent->expectargs) parent = parent->parent; if (EQN_TOK_LEFT == tok && - (start = eqn_nexttok(ep, &sz)) == NULL) { + eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + ep->node->line, ep->node->pos, eqn_toks[tok]); break; } parent = eqn_box_alloc(ep, parent); parent->type = EQN_LIST; if (EQN_TOK_LEFT == tok) { - if (STRNEQ(start, sz, "ceiling", 7)) { - strlcpy(sym, "\\[lc]", sizeof(sym)); - parent->left = mandoc_strdup(sym); - } else if (STRNEQ(start, sz, "floor", 5)) { - strlcpy(sym, "\\[lf]", sizeof(sym)); - parent->left = mandoc_strdup(sym); - } else - parent->left = mandoc_strndup(start, sz); + if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) + parent->left = mandoc_strdup("\\[lc]"); + else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) + parent->left = mandoc_strdup("\\[lf]"); + else + parent->left = + mandoc_strndup(ep->start, ep->toksz); } break; - case (EQN_TOK_PILE): - case (EQN_TOK_LPILE): - case (EQN_TOK_RPILE): - case (EQN_TOK_CPILE): - case (EQN_TOK_CCOL): - case (EQN_TOK_LCOL): - case (EQN_TOK_RCOL): + case EQN_TOK_PILE: + case EQN_TOK_LPILE: + case EQN_TOK_RPILE: + case EQN_TOK_CPILE: + case EQN_TOK_CCOL: + case EQN_TOK_LCOL: + case EQN_TOK_RCOL: while (parent->args == parent->expectargs) parent = parent->parent; parent = eqn_box_alloc(ep, parent); parent->type = EQN_PILE; parent->expectargs = 1; break; - case (EQN_TOK_ABOVE): + case EQN_TOK_ABOVE: for (cur = parent; cur != NULL; cur = cur->parent) if (cur->type == EQN_PILE) break; if (cur == NULL) { mandoc_msg(MANDOCERR_IT_STRAY, ep->parse, - ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); + ep->node->line, ep->node->pos, eqn_toks[tok]); break; } parent = eqn_box_alloc(ep, cur); parent->type = EQN_LIST; break; - case (EQN_TOK_MATRIX): + case EQN_TOK_MATRIX: while (parent->args == parent->expectargs) parent = parent->parent; parent = eqn_box_alloc(ep, parent); parent->type = EQN_MATRIX; parent->expectargs = 1; break; - case (EQN_TOK_EOF): - /* - * End of file! - * TODO: make sure we're not in an open subexpression. - */ - return ROFF_EQN; - default: - assert(tok == EQN_TOK__MAX); - assert(NULL != p); + case EQN_TOK_EOF: + return; + case EQN_TOK__MAX: + case EQN_TOK_FUNC: + case EQN_TOK_QUOTED: + case EQN_TOK_SYM: + p = ep->start; + assert(p != NULL); /* * If we already have something in the stack and we're * in an expression, then rewind til we're not any more. @@ -1073,48 +1005,93 @@ this_tok: parent = parent->parent; cur = eqn_box_alloc(ep, parent); cur->type = EQN_TEXT; - for (i = 0; i < EQNSYM__MAX; i++) - if (0 == strcmp(eqnsyms[i].str, p)) { - (void)snprintf(sym, sizeof(sym), - "\\[%s]", eqnsyms[i].sym); - cur->text = mandoc_strdup(sym); - free(p); + cur->text = p; + switch (tok) { + case EQN_TOK_FUNC: + cur->font = EQNFONT_ROMAN; + break; + case EQN_TOK_QUOTED: + if (cur->font == EQNFONT_NONE) + cur->font = EQNFONT_ITALIC; + break; + case EQN_TOK_SYM: + break; + default: + if (cur->font != EQNFONT_NONE || *p == '\0') break; + cpn = p - 1; + ccln = CCL_LET; + split = NULL; + for (;;) { + /* Advance to next character. */ + cp = cpn++; + ccl = ccln; + ccln = isalpha((unsigned char)*cpn) ? CCL_LET : + isdigit((unsigned char)*cpn) || + (*cpn == '.' && (ccl == CCL_DIG || + isdigit((unsigned char)cpn[1]))) ? + CCL_DIG : CCL_PUN; + /* No boundary before first character. */ + if (cp < p) + continue; + cur->font = ccl == CCL_LET ? + EQNFONT_ITALIC : EQNFONT_ROMAN; + if (*cp == '\\') + mandoc_escape(&cpn, NULL, NULL); + /* No boundary after last character. */ + if (*cpn == '\0') + break; + if (ccln == ccl && *cp != ',' && *cpn != ',') + continue; + /* Boundary found, split the text. */ + if (parent->args == parent->expectargs) { + /* Remove the text from the tree. */ + if (cur->prev == NULL) + parent->first = cur->next; + else + cur->prev->next = NULL; + parent->last = cur->prev; + parent->args--; + /* Set up a list instead. */ + split = eqn_box_alloc(ep, parent); + split->type = EQN_LIST; + /* Insert the word into the list. */ + split->first = split->last = cur; + cur->parent = split; + cur->prev = NULL; + parent = split; + } + /* Append a new text box. */ + nbox = eqn_box_alloc(ep, parent); + nbox->type = EQN_TEXT; + nbox->text = mandoc_strdup(cpn); + /* Truncate the old box. */ + p = mandoc_strndup(cur->text, + cpn - cur->text); + free(cur->text); + cur->text = p; + /* Setup to process the new box. */ + cur = nbox; + p = nbox->text; + cpn = p - 1; + ccln = CCL_LET; } - - if (i == EQNSYM__MAX) - cur->text = p; - /* - * Post-process list status. - */ - while (parent->type == EQN_LISTONE && - parent->args == parent->expectargs) - parent = parent->parent; + if (split != NULL) + parent = split->parent; + break; + } break; + default: + abort(); } goto next_tok; } -enum rofferr -eqn_end(struct eqn_node **epp) -{ - struct eqn_node *ep; - - ep = *epp; - *epp = NULL; - - ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box)); - ep->eqn.root->expectargs = UINT_MAX; - return eqn_parse(ep, ep->eqn.root); -} - void eqn_free(struct eqn_node *p) { int i; - eqn_box_free(p->eqn.root); - for (i = 0; i < (int)p->defsz; i++) { free(p->defs[i].key); free(p->defs[i].val); diff --git a/usr/src/cmd/mandoc/eqn_html.c b/usr/src/cmd/mandoc/eqn_html.c index b6e7d914b8..51f1442342 100644 --- a/usr/src/cmd/mandoc/eqn_html.c +++ b/usr/src/cmd/mandoc/eqn_html.c @@ -1,4 +1,4 @@ -/* $Id: eqn_html.c,v 1.11 2017/01/17 01:47:51 schwarze Exp $ */ +/* $Id: eqn_html.c,v 1.17 2017/07/14 13:32:35 schwarze Exp $ */ /* * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -20,6 +20,7 @@ #include <sys/types.h> #include <assert.h> +#include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -33,7 +34,10 @@ eqn_box(struct html *p, const struct eqn_box *bp) { struct tag *post, *row, *cell, *t; const struct eqn_box *child, *parent; + const char *cp; size_t i, j, rows; + enum htmltag tag; + enum eqn_fontt font; if (NULL == bp) return; @@ -47,7 +51,8 @@ eqn_box(struct html *p, const struct eqn_box *bp) if (EQN_MATRIX == bp->type) { if (NULL == bp->first) goto out; - if (EQN_LIST != bp->first->type) { + if (bp->first->type != EQN_LIST || + bp->first->expectargs == 1) { eqn_box(p, bp->first); goto out; } @@ -87,28 +92,28 @@ eqn_box(struct html *p, const struct eqn_box *bp) } switch (bp->pos) { - case (EQNPOS_TO): + case EQNPOS_TO: post = print_otag(p, TAG_MOVER, ""); break; - case (EQNPOS_SUP): + case EQNPOS_SUP: post = print_otag(p, TAG_MSUP, ""); break; - case (EQNPOS_FROM): + case EQNPOS_FROM: post = print_otag(p, TAG_MUNDER, ""); break; - case (EQNPOS_SUB): + case EQNPOS_SUB: post = print_otag(p, TAG_MSUB, ""); break; - case (EQNPOS_OVER): + case EQNPOS_OVER: post = print_otag(p, TAG_MFRAC, ""); break; - case (EQNPOS_FROMTO): + case EQNPOS_FROMTO: post = print_otag(p, TAG_MUNDEROVER, ""); break; - case (EQNPOS_SUBSUP): + case EQNPOS_SUBSUP: post = print_otag(p, TAG_MSUBSUP, ""); break; - case (EQNPOS_SQRT): + case EQNPOS_SQRT: post = print_otag(p, TAG_MSQRT, ""); break; default: @@ -127,18 +132,68 @@ eqn_box(struct html *p, const struct eqn_box *bp) if (EQN_PILE == bp->type) { assert(NULL == post); - if (bp->first != NULL && bp->first->type == EQN_LIST) + if (bp->first != NULL && + bp->first->type == EQN_LIST && + bp->first->expectargs > 1) post = print_otag(p, TAG_MTABLE, ""); - } else if (bp->type == EQN_LIST && + } else if (bp->type == EQN_LIST && bp->expectargs > 1 && bp->parent && bp->parent->type == EQN_PILE) { assert(NULL == post); post = print_otag(p, TAG_MTR, ""); print_otag(p, TAG_MTD, ""); } - if (NULL != bp->text) { - assert(NULL == post); - post = print_otag(p, TAG_MI, ""); + if (bp->text != NULL) { + assert(post == NULL); + tag = TAG_MI; + cp = bp->text; + if (isdigit((unsigned char)cp[0]) || + (cp[0] == '.' && isdigit((unsigned char)cp[1]))) { + tag = TAG_MN; + while (*++cp != '\0') { + if (*cp != '.' && + isdigit((unsigned char)*cp) == 0) { + tag = TAG_MI; + break; + } + } + } else if (*cp != '\0' && isalpha((unsigned char)*cp) == 0) { + tag = TAG_MO; + while (*cp != '\0') { + if (cp[0] == '\\' && cp[1] != '\0') { + cp++; + mandoc_escape(&cp, NULL, NULL); + } else if (isalnum((unsigned char)*cp)) { + tag = TAG_MI; + break; + } else + cp++; + } + } + font = bp->font; + if (bp->text[0] != '\0' && + (((tag == TAG_MN || tag == TAG_MO) && + font == EQNFONT_ROMAN) || + (tag == TAG_MI && font == (bp->text[1] == '\0' ? + EQNFONT_ITALIC : EQNFONT_ROMAN)))) + font = EQNFONT_NONE; + switch (font) { + case EQNFONT_NONE: + post = print_otag(p, tag, ""); + break; + case EQNFONT_ROMAN: + post = print_otag(p, tag, "?", "fontstyle", "normal"); + break; + case EQNFONT_BOLD: + case EQNFONT_FAT: + post = print_otag(p, tag, "?", "fontweight", "bold"); + break; + case EQNFONT_ITALIC: + post = print_otag(p, tag, "?", "fontstyle", "italic"); + break; + default: + abort(); + } print_text(p, bp->text); } else if (NULL == post) { if (NULL != bp->left || NULL != bp->right) @@ -172,14 +227,17 @@ out: } void -print_eqn(struct html *p, const struct eqn *ep) +print_eqn(struct html *p, const struct eqn_box *bp) { struct tag *t; + if (bp->first == NULL) + return; + t = print_otag(p, TAG_MATH, "c", "eqn"); p->flags |= HTML_NONOSPACE; - eqn_box(p, ep->root); + eqn_box(p, bp); p->flags &= ~HTML_NONOSPACE; print_tagq(p, t); diff --git a/usr/src/cmd/mandoc/eqn_term.c b/usr/src/cmd/mandoc/eqn_term.c index 4358015274..08f4a993ec 100644 --- a/usr/src/cmd/mandoc/eqn_term.c +++ b/usr/src/cmd/mandoc/eqn_term.c @@ -1,4 +1,4 @@ -/* $Id: eqn_term.c,v 1.9 2017/02/12 14:19:01 schwarze Exp $ */ +/* $Id: eqn_term.c,v 1.13 2017/07/08 14:51:04 schwarze Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -40,10 +40,10 @@ static void eqn_box(struct termp *, const struct eqn_box *); void -term_eqn(struct termp *p, const struct eqn *ep) +term_eqn(struct termp *p, const struct eqn_box *bp) { - eqn_box(p, ep->root); + eqn_box(p, bp); p->flags &= ~TERMP_NOSPACE; } @@ -51,21 +51,40 @@ static void eqn_box(struct termp *p, const struct eqn_box *bp) { const struct eqn_box *child; + int delim; - if (bp->type == EQN_LIST || + /* Delimiters around this box? */ + + if ((bp->type == EQN_LIST && bp->expectargs > 1) || (bp->type == EQN_PILE && (bp->prev || bp->next)) || - (bp->parent != NULL && bp->parent->pos == EQNPOS_SQRT)) { + (bp->parent != NULL && (bp->parent->pos == EQNPOS_SQRT || + /* Diacritic followed by ^ or _. */ + ((bp->top != NULL || bp->bottom != NULL) && + bp->parent->type == EQN_SUBEXPR && + bp->parent->pos != EQNPOS_OVER && bp->next != NULL) || + /* Nested over, sub, sup, from, to. */ + (bp->type == EQN_SUBEXPR && bp->pos != EQNPOS_SQRT && + ((bp->parent->type == EQN_LIST && bp->expectargs == 1) || + (bp->parent->type == EQN_SUBEXPR && + bp->pos != EQNPOS_SQRT)))))) { if (bp->parent->type == EQN_SUBEXPR && bp->prev != NULL) p->flags |= TERMP_NOSPACE; term_word(p, bp->left != NULL ? bp->left : "("); p->flags |= TERMP_NOSPACE; - } + delim = 1; + } else + delim = 0; + + /* Handle Fonts and text. */ + if (bp->font != EQNFONT_NONE) term_fontpush(p, fontmap[(int)bp->font]); if (bp->text != NULL) term_word(p, bp->text); + /* Special box types. */ + if (bp->pos == EQNPOS_SQRT) { term_word(p, "sqrt"); if (bp->first != NULL) { @@ -96,29 +115,25 @@ eqn_box(struct termp *p, const struct eqn_box *bp) } else { child = bp->first; if (bp->type == EQN_MATRIX && - child != NULL && child->type == EQN_LIST) + child != NULL && + child->type == EQN_LIST && + child->expectargs > 1) child = child->first; while (child != NULL) { eqn_box(p, bp->type == EQN_PILE && child->type == EQN_LIST && + child->expectargs > 1 && child->args == 1 ? child->first : child); child = child->next; } } + /* Handle Fonts and diacritics. */ + if (bp->font != EQNFONT_NONE) term_fontpop(p); - if (bp->type == EQN_LIST || - (bp->type == EQN_PILE && (bp->prev || bp->next)) || - (bp->parent != NULL && bp->parent->pos == EQNPOS_SQRT)) { - p->flags |= TERMP_NOSPACE; - term_word(p, bp->right != NULL ? bp->right : ")"); - if (bp->parent->type == EQN_SUBEXPR && bp->next != NULL) - p->flags |= TERMP_NOSPACE; - } - if (bp->top != NULL) { p->flags |= TERMP_NOSPACE; term_word(p, bp->top); @@ -127,4 +142,13 @@ eqn_box(struct termp *p, const struct eqn_box *bp) p->flags |= TERMP_NOSPACE; term_word(p, "_"); } + + /* Right delimiter after this box? */ + + if (delim) { + p->flags |= TERMP_NOSPACE; + term_word(p, bp->right != NULL ? bp->right : ")"); + if (bp->parent->type == EQN_SUBEXPR && bp->next != NULL) + p->flags |= TERMP_NOSPACE; + } } diff --git a/usr/src/cmd/mandoc/html.c b/usr/src/cmd/mandoc/html.c index 40f2cc076b..fc55e881b7 100644 --- a/usr/src/cmd/mandoc/html.c +++ b/usr/src/cmd/mandoc/html.c @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.207 2017/02/05 20:22:04 schwarze Exp $ */ +/* $Id: html.c,v 1.219 2017/07/15 17:57:51 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -28,8 +28,9 @@ #include <string.h> #include <unistd.h> -#include "mandoc.h" #include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" #include "out.h" #include "html.h" #include "manconf.h" @@ -86,6 +87,7 @@ static const struct htmldata htmltags[TAG_MAX] = { {"math", HTML_NLALL | HTML_INDENT}, {"mrow", 0}, {"mi", 0}, + {"mn", 0}, {"mo", 0}, {"msup", 0}, {"msub", 0}, @@ -236,6 +238,28 @@ print_metaf(struct html *h, enum mandoc_esc deco) } } +char * +html_make_id(const struct roff_node *n) +{ + const struct roff_node *nch; + char *buf, *cp; + + for (nch = n->child; nch != NULL; nch = nch->next) + if (nch->type != ROFFT_TEXT) + return NULL; + + buf = NULL; + deroff(&buf, n); + + /* http://www.w3.org/TR/html5/dom.html#the-id-attribute */ + + for (cp = buf; *cp != '\0'; cp++) + if (*cp == ' ') + *cp = '_'; + + return buf; +} + int html_strlen(const char *cp) { @@ -322,16 +346,18 @@ static int print_encode(struct html *h, const char *p, const char *pend, int norecurse) { char numbuf[16]; - size_t sz; - int c, len, nospace; + struct tag *t; const char *seq; + size_t sz; + int c, len, breakline, nospace; enum mandoc_esc esc; - static const char rejs[9] = { '\\', '<', '>', '&', '"', + static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"', ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' }; if (pend == NULL) pend = strchr(p, '\0'); + breakline = 0; nospace = 0; while (p < pend) { @@ -342,14 +368,28 @@ print_encode(struct html *h, const char *p, const char *pend, int norecurse) } for (sz = strcspn(p, rejs); sz-- && p < pend; p++) - if (*p == ' ') - print_endword(h); - else - print_byte(h, *p); + print_byte(h, *p); + + if (breakline && + (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) { + t = print_otag(h, TAG_DIV, ""); + print_text(h, "\\~"); + print_tagq(h, t); + breakline = 0; + while (p < pend && (*p == ' ' || *p == ASCII_NBRSP)) + p++; + continue; + } if (p >= pend) break; + if (*p == ' ') { + print_endword(h); + p++; + continue; + } + if (print_escape(h, *p++)) continue; @@ -394,6 +434,9 @@ print_encode(struct html *h, const char *p, const char *pend, int norecurse) if (c <= 0) continue; break; + case ESCAPE_BREAK: + breakline = 1; + continue; case ESCAPE_NOSPACE: if ('\0' == *p) nospace = 1; @@ -410,7 +453,7 @@ print_encode(struct html *h, const char *p, const char *pend, int norecurse) (c > 0x7E && c < 0xA0)) c = 0xFFFD; if (c > 0x7E) { - (void)snprintf(numbuf, sizeof(numbuf), "&#%d;", c); + (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c); print_word(h, numbuf); } else if (print_escape(h, c) == 0) print_byte(h, c); @@ -473,7 +516,7 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) print_indent(h); else if ((h->flags & HTML_NOSPACE) == 0) { if (h->flags & HTML_KEEP) - print_word(h, " "); + print_word(h, " "); else { if (h->flags & HTML_PREKEEP) h->flags |= HTML_KEEP; @@ -534,18 +577,25 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) print_byte(h, '='); print_byte(h, '"'); switch (*fmt) { - case 'M': - print_href(h, arg1, arg2, 1); - fmt++; - break; case 'I': print_href(h, arg1, NULL, 0); fmt++; break; + case 'M': + print_href(h, arg1, arg2, 1); + fmt++; + break; case 'R': print_byte(h, '#'); + print_encode(h, arg1, NULL, 1); + fmt++; + break; + case 'T': + print_encode(h, arg1, NULL, 1); + print_word(h, "\" title=\""); + print_encode(h, arg1, NULL, 1); fmt++; - /* FALLTHROUGH */ + break; default: print_encode(h, arg1, NULL, 1); break; @@ -573,19 +623,31 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) case 'u': su = va_arg(ap, struct roffsu *); break; - case 'v': - i = va_arg(ap, int); - su = &mysu; - SCALE_VS_INIT(su, i); - break; case 'w': - case 'W': - if ((arg2 = va_arg(ap, char *)) == NULL) - break; - su = &mysu; - a2width(arg2, su); - if (fmt[-1] == 'W') - su->scale *= -1.0; + if ((arg2 = va_arg(ap, char *)) != NULL) { + su = &mysu; + a2width(arg2, su); + } + if (*fmt == '*') { + if (su != NULL && su->unit == SCALE_EN && + su->scale > 5.9 && su->scale < 6.1) + su = NULL; + fmt++; + } + if (*fmt == '+') { + if (su != NULL) { + /* Make even bold text fit. */ + su->scale *= 1.2; + /* Add padding. */ + su->scale += 3.0; + } + fmt++; + } + if (*fmt == '-') { + if (su != NULL) + su->scale *= -1.0; + fmt++; + } break; default: abort(); @@ -594,9 +656,6 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) /* Second letter: style name. */ switch (*fmt++) { - case 'b': - attr = "margin-bottom"; - break; case 'h': attr = "height"; break; @@ -606,9 +665,6 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...) case 'l': attr = "margin-left"; break; - case 't': - attr = "margin-top"; - break; case 'w': attr = "width"; break; @@ -721,7 +777,7 @@ print_text(struct html *h, const char *word) h->flags |= HTML_KEEP; print_endword(h); } else - print_word(h, " "); + print_word(h, " "); } assert(NULL == h->metaf); @@ -912,7 +968,10 @@ print_word(struct html *h, const char *cp) static void a2width(const char *p, struct roffsu *su) { - if (a2roffsu(p, su, SCALE_MAX) < 2) { + const char *end; + + end = a2roffsu(p, su, SCALE_MAX); + if (end == NULL || *end != '\0') { su->unit = SCALE_EN; su->scale = html_strlen(p); } else if (su->scale < 0.0) diff --git a/usr/src/cmd/mandoc/html.h b/usr/src/cmd/mandoc/html.h index 5be2f82db6..c727eacf5d 100644 --- a/usr/src/cmd/mandoc/html.h +++ b/usr/src/cmd/mandoc/html.h @@ -1,4 +1,4 @@ -/* $Id: html.h,v 1.83 2017/02/05 20:22:04 schwarze Exp $ */ +/* $Id: html.h,v 1.87 2017/07/08 14:51:04 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -51,6 +51,7 @@ enum htmltag { TAG_MATH, TAG_MROW, TAG_MI, + TAG_MN, TAG_MO, TAG_MSUP, TAG_MSUB, @@ -112,8 +113,11 @@ struct html { }; +struct roff_node; struct tbl_span; -struct eqn; +struct eqn_box; + +void roff_html_pre(struct html *, const struct roff_node *); void print_gen_decls(struct html *); void print_gen_head(struct html *); @@ -123,8 +127,9 @@ void print_stagq(struct html *, const struct tag *); void print_text(struct html *, const char *); void print_tblclose(struct html *); void print_tbl(struct html *, const struct tbl_span *); -void print_eqn(struct html *, const struct eqn *); +void print_eqn(struct html *, const struct eqn_box *); void print_paragraph(struct html *); void print_endline(struct html *); +char *html_make_id(const struct roff_node *); int html_strlen(const char *); diff --git a/usr/src/cmd/mandoc/lib.c b/usr/src/cmd/mandoc/lib.c index 5295950b09..0474924d73 100644 --- a/usr/src/cmd/mandoc/lib.c +++ b/usr/src/cmd/mandoc/lib.c @@ -1,4 +1,4 @@ -/* $Id: lib.c,v 1.13 2015/10/06 18:32:19 schwarze Exp $ */ +/* $Id: lib.c,v 1.14 2017/06/24 14:38:32 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -20,6 +20,7 @@ #include <string.h> +#include "mandoc.h" #include "roff.h" #include "mdoc.h" #include "libmdoc.h" diff --git a/usr/src/cmd/mandoc/libman.h b/usr/src/cmd/mandoc/libman.h index 65849602c2..312093dd01 100644 --- a/usr/src/cmd/mandoc/libman.h +++ b/usr/src/cmd/mandoc/libman.h @@ -1,4 +1,4 @@ -/* $Id: libman.h,v 1.79 2015/11/07 14:01:16 schwarze Exp $ */ +/* $Id: libman.h,v 1.81 2017/04/29 12:45:41 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> @@ -17,7 +17,7 @@ */ #define MACRO_PROT_ARGS struct roff_man *man, \ - int tok, \ + enum roff_tok tok, \ int line, \ int ppos, \ int *pos, \ @@ -35,7 +35,6 @@ struct man_macro { extern const struct man_macro *const man_macros; -int man_hash_find(const char *); void man_node_validate(struct roff_man *); void man_state(struct roff_man *, struct roff_node *); void man_unscope(struct roff_man *, const struct roff_node *); diff --git a/usr/src/cmd/mandoc/libmandoc.h b/usr/src/cmd/mandoc/libmandoc.h index 04b3a44565..2cf0762861 100644 --- a/usr/src/cmd/mandoc/libmandoc.h +++ b/usr/src/cmd/mandoc/libmandoc.h @@ -1,7 +1,7 @@ -/* $Id: libmandoc.h,v 1.66 2017/02/18 13:43:52 schwarze Exp $ */ +/* $Id: libmandoc.h,v 1.70 2017/07/08 17:52:49 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -23,8 +23,6 @@ enum rofferr { ROFF_REPARSE, /* re-run main parser on the result */ ROFF_SO, /* include another file */ ROFF_IGN, /* ignore current line */ - ROFF_TBL, /* a table row was successfully parsed */ - ROFF_EQN /* an equation was successfully parsed */ }; struct buf { @@ -34,8 +32,6 @@ struct buf { struct mparse; -struct tbl_span; -struct eqn; struct roff; struct roff_man; @@ -45,16 +41,14 @@ void mandoc_vmsg(enum mandocerr, struct mparse *, int, int, const char *, ...) __attribute__((__format__ (__printf__, 5, 6))); char *mandoc_getarg(struct mparse *, char **, int, int *); -char *mandoc_normdate(struct mparse *, char *, int, int); +char *mandoc_normdate(struct roff_man *, char *, int, int); int mandoc_eos(const char *, size_t); int mandoc_strntoi(const char *, size_t, int); const char *mandoc_a2msec(const char*); -void mdoc_hash_init(void); int mdoc_parseln(struct roff_man *, int, char *, int); void mdoc_endparse(struct roff_man *); -void man_hash_init(void); int man_parseln(struct roff_man *, int, char *, int); void man_endparse(struct roff_man *); @@ -77,6 +71,3 @@ char *roff_strdup(const struct roff *, const char *); int roff_getcontrol(const struct roff *, const char *, int *); int roff_getformat(const struct roff *); - -const struct tbl_span *roff_span(const struct roff *); -const struct eqn *roff_eqn(const struct roff *); diff --git a/usr/src/cmd/mandoc/libmdoc.h b/usr/src/cmd/mandoc/libmdoc.h index ac1521410b..57dff61b4a 100644 --- a/usr/src/cmd/mandoc/libmdoc.h +++ b/usr/src/cmd/mandoc/libmdoc.h @@ -1,7 +1,7 @@ -/* $Id: libmdoc.h,v 1.109 2017/02/16 03:00:23 schwarze Exp $ */ +/* $Id: libmdoc.h,v 1.112 2017/05/30 16:22:03 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,7 +17,7 @@ */ #define MACRO_PROT_ARGS struct roff_man *mdoc, \ - int tok, \ + enum roff_tok tok, \ int line, \ int ppos, \ int *pos, \ @@ -39,7 +39,6 @@ enum margserr { ARGS_EOLN, /* end-of-line */ ARGS_WORD, /* normal word */ ARGS_PUNCT, /* series of punctuation */ - ARGS_QWORD, /* quoted word */ ARGS_PHRASE /* Bl -column phrase */ }; @@ -65,24 +64,24 @@ extern const struct mdoc_macro *const mdoc_macros; void mdoc_macro(MACRO_PROT_ARGS); void mdoc_elem_alloc(struct roff_man *, int, int, - int, struct mdoc_arg *); + enum roff_tok, struct mdoc_arg *); struct roff_node *mdoc_block_alloc(struct roff_man *, int, int, - int, struct mdoc_arg *); -void mdoc_tail_alloc(struct roff_man *, int, int, int); -struct roff_node *mdoc_endbody_alloc(struct roff_man *, int, int, int, - struct roff_node *); + enum roff_tok, struct mdoc_arg *); +void mdoc_tail_alloc(struct roff_man *, int, int, + enum roff_tok); +struct roff_node *mdoc_endbody_alloc(struct roff_man *, int, int, + enum roff_tok, struct roff_node *); void mdoc_node_relink(struct roff_man *, struct roff_node *); void mdoc_node_validate(struct roff_man *); void mdoc_state(struct roff_man *, struct roff_node *); void mdoc_state_reset(struct roff_man *); -int mdoc_hash_find(const char *); const char *mdoc_a2arch(const char *); const char *mdoc_a2att(const char *); const char *mdoc_a2lib(const char *); enum roff_sec mdoc_a2sec(const char *); const char *mdoc_a2st(const char *); -void mdoc_argv(struct roff_man *, int, int, +void mdoc_argv(struct roff_man *, int, enum roff_tok, struct mdoc_arg **, int *, char *); enum margserr mdoc_args(struct roff_man *, int, - int *, char *, int, char **); + int *, char *, enum roff_tok, char **); enum mdelim mdoc_isdelim(const char *); diff --git a/usr/src/cmd/mandoc/libroff.h b/usr/src/cmd/mandoc/libroff.h index 897a55ae61..b6a026d820 100644 --- a/usr/src/cmd/mandoc/libroff.h +++ b/usr/src/cmd/mandoc/libroff.h @@ -1,7 +1,7 @@ -/* $Id: libroff.h,v 1.39 2015/11/07 14:01:16 schwarze Exp $ */ +/* $Id: libroff.h,v 1.42 2017/07/08 17:52:49 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -38,15 +38,15 @@ struct tbl_node { }; struct eqn_node { - struct eqn eqn; /* syntax tree of this equation */ struct mparse *parse; /* main parser, for error reporting */ - struct eqn_node *next; /* singly linked list of equations */ + struct roff_node *node; /* syntax tree of this equation */ struct eqn_def *defs; /* array of definitions */ char *data; /* source code of this equation */ + char *start; /* first byte of the current token */ + char *end; /* first byte of the next token */ size_t defsz; /* number of definitions */ size_t sz; /* length of the source code */ - size_t cur; /* parse point in the source code */ - size_t rew; /* beginning of the current token */ + size_t toksz; /* length of the current token */ int gsize; /* default point size */ int delim; /* in-line delimiters enabled */ char odelim; /* in-line opening delimiter */ @@ -65,15 +65,16 @@ struct tbl_node *tbl_alloc(int, int, struct mparse *); void tbl_restart(int, int, struct tbl_node *); void tbl_free(struct tbl_node *); void tbl_reset(struct tbl_node *); -enum rofferr tbl_read(struct tbl_node *, int, const char *, int); +void tbl_read(struct tbl_node *, int, const char *, int); void tbl_option(struct tbl_node *, int, const char *, int *); void tbl_layout(struct tbl_node *, int, const char *, int); void tbl_data(struct tbl_node *, int, const char *, int); -int tbl_cdata(struct tbl_node *, int, const char *, int); +void tbl_cdata(struct tbl_node *, int, const char *, int); const struct tbl_span *tbl_span(struct tbl_node *); -int tbl_end(struct tbl_node **); -struct eqn_node *eqn_alloc(int, int, struct mparse *); -enum rofferr eqn_end(struct eqn_node **); +int tbl_end(struct tbl_node *); +struct eqn_node *eqn_alloc(struct mparse *); +void eqn_box_free(struct eqn_box *); void eqn_free(struct eqn_node *); -enum rofferr eqn_read(struct eqn_node **, int, - const char *, int, int *); +void eqn_parse(struct eqn_node *); +void eqn_read(struct eqn_node *, const char *); +void eqn_reset(struct eqn_node *); diff --git a/usr/src/cmd/mandoc/main.c b/usr/src/cmd/mandoc/main.c index 02abaaf791..7f1411a6fb 100644 --- a/usr/src/cmd/mandoc/main.c +++ b/usr/src/cmd/mandoc/main.c @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.283 2017/02/17 14:31:52 schwarze Exp $ */ +/* $Id: main.c,v 1.301 2017/07/26 10:21:55 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010-2012, 2014-2017 Ingo Schwarze <schwarze@openbsd.org> @@ -43,6 +43,7 @@ #include "mandoc_aux.h" #include "mandoc.h" +#include "mandoc_xr.h" #include "roff.h" #include "mdoc.h" #include "man.h" @@ -56,7 +57,6 @@ enum outmode { OUTMODE_FLN, OUTMODE_LST, OUTMODE_ALL, - OUTMODE_INT, OUTMODE_ONE }; @@ -67,6 +67,7 @@ enum outt { OUTT_TREE, /* -Ttree */ OUTT_MAN, /* -Tman */ OUTT_HTML, /* -Thtml */ + OUTT_MARKDOWN, /* -Tmarkdown */ OUTT_LINT, /* -Tlint */ OUTT_PS, /* -Tps */ OUTT_PDF /* -Tpdf */ @@ -74,25 +75,28 @@ enum outt { struct curparse { struct mparse *mp; - enum mandoclevel wlevel; /* ignore messages below this */ + struct manoutput *outopts; /* output options */ + void *outdata; /* data for output */ + char *os_s; /* operating system for display */ int wstop; /* stop after a file with a warning */ + enum mandocerr mmin; /* ignore messages below this */ + enum mandoc_os os_e; /* check base system conventions */ enum outt outtype; /* which output to use */ - void *outdata; /* data for output */ - struct manoutput *outopts; /* output options */ }; int mandocdb(int, char *[]); +static void check_xr(const char *); static int fs_lookup(const struct manpaths *, size_t ipath, const char *, const char *, const char *, struct manpage **, size_t *); -static void fs_search(const struct mansearch *, +static int fs_search(const struct mansearch *, const struct manpaths *, int, char**, struct manpage **, size_t *); static int koptions(int *, char *); -static int moptions(int *, char *); +static void moptions(int *, char *); static void mmsg(enum mandocerr, enum mandoclevel, const char *, int, int, const char *); static void outdata_alloc(struct curparse *); @@ -107,6 +111,7 @@ static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; static char help_arg[] = "help"; static char *help_argv[] = {help_arg, NULL}; static enum mandoclevel rc; +static FILE *mmsg_stream; int @@ -119,7 +124,7 @@ main(int argc, char *argv[]) struct manpage *res, *resp; const char *progname, *sec, *thisarg; char *conf_file, *defpaths, *auxpaths; - char *defos, *oarg; + char *oarg; unsigned char *uc; size_t i, sz; int prio, best_prio; @@ -149,7 +154,7 @@ main(int argc, char *argv[]) return mandocdb(argc, argv); #if HAVE_PLEDGE - if (pledge("stdio rpath tmppath tty proc exec flock", NULL) == -1) + if (pledge("stdio rpath tmppath tty proc exec", NULL) == -1) err((int)MANDOCLEVEL_SYSERR, "pledge"); #endif @@ -183,18 +188,22 @@ main(int argc, char *argv[]) memset(&curp, 0, sizeof(struct curparse)); curp.outtype = OUTT_LOCALE; - curp.wlevel = MANDOCLEVEL_BADARG; + curp.mmin = MANDOCERR_MAX; curp.outopts = &conf.output; options = MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1; - defos = NULL; + mmsg_stream = stderr; use_pager = 1; tag_files = NULL; show_usage = 0; outmode = OUTMODE_DEF; - while (-1 != (c = getopt(argc, argv, - "aC:cfhI:iK:klM:m:O:S:s:T:VW:w"))) { + while ((c = getopt(argc, argv, + "aC:cfhI:iK:klM:m:O:S:s:T:VW:w")) != -1) { + if (c == 'i' && search.argmode == ARG_EXPR) { + optind--; + break; + } switch (c) { case 'a': outmode = OUTMODE_ALL; @@ -218,14 +227,11 @@ main(int argc, char *argv[]) warnx("-I %s: Bad argument", optarg); return (int)MANDOCLEVEL_BADARG; } - if (defos) { + if (curp.os_s != NULL) { warnx("-I %s: Duplicate argument", optarg); return (int)MANDOCLEVEL_BADARG; } - defos = mandoc_strdup(optarg + 3); - break; - case 'i': - outmode = OUTMODE_INT; + curp.os_s = mandoc_strdup(optarg + 3); break; case 'K': if ( ! koptions(&options, optarg)) @@ -312,7 +318,7 @@ main(int argc, char *argv[]) #if HAVE_PLEDGE if (!use_pager) - if (pledge("stdio rpath flock", NULL) == -1) + if (pledge("stdio rpath", NULL) == -1) err((int)MANDOCLEVEL_SYSERR, "pledge"); #endif @@ -441,11 +447,12 @@ main(int argc, char *argv[]) } #endif - if (search.argmode == ARG_FILE && ! moptions(&options, auxpaths)) - return (int)MANDOCLEVEL_BADARG; + if (search.argmode == ARG_FILE) + moptions(&options, auxpaths); mchars_alloc(); - curp.mp = mparse_alloc(options, curp.wlevel, mmsg, defos); + curp.mp = mparse_alloc(options, curp.mmin, mmsg, + curp.os_e, curp.os_s); /* * Conditionally start up the lookaside buffer before parsing. @@ -471,7 +478,7 @@ main(int argc, char *argv[]) parse(&curp, fd, *argv); else if (resp->form == FORM_SRC) { /* For .so only; ignore failure. */ - chdir(conf.manpath.paths[resp->ipath]); + (void)chdir(conf.manpath.paths[resp->ipath]); parse(&curp, fd, resp->file); } else passthrough(resp->file, fd, @@ -514,6 +521,7 @@ main(int argc, char *argv[]) break; } } + mandoc_xr_free(); mparse_free(curp.mp); mchars_free(); @@ -523,7 +531,7 @@ out: mansearch_free(res, sz); } - free(defos); + free(curp.os_s); /* * When using a pager, finish writing both temporary files, @@ -591,24 +599,22 @@ usage(enum argmode argmode) switch (argmode) { case ARG_FILE: - fputs("usage: mandoc [-acfhkl] [-I os=name] " - "[-K encoding] [-mformat] [-O option]\n" + fputs("usage: mandoc [-ac] [-I os=name] " + "[-K encoding] [-mdoc | -man] [-O options]\n" "\t [-T output] [-W level] [file ...]\n", stderr); break; case ARG_NAME: - fputs("usage: man [-acfhklw] [-C file] [-I os=name] " - "[-K encoding] [-M path] [-m path]\n" - "\t [-O option=value] [-S subsection] [-s section] " - "[-T output] [-W level]\n" - "\t [section] name ...\n", stderr); + fputs("usage: man [-acfhklw] [-C file] [-M path] " + "[-m path] [-S subsection]\n" + "\t [[-s] section] name ...\n", stderr); break; case ARG_WORD: - fputs("usage: whatis [-acfhklw] [-C file] " + fputs("usage: whatis [-afk] [-C file] " "[-M path] [-m path] [-O outkey] [-S arch]\n" "\t [-s section] name ...\n", stderr); break; case ARG_EXPR: - fputs("usage: apropos [-acfhklw] [-C file] " + fputs("usage: apropos [-afk] [-C file] " "[-M path] [-m path] [-O outkey] [-S arch]\n" "\t [-s section] expression ...\n", stderr); break; @@ -659,12 +665,23 @@ fs_lookup(const struct manpaths *paths, size_t ipath, if (globres == 0) file = mandoc_strdup(*globinfo.gl_pathv); globfree(&globinfo); - if (globres != 0) + if (globres == 0) + goto found; + if (res != NULL || ipath + 1 != paths->sz) return 0; + mandoc_asprintf(&file, "%s.%s", name, sec); + globres = access(file, R_OK); + free(file); + return globres != -1; + found: warnx("outdated mandoc.db lacks %s(%s) entry, run %s %s", name, sec, BINM_MAKEWHATIS, paths->paths[ipath]); + if (res == NULL) { + free(file); + return 1; + } *res = mandoc_reallocarray(*res, ++*ressz, sizeof(struct manpage)); page = *res + (*ressz - 1); page->file = file; @@ -677,7 +694,7 @@ found: return 1; } -static void +static int fs_search(const struct mansearch *cfg, const struct manpaths *paths, int argc, char **argv, struct manpage **res, size_t *ressz) { @@ -689,7 +706,8 @@ fs_search(const struct mansearch *cfg, const struct manpaths *paths, assert(cfg->argmode == ARG_NAME); - *res = NULL; + if (res != NULL) + *res = NULL; *ressz = lastsz = 0; while (argc) { for (ipath = 0; ipath < paths->sz; ipath++) { @@ -697,19 +715,20 @@ fs_search(const struct mansearch *cfg, const struct manpaths *paths, if (fs_lookup(paths, ipath, cfg->sec, cfg->arch, *argv, res, ressz) && cfg->firstmatch) - return; + return 1; } else for (isec = 0; isec < nsec; isec++) if (fs_lookup(paths, ipath, sections[isec], cfg->arch, *argv, res, ressz) && cfg->firstmatch) - return; + return 1; } - if (*ressz == lastsz) + if (res != NULL && *ressz == lastsz) warnx("No entry for %s in the manual.", *argv); lastsz = *ressz; argv++; argc--; } + return 0; } static void @@ -746,6 +765,7 @@ parse(struct curparse *curp, int fd, const char *file) if (man == NULL) return; + mandoc_xr_reset(); if (man->macroset == MACROSET_MDOC) { if (curp->outtype != OUTT_TREE || !curp->outopts->noval) mdoc_validate(man); @@ -766,6 +786,9 @@ parse(struct curparse *curp, int fd, const char *file) case OUTT_PS: terminal_mdoc(curp->outdata, man); break; + case OUTT_MARKDOWN: + markdown_mdoc(curp->outdata, man); + break; default: break; } @@ -794,10 +817,47 @@ parse(struct curparse *curp, int fd, const char *file) break; } } + if (curp->mmin < MANDOCERR_STYLE) + check_xr(file); mparse_updaterc(curp->mp, &rc); } static void +check_xr(const char *file) +{ + static struct manpaths paths; + struct mansearch search; + struct mandoc_xr *xr; + char *cp; + size_t sz; + + if (paths.sz == 0) + manpath_base(&paths); + + for (xr = mandoc_xr_get(); xr != NULL; xr = xr->next) { + if (xr->line == -1) + continue; + search.arch = NULL; + search.sec = xr->sec; + search.outkey = NULL; + search.argmode = ARG_NAME; + search.firstmatch = 1; + if (mansearch(&search, &paths, 1, &xr->name, NULL, &sz)) + continue; + if (fs_search(&search, &paths, 1, &xr->name, NULL, &sz)) + continue; + if (xr->count == 1) + mandoc_asprintf(&cp, "Xr %s %s", xr->name, xr->sec); + else + mandoc_asprintf(&cp, "Xr %s %s (%d times)", + xr->name, xr->sec, xr->count); + mmsg(MANDOCERR_XR_BAD, MANDOCLEVEL_STYLE, + file, xr->line, xr->pos + 1, cp); + free(cp); + } +} + +static void outdata_alloc(struct curparse *curp) { switch (curp->outtype) { @@ -915,24 +975,16 @@ koptions(int *options, char *arg) return 1; } -static int +static void moptions(int *options, char *arg) { if (arg == NULL) - /* nothing to do */; - else if (0 == strcmp(arg, "doc")) + return; + if (strcmp(arg, "doc") == 0) *options |= MPARSE_MDOC; - else if (0 == strcmp(arg, "andoc")) - /* nothing to do */; - else if (0 == strcmp(arg, "an")) + else if (strcmp(arg, "an") == 0) *options |= MPARSE_MAN; - else { - warnx("-m %s: Bad argument", arg); - return 0; - } - - return 1; } static int @@ -943,19 +995,20 @@ toptions(struct curparse *curp, char *arg) curp->outtype = OUTT_ASCII; else if (0 == strcmp(arg, "lint")) { curp->outtype = OUTT_LINT; - curp->wlevel = MANDOCLEVEL_WARNING; + curp->mmin = MANDOCERR_BASE; + mmsg_stream = stdout; } else if (0 == strcmp(arg, "tree")) curp->outtype = OUTT_TREE; else if (0 == strcmp(arg, "man")) curp->outtype = OUTT_MAN; else if (0 == strcmp(arg, "html")) curp->outtype = OUTT_HTML; + else if (0 == strcmp(arg, "markdown")) + curp->outtype = OUTT_MARKDOWN; else if (0 == strcmp(arg, "utf8")) curp->outtype = OUTT_UTF8; else if (0 == strcmp(arg, "locale")) curp->outtype = OUTT_LOCALE; - else if (0 == strcmp(arg, "xhtml")) - curp->outtype = OUTT_HTML; else if (0 == strcmp(arg, "ps")) curp->outtype = OUTT_PS; else if (0 == strcmp(arg, "pdf")) @@ -972,15 +1025,19 @@ static int woptions(struct curparse *curp, char *arg) { char *v, *o; - const char *toks[7]; + const char *toks[11]; toks[0] = "stop"; toks[1] = "all"; - toks[2] = "warning"; - toks[3] = "error"; - toks[4] = "unsupp"; - toks[5] = "fatal"; - toks[6] = NULL; + toks[2] = "base"; + toks[3] = "style"; + toks[4] = "warning"; + toks[5] = "error"; + toks[6] = "unsupp"; + toks[7] = "fatal"; + toks[8] = "openbsd"; + toks[9] = "netbsd"; + toks[10] = NULL; while (*arg) { o = arg; @@ -990,23 +1047,36 @@ woptions(struct curparse *curp, char *arg) break; case 1: case 2: - curp->wlevel = MANDOCLEVEL_WARNING; + curp->mmin = MANDOCERR_BASE; break; case 3: - curp->wlevel = MANDOCLEVEL_ERROR; + curp->mmin = MANDOCERR_STYLE; break; case 4: - curp->wlevel = MANDOCLEVEL_UNSUPP; + curp->mmin = MANDOCERR_WARNING; break; case 5: - curp->wlevel = MANDOCLEVEL_BADARG; + curp->mmin = MANDOCERR_ERROR; + break; + case 6: + curp->mmin = MANDOCERR_UNSUPP; + break; + case 7: + curp->mmin = MANDOCERR_MAX; + break; + case 8: + curp->mmin = MANDOCERR_BASE; + curp->os_e = MANDOC_OS_OPENBSD; + break; + case 9: + curp->mmin = MANDOCERR_BASE; + curp->os_e = MANDOC_OS_NETBSD; break; default: warnx("-W %s: Bad argument", o); return 0; } } - return 1; } @@ -1016,21 +1086,21 @@ mmsg(enum mandocerr t, enum mandoclevel lvl, { const char *mparse_msg; - fprintf(stderr, "%s: %s:", getprogname(), + fprintf(mmsg_stream, "%s: %s:", getprogname(), file == NULL ? "<stdin>" : file); if (line) - fprintf(stderr, "%d:%d:", line, col + 1); + fprintf(mmsg_stream, "%d:%d:", line, col + 1); - fprintf(stderr, " %s", mparse_strlevel(lvl)); + fprintf(mmsg_stream, " %s", mparse_strlevel(lvl)); - if (NULL != (mparse_msg = mparse_strerror(t))) - fprintf(stderr, ": %s", mparse_msg); + if ((mparse_msg = mparse_strerror(t)) != NULL) + fprintf(mmsg_stream, ": %s", mparse_msg); if (msg) - fprintf(stderr, ": %s", msg); + fprintf(mmsg_stream, ": %s", msg); - fputc('\n', stderr); + fputc('\n', mmsg_stream); } static pid_t diff --git a/usr/src/cmd/mandoc/main.h b/usr/src/cmd/mandoc/main.h index f12f3e4c3a..f9b8f135f8 100644 --- a/usr/src/cmd/mandoc/main.h +++ b/usr/src/cmd/mandoc/main.h @@ -1,4 +1,4 @@ -/* $Id: main.h,v 1.26 2016/07/15 19:33:01 schwarze Exp $ */ +/* $Id: main.h,v 1.27 2017/03/03 14:23:23 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> @@ -49,3 +49,5 @@ void pspdf_free(void *); void terminal_mdoc(void *, const struct roff_man *); void terminal_man(void *, const struct roff_man *); void terminal_sepline(void *); + +void markdown_mdoc(void *, const struct roff_man *); diff --git a/usr/src/cmd/mandoc/man.c b/usr/src/cmd/mandoc/man.c index a2db05fbaa..7a2bcc9688 100644 --- a/usr/src/cmd/mandoc/man.c +++ b/usr/src/cmd/mandoc/man.c @@ -1,7 +1,7 @@ -/* $Id: man.c,v 1.167 2017/01/10 13:47:00 schwarze Exp $ */ +/* $Id: man.c,v 1.176 2017/06/28 12:52:45 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org> * * Permission to use, copy, modify, and distribute this software for any @@ -35,21 +35,6 @@ #include "roff_int.h" #include "libman.h" -const char *const __man_macronames[MAN_MAX] = { - "br", "TH", "SH", "SS", - "TP", "LP", "PP", "P", - "IP", "HP", "SM", "SB", - "BI", "IB", "BR", "RB", - "R", "B", "I", "IR", - "RI", "sp", "nf", - "fi", "RE", "RS", "DT", - "UC", "PD", "AT", "in", - "ft", "OP", "EX", "EE", - "UR", "UE", "ll" - }; - -const char * const *man_macronames = __man_macronames; - static void man_descope(struct roff_man *, int, int); static int man_ptext(struct roff_man *, int, char *, int); static int man_pmacro(struct roff_man *, int, char *, int); @@ -91,6 +76,8 @@ static int man_ptext(struct roff_man *man, int line, char *buf, int offs) { int i; + const char *cp, *sp; + char *ep; /* Literal free-form text whitespace is preserved. */ @@ -104,17 +91,36 @@ man_ptext(struct roff_man *man, int line, char *buf, int offs) /* Skip leading whitespace. */ ; /* - * Blank lines are ignored right after headings + * Blank lines are ignored in next line scope + * and right after headings and cancel preceding \c, * but add a single vertical space elsewhere. */ if (buf[i] == '\0') { - /* Allocate a blank entry. */ - if (man->last->tok != MAN_SH && - man->last->tok != MAN_SS) { - roff_elem_alloc(man, line, offs, MAN_sp); - man->next = ROFF_NEXT_SIBLING; + if (man->flags & (MAN_ELINE | MAN_BLINE)) { + mandoc_msg(MANDOCERR_BLK_BLANK, man->parse, + line, 0, NULL); + return 1; + } + if (man->last->tok == MAN_SH || man->last->tok == MAN_SS) + return 1; + switch (man->last->type) { + case ROFFT_TEXT: + sp = man->last->string; + cp = ep = strchr(sp, '\0') - 2; + if (cp < sp || cp[0] != '\\' || cp[1] != 'c') + break; + while (cp > sp && cp[-1] == '\\') + cp--; + if ((ep - cp) % 2) + break; + *ep = '\0'; + return 1; + default: + break; } + roff_elem_alloc(man, line, offs, ROFF_sp); + man->next = ROFF_NEXT_SIBLING; return 1; } @@ -160,26 +166,19 @@ man_pmacro(struct roff_man *man, int ln, char *buf, int offs) { struct roff_node *n; const char *cp; - int tok; - int i, ppos; + size_t sz; + enum roff_tok tok; + int ppos; int bline; - char mac[5]; - - ppos = offs; - - /* - * Copy the first word into a nil-terminated buffer. - * Stop when a space, tab, escape, or eoln is encountered. - */ - - i = 0; - while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) - mac[i++] = buf[offs++]; - - mac[i] = '\0'; - tok = (i > 0 && i < 4) ? man_hash_find(mac) : TOKEN_NONE; + /* Determine the line macro. */ + ppos = offs; + tok = TOKEN_NONE; + for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) + offs++; + if (sz > 0 && sz < 4) + tok = roffhash_find(man->manmac, buf + ppos, sz); if (tok == TOKEN_NONE) { mandoc_msg(MANDOCERR_MACRO, man->parse, ln, ppos, buf + ppos - 1); @@ -203,7 +202,7 @@ man_pmacro(struct roff_man *man, int ln, char *buf, int offs) /* Jump to the next non-whitespace word. */ - while (buf[offs] && buf[offs] == ' ') + while (buf[offs] == ' ') offs++; /* @@ -223,6 +222,20 @@ man_pmacro(struct roff_man *man, int ln, char *buf, int offs) man_breakscope(man, tok); bline = man->flags & MAN_BLINE; + /* + * If the line in next-line scope ends with \c, keep the + * next-line scope open for the subsequent input line. + * That is not at all portable, only groff >= 1.22.4 + * does it, but *if* this weird idiom occurs in a manual + * page, that's very likely what the author intended. + */ + + if (bline) { + cp = strchr(buf + offs, '\0') - 2; + if (cp >= buf && cp[0] == '\\' && cp[1] == 'c') + bline = 0; + } + /* Call to handler... */ assert(man_macros[tok].fp); @@ -266,17 +279,18 @@ man_breakscope(struct roff_man *man, int tok) * Delete the element that is being broken. */ - if (man->flags & MAN_ELINE && (tok == TOKEN_NONE || + if (man->flags & MAN_ELINE && (tok < MAN_TH || ! (man_macros[tok].flags & MAN_NSCOPED))) { n = man->last; - assert(n->type != ROFFT_TEXT); - if (man_macros[n->tok].flags & MAN_NSCOPED) + if (n->type == ROFFT_TEXT) + n = n->parent; + if (n->tok < MAN_TH || + man_macros[n->tok].flags & MAN_NSCOPED) n = n->parent; mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line, n->pos, "%s breaks %s", - tok == TOKEN_NONE ? "TS" : man_macronames[tok], - man_macronames[n->tok]); + roff_name[tok], roff_name[n->tok]); roff_node_delete(man, n); man->flags &= ~MAN_ELINE; @@ -302,12 +316,13 @@ man_breakscope(struct roff_man *man, int tok) * Delete the block that is being broken. */ - if (man->flags & MAN_BLINE && (tok == TOKEN_NONE || + if (man->flags & MAN_BLINE && (tok < MAN_TH || man_macros[tok].flags & MAN_BSCOPE)) { n = man->last; if (n->type == ROFFT_TEXT) n = n->parent; - if ( ! (man_macros[n->tok].flags & MAN_BSCOPE)) + if (n->tok < MAN_TH || + (man_macros[n->tok].flags & MAN_BSCOPE) == 0) n = n->parent; assert(n->type == ROFFT_HEAD); @@ -317,8 +332,7 @@ man_breakscope(struct roff_man *man, int tok) mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line, n->pos, "%s breaks %s", - tok == TOKEN_NONE ? "TS" : man_macronames[tok], - man_macronames[n->tok]); + roff_name[tok], roff_name[n->tok]); roff_node_delete(man, n); man->flags &= ~MAN_BLINE; diff --git a/usr/src/cmd/mandoc/man.h b/usr/src/cmd/mandoc/man.h index 8f63f3b99e..d671f9a9e4 100644 --- a/usr/src/cmd/mandoc/man.h +++ b/usr/src/cmd/mandoc/man.h @@ -1,4 +1,4 @@ -/* $Id: man.h,v 1.77 2015/11/07 14:01:16 schwarze Exp $ */ +/* $Id: man.h,v 1.78 2017/04/24 23:06:18 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> @@ -16,50 +16,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define MAN_br 0 -#define MAN_TH 1 -#define MAN_SH 2 -#define MAN_SS 3 -#define MAN_TP 4 -#define MAN_LP 5 -#define MAN_PP 6 -#define MAN_P 7 -#define MAN_IP 8 -#define MAN_HP 9 -#define MAN_SM 10 -#define MAN_SB 11 -#define MAN_BI 12 -#define MAN_IB 13 -#define MAN_BR 14 -#define MAN_RB 15 -#define MAN_R 16 -#define MAN_B 17 -#define MAN_I 18 -#define MAN_IR 19 -#define MAN_RI 20 -#define MAN_sp 21 -#define MAN_nf 22 -#define MAN_fi 23 -#define MAN_RE 24 -#define MAN_RS 25 -#define MAN_DT 26 -#define MAN_UC 27 -#define MAN_PD 28 -#define MAN_AT 29 -#define MAN_in 30 -#define MAN_ft 31 -#define MAN_OP 32 -#define MAN_EX 33 -#define MAN_EE 34 -#define MAN_UR 35 -#define MAN_UE 36 -#define MAN_ll 37 -#define MAN_MAX 38 - -/* Names of macros. */ -extern const char *const *man_macronames; - - struct roff_man; const struct mparse *man_mparse(const struct roff_man *); diff --git a/usr/src/cmd/mandoc/man_hash.c b/usr/src/cmd/mandoc/man_hash.c deleted file mode 100644 index bb7b4665b3..0000000000 --- a/usr/src/cmd/mandoc/man_hash.c +++ /dev/null @@ -1,103 +0,0 @@ -/* $Id: man_hash.c,v 1.35 2016/07/15 18:03:45 schwarze Exp $ */ -/* - * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#include "config.h" - -#include <sys/types.h> - -#include <assert.h> -#include <ctype.h> -#include <limits.h> -#include <string.h> - -#include "mandoc.h" -#include "roff.h" -#include "man.h" -#include "libmandoc.h" -#include "libman.h" - -#define HASH_DEPTH 6 - -#define HASH_ROW(x) do { \ - if (isupper((unsigned char)(x))) \ - (x) -= 65; \ - else \ - (x) -= 97; \ - (x) *= HASH_DEPTH; \ - } while (/* CONSTCOND */ 0) - -/* - * Lookup table is indexed first by lower-case first letter (plus one - * for the period, which is stored in the last row), then by lower or - * uppercase second letter. Buckets correspond to the index of the - * macro (the integer value of the enum stored as a char to save a bit - * of space). - */ -static unsigned char table[26 * HASH_DEPTH]; - - -void -man_hash_init(void) -{ - int i, j, x; - - if (*table != '\0') - return; - - memset(table, UCHAR_MAX, sizeof(table)); - - for (i = 0; i < (int)MAN_MAX; i++) { - x = man_macronames[i][0]; - - assert(isalpha((unsigned char)x)); - - HASH_ROW(x); - - for (j = 0; j < HASH_DEPTH; j++) - if (UCHAR_MAX == table[x + j]) { - table[x + j] = (unsigned char)i; - break; - } - - assert(j < HASH_DEPTH); - } -} - -int -man_hash_find(const char *tmp) -{ - int x, y, i; - int tok; - - if ('\0' == (x = tmp[0])) - return TOKEN_NONE; - if ( ! (isalpha((unsigned char)x))) - return TOKEN_NONE; - - HASH_ROW(x); - - for (i = 0; i < HASH_DEPTH; i++) { - if (UCHAR_MAX == (y = table[x + i])) - return TOKEN_NONE; - - tok = y; - if (0 == strcmp(tmp, man_macronames[tok])) - return tok; - } - - return TOKEN_NONE; -} diff --git a/usr/src/cmd/mandoc/man_html.c b/usr/src/cmd/mandoc/man_html.c index 9151e4c750..a304b3e4d2 100644 --- a/usr/src/cmd/mandoc/man_html.c +++ b/usr/src/cmd/mandoc/man_html.c @@ -1,4 +1,4 @@ -/* $Id: man_html.c,v 1.133 2017/02/05 18:15:39 schwarze Exp $ */ +/* $Id: man_html.c,v 1.145 2017/06/25 11:42:02 schwarze Exp $ */ /* * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -26,6 +26,7 @@ #include <string.h> #include "mandoc_aux.h" +#include "mandoc.h" #include "roff.h" #include "man.h" #include "out.h" @@ -65,14 +66,12 @@ static int man_SM_pre(MAN_ARGS); static int man_SS_pre(MAN_ARGS); static int man_UR_pre(MAN_ARGS); static int man_alt_pre(MAN_ARGS); -static int man_br_pre(MAN_ARGS); static int man_ign_pre(MAN_ARGS); static int man_in_pre(MAN_ARGS); static void man_root_post(MAN_ARGS); static void man_root_pre(MAN_ARGS); -static const struct htmlman mans[MAN_MAX] = { - { man_br_pre, NULL }, /* br */ +static const struct htmlman __mans[MAN_MAX - MAN_TH] = { { NULL, NULL }, /* TH */ { man_SH_pre, NULL }, /* SH */ { man_SS_pre, NULL }, /* SS */ @@ -93,7 +92,6 @@ static const struct htmlman mans[MAN_MAX] = { { man_I_pre, NULL }, /* I */ { man_alt_pre, NULL }, /* IR */ { man_alt_pre, NULL }, /* RI */ - { man_br_pre, NULL }, /* sp */ { NULL, NULL }, /* nf */ { NULL, NULL }, /* fi */ { NULL, NULL }, /* RE */ @@ -103,14 +101,15 @@ static const struct htmlman mans[MAN_MAX] = { { man_ign_pre, NULL }, /* PD */ { man_ign_pre, NULL }, /* AT */ { man_in_pre, NULL }, /* in */ - { man_ign_pre, NULL }, /* ft */ { man_OP_pre, NULL }, /* OP */ { NULL, NULL }, /* EX */ { NULL, NULL }, /* EE */ { man_UR_pre, NULL }, /* UR */ { NULL, NULL }, /* UE */ - { man_ign_pre, NULL }, /* ll */ + { man_UR_pre, NULL }, /* MT */ + { NULL, NULL }, /* ME */ }; +static const struct htmlman *const mans = __mans - MAN_TH; /* @@ -233,6 +232,7 @@ print_man_node(MAN_ARGS) case MAN_P: /* reopen .nf in the body. */ case MAN_RS: case MAN_UR: + case MAN_MT: fillmode(h, MAN_fi); break; default: @@ -255,7 +255,8 @@ print_man_node(MAN_ARGS) case ROFFT_TEXT: if (fillmode(h, want_fillmode) == MAN_fi && want_fillmode == MAN_fi && - n->flags & NODE_LINE && *n->string == ' ') + n->flags & NODE_LINE && *n->string == ' ' && + (h->flags & HTML_NONEWLINE) == 0) print_otag(h, TAG_BR, ""); if (*n->string != '\0') break; @@ -304,6 +305,13 @@ print_man_node(MAN_ARGS) print_tblclose(h); t = h->tag; + if (n->tok < ROFF_MAX) { + roff_html_pre(h, n); + child = 0; + break; + } + + assert(n->tok >= MAN_TH && n->tok < MAN_MAX); if (mans[n->tok].pre) child = (*mans[n->tok].pre)(man, n, h); @@ -354,13 +362,9 @@ fillmode(struct html *h, int want) static int a2width(const struct roff_node *n, struct roffsu *su) { - if (n->type != ROFFT_TEXT) return 0; - if (a2roffsu(n->string, su, SCALE_EN)) - return 1; - - return 0; + return a2roffsu(n->string, su, SCALE_EN) != NULL; } static void @@ -409,34 +413,18 @@ man_root_post(MAN_ARGS) print_tagq(h, t); } - -static int -man_br_pre(MAN_ARGS) -{ - struct roffsu su; - - SCALE_VS_INIT(&su, 1); - - if (MAN_sp == n->tok) { - if (NULL != (n = n->child)) - if ( ! a2roffsu(n->string, &su, SCALE_VS)) - su.scale = 1.0; - } else - su.scale = 0.0; - - print_otag(h, TAG_DIV, "suh", &su); - - /* So the div isn't empty: */ - print_text(h, "\\~"); - - return 0; -} - static int man_SH_pre(MAN_ARGS) { - if (n->type == ROFFT_HEAD) - print_otag(h, TAG_H1, "c", "Sh"); + char *id; + + if (n->type == ROFFT_HEAD) { + id = html_make_id(n); + print_otag(h, TAG_H1, "cTi", "Sh", id); + if (id != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + free(id); + } return 1; } @@ -498,8 +486,15 @@ man_SM_pre(MAN_ARGS) static int man_SS_pre(MAN_ARGS) { - if (n->type == ROFFT_HEAD) - print_otag(h, TAG_H2, "c", "Ss"); + char *id; + + if (n->type == ROFFT_HEAD) { + id = html_make_id(n); + print_otag(h, TAG_H2, "cTi", "Ss", id); + if (id != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + free(id); + } return 1; } @@ -652,11 +647,17 @@ man_RS_pre(MAN_ARGS) static int man_UR_pre(MAN_ARGS) { + char *cp; n = n->child; assert(n->type == ROFFT_HEAD); if (n->child != NULL) { assert(n->child->type == ROFFT_TEXT); - print_otag(h, TAG_A, "ch", "Lk", n->child->string); + if (n->tok == MAN_MT) { + mandoc_asprintf(&cp, "mailto:%s", n->child->string); + print_otag(h, TAG_A, "cTh", "Mt", cp); + free(cp); + } else + print_otag(h, TAG_A, "cTh", "Lk", n->child->string); } assert(n->next->type == ROFFT_BODY); diff --git a/usr/src/cmd/mandoc/man_macro.c b/usr/src/cmd/mandoc/man_macro.c index 7fd17c5348..aa8b200196 100644 --- a/usr/src/cmd/mandoc/man_macro.c +++ b/usr/src/cmd/mandoc/man_macro.c @@ -1,7 +1,7 @@ -/* $Id: man_macro.c,v 1.115 2017/01/10 13:47:00 schwarze Exp $ */ +/* $Id: man_macro.c,v 1.123 2017/06/25 11:45:37 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2012, 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2012-2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de> * * Permission to use, copy, modify, and distribute this software for any @@ -38,10 +38,9 @@ static void blk_imp(MACRO_PROT_ARGS); static void in_line_eoln(MACRO_PROT_ARGS); static int man_args(struct roff_man *, int, int *, char *, char **); -static void rew_scope(struct roff_man *, int); +static void rew_scope(struct roff_man *, enum roff_tok); -const struct man_macro __man_macros[MAN_MAX] = { - { in_line_eoln, MAN_NSCOPED }, /* br */ +const struct man_macro __man_macros[MAN_MAX - MAN_TH] = { { in_line_eoln, MAN_BSCOPE }, /* TH */ { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SH */ { blk_imp, MAN_BSCOPE | MAN_SCOPED }, /* SS */ @@ -62,7 +61,6 @@ const struct man_macro __man_macros[MAN_MAX] = { { in_line_eoln, MAN_SCOPED | MAN_JOIN }, /* I */ { in_line_eoln, 0 }, /* IR */ { in_line_eoln, 0 }, /* RI */ - { in_line_eoln, MAN_NSCOPED }, /* sp */ { in_line_eoln, MAN_NSCOPED }, /* nf */ { in_line_eoln, MAN_NSCOPED }, /* fi */ { blk_close, MAN_BSCOPE }, /* RE */ @@ -71,17 +69,16 @@ const struct man_macro __man_macros[MAN_MAX] = { { in_line_eoln, 0 }, /* UC */ { in_line_eoln, MAN_NSCOPED }, /* PD */ { in_line_eoln, 0 }, /* AT */ - { in_line_eoln, 0 }, /* in */ - { in_line_eoln, 0 }, /* ft */ + { in_line_eoln, MAN_NSCOPED }, /* in */ { in_line_eoln, 0 }, /* OP */ { in_line_eoln, MAN_BSCOPE }, /* EX */ { in_line_eoln, MAN_BSCOPE }, /* EE */ { blk_exp, MAN_BSCOPE }, /* UR */ { blk_close, MAN_BSCOPE }, /* UE */ - { in_line_eoln, 0 }, /* ll */ + { blk_exp, MAN_BSCOPE }, /* MT */ + { blk_close, MAN_BSCOPE }, /* ME */ }; - -const struct man_macro * const man_macros = __man_macros; +const struct man_macro *const man_macros = __man_macros - MAN_TH; void @@ -100,8 +97,7 @@ man_unscope(struct roff_man *man, const struct roff_node *to) man_macros[n->tok].flags & MAN_SCOPED) { mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line, n->pos, - "EOF breaks %s", - man_macronames[n->tok]); + "EOF breaks %s", roff_name[n->tok]); if (man->flags & MAN_ELINE) man->flags &= ~MAN_ELINE; else { @@ -118,7 +114,7 @@ man_unscope(struct roff_man *man, const struct roff_node *to) man_macros[n->tok].fp == blk_exp) mandoc_msg(MANDOCERR_BLK_NOEND, man->parse, n->line, n->pos, - man_macronames[n->tok]); + roff_name[n->tok]); } /* @@ -150,7 +146,7 @@ man_unscope(struct roff_man *man, const struct roff_node *to) * scopes. When a scope is closed, it must be validated and actioned. */ static void -rew_scope(struct roff_man *man, int tok) +rew_scope(struct roff_man *man, enum roff_tok tok) { struct roff_node *n; @@ -193,7 +189,7 @@ rew_scope(struct roff_man *man, int tok) void blk_close(MACRO_PROT_ARGS) { - int ntok; + enum roff_tok ntok; const struct roff_node *nn; char *p; int nrew, target; @@ -223,6 +219,9 @@ blk_close(MACRO_PROT_ARGS) case MAN_UE: ntok = MAN_UR; break; + case MAN_ME: + ntok = MAN_MT; + break; default: abort(); } @@ -233,7 +232,7 @@ blk_close(MACRO_PROT_ARGS) if (nn == NULL) { mandoc_msg(MANDOCERR_BLK_NOTOPEN, man->parse, - line, ppos, man_macronames[tok]); + line, ppos, roff_name[tok]); rew_scope(man, MAN_PP); } else { line = man->last->line; @@ -241,6 +240,10 @@ blk_close(MACRO_PROT_ARGS) ntok = man->last->tok; man_unscope(man, nn); + if (tok == MAN_RE && nn->head->aux > 0) + roff_setreg(man->roff, "an-margin", + nn->head->aux, '-'); + /* Move a trailing paragraph behind the block. */ if (ntok == MAN_LP || ntok == MAN_PP || ntok == MAN_P) { @@ -262,13 +265,21 @@ blk_exp(MACRO_PROT_ARGS) head = roff_head_alloc(man, line, ppos, tok); la = *pos; - if (man_args(man, line, pos, buf, &p)) + if (man_args(man, line, pos, buf, &p)) { roff_word_alloc(man, line, la, p); + if (tok == MAN_RS) { + if (roff_getreg(man->roff, "an-margin") == 0) + roff_setreg(man->roff, "an-margin", + 7 * 24, '='); + if ((head->aux = strtod(p, NULL) * 24.0) > 0) + roff_setreg(man->roff, "an-margin", + head->aux, '+'); + } + } if (buf[*pos] != '\0') - mandoc_vmsg(MANDOCERR_ARG_EXCESS, - man->parse, line, *pos, "%s ... %s", - man_macronames[tok], buf + *pos); + mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, line, + *pos, "%s ... %s", roff_name[tok], buf + *pos); man_unscope(man, head); roff_body_alloc(man, line, ppos, tok); @@ -331,18 +342,16 @@ in_line_eoln(MACRO_PROT_ARGS) n = man->last; for (;;) { - if (buf[*pos] != '\0' && (tok == MAN_br || - tok == MAN_fi || tok == MAN_nf)) { + if (buf[*pos] != '\0' && (tok == MAN_fi || tok == MAN_nf)) { mandoc_vmsg(MANDOCERR_ARG_SKIP, man->parse, line, *pos, "%s %s", - man_macronames[tok], buf + *pos); + roff_name[tok], buf + *pos); break; } - if (buf[*pos] != '\0' && man->last != n && - (tok == MAN_PD || tok == MAN_ft || tok == MAN_sp)) { + if (buf[*pos] != '\0' && man->last != n && tok == MAN_PD) { mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse, line, *pos, "%s ... %s", - man_macronames[tok], buf + *pos); + roff_name[tok], buf + *pos); break; } la = *pos; diff --git a/usr/src/cmd/mandoc/man_term.c b/usr/src/cmd/mandoc/man_term.c index b2732d4558..8946a05067 100644 --- a/usr/src/cmd/mandoc/man_term.c +++ b/usr/src/cmd/mandoc/man_term.c @@ -1,4 +1,4 @@ -/* $Id: man_term.c,v 1.191 2017/02/15 14:10:08 schwarze Exp $ */ +/* $Id: man_term.c,v 1.209 2017/07/31 15:19:06 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -68,6 +68,7 @@ static void print_bvspace(struct termp *, const struct roff_node *, int); static int pre_B(DECL_ARGS); +static int pre_DT(DECL_ARGS); static int pre_HP(DECL_ARGS); static int pre_I(DECL_ARGS); static int pre_IP(DECL_ARGS); @@ -80,12 +81,9 @@ static int pre_SS(DECL_ARGS); static int pre_TP(DECL_ARGS); static int pre_UR(DECL_ARGS); static int pre_alternate(DECL_ARGS); -static int pre_ft(DECL_ARGS); static int pre_ign(DECL_ARGS); static int pre_in(DECL_ARGS); static int pre_literal(DECL_ARGS); -static int pre_ll(DECL_ARGS); -static int pre_sp(DECL_ARGS); static void post_IP(DECL_ARGS); static void post_HP(DECL_ARGS); @@ -95,8 +93,7 @@ static void post_SS(DECL_ARGS); static void post_TP(DECL_ARGS); static void post_UR(DECL_ARGS); -static const struct termact termacts[MAN_MAX] = { - { pre_sp, NULL, MAN_NOTEXT }, /* br */ +static const struct termact __termacts[MAN_MAX - MAN_TH] = { { NULL, NULL, 0 }, /* TH */ { pre_SH, post_SH, 0 }, /* SH */ { pre_SS, post_SS, 0 }, /* SS */ @@ -117,24 +114,24 @@ static const struct termact termacts[MAN_MAX] = { { pre_I, NULL, 0 }, /* I */ { pre_alternate, NULL, 0 }, /* IR */ { pre_alternate, NULL, 0 }, /* RI */ - { pre_sp, NULL, MAN_NOTEXT }, /* sp */ { pre_literal, NULL, 0 }, /* nf */ { pre_literal, NULL, 0 }, /* fi */ { NULL, NULL, 0 }, /* RE */ { pre_RS, post_RS, 0 }, /* RS */ - { pre_ign, NULL, 0 }, /* DT */ + { pre_DT, NULL, 0 }, /* DT */ { pre_ign, NULL, MAN_NOTEXT }, /* UC */ { pre_PD, NULL, MAN_NOTEXT }, /* PD */ { pre_ign, NULL, 0 }, /* AT */ { pre_in, NULL, MAN_NOTEXT }, /* in */ - { pre_ft, NULL, MAN_NOTEXT }, /* ft */ { pre_OP, NULL, 0 }, /* OP */ { pre_literal, NULL, 0 }, /* EX */ { pre_literal, NULL, 0 }, /* EE */ { pre_UR, post_UR, 0 }, /* UR */ { NULL, NULL, 0 }, /* UE */ - { pre_ll, NULL, MAN_NOTEXT }, /* ll */ + { pre_UR, post_UR, 0 }, /* MT */ + { NULL, NULL, 0 }, /* ME */ }; +static const struct termact *termacts = __termacts - MAN_TH; void @@ -146,9 +143,13 @@ terminal_man(void *arg, const struct roff_man *man) size_t save_defindent; p = (struct termp *)arg; - p->overstep = 0; - p->rmargin = p->maxrmargin = p->defrmargin; - p->tabwidth = term_len(p, 5); + save_defindent = p->defindent; + if (p->synopsisonly == 0 && p->defindent == 0) + p->defindent = 7; + p->tcol->rmargin = p->maxrmargin = p->defrmargin; + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); memset(&mt, 0, sizeof(struct mtermp)); mt.lmargin[mt.lmargincur] = term_len(p, p->defindent); @@ -171,16 +172,13 @@ terminal_man(void *arg, const struct roff_man *man) n = n->next; } } else { - save_defindent = p->defindent; - if (p->defindent == 0) - p->defindent = 7; term_begin(p, print_man_head, print_man_foot, &man->meta); p->flags |= TERMP_NOSPACE; if (n != NULL) print_man_nodelist(p, &mt, n, &man->meta); term_end(p); - p->defindent = save_defindent; } + p->defindent = save_defindent; } /* @@ -219,14 +217,6 @@ pre_ign(DECL_ARGS) } static int -pre_ll(DECL_ARGS) -{ - - term_setwidth(p, n->child != NULL ? n->child->string : NULL); - return 0; -} - -static int pre_I(DECL_ARGS) { @@ -240,7 +230,7 @@ pre_literal(DECL_ARGS) term_newln(p); - if (MAN_nf == n->tok || MAN_EX == n->tok) + if (n->tok == MAN_nf || n->tok == MAN_EX) mt->fl |= MANT_LITERAL; else mt->fl &= ~MANT_LITERAL; @@ -250,9 +240,9 @@ pre_literal(DECL_ARGS) * So in case a second call to term_flushln() is needed, * indentation has to be set up explicitly. */ - if (MAN_HP == n->parent->tok && p->rmargin < p->maxrmargin) { - p->offset = p->rmargin; - p->rmargin = p->maxrmargin; + if (n->parent->tok == MAN_HP && p->tcol->rmargin < p->maxrmargin) { + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; p->trailspace = 0; p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); p->flags |= TERMP_NOSPACE; @@ -272,7 +262,7 @@ pre_PD(DECL_ARGS) return 0; } assert(n->type == ROFFT_TEXT); - if (a2roffsu(n->string, &su, SCALE_VS)) + if (a2roffsu(n->string, &su, SCALE_VS) != NULL) mt->pardist = term_vspan(p, &su); return 0; } @@ -362,41 +352,6 @@ pre_OP(DECL_ARGS) } static int -pre_ft(DECL_ARGS) -{ - const char *cp; - - if (NULL == n->child) { - term_fontlast(p); - return 0; - } - - cp = n->child->string; - switch (*cp) { - case '4': - case '3': - case 'B': - term_fontrepl(p, TERMFONT_BOLD); - break; - case '2': - case 'I': - term_fontrepl(p, TERMFONT_UNDER); - break; - case 'P': - term_fontlast(p); - break; - case '1': - case 'C': - case 'R': - term_fontrepl(p, TERMFONT_NONE); - break; - default: - break; - } - return 0; -} - -static int pre_in(DECL_ARGS) { struct roffsu su; @@ -406,8 +361,8 @@ pre_in(DECL_ARGS) term_newln(p); - if (NULL == n->child) { - p->offset = mt->offset; + if (n->child == NULL) { + p->tcol->offset = mt->offset; return 0; } @@ -421,71 +376,29 @@ pre_in(DECL_ARGS) else cp--; - if ( ! a2roffsu(++cp, &su, SCALE_EN)) + if (a2roffsu(++cp, &su, SCALE_EN) == NULL) return 0; - v = (term_hspan(p, &su) + 11) / 24; + v = term_hen(p, &su); if (less < 0) - p->offset -= p->offset > v ? v : p->offset; + p->tcol->offset -= p->tcol->offset > v ? v : p->tcol->offset; else if (less > 0) - p->offset += v; + p->tcol->offset += v; else - p->offset = v; - if (p->offset > SHRT_MAX) - p->offset = term_len(p, p->defindent); + p->tcol->offset = v; + if (p->tcol->offset > SHRT_MAX) + p->tcol->offset = term_len(p, p->defindent); return 0; } static int -pre_sp(DECL_ARGS) +pre_DT(DECL_ARGS) { - struct roffsu su; - int i, len; - - if ((NULL == n->prev && n->parent)) { - switch (n->parent->tok) { - case MAN_SH: - case MAN_SS: - case MAN_PP: - case MAN_LP: - case MAN_P: - return 0; - default: - break; - } - } - - if (n->tok == MAN_br) - len = 0; - else if (n->child == NULL) - len = 1; - else { - if ( ! a2roffsu(n->child->string, &su, SCALE_VS)) - su.scale = 1.0; - len = term_vspan(p, &su); - } - - if (len == 0) - term_newln(p); - else if (len < 0) - p->skipvsp -= len; - else - for (i = 0; i < len; i++) - term_vspace(p); - - /* - * Handle an explicit break request in the same way - * as an overflowing line. - */ - - if (p->flags & TERMP_BRIND) { - p->offset = p->rmargin; - p->rmargin = p->maxrmargin; - p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); - } - + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); return 0; } @@ -514,8 +427,8 @@ pre_HP(DECL_ARGS) /* Calculate offset. */ if ((nn = n->parent->head->child) != NULL && - a2roffsu(nn->string, &su, SCALE_EN)) { - len = term_hspan(p, &su) / 24; + a2roffsu(nn->string, &su, SCALE_EN) != NULL) { + len = term_hen(p, &su); if (len < 0 && (size_t)(-len) > mt->offset) len = -mt->offset; else if (len > SHRT_MAX) @@ -524,8 +437,8 @@ pre_HP(DECL_ARGS) } else len = mt->lmargin[mt->lmargincur]; - p->offset = mt->offset; - p->rmargin = mt->offset + len; + p->tcol->offset = mt->offset; + p->tcol->rmargin = mt->offset + len; return 1; } @@ -549,8 +462,8 @@ post_HP(DECL_ARGS) p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); p->trailspace = 0; - p->offset = mt->offset; - p->rmargin = p->maxrmargin; + p->tcol->offset = mt->offset; + p->tcol->rmargin = p->maxrmargin; break; default: break; @@ -567,7 +480,7 @@ pre_PP(DECL_ARGS) print_bvspace(p, n, mt->pardist); break; default: - p->offset = mt->offset; + p->tcol->offset = mt->offset; break; } @@ -599,8 +512,8 @@ pre_IP(DECL_ARGS) /* Calculate the offset from the optional second argument. */ if ((nn = n->parent->head->child) != NULL && (nn = nn->next) != NULL && - a2roffsu(nn->string, &su, SCALE_EN)) { - len = term_hspan(p, &su) / 24; + a2roffsu(nn->string, &su, SCALE_EN) != NULL) { + len = term_hen(p, &su); if (len < 0 && (size_t)(-len) > mt->offset) len = -mt->offset; else if (len > SHRT_MAX) @@ -611,8 +524,8 @@ pre_IP(DECL_ARGS) switch (n->type) { case ROFFT_HEAD: - p->offset = mt->offset; - p->rmargin = mt->offset + len; + p->tcol->offset = mt->offset; + p->tcol->rmargin = mt->offset + len; savelit = MANT_LITERAL & mt->fl; mt->fl &= ~MANT_LITERAL; @@ -625,8 +538,8 @@ pre_IP(DECL_ARGS) return 0; case ROFFT_BODY: - p->offset = mt->offset + len; - p->rmargin = p->maxrmargin; + p->tcol->offset = mt->offset + len; + p->tcol->rmargin = p->maxrmargin; break; default: break; @@ -644,11 +557,11 @@ post_IP(DECL_ARGS) term_flushln(p); p->flags &= ~TERMP_NOBREAK; p->trailspace = 0; - p->rmargin = p->maxrmargin; + p->tcol->rmargin = p->maxrmargin; break; case ROFFT_BODY: term_newln(p); - p->offset = mt->offset; + p->tcol->offset = mt->offset; break; default: break; @@ -681,8 +594,8 @@ pre_TP(DECL_ARGS) if ((nn = n->parent->head->child) != NULL && nn->string != NULL && ! (NODE_LINE & nn->flags) && - a2roffsu(nn->string, &su, SCALE_EN)) { - len = term_hspan(p, &su) / 24; + a2roffsu(nn->string, &su, SCALE_EN) != NULL) { + len = term_hen(p, &su); if (len < 0 && (size_t)(-len) > mt->offset) len = -mt->offset; else if (len > SHRT_MAX) @@ -693,8 +606,8 @@ pre_TP(DECL_ARGS) switch (n->type) { case ROFFT_HEAD: - p->offset = mt->offset; - p->rmargin = mt->offset + len; + p->tcol->offset = mt->offset; + p->tcol->rmargin = mt->offset + len; savelit = MANT_LITERAL & mt->fl; mt->fl &= ~MANT_LITERAL; @@ -713,8 +626,8 @@ pre_TP(DECL_ARGS) mt->fl |= MANT_LITERAL; return 0; case ROFFT_BODY: - p->offset = mt->offset + len; - p->rmargin = p->maxrmargin; + p->tcol->offset = mt->offset + len; + p->tcol->rmargin = p->maxrmargin; p->trailspace = 0; p->flags &= ~(TERMP_NOBREAK | TERMP_BRTRSP); break; @@ -735,7 +648,7 @@ post_TP(DECL_ARGS) break; case ROFFT_BODY: term_newln(p); - p->offset = mt->offset; + p->tcol->offset = mt->offset; break; default: break; @@ -760,7 +673,7 @@ pre_SS(DECL_ARGS) do { n = n->prev; - } while (n != NULL && n->tok != TOKEN_NONE && + } while (n != NULL && n->tok >= MAN_TH && termacts[n->tok].flags & MAN_NOTEXT); if (n == NULL || (n->tok == MAN_SS && n->body->child == NULL)) break; @@ -770,14 +683,14 @@ pre_SS(DECL_ARGS) break; case ROFFT_HEAD: term_fontrepl(p, TERMFONT_BOLD); - p->offset = term_len(p, 3); - p->rmargin = mt->offset; + p->tcol->offset = term_len(p, 3); + p->tcol->rmargin = mt->offset; p->trailspace = mt->offset; p->flags |= TERMP_NOBREAK | TERMP_BRIND; break; case ROFFT_BODY: - p->offset = mt->offset; - p->rmargin = p->maxrmargin; + p->tcol->offset = mt->offset; + p->tcol->rmargin = p->maxrmargin; p->trailspace = 0; p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); break; @@ -822,7 +735,7 @@ pre_SH(DECL_ARGS) do { n = n->prev; - } while (n != NULL && n->tok != TOKEN_NONE && + } while (n != NULL && n->tok >= MAN_TH && termacts[n->tok].flags & MAN_NOTEXT); if (n == NULL || (n->tok == MAN_SH && n->body->child == NULL)) break; @@ -832,14 +745,14 @@ pre_SH(DECL_ARGS) break; case ROFFT_HEAD: term_fontrepl(p, TERMFONT_BOLD); - p->offset = 0; - p->rmargin = mt->offset; + p->tcol->offset = 0; + p->tcol->rmargin = mt->offset; p->trailspace = mt->offset; p->flags |= TERMP_NOBREAK | TERMP_BRIND; break; case ROFFT_BODY: - p->offset = mt->offset; - p->rmargin = p->maxrmargin; + p->tcol->offset = mt->offset; + p->tcol->rmargin = p->maxrmargin; p->trailspace = 0; p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); break; @@ -885,16 +798,16 @@ pre_RS(DECL_ARGS) n->aux = SHRT_MAX + 1; if (n->child == NULL) n->aux = mt->lmargin[mt->lmargincur]; - else if (a2roffsu(n->child->string, &su, SCALE_EN)) - n->aux = term_hspan(p, &su) / 24; + else if (a2roffsu(n->child->string, &su, SCALE_EN) != NULL) + n->aux = term_hen(p, &su); if (n->aux < 0 && (size_t)(-n->aux) > mt->offset) n->aux = -mt->offset; else if (n->aux > SHRT_MAX) n->aux = term_len(p, p->defindent); mt->offset += n->aux; - p->offset = mt->offset; - p->rmargin = p->maxrmargin; + p->tcol->offset = mt->offset; + p->tcol->rmargin = p->maxrmargin; if (++mt->lmarginsz < MAXMARGINS) mt->lmargincur = mt->lmarginsz; @@ -918,7 +831,7 @@ post_RS(DECL_ARGS) } mt->offset -= n->parent->head->aux; - p->offset = mt->offset; + p->tcol->offset = mt->offset; if (--mt->lmarginsz < MAXMARGINS) mt->lmargincur = mt->lmarginsz; @@ -951,7 +864,6 @@ post_UR(DECL_ARGS) static void print_man_node(DECL_ARGS) { - size_t rm, rmax; int c; switch (n->type) { @@ -961,10 +873,14 @@ print_man_node(DECL_ARGS) * If we have a space as the first character, break * before printing the line's data. */ - if ('\0' == *n->string) { - term_vspace(p); + if (*n->string == '\0') { + if (p->flags & TERMP_NONEWLINE) + term_newln(p); + else + term_vspace(p); return; - } else if (' ' == *n->string && NODE_LINE & n->flags) + } else if (*n->string == ' ' && n->flags & NODE_LINE && + (p->flags & TERMP_NONEWLINE) == 0) term_newln(p); term_word(p, n->string); @@ -986,6 +902,12 @@ print_man_node(DECL_ARGS) break; } + if (n->tok < ROFF_MAX) { + roff_term_pre(p, n); + return; + } + + assert(n->tok >= MAN_TH && n->tok <= MAN_MAX); if ( ! (MAN_NOTEXT & termacts[n->tok].flags)) term_fontrepl(p, TERMFONT_NONE); @@ -1012,20 +934,17 @@ out: if (mt->fl & MANT_LITERAL && ! (p->flags & (TERMP_NOBREAK | TERMP_NONEWLINE)) && (n->next == NULL || n->next->flags & NODE_LINE)) { - rm = p->rmargin; - rmax = p->maxrmargin; - p->rmargin = p->maxrmargin = TERM_MAXMARGIN; - p->flags |= TERMP_NOSPACE; + p->flags |= TERMP_BRNEVER | TERMP_NOSPACE; if (n->string != NULL && *n->string != '\0') term_flushln(p); else term_newln(p); - if (rm < rmax && n->parent->tok == MAN_HP) { - p->offset = rm; - p->rmargin = rmax; - } else - p->rmargin = rm; - p->maxrmargin = rmax; + p->flags &= ~TERMP_BRNEVER; + if (p->tcol->rmargin < p->maxrmargin && + n->parent->tok == MAN_HP) { + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; + } } if (NODE_EOS & n->flags) p->flags |= TERMP_SENTENCE; @@ -1081,8 +1000,8 @@ print_man_foot(struct termp *p, const struct roff_meta *meta) p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; p->trailspace = 1; - p->offset = 0; - p->rmargin = p->maxrmargin > datelen ? + p->tcol->offset = 0; + p->tcol->rmargin = p->maxrmargin > datelen ? (p->maxrmargin + term_len(p, 1) - datelen) / 2 : 0; if (meta->os) @@ -1091,9 +1010,10 @@ print_man_foot(struct termp *p, const struct roff_meta *meta) /* At the bottom in the middle: manual date. */ - p->offset = p->rmargin; + p->tcol->offset = p->tcol->rmargin; titlen = term_strlen(p, title); - p->rmargin = p->maxrmargin > titlen ? p->maxrmargin - titlen : 0; + p->tcol->rmargin = p->maxrmargin > titlen ? + p->maxrmargin - titlen : 0; p->flags |= TERMP_NOSPACE; term_word(p, meta->date); @@ -1104,8 +1024,8 @@ print_man_foot(struct termp *p, const struct roff_meta *meta) p->flags &= ~TERMP_NOBREAK; p->flags |= TERMP_NOSPACE; p->trailspace = 0; - p->offset = p->rmargin; - p->rmargin = p->maxrmargin; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; term_word(p, title); term_flushln(p); @@ -1132,8 +1052,8 @@ print_man_head(struct termp *p, const struct roff_meta *meta) p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; p->trailspace = 1; - p->offset = 0; - p->rmargin = 2 * (titlen+1) + vollen < p->maxrmargin ? + p->tcol->offset = 0; + p->tcol->rmargin = 2 * (titlen+1) + vollen < p->maxrmargin ? (p->maxrmargin - vollen + term_len(p, 1)) / 2 : vollen < p->maxrmargin ? p->maxrmargin - vollen : 0; @@ -1143,9 +1063,9 @@ print_man_head(struct termp *p, const struct roff_meta *meta) /* At the top in the middle: manual volume. */ p->flags |= TERMP_NOSPACE; - p->offset = p->rmargin; - p->rmargin = p->offset + vollen + titlen < p->maxrmargin ? - p->maxrmargin - titlen : p->maxrmargin; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->tcol->offset + vollen + titlen < + p->maxrmargin ? p->maxrmargin - titlen : p->maxrmargin; term_word(p, volume); term_flushln(p); @@ -1154,17 +1074,17 @@ print_man_head(struct termp *p, const struct roff_meta *meta) p->flags &= ~TERMP_NOBREAK; p->trailspace = 0; - if (p->rmargin + titlen <= p->maxrmargin) { + if (p->tcol->rmargin + titlen <= p->maxrmargin) { p->flags |= TERMP_NOSPACE; - p->offset = p->rmargin; - p->rmargin = p->maxrmargin; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; term_word(p, title); term_flushln(p); } p->flags &= ~TERMP_NOSPACE; - p->offset = 0; - p->rmargin = p->maxrmargin; + p->tcol->offset = 0; + p->tcol->rmargin = p->maxrmargin; /* * Groff prints three blank lines before the content. diff --git a/usr/src/cmd/mandoc/man_validate.c b/usr/src/cmd/mandoc/man_validate.c index 16d996355e..b3356ccb3d 100644 --- a/usr/src/cmd/mandoc/man_validate.c +++ b/usr/src/cmd/mandoc/man_validate.c @@ -1,7 +1,7 @@ /* $OpenBSD$ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2010, 2012-2016 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -47,15 +47,14 @@ static void check_text(CHKARGS); static void post_AT(CHKARGS); static void post_IP(CHKARGS); -static void post_vs(CHKARGS); -static void post_ft(CHKARGS); static void post_OP(CHKARGS); static void post_TH(CHKARGS); static void post_UC(CHKARGS); static void post_UR(CHKARGS); +static void post_in(CHKARGS); +static void post_vs(CHKARGS); -static v_check man_valids[MAN_MAX] = { - post_vs, /* br */ +static const v_check __man_valids[MAN_MAX - MAN_TH] = { post_TH, /* TH */ NULL, /* SH */ NULL, /* SS */ @@ -76,7 +75,6 @@ static v_check man_valids[MAN_MAX] = { NULL, /* I */ NULL, /* IR */ NULL, /* RI */ - post_vs, /* sp */ NULL, /* nf */ NULL, /* fi */ NULL, /* RE */ @@ -85,22 +83,23 @@ static v_check man_valids[MAN_MAX] = { post_UC, /* UC */ NULL, /* PD */ post_AT, /* AT */ - NULL, /* in */ - post_ft, /* ft */ + post_in, /* in */ post_OP, /* OP */ NULL, /* EX */ NULL, /* EE */ post_UR, /* UR */ NULL, /* UE */ - NULL, /* ll */ + post_UR, /* MT */ + NULL, /* ME */ }; +static const v_check *man_valids = __man_valids - MAN_TH; void man_node_validate(struct roff_man *man) { struct roff_node *n; - v_check *cp; + const v_check *cp; n = man->last; man->last = man->last->child; @@ -125,6 +124,19 @@ man_node_validate(struct roff_man *man) case ROFFT_TBL: break; default: + if (n->tok < ROFF_MAX) { + switch (n->tok) { + case ROFF_br: + case ROFF_sp: + post_vs(man, n); + break; + default: + roff_validate(man); + break; + } + break; + } + assert(n->tok >= MAN_TH && n->tok < MAN_MAX); cp = man_valids + n->tok; if (*cp) (*cp)(man, n); @@ -158,8 +170,14 @@ check_root(CHKARGS) man->meta.title = mandoc_strdup(""); man->meta.msec = mandoc_strdup(""); man->meta.date = man->quick ? mandoc_strdup("") : - mandoc_normdate(man->parse, NULL, n->line, n->pos); + mandoc_normdate(man, NULL, n->line, n->pos); } + + if (man->meta.os_e && + (man->meta.rcsids & (1 << man->meta.os_e)) == 0) + mandoc_msg(MANDOCERR_RCS_MISSING, man->parse, 0, 0, + man->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); } static void @@ -193,61 +211,19 @@ post_OP(CHKARGS) static void post_UR(CHKARGS) { - if (n->type == ROFFT_HEAD && n->child == NULL) - mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse, - n->line, n->pos, "UR"); + mandoc_msg(MANDOCERR_UR_NOHEAD, man->parse, + n->line, n->pos, roff_name[n->tok]); check_part(man, n); } static void -post_ft(CHKARGS) -{ - char *cp; - int ok; - - if (n->child == NULL) - return; - - ok = 0; - cp = n->child->string; - switch (*cp) { - case '1': - case '2': - case '3': - case '4': - case 'I': - case 'P': - case 'R': - if ('\0' == cp[1]) - ok = 1; - break; - case 'B': - if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) - ok = 1; - break; - case 'C': - if ('W' == cp[1] && '\0' == cp[2]) - ok = 1; - break; - default: - break; - } - - if (0 == ok) { - mandoc_vmsg(MANDOCERR_FT_BAD, man->parse, - n->line, n->pos, "ft %s", cp); - *cp = '\0'; - } -} - -static void check_part(CHKARGS) { if (n->type == ROFFT_BODY && n->child == NULL) mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse, - n->line, n->pos, man_macronames[n->tok]); + n->line, n->pos, roff_name[n->tok]); } static void @@ -263,14 +239,13 @@ check_par(CHKARGS) if (n->child == NULL) mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos, - "%s empty", man_macronames[n->tok]); + "%s empty", roff_name[n->tok]); break; case ROFFT_HEAD: if (n->child != NULL) mandoc_vmsg(MANDOCERR_ARG_SKIP, - man->parse, n->line, n->pos, - "%s %s%s", man_macronames[n->tok], - n->child->string, + man->parse, n->line, n->pos, "%s %s%s", + roff_name[n->tok], n->child->string, n->child->next != NULL ? " ..." : ""); break; default: @@ -291,7 +266,7 @@ post_IP(CHKARGS) if (n->parent->head->child == NULL && n->child == NULL) mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos, - "%s empty", man_macronames[n->tok]); + "%s empty", roff_name[n->tok]); break; default: break; @@ -356,8 +331,7 @@ post_TH(CHKARGS) if (n && n->string && '\0' != n->string[0]) { man->meta.date = man->quick ? mandoc_strdup(n->string) : - mandoc_normdate(man->parse, n->string, - n->line, n->pos); + mandoc_normdate(man, n->string, n->line, n->pos); } else { man->meta.date = mandoc_strdup(""); mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, @@ -369,8 +343,14 @@ post_TH(CHKARGS) if (n && (n = n->next)) man->meta.os = mandoc_strdup(n->string); - else if (man->defos != NULL) - man->meta.os = mandoc_strdup(man->defos); + else if (man->os_s != NULL) + man->meta.os = mandoc_strdup(man->os_s); + if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { + if (strstr(man->meta.os, "OpenBSD") != NULL) + man->meta.os_e = MANDOC_OS_OPENBSD; + else if (strstr(man->meta.os, "NetBSD") != NULL) + man->meta.os_e = MANDOC_OS_NETBSD; + } /* TITLE MSEC DATE OS ->VOL<- */ /* If missing, use the default VOL name for MSEC. */ @@ -469,6 +449,22 @@ post_AT(CHKARGS) } static void +post_in(CHKARGS) +{ + char *s; + + if (n->parent->tok != MAN_TP || + n->parent->type != ROFFT_HEAD || + n->child == NULL || + *n->child->string == '+' || + *n->child->string == '-') + return; + mandoc_asprintf(&s, "+%s", n->child->string); + free(n->child->string); + n->child->string = s; +} + +static void post_vs(CHKARGS) { @@ -478,9 +474,12 @@ post_vs(CHKARGS) switch (n->parent->tok) { case MAN_SH: case MAN_SS: + case MAN_PP: + case MAN_LP: + case MAN_P: mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos, - "%s after %s", man_macronames[n->tok], - man_macronames[n->parent->tok]); + "%s after %s", roff_name[n->tok], + roff_name[n->parent->tok]); /* FALLTHROUGH */ case TOKEN_NONE: /* diff --git a/usr/src/cmd/mandoc/manconf.h b/usr/src/cmd/mandoc/manconf.h index f5c678e890..b4cd31646c 100644 --- a/usr/src/cmd/mandoc/manconf.h +++ b/usr/src/cmd/mandoc/manconf.h @@ -1,6 +1,6 @@ -/* $OpenBSD$ */ +/* $Id: manconf.h,v 1.5 2017/07/01 09:47:30 schwarze Exp $ */ /* - * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> * * Permission to use, copy, modify, and distribute this software for any @@ -47,3 +47,4 @@ struct manconf { void manconf_parse(struct manconf *, const char *, char *, char *); int manconf_output(struct manoutput *, const char *, int); void manconf_free(struct manconf *); +void manpath_base(struct manpaths *); diff --git a/usr/src/cmd/mandoc/mandoc.c b/usr/src/cmd/mandoc/mandoc.c index d265463b4e..3e16d2c64f 100644 --- a/usr/src/cmd/mandoc/mandoc.c +++ b/usr/src/cmd/mandoc/mandoc.c @@ -1,7 +1,7 @@ -/* $Id: mandoc.c,v 1.98 2015/11/12 22:44:27 schwarze Exp $ */ +/* $Id: mandoc.c,v 1.103 2017/07/03 13:40:19 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -28,8 +28,9 @@ #include <string.h> #include <time.h> -#include "mandoc.h" #include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" #include "libmandoc.h" static int a2time(time_t *, const char *, const char *); @@ -95,6 +96,8 @@ mandoc_escape(const char **end, const char **start, int *sz) case ',': case '/': return ESCAPE_IGNORE; + case 'p': + return ESCAPE_BREAK; /* * The \z escape is supposed to output the following @@ -175,7 +178,17 @@ mandoc_escape(const char **end, const char **start, int *sz) ++*end; return ESCAPE_ERROR; } - gly = ESCAPE_IGNORE; + switch ((*start)[-1]) { + case 'h': + gly = ESCAPE_HORIZ; + break; + case 'l': + gly = ESCAPE_HLINE; + break; + default: + gly = ESCAPE_IGNORE; + break; + } term = **start; *start = ++*end; break; @@ -508,27 +521,38 @@ fail: } char * -mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) +mandoc_normdate(struct roff_man *man, char *in, int ln, int pos) { + char *cp; time_t t; /* No date specified: use today's date. */ if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) { - mandoc_msg(MANDOCERR_DATE_MISSING, parse, ln, pos, NULL); + mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, ln, pos, NULL); return time2a(time(NULL)); } /* Valid mdoc(7) date format. */ if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) || - a2time(&t, "%b %d, %Y", in)) - return time2a(t); + a2time(&t, "%b %d, %Y", in)) { + cp = time2a(t); + if (t > time(NULL) + 86400) + mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse, + ln, pos, cp); + return cp; + } - /* Do not warn about the legacy man(7) format. */ + /* In man(7), do not warn about the legacy format. */ - if ( ! a2time(&t, "%Y-%m-%d", in)) - mandoc_msg(MANDOCERR_DATE_BAD, parse, ln, pos, in); + if (a2time(&t, "%Y-%m-%d", in) == 0) + mandoc_msg(MANDOCERR_DATE_BAD, man->parse, ln, pos, in); + else if (t > time(NULL) + 86400) + mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse, ln, pos, in); + else if (man->macroset == MACROSET_MDOC) + mandoc_vmsg(MANDOCERR_DATE_LEGACY, man->parse, + ln, pos, "Dd %s", in); /* Use any non-mdoc(7) date verbatim. */ diff --git a/usr/src/cmd/mandoc/mandoc.h b/usr/src/cmd/mandoc/mandoc.h index a80d6ae7b8..b234cb5ee1 100644 --- a/usr/src/cmd/mandoc/mandoc.h +++ b/usr/src/cmd/mandoc/mandoc.h @@ -1,4 +1,4 @@ -/* $Id: mandoc.h,v 1.214 2017/01/28 23:30:08 schwarze Exp $ */ +/* $Id: mandoc.h,v 1.245 2017/07/08 14:51:04 schwarze Exp $ */ /* * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> @@ -28,7 +28,7 @@ */ enum mandoclevel { MANDOCLEVEL_OK = 0, - MANDOCLEVEL_RESERVED, + MANDOCLEVEL_STYLE, /* style suggestions */ MANDOCLEVEL_WARNING, /* warnings: syntax, whitespace, etc. */ MANDOCLEVEL_ERROR, /* input has been thrown away */ MANDOCLEVEL_UNSUPP, /* input needs unimplemented features */ @@ -44,20 +44,46 @@ enum mandoclevel { enum mandocerr { MANDOCERR_OK, + MANDOCERR_BASE, /* ===== start of base system conventions ===== */ + + MANDOCERR_MDOCDATE, /* Mdocdate found: Dd ... */ + MANDOCERR_MDOCDATE_MISSING, /* Mdocdate missing: Dd ... */ + MANDOCERR_ARCH_BAD, /* unknown architecture: Dt ... arch */ + MANDOCERR_OS_ARG, /* operating system explicitly specified: Os ... */ + MANDOCERR_RCS_MISSING, /* RCS id missing */ + MANDOCERR_XR_BAD, /* referenced manual not found: Xr name sec */ + + MANDOCERR_STYLE, /* ===== start of style suggestions ===== */ + + MANDOCERR_DATE_LEGACY, /* legacy man(7) date format: Dd ... */ + MANDOCERR_TITLE_CASE, /* lower case character in document title */ + MANDOCERR_RCS_REP, /* duplicate RCS id: ... */ + MANDOCERR_SEC_TYPO, /* typo in section name: Sh ... */ + MANDOCERR_ARG_QUOTE, /* unterminated quoted argument */ + MANDOCERR_MACRO_USELESS, /* useless macro: macro */ + MANDOCERR_BX, /* consider using OS macro: macro */ + MANDOCERR_ER_ORDER, /* errnos out of order: Er ... */ + MANDOCERR_ER_REP, /* duplicate errno: Er ... */ + MANDOCERR_DELIM, /* trailing delimiter: macro ... */ + MANDOCERR_DELIM_NB, /* no blank before trailing delimiter: macro ... */ + MANDOCERR_FI_SKIP, /* fill mode already enabled, skipping: fi */ + MANDOCERR_NF_SKIP, /* fill mode already disabled, skipping: nf */ + MANDOCERR_FUNC, /* function name without markup: name() */ + MANDOCERR_SPACE_EOL, /* whitespace at end of input line */ + MANDOCERR_COMMENT_BAD, /* bad comment style */ + MANDOCERR_WARNING, /* ===== start of warnings ===== */ /* related to the prologue */ MANDOCERR_DT_NOTITLE, /* missing manual title, using UNTITLED: line */ MANDOCERR_TH_NOTITLE, /* missing manual title, using "": [macro] */ - MANDOCERR_TITLE_CASE, /* lower case character in document title */ MANDOCERR_MSEC_MISSING, /* missing manual section, using "": macro */ MANDOCERR_MSEC_BAD, /* unknown manual section: Dt ... section */ MANDOCERR_DATE_MISSING, /* missing date, using today's date */ MANDOCERR_DATE_BAD, /* cannot parse date, using it verbatim: date */ + MANDOCERR_DATE_FUTURE, /* date in the future, using it anyway: date */ MANDOCERR_OS_MISSING, /* missing Os macro, using "" */ - MANDOCERR_PROLOG_REP, /* duplicate prologue macro: macro */ MANDOCERR_PROLOG_LATE, /* late prologue macro: macro */ - MANDOCERR_DT_LATE, /* skipping late title macro: Dt args */ MANDOCERR_PROLOG_ORDER, /* prologue macros out of order: macros */ /* related to document structure */ @@ -71,9 +97,11 @@ enum mandocerr { MANDOCERR_NAMESEC_BAD, /* bad NAME section content: macro */ MANDOCERR_NAMESEC_PUNCT, /* missing comma before name: Nm name */ MANDOCERR_ND_EMPTY, /* missing description line, using "" */ + MANDOCERR_ND_LATE, /* description line outside NAME section */ MANDOCERR_SEC_ORDER, /* sections out of conventional order: Sh title */ MANDOCERR_SEC_REP, /* duplicate section title: Sh title */ MANDOCERR_SEC_MSEC, /* unexpected section: Sh title for ... only */ + MANDOCERR_XR_SELF, /* cross reference to self: Xr name sec */ MANDOCERR_XR_ORDER, /* unusual Xr order: ... after ... */ MANDOCERR_XR_PUNCT, /* unusual Xr punctuation: ... after ... */ MANDOCERR_AN_MISSING, /* AUTHORS section without An macro */ @@ -87,9 +115,9 @@ enum mandocerr { MANDOCERR_BLK_NEST, /* blocks badly nested: macro ... */ MANDOCERR_BD_NEST, /* nested displays are not portable: macro ... */ MANDOCERR_BL_MOVE, /* moving content out of list: macro */ - MANDOCERR_FI_SKIP, /* fill mode already enabled, skipping: fi */ - MANDOCERR_NF_SKIP, /* fill mode already disabled, skipping: nf */ + MANDOCERR_TA_LINE, /* first macro on line: Ta */ MANDOCERR_BLK_LINE, /* line scope broken: macro breaks macro */ + MANDOCERR_BLK_BLANK, /* skipping blank line in line scope */ /* related to missing arguments */ MANDOCERR_REQ_EMPTY, /* skipping empty request: request */ @@ -104,6 +132,7 @@ enum mandocerr { MANDOCERR_FO_NOHEAD, /* missing function name, using "": Fo */ MANDOCERR_IT_NOHEAD, /* empty head in list item: Bl -type It */ MANDOCERR_IT_NOBODY, /* empty list item: Bl -type It */ + MANDOCERR_IT_NOARG, /* missing argument, using next line: Bl -c It */ MANDOCERR_BF_NOFONT, /* missing font type, using \fR: Bf */ MANDOCERR_BF_BADFONT, /* unknown font type, using \fR: Bf font */ MANDOCERR_PF_SKIP, /* nothing follows prefix: Pf arg */ @@ -115,7 +144,6 @@ enum mandocerr { MANDOCERR_EQN_NOBOX, /* missing eqn box, using "": op */ /* related to bad arguments */ - MANDOCERR_ARG_QUOTE, /* unterminated quoted argument */ MANDOCERR_ARG_REP, /* duplicate argument: macro arg */ MANDOCERR_AN_REP, /* skipping duplicate argument: An -arg */ MANDOCERR_BD_REP, /* skipping duplicate display type: Bd -type */ @@ -125,6 +153,7 @@ enum mandocerr { MANDOCERR_AT_BAD, /* unknown AT&T UNIX version: At version */ MANDOCERR_FA_COMMA, /* comma in function argument: arg */ MANDOCERR_FN_PAREN, /* parenthesis in function name: arg */ + MANDOCERR_LB_BAD, /* unknown library name: Lb ... */ MANDOCERR_RS_BAD, /* invalid content in Rs block: macro */ MANDOCERR_SM_BAD, /* invalid Boolean argument: macro arg */ MANDOCERR_FT_BAD, /* unknown font, skipping request: ft font */ @@ -133,9 +162,7 @@ enum mandocerr { /* related to plain text */ MANDOCERR_FI_BLANK, /* blank line in fill mode, using .sp */ MANDOCERR_FI_TAB, /* tab in filled text */ - MANDOCERR_SPACE_EOL, /* whitespace at end of input line */ MANDOCERR_EOS, /* new sentence, new line */ - MANDOCERR_COMMENT_BAD, /* bad comment style */ MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */ MANDOCERR_STR_UNDEF, /* undefined string, using "": name */ @@ -161,6 +188,8 @@ enum mandocerr { /* related to document structure and macros */ MANDOCERR_FILE, /* cannot open file */ + MANDOCERR_PROLOG_REP, /* duplicate prologue macro: macro */ + MANDOCERR_DT_LATE, /* skipping late title macro: Dt args */ MANDOCERR_ROFFLOOP, /* input stack limit exceeded, infinite loop? */ MANDOCERR_CHAR_BAD, /* skipping bad character: number */ MANDOCERR_MACRO, /* skipping unknown macro: macro */ @@ -177,6 +206,7 @@ enum mandocerr { MANDOCERR_BD_FILE, /* NOT IMPLEMENTED: Bd -file */ MANDOCERR_BD_NOARG, /* skipping display without arguments: Bd */ MANDOCERR_BL_NOTYPE, /* missing list type, using -item: Bl */ + MANDOCERR_CE_NONUM, /* argument is not numeric, using 1: ce ... */ MANDOCERR_NM_NONAME, /* missing manual name, using "": Nm */ MANDOCERR_OS_UNAME, /* uname(3) system call failed, using UNKNOWN */ MANDOCERR_ST_BAD, /* unknown standard specifier: St standard */ @@ -234,9 +264,10 @@ enum tbl_cellt { */ struct tbl_cell { struct tbl_cell *next; + char *wstr; /* min width represented as a string */ + size_t width; /* minimum column width */ + size_t spacing; /* to the right of the column */ int vert; /* width of subsequent vertical line */ - enum tbl_cellt pos; - size_t spacing; int col; /* column number, starting from 0 */ int flags; #define TBL_CELL_TALIGN (1 << 0) /* t, T */ @@ -247,6 +278,7 @@ struct tbl_cell { #define TBL_CELL_UP (1 << 5) /* u, U */ #define TBL_CELL_WIGN (1 << 6) /* z, Z */ #define TBL_CELL_WMAX (1 << 7) /* x, X */ + enum tbl_cellt pos; }; /* @@ -274,9 +306,10 @@ enum tbl_datt { */ struct tbl_dat { struct tbl_cell *layout; /* layout cell */ - int spans; /* how many spans follow */ struct tbl_dat *next; char *string; /* data (NULL if not TBL_DATA_DATA) */ + int spans; /* how many spans follow */ + int block; /* T{ text block T} */ enum tbl_datt pos; }; @@ -301,11 +334,9 @@ struct tbl_span { }; enum eqn_boxt { - EQN_ROOT, /* root of parse tree */ EQN_TEXT, /* text (number, variable, whatever) */ EQN_SUBEXPR, /* nested `eqn' subexpression */ EQN_LIST, /* list (braces, etc.) */ - EQN_LISTONE, /* singleton list */ EQN_PILE, /* vertical pile */ EQN_MATRIX /* pile of piles */ }; @@ -371,17 +402,6 @@ struct eqn_box { }; /* - * An equation consists of a tree of expressions starting at a given - * line and position. - */ -struct eqn { - char *name; /* identifier (or NULL) */ - struct eqn_box *root; /* root mathematical expression */ - int ln; /* invocation line */ - int pos; /* invocation position */ -}; - -/* * Parse options. */ #define MPARSE_MDOC 1 /* assume -mdoc */ @@ -391,6 +411,12 @@ struct eqn { #define MPARSE_UTF8 16 /* accept UTF-8 input */ #define MPARSE_LATIN1 32 /* accept ISO-LATIN-1 input */ +enum mandoc_os { + MANDOC_OS_OTHER = 0, + MANDOC_OS_NETBSD, + MANDOC_OS_OPENBSD +}; + enum mandoc_esc { ESCAPE_ERROR = 0, /* bail! unparsable escape */ ESCAPE_IGNORE, /* escape to be ignored */ @@ -403,7 +429,10 @@ enum mandoc_esc { ESCAPE_FONTPREV, /* previous font mode */ ESCAPE_NUMBERED, /* a numbered glyph */ ESCAPE_UNICODE, /* a unicode codepoint */ + ESCAPE_BREAK, /* break the output line */ ESCAPE_NOSPACE, /* suppress space if the last on a line */ + ESCAPE_HORIZ, /* horizontal movement */ + ESCAPE_HLINE, /* horizontal line drawing */ ESCAPE_SKIPCHAR, /* skip the next character */ ESCAPE_OVERSTRIKE /* overstrike all chars in the argument */ }; @@ -423,7 +452,8 @@ const char *mchars_uc2str(int); int mchars_num2uc(const char *, size_t); int mchars_spec2cp(const char *, size_t); const char *mchars_spec2str(const char *, size_t, size_t *); -struct mparse *mparse_alloc(int, enum mandoclevel, mandocmsg, const char *); +struct mparse *mparse_alloc(int, enum mandocerr, mandocmsg, + enum mandoc_os, const char *); void mparse_free(struct mparse *); void mparse_keep(struct mparse *); int mparse_open(struct mparse *, const char *); diff --git a/usr/src/cmd/mandoc/mandoc_aux.c b/usr/src/cmd/mandoc/mandoc_aux.c index cc74b7e720..db593e444c 100644 --- a/usr/src/cmd/mandoc/mandoc_aux.c +++ b/usr/src/cmd/mandoc/mandoc_aux.c @@ -1,7 +1,7 @@ -/* $Id: mandoc_aux.c,v 1.9 2015/11/07 14:22:29 schwarze Exp $ */ +/* $Id: mandoc_aux.c,v 1.10 2017/06/12 19:05:47 schwarze Exp $ */ /* * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -71,7 +71,6 @@ mandoc_malloc(size_t size) void * mandoc_realloc(void *ptr, size_t size) { - ptr = realloc(ptr, size); if (ptr == NULL) err((int)MANDOCLEVEL_SYSERR, NULL); @@ -81,13 +80,21 @@ mandoc_realloc(void *ptr, size_t size) void * mandoc_reallocarray(void *ptr, size_t num, size_t size) { - ptr = reallocarray(ptr, num, size); if (ptr == NULL) err((int)MANDOCLEVEL_SYSERR, NULL); return ptr; } +void * +mandoc_recallocarray(void *ptr, size_t oldnum, size_t num, size_t size) +{ + ptr = recallocarray(ptr, oldnum, num, size); + if (ptr == NULL) + err((int)MANDOCLEVEL_SYSERR, NULL); + return ptr; +} + char * mandoc_strdup(const char *ptr) { diff --git a/usr/src/cmd/mandoc/mandoc_aux.h b/usr/src/cmd/mandoc/mandoc_aux.h index a2425066c1..469e331eb4 100644 --- a/usr/src/cmd/mandoc/mandoc_aux.h +++ b/usr/src/cmd/mandoc/mandoc_aux.h @@ -1,7 +1,7 @@ -/* $Id: mandoc_aux.h,v 1.6 2017/02/17 14:31:52 schwarze Exp $ */ +/* $Id: mandoc_aux.h,v 1.7 2017/06/12 19:05:47 schwarze Exp $ */ /* * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2014, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -22,5 +22,6 @@ void *mandoc_calloc(size_t, size_t); void *mandoc_malloc(size_t); void *mandoc_realloc(void *, size_t); void *mandoc_reallocarray(void *, size_t, size_t); +void *mandoc_recallocarray(void *, size_t, size_t, size_t); char *mandoc_strdup(const char *); char *mandoc_strndup(const char *, size_t); diff --git a/usr/src/cmd/mandoc/mandoc_xr.c b/usr/src/cmd/mandoc/mandoc_xr.c new file mode 100644 index 0000000000..da0a7f0cf2 --- /dev/null +++ b/usr/src/cmd/mandoc/mandoc_xr.c @@ -0,0 +1,121 @@ +/* $Id: mandoc_xr.c,v 1.3 2017/07/02 21:18:29 schwarze Exp $ */ +/* + * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc_ohash.h" +#include "mandoc_xr.h" + +static struct ohash *xr_hash = NULL; +static struct mandoc_xr *xr_first = NULL; +static struct mandoc_xr *xr_last = NULL; + +static void mandoc_xr_clear(void); + + +static void +mandoc_xr_clear(void) +{ + struct mandoc_xr *xr; + unsigned int slot; + + if (xr_hash == NULL) + return; + for (xr = ohash_first(xr_hash, &slot); xr != NULL; + xr = ohash_next(xr_hash, &slot)) + free(xr); + ohash_delete(xr_hash); +} + +void +mandoc_xr_reset(void) +{ + if (xr_hash == NULL) + xr_hash = mandoc_malloc(sizeof(*xr_hash)); + else + mandoc_xr_clear(); + mandoc_ohash_init(xr_hash, 5, + offsetof(struct mandoc_xr, hashkey)); + xr_first = xr_last = NULL; +} + +int +mandoc_xr_add(const char *sec, const char *name, int line, int pos) +{ + struct mandoc_xr *xr, *oxr; + const char *pend; + size_t ssz, nsz, tsz; + unsigned int slot; + int ret; + uint32_t hv; + + if (xr_hash == NULL) + return 0; + + ssz = strlen(sec) + 1; + nsz = strlen(name) + 1; + tsz = ssz + nsz; + xr = mandoc_malloc(sizeof(*xr) + tsz); + xr->next = NULL; + xr->sec = xr->hashkey; + xr->name = xr->hashkey + ssz; + xr->line = line; + xr->pos = pos; + xr->count = 1; + memcpy(xr->sec, sec, ssz); + memcpy(xr->name, name, nsz); + + pend = xr->hashkey + tsz; + hv = ohash_interval(xr->hashkey, &pend); + slot = ohash_lookup_memory(xr_hash, xr->hashkey, tsz, hv); + if ((oxr = ohash_find(xr_hash, slot)) == NULL) { + ohash_insert(xr_hash, slot, xr); + if (xr_first == NULL) + xr_first = xr; + else + xr_last->next = xr; + xr_last = xr; + return 0; + } + + oxr->count++; + ret = (oxr->line == -1) ^ (xr->line == -1); + if (xr->line == -1) + oxr->line = -1; + free(xr); + return ret; +} + +struct mandoc_xr * +mandoc_xr_get(void) +{ + return xr_first; +} + +void +mandoc_xr_free(void) +{ + mandoc_xr_clear(); + free(xr_hash); + xr_hash = NULL; +} diff --git a/usr/src/cmd/mandoc/mandoc_xr.h b/usr/src/cmd/mandoc/mandoc_xr.h new file mode 100644 index 0000000000..e0c6af0ebf --- /dev/null +++ b/usr/src/cmd/mandoc/mandoc_xr.h @@ -0,0 +1,31 @@ +/* $Id: mandoc_xr.h,v 1.3 2017/07/02 21:18:29 schwarze Exp $ */ +/* + * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct mandoc_xr { + struct mandoc_xr *next; + char *sec; + char *name; + int line; /* Or -1 for this page's own names. */ + int pos; + int count; + char hashkey[]; +}; + +void mandoc_xr_reset(void); +int mandoc_xr_add(const char *, const char *, int, int); +struct mandoc_xr *mandoc_xr_get(void); +void mandoc_xr_free(void); diff --git a/usr/src/cmd/mandoc/mandocdb.c b/usr/src/cmd/mandoc/mandocdb.c index 3b26ca9649..26117cf827 100644 --- a/usr/src/cmd/mandoc/mandocdb.c +++ b/usr/src/cmd/mandoc/mandocdb.c @@ -1,4 +1,4 @@ -/* $Id: mandocdb.c,v 1.244 2017/02/17 14:45:55 schwarze Exp $ */ +/* $Id: mandocdb.c,v 1.253 2017/07/28 14:48:25 schwarze Exp $ */ /* * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011-2017 Ingo Schwarze <schwarze@openbsd.org> @@ -183,8 +183,7 @@ static struct ohash names; /* table of all names */ static struct ohash strings; /* table of all strings */ static uint64_t name_mask; -static const struct mdoc_handler mdocs[MDOC_MAX] = { - { NULL, 0, 0 }, /* Ap */ +static const struct mdoc_handler __mdocs[MDOC_MAX - MDOC_Dd] = { { NULL, 0, NODE_NOPRT }, /* Dd */ { NULL, 0, NODE_NOPRT }, /* Dt */ { NULL, 0, NODE_NOPRT }, /* Os */ @@ -200,6 +199,7 @@ static const struct mdoc_handler mdocs[MDOC_MAX] = { { NULL, 0, 0 }, /* It */ { NULL, 0, 0 }, /* Ad */ { NULL, TYPE_An, 0 }, /* An */ + { NULL, 0, 0 }, /* Ap */ { NULL, TYPE_Ar, 0 }, /* Ar */ { NULL, TYPE_Cd, 0 }, /* Cd */ { NULL, TYPE_Cm, 0 }, /* Cm */ @@ -302,12 +302,10 @@ static const struct mdoc_handler mdocs[MDOC_MAX] = { { NULL, 0, 0 }, /* En */ { NULL, TYPE_Dx, NODE_NOSRC }, /* Dx */ { NULL, 0, 0 }, /* %Q */ - { NULL, 0, 0 }, /* br */ - { NULL, 0, 0 }, /* sp */ { NULL, 0, 0 }, /* %U */ { NULL, 0, 0 }, /* Ta */ - { NULL, 0, 0 }, /* ll */ }; +static const struct mdoc_handler *const mdocs = __mdocs - MDOC_Dd; int @@ -422,7 +420,8 @@ mandocdb(int argc, char *argv[]) exitcode = (int)MANDOCLEVEL_OK; mchars_alloc(); - mp = mparse_alloc(mparse_options, MANDOCLEVEL_BADARG, NULL, NULL); + mp = mparse_alloc(mparse_options, MANDOCERR_MAX, NULL, + MANDOC_OS_OTHER, NULL); mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev)); mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file)); @@ -1211,7 +1210,7 @@ mpages_merge(struct dba *dba, struct mparse *mp) } else if (man != NULL && man->macroset == MACROSET_MAN) { man_validate(man); if (*man->meta.msec != '\0' || - *man->meta.msec != '\0') { + *man->meta.title != '\0') { mpage->form = FORM_SRC; mpage->sec = mandoc_strdup(man->meta.msec); mpage->arch = mandoc_strdup(mlink->arch); @@ -1545,25 +1544,26 @@ parse_mdoc(struct mpage *mpage, const struct roff_meta *meta, const struct roff_node *n) { - assert(NULL != n); - for (n = n->child; NULL != n; n = n->next) { - if (n->flags & mdocs[n->tok].taboo) + for (n = n->child; n != NULL; n = n->next) { + if (n->tok == TOKEN_NONE || + n->tok < ROFF_MAX || + n->flags & mdocs[n->tok].taboo) continue; + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); switch (n->type) { case ROFFT_ELEM: case ROFFT_BLOCK: case ROFFT_HEAD: case ROFFT_BODY: case ROFFT_TAIL: - if (NULL != mdocs[n->tok].fp) - if (0 == (*mdocs[n->tok].fp)(mpage, meta, n)) - break; + if (mdocs[n->tok].fp != NULL && + (*mdocs[n->tok].fp)(mpage, meta, n) == 0) + break; if (mdocs[n->tok].mask) putmdockey(mpage, n->child, mdocs[n->tok].mask, mdocs[n->tok].taboo); break; default: - assert(n->type != ROFFT_ROOT); continue; } if (NULL != n->child) @@ -2123,6 +2123,23 @@ dbwrite(struct dba *dba) int status; pid_t child; + /* + * Do not write empty databases, and delete existing ones + * when makewhatis -u causes them to become empty. + */ + + dba_array_start(dba->pages); + if (dba_array_next(dba->pages) == NULL) { + if (unlink(MANDOC_DB) == -1 && errno != ENOENT) + say(MANDOC_DB, "&unlink"); + return; + } + + /* + * Build the database in a temporary file, + * then atomically move it into place. + */ + if (dba_write(MANDOC_DB "~", dba) != -1) { if (rename(MANDOC_DB "~", MANDOC_DB) == -1) { exitcode = (int)MANDOCLEVEL_SYSERR; @@ -2132,6 +2149,11 @@ dbwrite(struct dba *dba) return; } + /* + * We lack write permission and cannot replace the database + * file, but let's at least check whether the data changed. + */ + (void)strlcpy(tfn, "/tmp/mandocdb.XXXXXXXX", sizeof(tfn)); if (mkdtemp(tfn) == NULL) { exitcode = (int)MANDOCLEVEL_SYSERR; diff --git a/usr/src/cmd/mandoc/manpath.c b/usr/src/cmd/mandoc/manpath.c index f43ace6069..54f7a6b110 100644 --- a/usr/src/cmd/mandoc/manpath.c +++ b/usr/src/cmd/mandoc/manpath.c @@ -1,4 +1,4 @@ -/* $Id: manpath.c,v 1.33 2017/02/10 15:45:28 schwarze Exp $ */ +/* $Id: manpath.c,v 1.35 2017/07/01 09:47:30 schwarze Exp $ */ /* * Copyright (c) 2011, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> @@ -91,6 +91,13 @@ manconf_parse(struct manconf *conf, const char *file, manpath_parseline(&conf->manpath, defp, 0); } +void +manpath_base(struct manpaths *dirs) +{ + char path_base[] = MANPATH_BASE; + manpath_parseline(dirs, path_base, 0); +} + /* * Parse a FULL pathname from a colon-separated list of arrays. */ @@ -299,7 +306,7 @@ manconf_output(struct manoutput *conf, const char *cp, int fromfile) mandoc_asprintf(&oldval, "%zu", conf->width); break; } - conf->width = strtonum(cp, 58, 1000, &errstr); + conf->width = strtonum(cp, 1, 1000, &errstr); if (errstr == NULL) return 0; warnx("-O width=%s is %s", cp, errstr); diff --git a/usr/src/cmd/mandoc/mansearch.c b/usr/src/cmd/mandoc/mansearch.c index 6e689bd358..0d60c3bed7 100644 --- a/usr/src/cmd/mandoc/mansearch.c +++ b/usr/src/cmd/mandoc/mansearch.c @@ -1,7 +1,7 @@ -/* $OpenBSD: mansearch.c,v 1.50 2016/07/09 15:23:36 schwarze Exp $ */ +/* $Id: mansearch.c,v 1.76 2017/08/02 13:29:04 schwarze Exp $ */ /* * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2013, 2014, 2015, 2016 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2013-2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -67,9 +67,9 @@ static struct ohash *manmerge_term(struct expr *, struct ohash *); static struct ohash *manmerge_or(struct expr *, struct ohash *); static struct ohash *manmerge_and(struct expr *, struct ohash *); static char *buildnames(const struct dbm_page *); -static char *buildoutput(size_t, int32_t); -static size_t lstlen(const char *); -static void lstcat(char *, size_t *, const char *); +static char *buildoutput(size_t, struct dbm_page *); +static size_t lstlen(const char *, size_t); +static void lstcat(char *, size_t *, const char *, const char *); static int lstmatch(const char *, const char *); static struct expr *exprcomp(const struct mansearch *, int, char *[], int *); @@ -104,7 +104,8 @@ mansearch(const struct mansearch *search, } cur = maxres = 0; - *res = NULL; + if (res != NULL) + *res = NULL; outkey = KEY_Nd; if (search->outkey != NULL) @@ -155,7 +156,8 @@ mansearch(const struct mansearch *search, chdir_status = 1; if (dbm_open(MANDOC_DB) == -1) { - warn("%s/%s", paths->paths[i], MANDOC_DB); + if (errno != ENOENT) + warn("%s/%s", paths->paths[i], MANDOC_DB); continue; } @@ -169,9 +171,15 @@ mansearch(const struct mansearch *search, page = dbm_page_get(rp->page); if (lstmatch(search->sec, page->sect) == 0 || - lstmatch(search->arch, page->arch) == 0) + lstmatch(search->arch, page->arch) == 0 || + (search->argmode == ARG_NAME && + rp->bits <= (int32_t)(NAME_SYN & NAME_MASK))) continue; + if (res == NULL) { + cur = 1; + break; + } if (cur + 1 > maxres) { maxres += 1024; *res = mandoc_reallocarray(*res, @@ -181,9 +189,7 @@ mansearch(const struct mansearch *search, mandoc_asprintf(&mpage->file, "%s/%s", paths->paths[i], page->file + 1); mpage->names = buildnames(page); - mpage->output = (int)outkey == KEY_Nd ? - mandoc_strdup(page->desc) : - buildoutput(outkey, page->addr); + mpage->output = buildoutput(outkey, page); mpage->ipath = i; mpage->bits = rp->bits; mpage->sec = *page->sect - '0'; @@ -205,12 +211,13 @@ mansearch(const struct mansearch *search, if (cur && search->firstmatch) break; } - qsort(*res, cur, sizeof(struct manpage), manpage_compare); + if (res != NULL) + qsort(*res, cur, sizeof(struct manpage), manpage_compare); if (chdir_status && getcwd_status && chdir(buf) == -1) warn("%s", buf); exprfree(e); *sz = cur; - return 1; + return res != NULL || cur; } /* @@ -389,13 +396,29 @@ static int manpage_compare(const void *vp1, const void *vp2) { const struct manpage *mp1, *mp2; + const char *cp1, *cp2; + size_t sz1, sz2; int diff; mp1 = vp1; mp2 = vp2; - return (diff = mp2->bits - mp1->bits) ? diff : - (diff = mp1->sec - mp2->sec) ? diff : - strcasecmp(mp1->names, mp2->names); + if ((diff = mp2->bits - mp1->bits) || + (diff = mp1->sec - mp2->sec)) + return diff; + + /* Fall back to alphabetic ordering of names. */ + sz1 = strcspn(mp1->names, "("); + sz2 = strcspn(mp2->names, "("); + if (sz1 < sz2) + sz1 = sz2; + if ((diff = strncasecmp(mp1->names, mp2->names, sz1))) + return diff; + + /* For identical names and sections, prefer arch-dependent. */ + cp1 = strchr(mp1->names + sz1, '/'); + cp2 = strchr(mp2->names + sz2, '/'); + return cp1 != NULL && cp2 != NULL ? strcasecmp(cp1, cp2) : + cp1 != NULL ? -1 : cp2 != NULL ? 1 : 0; } static char * @@ -404,16 +427,16 @@ buildnames(const struct dbm_page *page) char *buf; size_t i, sz; - sz = lstlen(page->name) + 1 + lstlen(page->sect) + - (page->arch == NULL ? 0 : 1 + lstlen(page->arch)) + 2; + sz = lstlen(page->name, 2) + 1 + lstlen(page->sect, 2) + + (page->arch == NULL ? 0 : 1 + lstlen(page->arch, 2)) + 2; buf = mandoc_malloc(sz); i = 0; - lstcat(buf, &i, page->name); + lstcat(buf, &i, page->name, ", "); buf[i++] = '('; - lstcat(buf, &i, page->sect); + lstcat(buf, &i, page->sect, ", "); if (page->arch != NULL) { buf[i++] = '/'; - lstcat(buf, &i, page->arch); + lstcat(buf, &i, page->arch, ", "); } buf[i++] = ')'; buf[i++] = '\0'; @@ -423,43 +446,75 @@ buildnames(const struct dbm_page *page) /* * Count the buffer space needed to print the NUL-terminated - * list of NUL-terminated strings, when printing two separator + * list of NUL-terminated strings, when printing sep separator * characters between strings. */ static size_t -lstlen(const char *cp) +lstlen(const char *cp, size_t sep) { size_t sz; - for (sz = 0;; sz++) { - if (cp[0] == '\0') { - if (cp[1] == '\0') - break; + for (sz = 0; *cp != '\0'; cp++) { + + /* Skip names appearing only in the SYNOPSIS. */ + if (*cp <= (char)(NAME_SYN & NAME_MASK)) { + while (*cp != '\0') + cp++; + continue; + } + + /* Skip name class markers. */ + if (*cp < ' ') + cp++; + + /* Print a separator before each but the first string. */ + if (sz) + sz += sep; + + /* Copy one string. */ + while (*cp != '\0') { sz++; - } else if (cp[0] < ' ') - sz--; - cp++; + cp++; + } } return sz; } /* * Print the NUL-terminated list of NUL-terminated strings - * into the buffer, seperating strings with a comma and a blank. + * into the buffer, seperating strings with sep. */ static void -lstcat(char *buf, size_t *i, const char *cp) +lstcat(char *buf, size_t *i, const char *cp, const char *sep) { - for (;;) { - if (cp[0] == '\0') { - if (cp[1] == '\0') - break; - buf[(*i)++] = ','; - buf[(*i)++] = ' '; - } else if (cp[0] >= ' ') - buf[(*i)++] = cp[0]; - cp++; + const char *s; + size_t i_start; + + for (i_start = *i; *cp != '\0'; cp++) { + + /* Skip names appearing only in the SYNOPSIS. */ + if (*cp <= (char)(NAME_SYN & NAME_MASK)) { + while (*cp != '\0') + cp++; + continue; + } + + /* Skip name class markers. */ + if (*cp < ' ') + cp++; + + /* Print a separator before each but the first string. */ + if (*i > i_start) { + s = sep; + while (*s != '\0') + buf[(*i)++] = *s++; + } + + /* Copy one string. */ + while (*cp != '\0') + buf[(*i)++] = *cp++; } + } /* @@ -482,17 +537,46 @@ lstmatch(const char *want, const char *have) } /* - * Build a list of values taken by the macro im - * in the manual page with big-endian address addr. + * Build a list of values taken by the macro im in the manual page. */ static char * -buildoutput(size_t im, int32_t addr) +buildoutput(size_t im, struct dbm_page *page) { - const char *oldoutput, *sep; + const char *oldoutput, *sep, *input; char *output, *newoutput, *value; + size_t sz, i; + + switch (im) { + case KEY_Nd: + return mandoc_strdup(page->desc); + case KEY_Nm: + input = page->name; + break; + case KEY_sec: + input = page->sect; + break; + case KEY_arch: + input = page->arch; + if (input == NULL) + input = "all\0"; + break; + default: + input = NULL; + break; + } + + if (input != NULL) { + sz = lstlen(input, 3) + 1; + output = mandoc_malloc(sz); + i = 0; + lstcat(output, &i, input, " # "); + output[i++] = '\0'; + assert(i == sz); + return output; + } output = NULL; - dbm_macro_bypage(im - 2, addr); + dbm_macro_bypage(im - 2, page->addr); while ((value = dbm_macro_next()) != NULL) { if (output == NULL) { oldoutput = ""; @@ -642,6 +726,12 @@ exprterm(const struct mansearch *search, int argc, char *argv[], int *argi) return e; } + if (strcmp("-i", argv[*argi]) == 0 && *argi + 1 < argc) { + cs = 0; + ++*argi; + } else + cs = 1; + e = mandoc_calloc(1, sizeof(*e)); e->type = EXPR_TERM; e->bits = 0; diff --git a/usr/src/cmd/mandoc/mansearch.h b/usr/src/cmd/mandoc/mansearch.h index 892c6e1e8a..cc4f364f69 100644 --- a/usr/src/cmd/mandoc/mansearch.h +++ b/usr/src/cmd/mandoc/mansearch.h @@ -1,7 +1,7 @@ -/* $Id: mansearch.h,v 1.27 2016/08/01 12:31:00 schwarze Exp $ */ +/* $Id: mansearch.h,v 1.28 2017/04/17 20:05:08 schwarze Exp $ */ /* * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2013, 2014, 2016 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2013, 2014, 2016, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -21,6 +21,9 @@ #define MANDOCDB_VERSION 1 #define MACRO_MAX 36 +#define KEY_arch 0 +#define KEY_sec 1 +#define KEY_Nm 38 #define KEY_Nd 39 #define KEY_MAX 40 diff --git a/usr/src/cmd/mandoc/mdoc.c b/usr/src/cmd/mandoc/mdoc.c index 5be1e7810d..7afcc5d29e 100644 --- a/usr/src/cmd/mandoc/mdoc.c +++ b/usr/src/cmd/mandoc/mdoc.c @@ -1,4 +1,4 @@ -/* $Id: mdoc.c,v 1.260 2017/02/16 03:00:23 schwarze Exp $ */ +/* $Id: mdoc.c,v 1.267 2017/06/17 13:06:16 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> @@ -35,41 +35,6 @@ #include "roff_int.h" #include "libmdoc.h" -const char *const __mdoc_macronames[MDOC_MAX + 1] = { - "text", - "Ap", "Dd", "Dt", "Os", - "Sh", "Ss", "Pp", "D1", - "Dl", "Bd", "Ed", "Bl", - "El", "It", "Ad", "An", - "Ar", "Cd", "Cm", "Dv", - "Er", "Ev", "Ex", "Fa", - "Fd", "Fl", "Fn", "Ft", - "Ic", "In", "Li", "Nd", - "Nm", "Op", "Ot", "Pa", - "Rv", "St", "Va", "Vt", - "Xr", "%A", "%B", "%D", - "%I", "%J", "%N", "%O", - "%P", "%R", "%T", "%V", - "Ac", "Ao", "Aq", "At", - "Bc", "Bf", "Bo", "Bq", - "Bsx", "Bx", "Db", "Dc", - "Do", "Dq", "Ec", "Ef", - "Em", "Eo", "Fx", "Ms", - "No", "Ns", "Nx", "Ox", - "Pc", "Pf", "Po", "Pq", - "Qc", "Ql", "Qo", "Qq", - "Re", "Rs", "Sc", "So", - "Sq", "Sm", "Sx", "Sy", - "Tn", "Ux", "Xc", "Xo", - "Fo", "Fc", "Oo", "Oc", - "Bk", "Ek", "Bt", "Hf", - "Fr", "Ud", "Lb", "Lp", - "Lk", "Mt", "Brq", "Bro", - "Brc", "%C", "Es", "En", - "Dx", "%Q", "br", "sp", - "%U", "Ta", "ll", -}; - const char *const __mdoc_argnames[MDOC_ARG_MAX] = { "split", "nosplit", "ragged", "unfilled", "literal", "file", @@ -80,9 +45,7 @@ const char *const __mdoc_argnames[MDOC_ARG_MAX] = { "width", "compact", "std", "filled", "words", "emphasis", "symbolic", "nested", "centered" - }; - -const char * const *mdoc_macronames = __mdoc_macronames + 1; +}; const char * const *mdoc_argnames = __mdoc_argnames; static int mdoc_ptext(struct roff_man *, int, char *, int); @@ -119,13 +82,12 @@ mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) void mdoc_macro(MACRO_PROT_ARGS) { - assert(tok > TOKEN_NONE && tok < MDOC_MAX); - + assert(tok >= MDOC_Dd && tok < MDOC_MAX); (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); } void -mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) +mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok) { struct roff_node *p; @@ -135,8 +97,8 @@ mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) } struct roff_node * -mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, - struct roff_node *body) +mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, + enum roff_tok tok, struct roff_node *body) { struct roff_node *p; @@ -153,7 +115,7 @@ mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, struct roff_node * mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, - int tok, struct mdoc_arg *args) + enum roff_tok tok, struct mdoc_arg *args) { struct roff_node *p; @@ -180,7 +142,7 @@ mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, void mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, - int tok, struct mdoc_arg *args) + enum roff_tok tok, struct mdoc_arg *args) { struct roff_node *p; @@ -217,6 +179,7 @@ static int mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) { struct roff_node *n; + const char *cp, *sp; char *c, *ws, *end; n = mdoc->last; @@ -282,16 +245,31 @@ mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, line, (int)(ws-buf), NULL); + /* + * Blank lines are allowed in no-fill mode + * and cancel preceding \c, + * but add a single vertical space elsewhere. + */ + if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { + switch (mdoc->last->type) { + case ROFFT_TEXT: + sp = mdoc->last->string; + cp = end = strchr(sp, '\0') - 2; + if (cp < sp || cp[0] != '\\' || cp[1] != 'c') + break; + while (cp > sp && cp[-1] == '\\') + cp--; + if ((end - cp) % 2) + break; + *end = '\0'; + return 1; + default: + break; + } mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, line, (int)(c - buf), NULL); - - /* - * Insert a `sp' in the case of a blank line. Technically, - * blank lines aren't allowed, but enough manuals assume this - * behaviour that we want to work around it. - */ - roff_elem_alloc(mdoc, line, offs, MDOC_sp); + roff_elem_alloc(mdoc, line, offs, ROFF_sp); mdoc->last->flags |= NODE_VALID | NODE_ENDED; mdoc->next = ROFF_NEXT_SIBLING; return 1; @@ -316,14 +294,20 @@ mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) { if (c - buf < offs + 2) continue; - if (end - c < 4) + if (end - c < 3) break; - if (isalpha((unsigned char)c[-2]) && - isalpha((unsigned char)c[-1]) && - c[1] == ' ' && - isupper((unsigned char)(c[2] == ' ' ? c[3] : c[2])) && - (c[-2] != 'n' || c[-1] != 'c') && - (c[-2] != 'v' || c[-1] != 's')) + if (c[1] != ' ' || + isalpha((unsigned char)c[-2]) == 0 || + isalpha((unsigned char)c[-1]) == 0 || + (c[-2] == 'n' && c[-1] == 'c') || + (c[-2] == 'v' && c[-1] == 's')) + continue; + c += 2; + if (*c == ' ') + c++; + if (*c == ' ') + c++; + if (isupper((unsigned char)(*c))) mandoc_msg(MANDOCERR_EOS, mdoc->parse, line, (int)(c - buf), NULL); } @@ -340,25 +324,18 @@ mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) { struct roff_node *n; const char *cp; - int tok; - int i, sv; - char mac[5]; - - sv = offs; + size_t sz; + enum roff_tok tok; + int sv; - /* - * Copy the first word into a nil-terminated buffer. - * Stop when a space, tab, escape, or eoln is encountered. - */ - - i = 0; - while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) - mac[i++] = buf[offs++]; - - mac[i] = '\0'; - - tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE; + /* Determine the line macro. */ + sv = offs; + tok = TOKEN_NONE; + for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) + offs++; + if (sz == 2 || sz == 3) + tok = roffhash_find(mdoc->mdocmac, buf + sv, sz); if (tok == TOKEN_NONE) { mandoc_msg(MANDOCERR_MACRO, mdoc->parse, ln, sv, buf + sv - 1); @@ -382,7 +359,7 @@ mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) /* Jump to the next non-whitespace word. */ - while (buf[offs] && ' ' == buf[offs]) + while (buf[offs] == ' ') offs++; /* diff --git a/usr/src/cmd/mandoc/mdoc.h b/usr/src/cmd/mandoc/mdoc.h index ebe4391ef6..1628e0c80c 100644 --- a/usr/src/cmd/mandoc/mdoc.h +++ b/usr/src/cmd/mandoc/mdoc.h @@ -1,4 +1,4 @@ -/* $Id: mdoc.h,v 1.144 2015/11/07 14:01:16 schwarze Exp $ */ +/* $Id: mdoc.h,v 1.145 2017/04/24 23:06:18 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> @@ -16,131 +16,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define MDOC_Ap 0 -#define MDOC_Dd 1 -#define MDOC_Dt 2 -#define MDOC_Os 3 -#define MDOC_Sh 4 -#define MDOC_Ss 5 -#define MDOC_Pp 6 -#define MDOC_D1 7 -#define MDOC_Dl 8 -#define MDOC_Bd 9 -#define MDOC_Ed 10 -#define MDOC_Bl 11 -#define MDOC_El 12 -#define MDOC_It 13 -#define MDOC_Ad 14 -#define MDOC_An 15 -#define MDOC_Ar 16 -#define MDOC_Cd 17 -#define MDOC_Cm 18 -#define MDOC_Dv 19 -#define MDOC_Er 20 -#define MDOC_Ev 21 -#define MDOC_Ex 22 -#define MDOC_Fa 23 -#define MDOC_Fd 24 -#define MDOC_Fl 25 -#define MDOC_Fn 26 -#define MDOC_Ft 27 -#define MDOC_Ic 28 -#define MDOC_In 29 -#define MDOC_Li 30 -#define MDOC_Nd 31 -#define MDOC_Nm 32 -#define MDOC_Op 33 -#define MDOC_Ot 34 -#define MDOC_Pa 35 -#define MDOC_Rv 36 -#define MDOC_St 37 -#define MDOC_Va 38 -#define MDOC_Vt 39 -#define MDOC_Xr 40 -#define MDOC__A 41 -#define MDOC__B 42 -#define MDOC__D 43 -#define MDOC__I 44 -#define MDOC__J 45 -#define MDOC__N 46 -#define MDOC__O 47 -#define MDOC__P 48 -#define MDOC__R 49 -#define MDOC__T 50 -#define MDOC__V 51 -#define MDOC_Ac 52 -#define MDOC_Ao 53 -#define MDOC_Aq 54 -#define MDOC_At 55 -#define MDOC_Bc 56 -#define MDOC_Bf 57 -#define MDOC_Bo 58 -#define MDOC_Bq 59 -#define MDOC_Bsx 60 -#define MDOC_Bx 61 -#define MDOC_Db 62 -#define MDOC_Dc 63 -#define MDOC_Do 64 -#define MDOC_Dq 65 -#define MDOC_Ec 66 -#define MDOC_Ef 67 -#define MDOC_Em 68 -#define MDOC_Eo 69 -#define MDOC_Fx 70 -#define MDOC_Ms 71 -#define MDOC_No 72 -#define MDOC_Ns 73 -#define MDOC_Nx 74 -#define MDOC_Ox 75 -#define MDOC_Pc 76 -#define MDOC_Pf 77 -#define MDOC_Po 78 -#define MDOC_Pq 79 -#define MDOC_Qc 80 -#define MDOC_Ql 81 -#define MDOC_Qo 82 -#define MDOC_Qq 83 -#define MDOC_Re 84 -#define MDOC_Rs 85 -#define MDOC_Sc 86 -#define MDOC_So 87 -#define MDOC_Sq 88 -#define MDOC_Sm 89 -#define MDOC_Sx 90 -#define MDOC_Sy 91 -#define MDOC_Tn 92 -#define MDOC_Ux 93 -#define MDOC_Xc 94 -#define MDOC_Xo 95 -#define MDOC_Fo 96 -#define MDOC_Fc 97 -#define MDOC_Oo 98 -#define MDOC_Oc 99 -#define MDOC_Bk 100 -#define MDOC_Ek 101 -#define MDOC_Bt 102 -#define MDOC_Hf 103 -#define MDOC_Fr 104 -#define MDOC_Ud 105 -#define MDOC_Lb 106 -#define MDOC_Lp 107 -#define MDOC_Lk 108 -#define MDOC_Mt 109 -#define MDOC_Brq 110 -#define MDOC_Bro 111 -#define MDOC_Brc 112 -#define MDOC__C 113 -#define MDOC_Es 114 -#define MDOC_En 115 -#define MDOC_Dx 116 -#define MDOC__Q 117 -#define MDOC_br 118 -#define MDOC_sp 119 -#define MDOC__U 120 -#define MDOC_Ta 121 -#define MDOC_ll 122 -#define MDOC_MAX 123 - enum mdocargt { MDOC_Split, /* -split */ MDOC_Nosplit, /* -nospli */ @@ -274,11 +149,7 @@ union mdoc_data { struct mdoc_rs Rs; }; -/* Names of macros. */ -extern const char *const *mdoc_macronames; - /* Names of macro args. Index is enum mdocargt. */ extern const char *const *mdoc_argnames; - void mdoc_validate(struct roff_man *); diff --git a/usr/src/cmd/mandoc/mdoc_argv.c b/usr/src/cmd/mandoc/mdoc_argv.c index b47c7dbdea..db4c63f0d6 100644 --- a/usr/src/cmd/mandoc/mdoc_argv.c +++ b/usr/src/cmd/mandoc/mdoc_argv.c @@ -1,7 +1,7 @@ -/* $Id: mdoc_argv.c,v 1.109 2016/08/28 16:15:12 schwarze Exp $ */ +/* $Id: mdoc_argv.c,v 1.115 2017/05/30 16:22:03 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2012, 2014-2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -144,8 +144,7 @@ static const enum mdocargt args_Bl[] = { MDOC_ARG_MAX }; -static const struct mdocarg mdocargs[MDOC_MAX] = { - { ARGSFL_DELIM, NULL }, /* Ap */ +static const struct mdocarg __mdocargs[MDOC_MAX - MDOC_Dd] = { { ARGSFL_NONE, NULL }, /* Dd */ { ARGSFL_NONE, NULL }, /* Dt */ { ARGSFL_NONE, NULL }, /* Os */ @@ -161,6 +160,7 @@ static const struct mdocarg mdocargs[MDOC_MAX] = { { ARGSFL_NONE, NULL }, /* It */ { ARGSFL_DELIM, NULL }, /* Ad */ { ARGSFL_DELIM, args_An }, /* An */ + { ARGSFL_DELIM, NULL }, /* Ap */ { ARGSFL_DELIM, NULL }, /* Ar */ { ARGSFL_DELIM, NULL }, /* Cd */ { ARGSFL_DELIM, NULL }, /* Cm */ @@ -263,12 +263,10 @@ static const struct mdocarg mdocargs[MDOC_MAX] = { { ARGSFL_DELIM, NULL }, /* En */ { ARGSFL_DELIM, NULL }, /* Dx */ { ARGSFL_NONE, NULL }, /* %Q */ - { ARGSFL_NONE, NULL }, /* br */ - { ARGSFL_NONE, NULL }, /* sp */ { ARGSFL_NONE, NULL }, /* %U */ { ARGSFL_NONE, NULL }, /* Ta */ - { ARGSFL_NONE, NULL }, /* ll */ }; +static const struct mdocarg *const mdocargs = __mdocargs - MDOC_Dd; /* @@ -277,7 +275,7 @@ static const struct mdocarg mdocargs[MDOC_MAX] = { * Some flags take no argument, some one, some multiple. */ void -mdoc_argv(struct roff_man *mdoc, int line, int tok, +mdoc_argv(struct roff_man *mdoc, int line, enum roff_tok tok, struct mdoc_arg **reta, int *pos, char *buf) { struct mdoc_argv tmpv; @@ -291,6 +289,7 @@ mdoc_argv(struct roff_man *mdoc, int line, int tok, /* Which flags does this macro support? */ + assert(tok >= MDOC_Dd && tok < MDOC_MAX); argtable = mdocargs[tok].argvs; if (argtable == NULL) return; @@ -415,7 +414,7 @@ argn_free(struct mdoc_arg *p, int iarg) enum margserr mdoc_args(struct roff_man *mdoc, int line, int *pos, - char *buf, int tok, char **v) + char *buf, enum roff_tok tok, char **v) { struct roff_node *n; char *v_local; @@ -424,8 +423,6 @@ mdoc_args(struct roff_man *mdoc, int line, int *pos, if (v == NULL) v = &v_local; fl = tok == TOKEN_NONE ? ARGSFL_NONE : mdocargs[tok].flags; - if (tok != MDOC_It) - return args(mdoc, line, pos, buf, fl, v); /* * We know that we're in an `It', so it's reasonable to expect @@ -434,12 +431,15 @@ mdoc_args(struct roff_man *mdoc, int line, int *pos, * safe fall-back into the default behaviour. */ - for (n = mdoc->last; n; n = n->parent) - if (MDOC_Bl == n->tok) - if (LIST_column == n->norm->Bl.type) { + if (tok == MDOC_It) { + for (n = mdoc->last; n != NULL; n = n->parent) { + if (n->tok != MDOC_Bl) + continue; + if (n->norm->Bl.type == LIST_column) fl = ARGSFL_TABSEP; - break; - } + break; + } + } return args(mdoc, line, pos, buf, fl, v); } @@ -555,14 +555,14 @@ args(struct roff_man *mdoc, int line, int *pos, if ( ! (mdoc->flags & MDOC_PHRASE)) mandoc_msg(MANDOCERR_ARG_QUOTE, mdoc->parse, line, *pos, NULL); - return ARGS_QWORD; + return ARGS_WORD; } mdoc->flags &= ~MDOC_PHRASELIT; buf[(*pos)++] = '\0'; if ('\0' == buf[*pos]) - return ARGS_QWORD; + return ARGS_WORD; while (' ' == buf[*pos]) (*pos)++; @@ -571,7 +571,7 @@ args(struct roff_man *mdoc, int line, int *pos, mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, line, *pos, NULL); - return ARGS_QWORD; + return ARGS_WORD; } p = &buf[*pos]; diff --git a/usr/src/cmd/mandoc/mdoc_hash.c b/usr/src/cmd/mandoc/mdoc_hash.c deleted file mode 100644 index cad3c2db1a..0000000000 --- a/usr/src/cmd/mandoc/mdoc_hash.c +++ /dev/null @@ -1,95 +0,0 @@ -/* $Id: mdoc_hash.c,v 1.27 2016/07/15 18:03:45 schwarze Exp $ */ -/* - * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org> - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#include "config.h" - -#include <sys/types.h> - -#include <assert.h> -#include <ctype.h> -#include <limits.h> -#include <stdlib.h> -#include <stdio.h> -#include <string.h> - -#include "mandoc.h" -#include "roff.h" -#include "mdoc.h" -#include "libmandoc.h" -#include "libmdoc.h" - -static unsigned char table[27 * 12]; - - -void -mdoc_hash_init(void) -{ - int i, j, major; - const char *p; - - if (*table != '\0') - return; - - memset(table, UCHAR_MAX, sizeof(table)); - - for (i = 0; i < (int)MDOC_MAX; i++) { - p = mdoc_macronames[i]; - - if (isalpha((unsigned char)p[1])) - major = 12 * (tolower((unsigned char)p[1]) - 97); - else - major = 12 * 26; - - for (j = 0; j < 12; j++) - if (UCHAR_MAX == table[major + j]) { - table[major + j] = (unsigned char)i; - break; - } - - assert(j < 12); - } -} - -int -mdoc_hash_find(const char *p) -{ - int major, i, j; - - if (0 == p[0]) - return TOKEN_NONE; - if ( ! isalpha((unsigned char)p[0]) && '%' != p[0]) - return TOKEN_NONE; - - if (isalpha((unsigned char)p[1])) - major = 12 * (tolower((unsigned char)p[1]) - 97); - else if ('1' == p[1]) - major = 12 * 26; - else - return TOKEN_NONE; - - if (p[2] && p[3]) - return TOKEN_NONE; - - for (j = 0; j < 12; j++) { - if (UCHAR_MAX == (i = table[major + j])) - break; - if (0 == strcmp(p, mdoc_macronames[i])) - return i; - } - - return TOKEN_NONE; -} diff --git a/usr/src/cmd/mandoc/mdoc_html.c b/usr/src/cmd/mandoc/mdoc_html.c index e824514166..0b4b9adf34 100644 --- a/usr/src/cmd/mandoc/mdoc_html.c +++ b/usr/src/cmd/mandoc/mdoc_html.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_html.c,v 1.271 2017/02/16 03:00:23 schwarze Exp $ */ +/* $Id: mdoc_html.c,v 1.294 2017/07/15 17:57:51 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -27,6 +27,7 @@ #include <unistd.h> #include "mandoc_aux.h" +#include "mandoc.h" #include "roff.h" #include "mdoc.h" #include "out.h" @@ -48,7 +49,7 @@ struct htmlmdoc { void (*post)(MDOC_ARGS); }; -static char *make_id(const struct roff_node *); +static char *cond_id(const struct roff_node *); static void print_mdoc_head(MDOC_ARGS); static void print_mdoc_node(MDOC_ARGS); static void print_mdoc_nodelist(MDOC_ARGS); @@ -108,7 +109,6 @@ static int mdoc_rs_pre(MDOC_ARGS); static int mdoc_sh_pre(MDOC_ARGS); static int mdoc_skip_pre(MDOC_ARGS); static int mdoc_sm_pre(MDOC_ARGS); -static int mdoc_sp_pre(MDOC_ARGS); static int mdoc_ss_pre(MDOC_ARGS); static int mdoc_st_pre(MDOC_ARGS); static int mdoc_sx_pre(MDOC_ARGS); @@ -118,8 +118,7 @@ static int mdoc_vt_pre(MDOC_ARGS); static int mdoc_xr_pre(MDOC_ARGS); static int mdoc_xx_pre(MDOC_ARGS); -static const struct htmlmdoc mdocs[MDOC_MAX] = { - {mdoc_ap_pre, NULL}, /* Ap */ +static const struct htmlmdoc __mdocs[MDOC_MAX - MDOC_Dd] = { {NULL, NULL}, /* Dd */ {NULL, NULL}, /* Dt */ {NULL, NULL}, /* Os */ @@ -135,6 +134,7 @@ static const struct htmlmdoc mdocs[MDOC_MAX] = { {mdoc_it_pre, NULL}, /* It */ {mdoc_ad_pre, NULL}, /* Ad */ {mdoc_an_pre, NULL}, /* An */ + {mdoc_ap_pre, NULL}, /* Ap */ {mdoc_ar_pre, NULL}, /* Ar */ {mdoc_cd_pre, NULL}, /* Cd */ {mdoc_cm_pre, NULL}, /* Cm */ @@ -237,12 +237,10 @@ static const struct htmlmdoc mdocs[MDOC_MAX] = { {mdoc_quote_pre, mdoc_quote_post}, /* En */ {mdoc_xx_pre, NULL}, /* Dx */ {mdoc__x_pre, mdoc__x_post}, /* %Q */ - {mdoc_sp_pre, NULL}, /* br */ - {mdoc_sp_pre, NULL}, /* sp */ {mdoc__x_pre, mdoc__x_post}, /* %U */ {NULL, NULL}, /* Ta */ - {mdoc_skip_pre, NULL}, /* ll */ }; +static const struct htmlmdoc *const mdocs = __mdocs - MDOC_Dd; /* @@ -362,9 +360,9 @@ print_mdoc_node(MDOC_ARGS) * Make sure that if we're in a literal mode already * (i.e., within a <PRE>) don't print the newline. */ - if (' ' == *n->string && NODE_LINE & n->flags) - if ( ! (HTML_LITERAL & h->flags)) - print_otag(h, TAG_BR, ""); + if (*n->string == ' ' && n->flags & NODE_LINE && + (h->flags & (HTML_LITERAL | HTML_NONEWLINE)) == 0) + print_otag(h, TAG_BR, ""); if (NODE_DELIMC & n->flags) h->flags |= HTML_NOSPACE; print_text(h, n->string); @@ -393,7 +391,14 @@ print_mdoc_node(MDOC_ARGS) t = h->tag; } assert(h->tblt == NULL); - if (mdocs[n->tok].pre && (n->end == ENDBODY_NOT || n->child)) + if (n->tok < ROFF_MAX) { + roff_html_pre(h, n); + child = 0; + break; + } + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); + if (mdocs[n->tok].pre != NULL && + (n->end == ENDBODY_NOT || n->child != NULL)) child = (*mdocs[n->tok].pre)(meta, n, h); break; } @@ -412,7 +417,9 @@ print_mdoc_node(MDOC_ARGS) case ROFFT_EQN: break; default: - if ( ! mdocs[n->tok].post || n->flags & NODE_ENDED) + if (n->tok < ROFF_MAX || + mdocs[n->tok].post == NULL || + n->flags & NODE_ENDED) break; (*mdocs[n->tok].post)(meta, n, h); if (n->end != ENDBODY_NOT) @@ -477,25 +484,19 @@ mdoc_root_pre(MDOC_ARGS) } static char * -make_id(const struct roff_node *n) +cond_id(const struct roff_node *n) { - const struct roff_node *nch; - char *buf, *cp; - - for (nch = n->child; nch != NULL; nch = nch->next) - if (nch->type != ROFFT_TEXT) - return NULL; - - buf = NULL; - deroff(&buf, n); - - /* http://www.w3.org/TR/html5/dom.html#the-id-attribute */ - - for (cp = buf; *cp != '\0'; cp++) - if (*cp == ' ') - *cp = '_'; - - return buf; + if (n->child != NULL && + n->child->type == ROFFT_TEXT && + (n->prev == NULL || + (n->prev->type == ROFFT_TEXT && + strcmp(n->prev->string, "|") == 0)) && + (n->parent->tok == MDOC_It || + (n->parent->tok == MDOC_Xo && + n->parent->parent->prev == NULL && + n->parent->parent->parent->tok == MDOC_It))) + return html_make_id(n); + return NULL; } static int @@ -505,8 +506,10 @@ mdoc_sh_pre(MDOC_ARGS) switch (n->type) { case ROFFT_HEAD: - id = make_id(n); - print_otag(h, TAG_H1, "ci", "Sh", id); + id = html_make_id(n); + print_otag(h, TAG_H1, "cTi", "Sh", id); + if (id != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); free(id); break; case ROFFT_BODY: @@ -527,8 +530,10 @@ mdoc_ss_pre(MDOC_ARGS) if (n->type != ROFFT_HEAD) return 1; - id = make_id(n); - print_otag(h, TAG_H2, "ci", "Ss", id); + id = html_make_id(n); + print_otag(h, TAG_H2, "cTi", "Ss", id); + if (id != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); free(id); return 1; } @@ -536,9 +541,14 @@ mdoc_ss_pre(MDOC_ARGS) static int mdoc_fl_pre(MDOC_ARGS) { - print_otag(h, TAG_B, "c", "Fl"); - print_text(h, "\\-"); + char *id; + if ((id = cond_id(n)) != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + print_otag(h, TAG_B, "cTi", "Fl", id); + free(id); + + print_text(h, "\\-"); if (!(n->child == NULL && (n->next == NULL || n->next->type == ROFFT_TEXT || @@ -551,7 +561,12 @@ mdoc_fl_pre(MDOC_ARGS) static int mdoc_cm_pre(MDOC_ARGS) { - print_otag(h, TAG_B, "c", "Cm"); + char *id; + + if ((id = cond_id(n)) != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + print_otag(h, TAG_B, "cTi", "Cm", id); + free(id); return 1; } @@ -564,22 +579,19 @@ mdoc_nd_pre(MDOC_ARGS) /* XXX: this tag in theory can contain block elements. */ print_text(h, "\\(em"); - print_otag(h, TAG_SPAN, "c", "Nd"); + print_otag(h, TAG_SPAN, "cT", "Nd"); return 1; } static int mdoc_nm_pre(MDOC_ARGS) { - struct tag *t; - int len; - switch (n->type) { case ROFFT_HEAD: print_otag(h, TAG_TD, ""); /* FALLTHROUGH */ case ROFFT_ELEM: - print_otag(h, TAG_B, "c", "Nm"); + print_otag(h, TAG_B, "cT", "Nm"); return 1; case ROFFT_BODY: print_otag(h, TAG_TD, ""); @@ -587,21 +599,8 @@ mdoc_nm_pre(MDOC_ARGS) default: break; } - synopsis_pre(h, n); print_otag(h, TAG_TABLE, "c", "Nm"); - - for (len = 0, n = n->head->child; n; n = n->next) - if (n->type == ROFFT_TEXT) - len += html_strlen(n->string); - - if (len == 0 && meta->name != NULL) - len = html_strlen(meta->name); - - t = print_otag(h, TAG_COLGROUP, ""); - print_otag(h, TAG_COL, "shw", len); - print_otag(h, TAG_COL, ""); - print_tagq(h, t); print_otag(h, TAG_TR, ""); return 1; } @@ -613,11 +612,11 @@ mdoc_xr_pre(MDOC_ARGS) return 0; if (h->base_man) - print_otag(h, TAG_A, "chM", "Xr", + print_otag(h, TAG_A, "cThM", "Xr", n->child->string, n->child->next == NULL ? NULL : n->child->next->string); else - print_otag(h, TAG_A, "c", "Xr"); + print_otag(h, TAG_A, "cT", "Xr"); n = n->child; print_text(h, n->string); @@ -646,7 +645,7 @@ mdoc_ns_pre(MDOC_ARGS) static int mdoc_ar_pre(MDOC_ARGS) { - print_otag(h, TAG_VAR, "c", "Ar"); + print_otag(h, TAG_VAR, "cT", "Ar"); return 1; } @@ -666,7 +665,7 @@ mdoc_it_pre(MDOC_ARGS) enum mdoc_list type; bl = n->parent; - while (bl != NULL && bl->tok != MDOC_Bl) + while (bl->tok != MDOC_Bl) bl = bl->parent; type = bl->norm->Bl.type; @@ -716,10 +715,7 @@ mdoc_it_pre(MDOC_ARGS) case ROFFT_HEAD: return 0; case ROFFT_BODY: - if (bl->norm->Bl.comp) - print_otag(h, TAG_LI, "csvt", cattr, 0); - else - print_otag(h, TAG_LI, "c", cattr); + print_otag(h, TAG_LI, "c", cattr); break; default: break; @@ -731,15 +727,12 @@ mdoc_it_pre(MDOC_ARGS) case LIST_ohang: switch (n->type) { case ROFFT_HEAD: - if (bl->norm->Bl.comp) - print_otag(h, TAG_DT, "csvt", cattr, 0); - else - print_otag(h, TAG_DT, "c", cattr); + print_otag(h, TAG_DT, "c", cattr); if (type == LIST_diag) print_otag(h, TAG_B, "c", cattr); break; case ROFFT_BODY: - print_otag(h, TAG_DD, "cswl", cattr, + print_otag(h, TAG_DD, "csw*+l", cattr, bl->norm->Bl.width); break; default: @@ -751,8 +744,9 @@ mdoc_it_pre(MDOC_ARGS) case ROFFT_HEAD: if (h->style != NULL && !bl->norm->Bl.comp && (n->parent->prev == NULL || + n->parent->prev->body == NULL || n->parent->prev->body->child != NULL)) { - t = print_otag(h, TAG_DT, "csWl", + t = print_otag(h, TAG_DT, "csw*+-l", cattr, bl->norm->Bl.width); print_text(h, "\\ "); print_tagq(h, t); @@ -760,7 +754,7 @@ mdoc_it_pre(MDOC_ARGS) print_text(h, "\\ "); print_tagq(h, t); } - print_otag(h, TAG_DT, "csWl", cattr, + print_otag(h, TAG_DT, "csw*+-l", cattr, bl->norm->Bl.width); break; case ROFFT_BODY: @@ -780,10 +774,7 @@ mdoc_it_pre(MDOC_ARGS) case ROFFT_HEAD: break; case ROFFT_BODY: - if (bl->norm->Bl.comp) - print_otag(h, TAG_TD, "csvt", cattr, 0); - else - print_otag(h, TAG_TD, "c", cattr); + print_otag(h, TAG_TD, "c", cattr); break; default: print_otag(h, TAG_TR, "c", cattr); @@ -798,9 +789,9 @@ mdoc_it_pre(MDOC_ARGS) static int mdoc_bl_pre(MDOC_ARGS) { + char cattr[21]; struct tag *t; struct mdoc_bl *bl; - const char *cattr; size_t i; enum htmltag elemtype; @@ -823,7 +814,7 @@ mdoc_bl_pre(MDOC_ARGS) t = print_otag(h, TAG_COLGROUP, ""); for (i = 0; i < bl->ncols - 1; i++) - print_otag(h, TAG_COL, "sww", bl->cols[i]); + print_otag(h, TAG_COL, "sw+w", bl->cols[i]); print_otag(h, TAG_COL, "swW", bl->cols[i]); print_tagq(h, t); return 0; @@ -835,50 +826,52 @@ mdoc_bl_pre(MDOC_ARGS) switch (bl->type) { case LIST_bullet: elemtype = TAG_UL; - cattr = "Bl-bullet"; + (void)strlcpy(cattr, "Bl-bullet", sizeof(cattr)); break; case LIST_dash: case LIST_hyphen: elemtype = TAG_UL; - cattr = "Bl-dash"; + (void)strlcpy(cattr, "Bl-dash", sizeof(cattr)); break; case LIST_item: elemtype = TAG_UL; - cattr = "Bl-item"; + (void)strlcpy(cattr, "Bl-item", sizeof(cattr)); break; case LIST_enum: elemtype = TAG_OL; - cattr = "Bl-enum"; + (void)strlcpy(cattr, "Bl-enum", sizeof(cattr)); break; case LIST_diag: elemtype = TAG_DL; - cattr = "Bl-diag"; + (void)strlcpy(cattr, "Bl-diag", sizeof(cattr)); break; case LIST_hang: elemtype = TAG_DL; - cattr = "Bl-hang"; + (void)strlcpy(cattr, "Bl-hang", sizeof(cattr)); break; case LIST_inset: elemtype = TAG_DL; - cattr = "Bl-inset"; + (void)strlcpy(cattr, "Bl-inset", sizeof(cattr)); break; case LIST_ohang: elemtype = TAG_DL; - cattr = "Bl-ohang"; + (void)strlcpy(cattr, "Bl-ohang", sizeof(cattr)); break; case LIST_tag: - cattr = "Bl-tag"; if (bl->offs) - print_otag(h, TAG_DIV, "cswl", cattr, bl->offs); - print_otag(h, TAG_DL, "cswl", cattr, bl->width); + print_otag(h, TAG_DIV, "cswl", "Bl-tag", bl->offs); + print_otag(h, TAG_DL, "csw*+l", bl->comp ? + "Bl-tag Bl-compact" : "Bl-tag", bl->width); return 1; case LIST_column: elemtype = TAG_TABLE; - cattr = "Bl-column"; + (void)strlcpy(cattr, "Bl-column", sizeof(cattr)); break; default: abort(); } + if (bl->comp) + (void)strlcat(cattr, " Bl-compact", sizeof(cattr)); print_otag(h, elemtype, "cswl", cattr, bl->offs); return 1; } @@ -894,14 +887,14 @@ mdoc_ex_pre(MDOC_ARGS) static int mdoc_st_pre(MDOC_ARGS) { - print_otag(h, TAG_SPAN, "c", "St"); + print_otag(h, TAG_SPAN, "cT", "St"); return 1; } static int mdoc_em_pre(MDOC_ARGS) { - print_otag(h, TAG_I, "c", "Em"); + print_otag(h, TAG_I, "cT", "Em"); return 1; } @@ -924,8 +917,8 @@ mdoc_sx_pre(MDOC_ARGS) { char *id; - id = make_id(n); - print_otag(h, TAG_A, "chR", "Sx", id); + id = html_make_id(n); + print_otag(h, TAG_A, "cThR", "Sx", id); free(id); return 1; } @@ -991,9 +984,9 @@ mdoc_bd_pre(MDOC_ARGS) * anyway, so don't sweat it. */ switch (nn->tok) { + case ROFF_br: + case ROFF_sp: case MDOC_Sm: - case MDOC_br: - case MDOC_sp: case MDOC_Bl: case MDOC_D1: case MDOC_Dl: @@ -1021,7 +1014,7 @@ mdoc_bd_pre(MDOC_ARGS) static int mdoc_pa_pre(MDOC_ARGS) { - print_otag(h, TAG_I, "c", "Pa"); + print_otag(h, TAG_I, "cT", "Pa"); return 1; } @@ -1052,7 +1045,7 @@ mdoc_an_pre(MDOC_ARGS) if (n->sec == SEC_AUTHORS && ! (h->flags & HTML_NOSPLIT)) h->flags |= HTML_SPLIT; - print_otag(h, TAG_SPAN, "c", "An"); + print_otag(h, TAG_SPAN, "cT", "An"); return 1; } @@ -1060,28 +1053,49 @@ static int mdoc_cd_pre(MDOC_ARGS) { synopsis_pre(h, n); - print_otag(h, TAG_B, "c", "Cd"); + print_otag(h, TAG_B, "cT", "Cd"); return 1; } static int mdoc_dv_pre(MDOC_ARGS) { - print_otag(h, TAG_CODE, "c", "Dv"); + char *id; + + if ((id = cond_id(n)) != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + print_otag(h, TAG_CODE, "cTi", "Dv", id); + free(id); return 1; } static int mdoc_ev_pre(MDOC_ARGS) { - print_otag(h, TAG_CODE, "c", "Ev"); + char *id; + + if ((id = cond_id(n)) != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + print_otag(h, TAG_CODE, "cTi", "Ev", id); + free(id); return 1; } static int mdoc_er_pre(MDOC_ARGS) { - print_otag(h, TAG_CODE, "c", "Er"); + char *id; + + id = n->sec == SEC_ERRORS && + (n->parent->tok == MDOC_It || + (n->parent->tok == MDOC_Bq && + n->parent->parent->parent->tok == MDOC_It)) ? + html_make_id(n) : NULL; + + if (id != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + print_otag(h, TAG_CODE, "cTi", "Er", id); + free(id); return 1; } @@ -1092,12 +1106,12 @@ mdoc_fa_pre(MDOC_ARGS) struct tag *t; if (n->parent->tok != MDOC_Fo) { - print_otag(h, TAG_VAR, "c", "Fa"); + print_otag(h, TAG_VAR, "cT", "Fa"); return 1; } for (nn = n->child; nn; nn = nn->next) { - t = print_otag(h, TAG_VAR, "c", "Fa"); + t = print_otag(h, TAG_VAR, "cT", "Fa"); print_text(h, nn->string); print_tagq(h, t); if (nn->next) { @@ -1128,11 +1142,11 @@ mdoc_fd_pre(MDOC_ARGS) assert(n->type == ROFFT_TEXT); if (strcmp(n->string, "#include")) { - print_otag(h, TAG_B, "c", "Fd"); + print_otag(h, TAG_B, "cT", "Fd"); return 1; } - print_otag(h, TAG_B, "c", "In"); + print_otag(h, TAG_B, "cT", "In"); print_text(h, n->string); if (NULL != (n = n->next)) { @@ -1146,10 +1160,10 @@ mdoc_fd_pre(MDOC_ARGS) cp = strchr(buf, '\0') - 1; if (cp >= buf && (*cp == '>' || *cp == '"')) *cp = '\0'; - t = print_otag(h, TAG_A, "chI", "In", buf); + t = print_otag(h, TAG_A, "cThI", "In", buf); free(buf); } else - t = print_otag(h, TAG_A, "c", "In"); + t = print_otag(h, TAG_A, "cT", "In"); print_text(h, n->string); print_tagq(h, t); @@ -1176,7 +1190,7 @@ mdoc_vt_pre(MDOC_ARGS) } else if (n->type == ROFFT_HEAD) return 0; - print_otag(h, TAG_VAR, "c", "Vt"); + print_otag(h, TAG_VAR, "cT", "Vt"); return 1; } @@ -1184,7 +1198,7 @@ static int mdoc_ft_pre(MDOC_ARGS) { synopsis_pre(h, n); - print_otag(h, TAG_VAR, "c", "Ft"); + print_otag(h, TAG_VAR, "cT", "Ft"); return 1; } @@ -1205,7 +1219,7 @@ mdoc_fn_pre(MDOC_ARGS) ep = strchr(sp, ' '); if (NULL != ep) { - t = print_otag(h, TAG_VAR, "c", "Ft"); + t = print_otag(h, TAG_VAR, "cT", "Ft"); while (ep) { sz = MIN((int)(ep - sp), BUFSIZ - 1); @@ -1218,7 +1232,7 @@ mdoc_fn_pre(MDOC_ARGS) print_tagq(h, t); } - t = print_otag(h, TAG_B, "c", "Fn"); + t = print_otag(h, TAG_B, "cT", "Fn"); if (sp) print_text(h, sp); @@ -1231,10 +1245,10 @@ mdoc_fn_pre(MDOC_ARGS) for (n = n->child->next; n; n = n->next) { if (NODE_SYNPRETTY & n->flags) - t = print_otag(h, TAG_VAR, "css?", "Fa", + t = print_otag(h, TAG_VAR, "cTss?", "Fa", "white-space", "nowrap"); else - t = print_otag(h, TAG_VAR, "c", "Fa"); + t = print_otag(h, TAG_VAR, "cT", "Fa"); print_text(h, n->string); print_tagq(h, t); if (n->next) { @@ -1287,47 +1301,39 @@ mdoc_pp_pre(MDOC_ARGS) } static int -mdoc_sp_pre(MDOC_ARGS) -{ - struct roffsu su; - - SCALE_VS_INIT(&su, 1); - - if (MDOC_sp == n->tok) { - if (NULL != (n = n->child)) { - if ( ! a2roffsu(n->string, &su, SCALE_VS)) - su.scale = 1.0; - else if (su.scale < 0.0) - su.scale = 0.0; - } - } else - su.scale = 0.0; - - print_otag(h, TAG_DIV, "suh", &su); - - /* So the div isn't empty: */ - print_text(h, "\\~"); - - return 0; - -} - -static int mdoc_lk_pre(MDOC_ARGS) { - if (NULL == (n = n->child)) - return 0; - - assert(n->type == ROFFT_TEXT); - - print_otag(h, TAG_A, "ch", "Lk", n->string); + const struct roff_node *link, *descr, *punct; + struct tag *t; - if (NULL == n->next) - print_text(h, n->string); + if ((link = n->child) == NULL) + return 0; - for (n = n->next; n; n = n->next) - print_text(h, n->string); + /* Find beginning of trailing punctuation. */ + punct = n->last; + while (punct != link && punct->flags & NODE_DELIMC) + punct = punct->prev; + punct = punct->next; + + /* Link target and link text. */ + descr = link->next; + if (descr == punct) + descr = link; /* no text */ + t = print_otag(h, TAG_A, "cTh", "Lk", link->string); + do { + if (descr->flags & (NODE_DELIMC | NODE_DELIMO)) + h->flags |= HTML_NOSPACE; + print_text(h, descr->string); + descr = descr->next; + } while (descr != punct); + print_tagq(h, t); + /* Trailing punctuation. */ + while (punct != NULL) { + h->flags |= HTML_NOSPACE; + print_text(h, punct->string); + punct = punct->next; + } return 0; } @@ -1341,7 +1347,7 @@ mdoc_mt_pre(MDOC_ARGS) assert(n->type == ROFFT_TEXT); mandoc_asprintf(&cp, "mailto:%s", n->string); - t = print_otag(h, TAG_A, "ch", "Mt", cp); + t = print_otag(h, TAG_A, "cTh", "Mt", cp); print_text(h, n->string); print_tagq(h, t); free(cp); @@ -1369,7 +1375,7 @@ mdoc_fo_pre(MDOC_ARGS) return 0; assert(n->child->string); - t = print_otag(h, TAG_B, "c", "Fn"); + t = print_otag(h, TAG_B, "cT", "Fn"); print_text(h, n->child->string); print_tagq(h, t); return 0; @@ -1393,7 +1399,7 @@ mdoc_in_pre(MDOC_ARGS) struct tag *t; synopsis_pre(h, n); - print_otag(h, TAG_B, "c", "In"); + print_otag(h, TAG_B, "cT", "In"); /* * The first argument of the `In' gets special treatment as @@ -1412,9 +1418,9 @@ mdoc_in_pre(MDOC_ARGS) assert(n->type == ROFFT_TEXT); if (h->base_includes) - t = print_otag(h, TAG_A, "chI", "In", n->string); + t = print_otag(h, TAG_A, "cThI", "In", n->string); else - t = print_otag(h, TAG_A, "c", "In"); + t = print_otag(h, TAG_A, "cT", "In"); print_text(h, n->string); print_tagq(h, t); @@ -1435,14 +1441,19 @@ mdoc_in_pre(MDOC_ARGS) static int mdoc_ic_pre(MDOC_ARGS) { - print_otag(h, TAG_B, "c", "Ic"); + char *id; + + if ((id = cond_id(n)) != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + print_otag(h, TAG_B, "cTi", "Ic", id); + free(id); return 1; } static int mdoc_va_pre(MDOC_ARGS) { - print_otag(h, TAG_VAR, "c", "Va"); + print_otag(h, TAG_VAR, "cT", "Va"); return 1; } @@ -1487,7 +1498,12 @@ mdoc_bf_pre(MDOC_ARGS) static int mdoc_ms_pre(MDOC_ARGS) { - print_otag(h, TAG_B, "c", "Ms"); + char *id; + + if ((id = cond_id(n)) != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + print_otag(h, TAG_B, "cTi", "Ms", id); + free(id); return 1; } @@ -1516,28 +1532,38 @@ mdoc_rs_pre(MDOC_ARGS) if (n->prev && SEC_SEE_ALSO == n->sec) print_paragraph(h); - print_otag(h, TAG_CITE, "c", "Rs"); + print_otag(h, TAG_CITE, "cT", "Rs"); return 1; } static int mdoc_no_pre(MDOC_ARGS) { - print_otag(h, TAG_SPAN, "c", "No"); + char *id; + + if ((id = cond_id(n)) != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + print_otag(h, TAG_SPAN, "ci", "No", id); + free(id); return 1; } static int mdoc_li_pre(MDOC_ARGS) { - print_otag(h, TAG_CODE, "c", "Li"); + char *id; + + if ((id = cond_id(n)) != NULL) + print_otag(h, TAG_A, "chR", "selflink", id); + print_otag(h, TAG_CODE, "ci", "Li", id); + free(id); return 1; } static int mdoc_sy_pre(MDOC_ARGS) { - print_otag(h, TAG_B, "c", "Sy"); + print_otag(h, TAG_B, "cT", "Sy"); return 1; } @@ -1547,7 +1573,7 @@ mdoc_lb_pre(MDOC_ARGS) if (SEC_LIBRARY == n->sec && NODE_LINE & n->flags && n->prev) print_otag(h, TAG_BR, ""); - print_otag(h, TAG_SPAN, "c", "Lb"); + print_otag(h, TAG_SPAN, "cT", "Lb"); return 1; } diff --git a/usr/src/cmd/mandoc/mdoc_macro.c b/usr/src/cmd/mandoc/mdoc_macro.c index 5ab9c41258..b463d03e22 100644 --- a/usr/src/cmd/mandoc/mdoc_macro.c +++ b/usr/src/cmd/mandoc/mdoc_macro.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_macro.c,v 1.217 2017/02/16 09:47:31 schwarze Exp $ */ +/* $Id: mdoc_macro.c,v 1.224 2017/05/30 16:22:03 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> @@ -46,21 +46,21 @@ static void phrase_ta(MACRO_PROT_ARGS); static void append_delims(struct roff_man *, int, int *, char *); static void dword(struct roff_man *, int, int, const char *, enum mdelim, int); -static int find_pending(struct roff_man *, int, int, int, - struct roff_node *); +static int find_pending(struct roff_man *, enum roff_tok, + int, int, struct roff_node *); static int lookup(struct roff_man *, int, int, int, const char *); static int macro_or_word(MACRO_PROT_ARGS, int); static void break_intermediate(struct roff_node *, - struct roff_node *); -static int parse_rest(struct roff_man *, int, int, int *, char *); -static int rew_alt(int); -static void rew_elem(struct roff_man *, int); + struct roff_node *); +static int parse_rest(struct roff_man *, enum roff_tok, + int, int *, char *); +static enum roff_tok rew_alt(enum roff_tok); +static void rew_elem(struct roff_man *, enum roff_tok); static void rew_last(struct roff_man *, const struct roff_node *); static void rew_pending(struct roff_man *, const struct roff_node *); -const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { - { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ap */ +const struct mdoc_macro __mdoc_macros[MDOC_MAX - MDOC_Dd] = { { in_line_eoln, MDOC_PROLOGUE }, /* Dd */ { in_line_eoln, MDOC_PROLOGUE }, /* Dt */ { in_line_eoln, MDOC_PROLOGUE }, /* Os */ @@ -76,6 +76,8 @@ const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { { blk_full, MDOC_PARSED | MDOC_JOIN }, /* It */ { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ad */ { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* An */ + { in_line_argn, MDOC_CALLABLE | MDOC_PARSED | + MDOC_IGNDELIM | MDOC_JOIN }, /* Ap */ { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Ar */ { in_line, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Cd */ { in_line, MDOC_CALLABLE | MDOC_PARSED }, /* Cm */ @@ -196,14 +198,10 @@ const struct mdoc_macro __mdoc_macros[MDOC_MAX] = { { blk_part_imp, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* En */ { in_line_argn, MDOC_CALLABLE | MDOC_PARSED }, /* Dx */ { in_line_eoln, MDOC_JOIN }, /* %Q */ - { in_line_eoln, 0 }, /* br */ - { in_line_eoln, 0 }, /* sp */ { in_line_eoln, 0 }, /* %U */ { phrase_ta, MDOC_CALLABLE | MDOC_PARSED | MDOC_JOIN }, /* Ta */ - { in_line_eoln, MDOC_PROLOGUE }, /* ll */ }; - -const struct mdoc_macro * const mdoc_macros = __mdoc_macros; +const struct mdoc_macro *const mdoc_macros = __mdoc_macros - MDOC_Dd; /* @@ -225,7 +223,7 @@ mdoc_endparse(struct roff_man *mdoc) if (n->type == ROFFT_BLOCK && mdoc_macros[n->tok].flags & MDOC_EXPLICIT) mandoc_msg(MANDOCERR_BLK_NOEND, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + n->line, n->pos, roff_name[n->tok]); /* Rewind to the first. */ @@ -240,20 +238,19 @@ mdoc_endparse(struct roff_man *mdoc) static int lookup(struct roff_man *mdoc, int from, int line, int ppos, const char *p) { - int res; + enum roff_tok res; if (mdoc->flags & MDOC_PHRASEQF) { mdoc->flags &= ~MDOC_PHRASEQF; return TOKEN_NONE; } if (from == TOKEN_NONE || mdoc_macros[from].flags & MDOC_PARSED) { - res = mdoc_hash_find(p); + res = roffhash_find(mdoc->mdocmac, p, 0); if (res != TOKEN_NONE) { if (mdoc_macros[res].flags & MDOC_CALLABLE) return res; - if (res != MDOC_br && res != MDOC_sp && res != MDOC_ll) - mandoc_msg(MANDOCERR_MACRO_CALL, - mdoc->parse, line, ppos, p); + mandoc_msg(MANDOCERR_MACRO_CALL, + mdoc->parse, line, ppos, p); } } return TOKEN_NONE; @@ -324,8 +321,8 @@ rew_pending(struct roff_man *mdoc, const struct roff_node *n) * For a block closing macro, return the corresponding opening one. * Otherwise, return the macro itself. */ -static int -rew_alt(int tok) +static enum roff_tok +rew_alt(enum roff_tok tok) { switch (tok) { case MDOC_Ac: @@ -366,7 +363,7 @@ rew_alt(int tok) } static void -rew_elem(struct roff_man *mdoc, int tok) +rew_elem(struct roff_man *mdoc, enum roff_tok tok) { struct roff_node *n; @@ -398,7 +395,7 @@ break_intermediate(struct roff_node *n, struct roff_node *breaker) * the rew_pending() call closing out the sub-block. */ static int -find_pending(struct roff_man *mdoc, int tok, int line, int ppos, +find_pending(struct roff_man *mdoc, enum roff_tok tok, int line, int ppos, struct roff_node *target) { struct roff_node *n; @@ -420,8 +417,8 @@ find_pending(struct roff_man *mdoc, int tok, int line, int ppos, else if ( ! (target->flags & NODE_ENDED)) { mandoc_vmsg(MANDOCERR_BLK_NEST, mdoc->parse, line, ppos, - "%s breaks %s", mdoc_macronames[tok], - mdoc_macronames[n->tok]); + "%s breaks %s", roff_name[tok], + roff_name[n->tok]); mdoc_endbody_alloc(mdoc, line, ppos, tok, target); } @@ -524,7 +521,8 @@ macro_or_word(MACRO_PROT_ARGS, int parsed) mdoc_macros[tok].flags & MDOC_JOIN); return 0; } else { - if (mdoc_macros[tok].fp == in_line_eoln) + if (tok != TOKEN_NONE && + mdoc_macros[tok].fp == in_line_eoln) rew_elem(mdoc, tok); mdoc_macro(mdoc, ntok, line, ppos, pos, buf); if (tok == TOKEN_NONE) @@ -548,7 +546,7 @@ blk_exp_close(MACRO_PROT_ARGS) int j, lastarg, maxargs, nl, pending; enum margserr ac; - int atok, ntok; + enum roff_tok atok, ntok; char *p; nl = MDOC_NEWLINE & mdoc->flags; @@ -633,8 +631,7 @@ blk_exp_close(MACRO_PROT_ARGS) mandoc_vmsg(MANDOCERR_BLK_NEST, mdoc->parse, line, ppos, "%s breaks %s", - mdoc_macronames[atok], - mdoc_macronames[later->tok]); + roff_name[atok], roff_name[later->tok]); endbody = mdoc_endbody_alloc(mdoc, line, ppos, atok, body); @@ -676,14 +673,14 @@ blk_exp_close(MACRO_PROT_ARGS) if (body == NULL) { mandoc_msg(MANDOCERR_BLK_NOTOPEN, mdoc->parse, - line, ppos, mdoc_macronames[tok]); + line, ppos, roff_name[tok]); if (maxargs && endbody == NULL) { /* * Stray .Ec without previous .Eo: * Break the output line, keep the arguments. */ - roff_elem_alloc(mdoc, line, ppos, MDOC_br); - rew_elem(mdoc, MDOC_br); + roff_elem_alloc(mdoc, line, ppos, ROFF_br); + rew_elem(mdoc, ROFF_br); } } else if (endbody == NULL) { rew_last(mdoc, body); @@ -695,7 +692,7 @@ blk_exp_close(MACRO_PROT_ARGS) if (buf[*pos] != '\0') mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, line, ppos, - "%s %s", mdoc_macronames[tok], + "%s %s", roff_name[tok], buf + *pos); if (endbody == NULL && n != NULL) rew_pending(mdoc, n); @@ -716,8 +713,7 @@ blk_exp_close(MACRO_PROT_ARGS) if (ac == ARGS_PUNCT || ac == ARGS_EOLN) break; - ntok = ac == ARGS_QWORD ? TOKEN_NONE : - lookup(mdoc, tok, line, lastarg, p); + ntok = lookup(mdoc, tok, line, lastarg, p); if (ntok == TOKEN_NONE) { dword(mdoc, line, lastarg, p, DELIM_MAX, @@ -752,7 +748,7 @@ static void in_line(MACRO_PROT_ARGS) { int la, scope, cnt, firstarg, mayopen, nc, nl; - int ntok; + enum roff_tok ntok; enum margserr ac; enum mdelim d; struct mdoc_arg *arg; @@ -813,7 +809,7 @@ in_line(MACRO_PROT_ARGS) break; } - ntok = (ac == ARGS_QWORD || (tok == MDOC_Fn && !cnt)) ? + ntok = (tok == MDOC_Fn && !cnt) ? TOKEN_NONE : lookup(mdoc, tok, line, la, p); /* @@ -833,7 +829,7 @@ in_line(MACRO_PROT_ARGS) mdoc_argv_free(arg); mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, line, ppos, - mdoc_macronames[tok]); + roff_name[tok]); } mdoc_macro(mdoc, ntok, line, la, pos, buf); if (nl) @@ -842,14 +838,11 @@ in_line(MACRO_PROT_ARGS) } /* - * Non-quote-enclosed punctuation. Set up our scope, if - * a word; rewind the scope, if a delimiter; then append - * the word. + * Handle punctuation. Set up our scope, if a word; + * rewind the scope, if a delimiter; then append the word. */ - d = ac == ARGS_QWORD ? DELIM_NONE : mdoc_isdelim(p); - - if (DELIM_NONE != d) { + if ((d = mdoc_isdelim(p)) != DELIM_NONE) { /* * If we encounter closing punctuation, no word * has been emitted, no scope is open, and we're @@ -869,11 +862,12 @@ in_line(MACRO_PROT_ARGS) * Close out our scope, if one is open, before * any punctuation. */ - if (scope) + if (scope && tok != MDOC_Lk) { rew_elem(mdoc, tok); - scope = 0; - if (tok == MDOC_Fn) - mayopen = 0; + scope = 0; + if (tok == MDOC_Fn) + mayopen = 0; + } } else if (mayopen && !scope) { mdoc_elem_alloc(mdoc, line, ppos, tok, arg); scope = 1; @@ -881,7 +875,7 @@ in_line(MACRO_PROT_ARGS) } dword(mdoc, line, la, p, d, - MDOC_JOIN & mdoc_macros[tok].flags); + mdoc_macros[tok].flags & MDOC_JOIN); /* * If the first argument is a closing delimiter, @@ -903,8 +897,10 @@ in_line(MACRO_PROT_ARGS) } } - if (scope) + if (scope && tok != MDOC_Lk) { rew_elem(mdoc, tok); + scope = 0; + } /* * If no elements have been collected and we're allowed to have @@ -919,11 +915,13 @@ in_line(MACRO_PROT_ARGS) } else { mdoc_argv_free(arg); mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, - line, ppos, mdoc_macronames[tok]); + line, ppos, roff_name[tok]); } } if (nl) append_delims(mdoc, line, pos, buf); + if (scope) + rew_elem(mdoc, tok); } static void @@ -942,7 +940,7 @@ blk_full(MACRO_PROT_ARGS) if (buf[*pos] == '\0' && (tok == MDOC_Sh || tok == MDOC_Ss)) { mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, - line, ppos, mdoc_macronames[tok]); + line, ppos, roff_name[tok]); return; } @@ -965,7 +963,7 @@ blk_full(MACRO_PROT_ARGS) mandoc_vmsg(MANDOCERR_BLK_BROKEN, mdoc->parse, line, ppos, "It breaks %s", - mdoc_macronames[blk->tok]); + roff_name[blk->tok]); rew_pending(mdoc, blk); } break; @@ -977,9 +975,8 @@ blk_full(MACRO_PROT_ARGS) case MDOC_Ss: mandoc_vmsg(MANDOCERR_BLK_BROKEN, mdoc->parse, line, ppos, - "%s breaks %s", - mdoc_macronames[tok], - mdoc_macronames[n->tok]); + "%s breaks %s", roff_name[tok], + roff_name[n->tok]); rew_pending(mdoc, n); n = mdoc->last; continue; @@ -1005,8 +1002,7 @@ blk_full(MACRO_PROT_ARGS) if (blk != NULL) { mandoc_vmsg(MANDOCERR_BLK_BROKEN, mdoc->parse, line, ppos, - "It breaks %s", - mdoc_macronames[blk->tok]); + "It breaks %s", roff_name[blk->tok]); rew_pending(mdoc, blk); blk = NULL; } @@ -1021,8 +1017,8 @@ blk_full(MACRO_PROT_ARGS) if (tok == MDOC_It && (n == NULL || n->tok != MDOC_Bl)) { mandoc_vmsg(MANDOCERR_IT_STRAY, mdoc->parse, line, ppos, "It %s", buf + *pos); - roff_elem_alloc(mdoc, line, ppos, MDOC_br); - rew_elem(mdoc, MDOC_br); + roff_elem_alloc(mdoc, line, ppos, ROFF_br); + rew_elem(mdoc, ROFF_br); return; } } @@ -1099,7 +1095,7 @@ blk_full(MACRO_PROT_ARGS) if (tok == MDOC_Bd || tok == MDOC_Bk) { mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, line, la, "%s ... %s", - mdoc_macronames[tok], buf + la); + roff_name[tok], buf + la); break; } if (tok == MDOC_Rs) { @@ -1117,7 +1113,6 @@ blk_full(MACRO_PROT_ARGS) if (head == NULL && ac != ARGS_PHRASE && - ac != ARGS_QWORD && mdoc_isdelim(p) == DELIM_OPEN) { dword(mdoc, line, la, p, DELIM_OPEN, 0); continue; @@ -1214,8 +1209,7 @@ blk_part_imp(MACRO_PROT_ARGS) if (ac == ARGS_EOLN || ac == ARGS_PUNCT) break; - if (body == NULL && ac != ARGS_QWORD && - mdoc_isdelim(p) == DELIM_OPEN) { + if (body == NULL && mdoc_isdelim(p) == DELIM_OPEN) { dword(mdoc, line, la, p, DELIM_OPEN, 0); continue; } @@ -1271,8 +1265,7 @@ blk_part_exp(MACRO_PROT_ARGS) /* Flush out leading punctuation. */ - if (head == NULL && ac != ARGS_QWORD && - mdoc_isdelim(p) == DELIM_OPEN) { + if (head == NULL && mdoc_isdelim(p) == DELIM_OPEN) { dword(mdoc, line, la, p, DELIM_OPEN, 0); continue; } @@ -1307,7 +1300,7 @@ in_line_argn(MACRO_PROT_ARGS) struct mdoc_arg *arg; char *p; enum margserr ac; - int ntok; + enum roff_tok ntok; int state; /* arg#; -1: not yet open; -2: closed */ int la, maxargs, nl; @@ -1371,7 +1364,7 @@ in_line_argn(MACRO_PROT_ARGS) state = -2; } - ntok = (ac == ARGS_QWORD || (tok == MDOC_Pf && state == 0)) ? + ntok = (tok == MDOC_Pf && state == 0) ? TOKEN_NONE : lookup(mdoc, tok, line, la, p); if (ntok != TOKEN_NONE) { @@ -1383,8 +1376,7 @@ in_line_argn(MACRO_PROT_ARGS) break; } - if (ac == ARGS_QWORD || - mdoc_macros[tok].flags & MDOC_IGNDELIM || + if (mdoc_macros[tok].flags & MDOC_IGNDELIM || mdoc_isdelim(p) == DELIM_NONE) { if (state == -1) { mdoc_elem_alloc(mdoc, line, ppos, tok, arg); @@ -1397,12 +1389,12 @@ in_line_argn(MACRO_PROT_ARGS) } dword(mdoc, line, la, p, DELIM_MAX, - MDOC_JOIN & mdoc_macros[tok].flags); + mdoc_macros[tok].flags & MDOC_JOIN); } if (state == -1) { mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, - line, ppos, mdoc_macronames[tok]); + line, ppos, roff_name[tok]); return; } @@ -1430,9 +1422,9 @@ in_line_eoln(MACRO_PROT_ARGS) } if (buf[*pos] == '\0' && - (tok == MDOC_Fd || mdoc_macronames[tok][0] == '%')) { + (tok == MDOC_Fd || *roff_name[tok] == '%')) { mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, - line, ppos, mdoc_macronames[tok]); + line, ppos, roff_name[tok]); return; } @@ -1449,7 +1441,8 @@ in_line_eoln(MACRO_PROT_ARGS) * or until the next macro, call that macro, and return 1. */ static int -parse_rest(struct roff_man *mdoc, int tok, int line, int *pos, char *buf) +parse_rest(struct roff_man *mdoc, enum roff_tok tok, + int line, int *pos, char *buf) { int la; diff --git a/usr/src/cmd/mandoc/mdoc_man.c b/usr/src/cmd/mandoc/mdoc_man.c index 88d39370e8..cf552ce902 100644 --- a/usr/src/cmd/mandoc/mdoc_man.c +++ b/usr/src/cmd/mandoc/mdoc_man.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_man.c,v 1.104 2017/02/17 19:15:41 schwarze Exp $ */ +/* $Id: mdoc_man.c,v 1.122 2017/06/14 22:51:25 schwarze Exp $ */ /* * Copyright (c) 2011-2017 Ingo Schwarze <schwarze@openbsd.org> * @@ -20,6 +20,7 @@ #include <assert.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include "mandoc_aux.h" @@ -32,10 +33,13 @@ #define DECL_ARGS const struct roff_meta *meta, struct roff_node *n +typedef int (*int_fp)(DECL_ARGS); +typedef void (*void_fp)(DECL_ARGS); + struct manact { - int (*cond)(DECL_ARGS); /* DON'T run actions */ - int (*pre)(DECL_ARGS); /* pre-node action */ - void (*post)(DECL_ARGS); /* post-node action */ + int_fp cond; /* DON'T run actions */ + int_fp pre; /* pre-node action */ + void_fp post; /* post-node action */ const char *prefix; /* pre-node string constant */ const char *suffix; /* post-node string constant */ }; @@ -44,6 +48,7 @@ static int cond_body(DECL_ARGS); static int cond_head(DECL_ARGS); static void font_push(char); static void font_pop(void); +static int man_strlen(const char *); static void mid_it(void); static void post__t(DECL_ARGS); static void post_aq(DECL_ARGS); @@ -68,7 +73,6 @@ static void post_nm(DECL_ARGS); static void post_percent(DECL_ARGS); static void post_pf(DECL_ARGS); static void post_sect(DECL_ARGS); -static void post_sp(DECL_ARGS); static void post_vt(DECL_ARGS); static int pre__t(DECL_ARGS); static int pre_an(DECL_ARGS); @@ -78,7 +82,7 @@ static int pre_bd(DECL_ARGS); static int pre_bf(DECL_ARGS); static int pre_bk(DECL_ARGS); static int pre_bl(DECL_ARGS); -static int pre_br(DECL_ARGS); +static void pre_br(DECL_ARGS); static int pre_dl(DECL_ARGS); static int pre_en(DECL_ARGS); static int pre_enc(DECL_ARGS); @@ -91,22 +95,24 @@ static int pre_fd(DECL_ARGS); static int pre_fl(DECL_ARGS); static int pre_fn(DECL_ARGS); static int pre_fo(DECL_ARGS); -static int pre_ft(DECL_ARGS); +static void pre_ft(DECL_ARGS); +static int pre_Ft(DECL_ARGS); static int pre_in(DECL_ARGS); static int pre_it(DECL_ARGS); static int pre_lk(DECL_ARGS); static int pre_li(DECL_ARGS); -static int pre_ll(DECL_ARGS); static int pre_nm(DECL_ARGS); static int pre_no(DECL_ARGS); static int pre_ns(DECL_ARGS); +static void pre_onearg(DECL_ARGS); static int pre_pp(DECL_ARGS); static int pre_rs(DECL_ARGS); static int pre_sm(DECL_ARGS); -static int pre_sp(DECL_ARGS); +static void pre_sp(DECL_ARGS); static int pre_sect(DECL_ARGS); static int pre_sy(DECL_ARGS); static void pre_syn(const struct roff_node *); +static void pre_ta(DECL_ARGS); static int pre_vt(DECL_ARGS); static int pre_xr(DECL_ARGS); static void print_word(const char *); @@ -118,8 +124,20 @@ static void print_width(const struct mdoc_bl *, static void print_count(int *); static void print_node(DECL_ARGS); -static const struct manact manacts[MDOC_MAX + 1] = { - { NULL, pre_ap, NULL, NULL, NULL }, /* Ap */ +static const void_fp roff_manacts[ROFF_MAX] = { + pre_br, /* br */ + pre_onearg, /* ce */ + pre_ft, /* ft */ + pre_onearg, /* ll */ + pre_onearg, /* mc */ + pre_onearg, /* po */ + pre_onearg, /* rj */ + pre_sp, /* sp */ + pre_ta, /* ta */ + pre_onearg, /* ti */ +}; + +static const struct manact __manacts[MDOC_MAX - MDOC_Dd] = { { NULL, NULL, NULL, NULL, NULL }, /* Dd */ { NULL, NULL, NULL, NULL, NULL }, /* Dt */ { NULL, NULL, NULL, NULL, NULL }, /* Os */ @@ -135,6 +153,7 @@ static const struct manact manacts[MDOC_MAX + 1] = { { NULL, pre_it, post_it, NULL, NULL }, /* It */ { NULL, pre_em, post_font, NULL, NULL }, /* Ad */ { NULL, pre_an, NULL, NULL, NULL }, /* An */ + { NULL, pre_ap, NULL, NULL, NULL }, /* Ap */ { NULL, pre_em, post_font, NULL, NULL }, /* Ar */ { NULL, pre_sy, post_font, NULL, NULL }, /* Cd */ { NULL, pre_sy, post_font, NULL, NULL }, /* Cm */ @@ -146,14 +165,14 @@ static const struct manact manacts[MDOC_MAX + 1] = { { NULL, pre_fd, post_fd, NULL, NULL }, /* Fd */ { NULL, pre_fl, post_fl, NULL, NULL }, /* Fl */ { NULL, pre_fn, post_fn, NULL, NULL }, /* Fn */ - { NULL, pre_ft, post_font, NULL, NULL }, /* Ft */ + { NULL, pre_Ft, post_font, NULL, NULL }, /* Ft */ { NULL, pre_sy, post_font, NULL, NULL }, /* Ic */ { NULL, pre_in, post_in, NULL, NULL }, /* In */ { NULL, pre_li, post_font, NULL, NULL }, /* Li */ { cond_head, pre_enc, NULL, "\\- ", NULL }, /* Nd */ { NULL, pre_nm, post_nm, NULL, NULL }, /* Nm */ { cond_body, pre_enc, post_enc, "[", "]" }, /* Op */ - { NULL, pre_ft, post_font, NULL, NULL }, /* Ot */ + { NULL, pre_Ft, post_font, NULL, NULL }, /* Ot */ { NULL, pre_em, post_font, NULL, NULL }, /* Pa */ { NULL, pre_ex, NULL, NULL, NULL }, /* Rv */ { NULL, NULL, NULL, NULL, NULL }, /* St */ @@ -179,8 +198,8 @@ static const struct manact manacts[MDOC_MAX + 1] = { { NULL, pre_bf, post_bf, NULL, NULL }, /* Bf */ { cond_body, pre_enc, post_enc, "[", "]" }, /* Bo */ { cond_body, pre_enc, post_enc, "[", "]" }, /* Bq */ - { NULL, NULL, NULL, NULL, NULL }, /* Bsx */ - { NULL, NULL, NULL, NULL, NULL }, /* Bx */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Bsx */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Bx */ { NULL, pre_skip, NULL, NULL, NULL }, /* Db */ { NULL, NULL, NULL, NULL, NULL }, /* Dc */ { cond_body, pre_enc, post_enc, "\\(Lq", "\\(Rq" }, /* Do */ @@ -189,12 +208,12 @@ static const struct manact manacts[MDOC_MAX + 1] = { { NULL, NULL, NULL, NULL, NULL }, /* Ef */ { NULL, pre_em, post_font, NULL, NULL }, /* Em */ { cond_body, pre_eo, post_eo, NULL, NULL }, /* Eo */ - { NULL, NULL, NULL, NULL, NULL }, /* Fx */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Fx */ { NULL, pre_sy, post_font, NULL, NULL }, /* Ms */ { NULL, pre_no, NULL, NULL, NULL }, /* No */ { NULL, pre_ns, NULL, NULL, NULL }, /* Ns */ - { NULL, NULL, NULL, NULL, NULL }, /* Nx */ - { NULL, NULL, NULL, NULL, NULL }, /* Ox */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Nx */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Ox */ { NULL, NULL, NULL, NULL, NULL }, /* Pc */ { NULL, NULL, post_pf, NULL, NULL }, /* Pf */ { cond_body, pre_enc, post_enc, "(", ")" }, /* Po */ @@ -235,15 +254,12 @@ static const struct manact manacts[MDOC_MAX + 1] = { { NULL, NULL, post_percent, NULL, NULL }, /* %C */ { NULL, pre_skip, NULL, NULL, NULL }, /* Es */ { cond_body, pre_en, post_en, NULL, NULL }, /* En */ - { NULL, NULL, NULL, NULL, NULL }, /* Dx */ + { NULL, pre_bk, post_bk, NULL, NULL }, /* Dx */ { NULL, NULL, post_percent, NULL, NULL }, /* %Q */ - { NULL, pre_br, NULL, NULL, NULL }, /* br */ - { NULL, pre_sp, post_sp, NULL, NULL }, /* sp */ { NULL, NULL, post_percent, NULL, NULL }, /* %U */ { NULL, NULL, NULL, NULL, NULL }, /* Ta */ - { NULL, pre_ll, post_sp, NULL, NULL }, /* ll */ - { NULL, NULL, NULL, NULL, NULL }, /* ROOT */ }; +static const struct manact *const manacts = __manacts - MDOC_Dd; static int outflags; #define MMAN_spc (1 << 0) /* blank character before next word */ @@ -274,6 +290,49 @@ static struct { } fontqueue; +static int +man_strlen(const char *cp) +{ + size_t rsz; + int skip, sz; + + sz = 0; + skip = 0; + for (;;) { + rsz = strcspn(cp, "\\"); + if (rsz) { + cp += rsz; + if (skip) { + skip = 0; + rsz--; + } + sz += rsz; + } + if ('\0' == *cp) + break; + cp++; + switch (mandoc_escape(&cp, NULL, NULL)) { + case ESCAPE_ERROR: + return sz; + case ESCAPE_UNICODE: + case ESCAPE_NUMBERED: + case ESCAPE_SPECIAL: + case ESCAPE_OVERSTRIKE: + if (skip) + skip = 0; + else + sz++; + break; + case ESCAPE_SKIPCHAR: + skip = 1; + break; + default: + break; + } + } + return sz; +} + static void font_push(char newfont) { @@ -391,7 +450,6 @@ static void print_line(const char *s, int newflags) { - outflags &= ~MMAN_br; outflags |= MMAN_nl; print_word(s); outflags |= newflags; @@ -420,6 +478,7 @@ print_offs(const char *v, int keywords) { char buf[24]; struct roffsu su; + const char *end; int sz; print_line(".RS", MMAN_Bk_susp); @@ -431,8 +490,11 @@ print_offs(const char *v, int keywords) sz = 6; else if (keywords && !strcmp(v, "indent-two")) sz = 12; - else if (a2roffsu(v, &su, SCALE_EN) > 1) { - if (SCALE_EN == su.unit) + else { + end = a2roffsu(v, &su, SCALE_EN); + if (end == NULL || *end != '\0') + sz = man_strlen(v); + else if (SCALE_EN == su.unit) sz = su.scale; else { /* @@ -446,8 +508,7 @@ print_offs(const char *v, int keywords) outflags |= MMAN_nl; return; } - } else - sz = strlen(v); + } /* * We are inside an enclosing list. @@ -469,6 +530,7 @@ print_width(const struct mdoc_bl *bl, const struct roff_node *child) { char buf[24]; struct roffsu su; + const char *end; int numeric, remain, sz, chsz; numeric = 1; @@ -477,21 +539,23 @@ print_width(const struct mdoc_bl *bl, const struct roff_node *child) /* Convert the width into a number (of characters). */ if (bl->width == NULL) sz = (bl->type == LIST_hang) ? 6 : 0; - else if (a2roffsu(bl->width, &su, SCALE_MAX) > 1) { - if (SCALE_EN == su.unit) + else { + end = a2roffsu(bl->width, &su, SCALE_MAX); + if (end == NULL || *end != '\0') + sz = man_strlen(bl->width); + else if (SCALE_EN == su.unit) sz = su.scale; else { sz = 0; numeric = 0; } - } else - sz = strlen(bl->width); + } /* XXX Rough estimation, might have multiple parts. */ if (bl->type == LIST_enum) chsz = (bl->count > 8) + 1; else if (child != NULL && child->type == ROFFT_TEXT) - chsz = strlen(child->string); + chsz = man_strlen(child->string); else chsz = 0; @@ -607,7 +671,11 @@ print_node(DECL_ARGS) outflags &= ~(MMAN_spc | MMAN_spc_force); else if (outflags & MMAN_Sm) outflags |= MMAN_spc; + } else if (n->tok < ROFF_MAX) { + (*roff_manacts[n->tok])(meta, n); + return; } else { + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); /* * Conditionally run the pre-node action handler for a * node. @@ -715,8 +783,7 @@ static int pre__t(DECL_ARGS) { - if (n->parent && MDOC_Rs == n->parent->tok && - n->parent->norm->Rs.quote_T) { + if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) { print_word("\\(lq"); outflags &= ~MMAN_spc; } else @@ -728,8 +795,7 @@ static void post__t(DECL_ARGS) { - if (n->parent && MDOC_Rs == n->parent->tok && - n->parent->norm->Rs.quote_T) { + if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) { outflags &= ~MMAN_spc; print_word("\\(rq"); } else @@ -926,11 +992,11 @@ post_bf(DECL_ARGS) static int pre_bk(DECL_ARGS) { - switch (n->type) { case ROFFT_BLOCK: return 1; case ROFFT_BODY: + case ROFFT_ELEM: outflags |= MMAN_Bk; return 1; default: @@ -941,9 +1007,18 @@ pre_bk(DECL_ARGS) static void post_bk(DECL_ARGS) { - - if (n->type == ROFFT_BODY) + switch (n->type) { + case ROFFT_ELEM: + while ((n = n->parent) != NULL) + if (n->tok == MDOC_Bk) + return; + /* FALLTHROUGH */ + case ROFFT_BODY: outflags &= ~MMAN_Bk; + break; + default: + break; + } } static int @@ -1013,12 +1088,10 @@ post_bl(DECL_ARGS) } -static int +static void pre_br(DECL_ARGS) { - outflags |= MMAN_br; - return 0; } static int @@ -1263,7 +1336,7 @@ post_fo(DECL_ARGS) } static int -pre_ft(DECL_ARGS) +pre_Ft(DECL_ARGS) { pre_syn(n); @@ -1271,6 +1344,14 @@ pre_ft(DECL_ARGS) return 1; } +static void +pre_ft(DECL_ARGS) +{ + print_line(".ft", 0); + print_word(n->child->string); + outflags |= MMAN_nl; +} + static int pre_in(DECL_ARGS) { @@ -1465,33 +1546,63 @@ post_lb(DECL_ARGS) static int pre_lk(DECL_ARGS) { - const struct roff_node *link, *descr; + const struct roff_node *link, *descr, *punct; + int display; - if (NULL == (link = n->child)) + if ((link = n->child) == NULL) return 0; - if (NULL != (descr = link->next)) { + /* Find beginning of trailing punctuation. */ + punct = n->last; + while (punct != link && punct->flags & NODE_DELIMC) + punct = punct->prev; + punct = punct->next; + + /* Link text. */ + if ((descr = link->next) != NULL && descr != punct) { font_push('I'); - while (NULL != descr) { + while (descr != punct) { print_word(descr->string); descr = descr->next; } - print_word(":"); font_pop(); + print_word(":"); } + /* Link target. */ + display = man_strlen(link->string) >= 26; + if (display) { + print_line(".RS", MMAN_Bk_susp); + print_word("6n"); + outflags |= MMAN_nl; + } font_push('B'); print_word(link->string); font_pop(); + + /* Trailing punctuation. */ + while (punct != NULL) { + print_word(punct->string); + punct = punct->next; + } + if (display) + print_line(".RE", MMAN_nl); return 0; } -static int -pre_ll(DECL_ARGS) +static void +pre_onearg(DECL_ARGS) { - - print_line(".ll", 0); - return 1; + outflags |= MMAN_nl; + print_word("."); + outflags &= ~MMAN_spc; + print_word(roff_name[n->tok]); + if (n->child != NULL) + print_word(n->child->string); + outflags |= MMAN_nl; + if (n->tok == ROFF_ce) + for (n = n->child->next; n != NULL; n = n->next) + print_node(meta, n); } static int @@ -1520,7 +1631,7 @@ pre_nm(DECL_ARGS) if (NULL == n->parent->prev) outflags |= MMAN_sp; print_block(".HP", 0); - printf(" %zun", strlen(name) + 1); + printf(" %dn", man_strlen(name) + 1); outflags |= MMAN_nl; } font_push('B'); @@ -1615,22 +1726,17 @@ pre_sm(DECL_ARGS) return 0; } -static int +static void pre_sp(DECL_ARGS) { - - if (MMAN_PP & outflags) { + if (outflags & MMAN_PP) { outflags &= ~MMAN_PP; print_line(".PP", 0); - } else + } else { print_line(".sp", 0); - return 1; -} - -static void -post_sp(DECL_ARGS) -{ - + if (n->child != NULL) + print_word(n->child->string); + } outflags |= MMAN_nl; } @@ -1642,6 +1748,15 @@ pre_sy(DECL_ARGS) return 1; } +static void +pre_ta(DECL_ARGS) +{ + print_line(".ta", 0); + for (n = n->child; n != NULL; n = n->next) + print_word(n->string); + outflags |= MMAN_nl; +} + static int pre_vt(DECL_ARGS) { diff --git a/usr/src/cmd/mandoc/mdoc_markdown.c b/usr/src/cmd/mandoc/mdoc_markdown.c new file mode 100644 index 0000000000..0b0f184821 --- /dev/null +++ b/usr/src/cmd/mandoc/mdoc_markdown.c @@ -0,0 +1,1569 @@ +/* $Id: mdoc_markdown.c,v 1.23 2017/06/14 01:31:26 schwarze Exp $ */ +/* + * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <string.h> + +#include "mandoc_aux.h" +#include "mandoc.h" +#include "roff.h" +#include "mdoc.h" +#include "main.h" + +struct md_act { + int (*cond)(struct roff_node *n); + int (*pre)(struct roff_node *n); + void (*post)(struct roff_node *n); + const char *prefix; /* pre-node string constant */ + const char *suffix; /* post-node string constant */ +}; + +static void md_nodelist(struct roff_node *); +static void md_node(struct roff_node *); +static const char *md_stack(char c); +static void md_preword(void); +static void md_rawword(const char *); +static void md_word(const char *); +static void md_named(const char *); +static void md_char(unsigned char); +static void md_uri(const char *); + +static int md_cond_head(struct roff_node *); +static int md_cond_body(struct roff_node *); + +static int md_pre_raw(struct roff_node *); +static int md_pre_word(struct roff_node *); +static int md_pre_skip(struct roff_node *); +static void md_pre_syn(struct roff_node *); +static int md_pre_An(struct roff_node *); +static int md_pre_Ap(struct roff_node *); +static int md_pre_Bd(struct roff_node *); +static int md_pre_Bk(struct roff_node *); +static int md_pre_Bl(struct roff_node *); +static int md_pre_D1(struct roff_node *); +static int md_pre_Dl(struct roff_node *); +static int md_pre_En(struct roff_node *); +static int md_pre_Eo(struct roff_node *); +static int md_pre_Fa(struct roff_node *); +static int md_pre_Fd(struct roff_node *); +static int md_pre_Fn(struct roff_node *); +static int md_pre_Fo(struct roff_node *); +static int md_pre_In(struct roff_node *); +static int md_pre_It(struct roff_node *); +static int md_pre_Lk(struct roff_node *); +static int md_pre_Mt(struct roff_node *); +static int md_pre_Nd(struct roff_node *); +static int md_pre_Nm(struct roff_node *); +static int md_pre_No(struct roff_node *); +static int md_pre_Ns(struct roff_node *); +static int md_pre_Pp(struct roff_node *); +static int md_pre_Rs(struct roff_node *); +static int md_pre_Sh(struct roff_node *); +static int md_pre_Sm(struct roff_node *); +static int md_pre_Vt(struct roff_node *); +static int md_pre_Xr(struct roff_node *); +static int md_pre__T(struct roff_node *); +static int md_pre_br(struct roff_node *); + +static void md_post_raw(struct roff_node *); +static void md_post_word(struct roff_node *); +static void md_post_pc(struct roff_node *); +static void md_post_Bk(struct roff_node *); +static void md_post_Bl(struct roff_node *); +static void md_post_D1(struct roff_node *); +static void md_post_En(struct roff_node *); +static void md_post_Eo(struct roff_node *); +static void md_post_Fa(struct roff_node *); +static void md_post_Fd(struct roff_node *); +static void md_post_Fl(struct roff_node *); +static void md_post_Fn(struct roff_node *); +static void md_post_Fo(struct roff_node *); +static void md_post_In(struct roff_node *); +static void md_post_It(struct roff_node *); +static void md_post_Lb(struct roff_node *); +static void md_post_Nm(struct roff_node *); +static void md_post_Pf(struct roff_node *); +static void md_post_Vt(struct roff_node *); +static void md_post__T(struct roff_node *); + +static const struct md_act __md_acts[MDOC_MAX - MDOC_Dd] = { + { NULL, NULL, NULL, NULL, NULL }, /* Dd */ + { NULL, NULL, NULL, NULL, NULL }, /* Dt */ + { NULL, NULL, NULL, NULL, NULL }, /* Os */ + { NULL, md_pre_Sh, NULL, NULL, NULL }, /* Sh */ + { NULL, md_pre_Sh, NULL, NULL, NULL }, /* Ss */ + { NULL, md_pre_Pp, NULL, NULL, NULL }, /* Pp */ + { md_cond_body, md_pre_D1, md_post_D1, NULL, NULL }, /* D1 */ + { md_cond_body, md_pre_Dl, md_post_D1, NULL, NULL }, /* Dl */ + { md_cond_body, md_pre_Bd, md_post_D1, NULL, NULL }, /* Bd */ + { NULL, NULL, NULL, NULL, NULL }, /* Ed */ + { md_cond_body, md_pre_Bl, md_post_Bl, NULL, NULL }, /* Bl */ + { NULL, NULL, NULL, NULL, NULL }, /* El */ + { NULL, md_pre_It, md_post_It, NULL, NULL }, /* It */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ad */ + { NULL, md_pre_An, NULL, NULL, NULL }, /* An */ + { NULL, md_pre_Ap, NULL, NULL, NULL }, /* Ap */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Ar */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cd */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Cm */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Dv */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Er */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Ev */ + { NULL, NULL, NULL, NULL, NULL }, /* Ex */ + { NULL, md_pre_Fa, md_post_Fa, NULL, NULL }, /* Fa */ + { NULL, md_pre_Fd, md_post_Fd, "**", "**" }, /* Fd */ + { NULL, md_pre_raw, md_post_Fl, "**-", "**" }, /* Fl */ + { NULL, md_pre_Fn, md_post_Fn, NULL, NULL }, /* Fn */ + { NULL, md_pre_Fd, md_post_raw, "*", "*" }, /* Ft */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Ic */ + { NULL, md_pre_In, md_post_In, NULL, NULL }, /* In */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Li */ + { md_cond_head, md_pre_Nd, NULL, NULL, NULL }, /* Nd */ + { NULL, md_pre_Nm, md_post_Nm, "**", "**" }, /* Nm */ + { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Op */ + { NULL, md_pre_Fd, md_post_raw, "*", "*" }, /* Ot */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Pa */ + { NULL, NULL, NULL, NULL, NULL }, /* Rv */ + { NULL, NULL, NULL, NULL, NULL }, /* St */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Va */ + { NULL, md_pre_Vt, md_post_Vt, "*", "*" }, /* Vt */ + { NULL, md_pre_Xr, NULL, NULL, NULL }, /* Xr */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %A */ + { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %B */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %D */ + { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %I */ + { NULL, md_pre_raw, md_post_pc, "*", "*" }, /* %J */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %N */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %O */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %P */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %R */ + { NULL, md_pre__T, md_post__T, NULL, NULL }, /* %T */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %V */ + { NULL, NULL, NULL, NULL, NULL }, /* Ac */ + { md_cond_body, md_pre_word, md_post_word, "<", ">" }, /* Ao */ + { md_cond_body, md_pre_word, md_post_word, "<", ">" }, /* Aq */ + { NULL, NULL, NULL, NULL, NULL }, /* At */ + { NULL, NULL, NULL, NULL, NULL }, /* Bc */ + { NULL, NULL, NULL, NULL, NULL }, /* Bf XXX not implemented */ + { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Bo */ + { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Bq */ + { NULL, NULL, NULL, NULL, NULL }, /* Bsx */ + { NULL, NULL, NULL, NULL, NULL }, /* Bx */ + { NULL, NULL, NULL, NULL, NULL }, /* Db */ + { NULL, NULL, NULL, NULL, NULL }, /* Dc */ + { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Do */ + { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Dq */ + { NULL, NULL, NULL, NULL, NULL }, /* Ec */ + { NULL, NULL, NULL, NULL, NULL }, /* Ef */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Em */ + { md_cond_body, md_pre_Eo, md_post_Eo, NULL, NULL }, /* Eo */ + { NULL, NULL, NULL, NULL, NULL }, /* Fx */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Ms */ + { NULL, md_pre_No, NULL, NULL, NULL }, /* No */ + { NULL, md_pre_Ns, NULL, NULL, NULL }, /* Ns */ + { NULL, NULL, NULL, NULL, NULL }, /* Nx */ + { NULL, NULL, NULL, NULL, NULL }, /* Ox */ + { NULL, NULL, NULL, NULL, NULL }, /* Pc */ + { NULL, NULL, md_post_Pf, NULL, NULL }, /* Pf */ + { md_cond_body, md_pre_word, md_post_word, "(", ")" }, /* Po */ + { md_cond_body, md_pre_word, md_post_word, "(", ")" }, /* Pq */ + { NULL, NULL, NULL, NULL, NULL }, /* Qc */ + { md_cond_body, md_pre_raw, md_post_raw, "'`", "`'" }, /* Ql */ + { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Qo */ + { md_cond_body, md_pre_word, md_post_word, "\"", "\"" }, /* Qq */ + { NULL, NULL, NULL, NULL, NULL }, /* Re */ + { md_cond_body, md_pre_Rs, NULL, NULL, NULL }, /* Rs */ + { NULL, NULL, NULL, NULL, NULL }, /* Sc */ + { md_cond_body, md_pre_word, md_post_word, "'", "'" }, /* So */ + { md_cond_body, md_pre_word, md_post_word, "'", "'" }, /* Sq */ + { NULL, md_pre_Sm, NULL, NULL, NULL }, /* Sm */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Sx */ + { NULL, md_pre_raw, md_post_raw, "**", "**" }, /* Sy */ + { NULL, md_pre_raw, md_post_raw, "`", "`" }, /* Tn */ + { NULL, NULL, NULL, NULL, NULL }, /* Ux */ + { NULL, NULL, NULL, NULL, NULL }, /* Xc */ + { NULL, NULL, NULL, NULL, NULL }, /* Xo */ + { NULL, md_pre_Fo, md_post_Fo, "**", "**" }, /* Fo */ + { NULL, NULL, NULL, NULL, NULL }, /* Fc */ + { md_cond_body, md_pre_word, md_post_word, "[", "]" }, /* Oo */ + { NULL, NULL, NULL, NULL, NULL }, /* Oc */ + { NULL, md_pre_Bk, md_post_Bk, NULL, NULL }, /* Bk */ + { NULL, NULL, NULL, NULL, NULL }, /* Ek */ + { NULL, NULL, NULL, NULL, NULL }, /* Bt */ + { NULL, NULL, NULL, NULL, NULL }, /* Hf */ + { NULL, md_pre_raw, md_post_raw, "*", "*" }, /* Fr */ + { NULL, NULL, NULL, NULL, NULL }, /* Ud */ + { NULL, NULL, md_post_Lb, NULL, NULL }, /* Lb */ + { NULL, md_pre_Pp, NULL, NULL, NULL }, /* Lp */ + { NULL, md_pre_Lk, NULL, NULL, NULL }, /* Lk */ + { NULL, md_pre_Mt, NULL, NULL, NULL }, /* Mt */ + { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Brq */ + { md_cond_body, md_pre_word, md_post_word, "{", "}" }, /* Bro */ + { NULL, NULL, NULL, NULL, NULL }, /* Brc */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %C */ + { NULL, md_pre_skip, NULL, NULL, NULL }, /* Es */ + { md_cond_body, md_pre_En, md_post_En, NULL, NULL }, /* En */ + { NULL, NULL, NULL, NULL, NULL }, /* Dx */ + { NULL, NULL, md_post_pc, NULL, NULL }, /* %Q */ + { NULL, md_pre_Lk, md_post_pc, NULL, NULL }, /* %U */ + { NULL, NULL, NULL, NULL, NULL }, /* Ta */ +}; +static const struct md_act *const md_acts = __md_acts - MDOC_Dd; + +static int outflags; +#define MD_spc (1 << 0) /* Blank character before next word. */ +#define MD_spc_force (1 << 1) /* Even before trailing punctuation. */ +#define MD_nonl (1 << 2) /* Prevent linebreak in markdown code. */ +#define MD_nl (1 << 3) /* Break markdown code line. */ +#define MD_br (1 << 4) /* Insert an output line break. */ +#define MD_sp (1 << 5) /* Insert a paragraph break. */ +#define MD_Sm (1 << 6) /* Horizontal spacing mode. */ +#define MD_Bk (1 << 7) /* Word keep mode. */ +#define MD_An_split (1 << 8) /* Author mode is "split". */ +#define MD_An_nosplit (1 << 9) /* Author mode is "nosplit". */ + +static int escflags; /* Escape in generated markdown code: */ +#define ESC_BOL (1 << 0) /* "#*+-" near the beginning of a line. */ +#define ESC_NUM (1 << 1) /* "." after a leading number. */ +#define ESC_HYP (1 << 2) /* "(" immediately after "]". */ +#define ESC_SQU (1 << 4) /* "]" when "[" is open. */ +#define ESC_FON (1 << 5) /* "*" immediately after unrelated "*". */ +#define ESC_EOL (1 << 6) /* " " at the and of a line. */ + +static int code_blocks, quote_blocks, list_blocks; +static int outcount; + +void +markdown_mdoc(void *arg, const struct roff_man *mdoc) +{ + outflags = MD_Sm; + md_word(mdoc->meta.title); + if (mdoc->meta.msec != NULL) { + outflags &= ~MD_spc; + md_word("("); + md_word(mdoc->meta.msec); + md_word(")"); + } + md_word("-"); + md_word(mdoc->meta.vol); + if (mdoc->meta.arch != NULL) { + md_word("("); + md_word(mdoc->meta.arch); + md_word(")"); + } + outflags |= MD_sp; + + md_nodelist(mdoc->first->child); + + outflags |= MD_sp; + md_word(mdoc->meta.os); + md_word("-"); + md_word(mdoc->meta.date); + putchar('\n'); +} + +static void +md_nodelist(struct roff_node *n) +{ + while (n != NULL) { + md_node(n); + n = n->next; + } +} + +static void +md_node(struct roff_node *n) +{ + const struct md_act *act; + int cond, process_children; + + if (n->flags & NODE_NOPRT) + return; + + if (outflags & MD_nonl) + outflags &= ~(MD_nl | MD_sp); + else if (outflags & MD_spc && n->flags & NODE_LINE) + outflags |= MD_nl; + + act = NULL; + cond = 0; + process_children = 1; + n->flags &= ~NODE_ENDED; + + if (n->type == ROFFT_TEXT) { + if (n->flags & NODE_DELIMC) + outflags &= ~(MD_spc | MD_spc_force); + else if (outflags & MD_Sm) + outflags |= MD_spc_force; + md_word(n->string); + if (n->flags & NODE_DELIMO) + outflags &= ~(MD_spc | MD_spc_force); + else if (outflags & MD_Sm) + outflags |= MD_spc; + } else if (n->tok < ROFF_MAX) { + switch (n->tok) { + case ROFF_br: + process_children = md_pre_br(n); + break; + case ROFF_sp: + process_children = md_pre_Pp(n); + break; + default: + process_children = 0; + break; + } + } else { + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); + act = md_acts + n->tok; + cond = act->cond == NULL || (*act->cond)(n); + if (cond && act->pre != NULL && + (n->end == ENDBODY_NOT || n->child != NULL)) + process_children = (*act->pre)(n); + } + + if (process_children && n->child != NULL) + md_nodelist(n->child); + + if (n->flags & NODE_ENDED) + return; + + if (cond && act->post != NULL) + (*act->post)(n); + + if (n->end != ENDBODY_NOT) + n->body->flags |= NODE_ENDED; +} + +static const char * +md_stack(char c) +{ + static char *stack; + static size_t sz; + static size_t cur; + + switch (c) { + case '\0': + break; + case (char)-1: + assert(cur); + stack[--cur] = '\0'; + break; + default: + if (cur + 1 >= sz) { + sz += 8; + stack = mandoc_realloc(stack, sz); + } + stack[cur] = c; + stack[++cur] = '\0'; + break; + } + return stack == NULL ? "" : stack; +} + +/* + * Handle vertical and horizontal spacing. + */ +static void +md_preword(void) +{ + const char *cp; + + /* + * If a list block is nested inside a code block or a blockquote, + * blank lines for paragraph breaks no longer work; instead, + * they terminate the list. Work around this markdown issue + * by using mere line breaks instead. + */ + + if (list_blocks && outflags & MD_sp) { + outflags &= ~MD_sp; + outflags |= MD_br; + } + + /* + * End the old line if requested. + * Escape whitespace at the end of the markdown line + * such that it won't look like an output line break. + */ + + if (outflags & MD_sp) + putchar('\n'); + else if (outflags & MD_br) { + putchar(' '); + putchar(' '); + } else if (outflags & MD_nl && escflags & ESC_EOL) + md_named("zwnj"); + + /* Start a new line if necessary. */ + + if (outflags & (MD_nl | MD_br | MD_sp)) { + putchar('\n'); + for (cp = md_stack('\0'); *cp != '\0'; cp++) { + putchar(*cp); + if (*cp == '>') + putchar(' '); + } + outflags &= ~(MD_nl | MD_br | MD_sp); + escflags = ESC_BOL; + outcount = 0; + + /* Handle horizontal spacing. */ + + } else if (outflags & MD_spc) { + if (outflags & MD_Bk) + fputs(" ", stdout); + else + putchar(' '); + escflags &= ~ESC_FON; + outcount++; + } + + outflags &= ~(MD_spc_force | MD_nonl); + if (outflags & MD_Sm) + outflags |= MD_spc; + else + outflags &= ~MD_spc; +} + +/* + * Print markdown syntax elements. + * Can also be used for constant strings when neither escaping + * nor delimiter handling is required. + */ +static void +md_rawword(const char *s) +{ + md_preword(); + + if (*s == '\0') + return; + + if (escflags & ESC_FON) { + escflags &= ~ESC_FON; + if (*s == '*' && !code_blocks) + fputs("‌", stdout); + } + + while (*s != '\0') { + switch(*s) { + case '*': + if (s[1] == '\0') + escflags |= ESC_FON; + break; + case '[': + escflags |= ESC_SQU; + break; + case ']': + escflags |= ESC_HYP; + escflags &= ~ESC_SQU; + break; + default: + break; + } + md_char(*s++); + } + if (s[-1] == ' ') + escflags |= ESC_EOL; + else + escflags &= ~ESC_EOL; +} + +/* + * Print text and mdoc(7) syntax elements. + */ +static void +md_word(const char *s) +{ + const char *seq, *prevfont, *currfont, *nextfont; + char c; + int bs, sz, uc, breakline; + + /* No spacing before closing delimiters. */ + if (s[0] != '\0' && s[1] == '\0' && + strchr("!),.:;?]", s[0]) != NULL && + (outflags & MD_spc_force) == 0) + outflags &= ~MD_spc; + + md_preword(); + + if (*s == '\0') + return; + + /* No spacing after opening delimiters. */ + if ((s[0] == '(' || s[0] == '[') && s[1] == '\0') + outflags &= ~MD_spc; + + breakline = 0; + prevfont = currfont = ""; + while ((c = *s++) != '\0') { + bs = 0; + switch(c) { + case ASCII_NBRSP: + if (code_blocks) + c = ' '; + else { + md_named("nbsp"); + c = '\0'; + } + break; + case ASCII_HYPH: + bs = escflags & ESC_BOL && !code_blocks; + c = '-'; + break; + case ASCII_BREAK: + continue; + case '#': + case '+': + case '-': + bs = escflags & ESC_BOL && !code_blocks; + break; + case '(': + bs = escflags & ESC_HYP && !code_blocks; + break; + case ')': + bs = escflags & ESC_NUM && !code_blocks; + break; + case '*': + case '[': + case '_': + case '`': + bs = !code_blocks; + break; + case '.': + bs = escflags & ESC_NUM && !code_blocks; + break; + case '<': + if (code_blocks == 0) { + md_named("lt"); + c = '\0'; + } + break; + case '=': + if (escflags & ESC_BOL && !code_blocks) { + md_named("equals"); + c = '\0'; + } + break; + case '>': + if (code_blocks == 0) { + md_named("gt"); + c = '\0'; + } + break; + case '\\': + uc = 0; + nextfont = NULL; + switch (mandoc_escape(&s, &seq, &sz)) { + case ESCAPE_UNICODE: + uc = mchars_num2uc(seq + 1, sz - 1); + break; + case ESCAPE_NUMBERED: + uc = mchars_num2char(seq, sz); + break; + case ESCAPE_SPECIAL: + uc = mchars_spec2cp(seq, sz); + break; + case ESCAPE_FONTBOLD: + nextfont = "**"; + break; + case ESCAPE_FONTITALIC: + nextfont = "*"; + break; + case ESCAPE_FONTBI: + nextfont = "***"; + break; + case ESCAPE_FONT: + case ESCAPE_FONTROMAN: + nextfont = ""; + break; + case ESCAPE_FONTPREV: + nextfont = prevfont; + break; + case ESCAPE_BREAK: + breakline = 1; + break; + case ESCAPE_NOSPACE: + case ESCAPE_SKIPCHAR: + case ESCAPE_OVERSTRIKE: + /* XXX not implemented */ + /* FALLTHROUGH */ + case ESCAPE_ERROR: + default: + break; + } + if (nextfont != NULL && !code_blocks) { + if (*currfont != '\0') { + outflags &= ~MD_spc; + md_rawword(currfont); + } + prevfont = currfont; + currfont = nextfont; + if (*currfont != '\0') { + outflags &= ~MD_spc; + md_rawword(currfont); + } + } + if (uc) { + if ((uc < 0x20 && uc != 0x09) || + (uc > 0x7E && uc < 0xA0)) + uc = 0xFFFD; + if (code_blocks) { + seq = mchars_uc2str(uc); + fputs(seq, stdout); + outcount += strlen(seq); + } else { + printf("&#%d;", uc); + outcount++; + } + escflags &= ~ESC_FON; + } + c = '\0'; + break; + case ']': + bs = escflags & ESC_SQU && !code_blocks; + escflags |= ESC_HYP; + break; + default: + break; + } + if (bs) + putchar('\\'); + md_char(c); + if (breakline && + (*s == '\0' || *s == ' ' || *s == ASCII_NBRSP)) { + printf(" \n"); + breakline = 0; + while (*s == ' ' || *s == ASCII_NBRSP) + s++; + } + } + if (*currfont != '\0') { + outflags &= ~MD_spc; + md_rawword(currfont); + } else if (s[-2] == ' ') + escflags |= ESC_EOL; + else + escflags &= ~ESC_EOL; +} + +/* + * Print a single HTML named character reference. + */ +static void +md_named(const char *s) +{ + printf("&%s;", s); + escflags &= ~(ESC_FON | ESC_EOL); + outcount++; +} + +/* + * Print a single raw character and maintain certain escape flags. + */ +static void +md_char(unsigned char c) +{ + if (c != '\0') { + putchar(c); + if (c == '*') + escflags |= ESC_FON; + else + escflags &= ~ESC_FON; + outcount++; + } + if (c != ']') + escflags &= ~ESC_HYP; + if (c == ' ' || c == '\t' || c == '>') + return; + if (isdigit(c) == 0) + escflags &= ~ESC_NUM; + else if (escflags & ESC_BOL) + escflags |= ESC_NUM; + escflags &= ~ESC_BOL; +} + +static int +md_cond_head(struct roff_node *n) +{ + return n->type == ROFFT_HEAD; +} + +static int +md_cond_body(struct roff_node *n) +{ + return n->type == ROFFT_BODY; +} + +static int +md_pre_raw(struct roff_node *n) +{ + const char *prefix; + + if ((prefix = md_acts[n->tok].prefix) != NULL) { + md_rawword(prefix); + outflags &= ~MD_spc; + if (*prefix == '`') + code_blocks++; + } + return 1; +} + +static void +md_post_raw(struct roff_node *n) +{ + const char *suffix; + + if ((suffix = md_acts[n->tok].suffix) != NULL) { + outflags &= ~(MD_spc | MD_nl); + md_rawword(suffix); + if (*suffix == '`') + code_blocks--; + } +} + +static int +md_pre_word(struct roff_node *n) +{ + const char *prefix; + + if ((prefix = md_acts[n->tok].prefix) != NULL) { + md_word(prefix); + outflags &= ~MD_spc; + } + return 1; +} + +static void +md_post_word(struct roff_node *n) +{ + const char *suffix; + + if ((suffix = md_acts[n->tok].suffix) != NULL) { + outflags &= ~(MD_spc | MD_nl); + md_word(suffix); + } +} + +static void +md_post_pc(struct roff_node *n) +{ + md_post_raw(n); + if (n->parent->tok != MDOC_Rs) + return; + if (n->next != NULL) { + md_word(","); + if (n->prev != NULL && + n->prev->tok == n->tok && + n->next->tok == n->tok) + md_word("and"); + } else { + md_word("."); + outflags |= MD_nl; + } +} + +static int +md_pre_skip(struct roff_node *n) +{ + return 0; +} + +static void +md_pre_syn(struct roff_node *n) +{ + if (n->prev == NULL || ! (n->flags & NODE_SYNPRETTY)) + return; + + if (n->prev->tok == n->tok && + n->tok != MDOC_Ft && + n->tok != MDOC_Fo && + n->tok != MDOC_Fn) { + outflags |= MD_br; + return; + } + + switch (n->prev->tok) { + case MDOC_Fd: + case MDOC_Fn: + case MDOC_Fo: + case MDOC_In: + case MDOC_Vt: + outflags |= MD_sp; + break; + case MDOC_Ft: + if (n->tok != MDOC_Fn && n->tok != MDOC_Fo) { + outflags |= MD_sp; + break; + } + /* FALLTHROUGH */ + default: + outflags |= MD_br; + break; + } +} + +static int +md_pre_An(struct roff_node *n) +{ + switch (n->norm->An.auth) { + case AUTH_split: + outflags &= ~MD_An_nosplit; + outflags |= MD_An_split; + return 0; + case AUTH_nosplit: + outflags &= ~MD_An_split; + outflags |= MD_An_nosplit; + return 0; + default: + if (outflags & MD_An_split) + outflags |= MD_br; + else if (n->sec == SEC_AUTHORS && + ! (outflags & MD_An_nosplit)) + outflags |= MD_An_split; + return 1; + } +} + +static int +md_pre_Ap(struct roff_node *n) +{ + outflags &= ~MD_spc; + md_word("'"); + outflags &= ~MD_spc; + return 0; +} + +static int +md_pre_Bd(struct roff_node *n) +{ + switch (n->norm->Bd.type) { + case DISP_unfilled: + case DISP_literal: + return md_pre_Dl(n); + default: + return md_pre_D1(n); + } +} + +static int +md_pre_Bk(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + return 1; + case ROFFT_BODY: + outflags |= MD_Bk; + return 1; + default: + return 0; + } +} + +static void +md_post_Bk(struct roff_node *n) +{ + if (n->type == ROFFT_BODY) + outflags &= ~MD_Bk; +} + +static int +md_pre_Bl(struct roff_node *n) +{ + n->norm->Bl.count = 0; + if (n->norm->Bl.type == LIST_column) + md_pre_Dl(n); + outflags |= MD_sp; + return 1; +} + +static void +md_post_Bl(struct roff_node *n) +{ + n->norm->Bl.count = 0; + if (n->norm->Bl.type == LIST_column) + md_post_D1(n); + outflags |= MD_sp; +} + +static int +md_pre_D1(struct roff_node *n) +{ + /* + * Markdown blockquote syntax does not work inside code blocks. + * The best we can do is fall back to another nested code block. + */ + if (code_blocks) { + md_stack('\t'); + code_blocks++; + } else { + md_stack('>'); + quote_blocks++; + } + outflags |= MD_sp; + return 1; +} + +static void +md_post_D1(struct roff_node *n) +{ + md_stack((char)-1); + if (code_blocks) + code_blocks--; + else + quote_blocks--; + outflags |= MD_sp; +} + +static int +md_pre_Dl(struct roff_node *n) +{ + /* + * Markdown code block syntax does not work inside blockquotes. + * The best we can do is fall back to another nested blockquote. + */ + if (quote_blocks) { + md_stack('>'); + quote_blocks++; + } else { + md_stack('\t'); + code_blocks++; + } + outflags |= MD_sp; + return 1; +} + +static int +md_pre_En(struct roff_node *n) +{ + if (n->norm->Es == NULL || + n->norm->Es->child == NULL) + return 1; + + md_word(n->norm->Es->child->string); + outflags &= ~MD_spc; + return 1; +} + +static void +md_post_En(struct roff_node *n) +{ + if (n->norm->Es == NULL || + n->norm->Es->child == NULL || + n->norm->Es->child->next == NULL) + return; + + outflags &= ~MD_spc; + md_word(n->norm->Es->child->next->string); +} + +static int +md_pre_Eo(struct roff_node *n) +{ + if (n->end == ENDBODY_NOT && + n->parent->head->child == NULL && + n->child != NULL && + n->child->end != ENDBODY_NOT) + md_preword(); + else if (n->end != ENDBODY_NOT ? n->child != NULL : + n->parent->head->child != NULL && (n->child != NULL || + (n->parent->tail != NULL && n->parent->tail->child != NULL))) + outflags &= ~(MD_spc | MD_nl); + return 1; +} + +static void +md_post_Eo(struct roff_node *n) +{ + if (n->end != ENDBODY_NOT) { + outflags |= MD_spc; + return; + } + + if (n->child == NULL && n->parent->head->child == NULL) + return; + + if (n->parent->tail != NULL && n->parent->tail->child != NULL) + outflags &= ~MD_spc; + else + outflags |= MD_spc; +} + +static int +md_pre_Fa(struct roff_node *n) +{ + int am_Fa; + + am_Fa = n->tok == MDOC_Fa; + + if (am_Fa) + n = n->child; + + while (n != NULL) { + md_rawword("*"); + outflags &= ~MD_spc; + md_node(n); + outflags &= ~MD_spc; + md_rawword("*"); + if ((n = n->next) != NULL) + md_word(","); + } + return 0; +} + +static void +md_post_Fa(struct roff_node *n) +{ + if (n->next != NULL && n->next->tok == MDOC_Fa) + md_word(","); +} + +static int +md_pre_Fd(struct roff_node *n) +{ + md_pre_syn(n); + md_pre_raw(n); + return 1; +} + +static void +md_post_Fd(struct roff_node *n) +{ + md_post_raw(n); + outflags |= MD_br; +} + +static void +md_post_Fl(struct roff_node *n) +{ + md_post_raw(n); + if (n->child == NULL && n->next != NULL && + n->next->type != ROFFT_TEXT && !(n->next->flags & NODE_LINE)) + outflags &= ~MD_spc; +} + +static int +md_pre_Fn(struct roff_node *n) +{ + md_pre_syn(n); + + if ((n = n->child) == NULL) + return 0; + + md_rawword("**"); + outflags &= ~MD_spc; + md_node(n); + outflags &= ~MD_spc; + md_rawword("**"); + outflags &= ~MD_spc; + md_word("("); + + if ((n = n->next) != NULL) + md_pre_Fa(n); + return 0; +} + +static void +md_post_Fn(struct roff_node *n) +{ + md_word(")"); + if (n->flags & NODE_SYNPRETTY) { + md_word(";"); + outflags |= MD_sp; + } +} + +static int +md_pre_Fo(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + md_pre_syn(n); + break; + case ROFFT_HEAD: + if (n->child == NULL) + return 0; + md_pre_raw(n); + break; + case ROFFT_BODY: + outflags &= ~(MD_spc | MD_nl); + md_word("("); + break; + default: + break; + } + return 1; +} + +static void +md_post_Fo(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_HEAD: + if (n->child != NULL) + md_post_raw(n); + break; + case ROFFT_BODY: + md_post_Fn(n); + break; + default: + break; + } +} + +static int +md_pre_In(struct roff_node *n) +{ + if (n->flags & NODE_SYNPRETTY) { + md_pre_syn(n); + md_rawword("**"); + outflags &= ~MD_spc; + md_word("#include <"); + } else { + md_word("<"); + outflags &= ~MD_spc; + md_rawword("*"); + } + outflags &= ~MD_spc; + return 1; +} + +static void +md_post_In(struct roff_node *n) +{ + if (n->flags & NODE_SYNPRETTY) { + outflags &= ~MD_spc; + md_rawword(">**"); + outflags |= MD_nl; + } else { + outflags &= ~MD_spc; + md_rawword("*>"); + } +} + +static int +md_pre_It(struct roff_node *n) +{ + struct roff_node *bln; + + switch (n->type) { + case ROFFT_BLOCK: + return 1; + + case ROFFT_HEAD: + bln = n->parent->parent; + if (bln->norm->Bl.comp == 0 && + bln->norm->Bl.type != LIST_column) + outflags |= MD_sp; + outflags |= MD_nl; + + switch (bln->norm->Bl.type) { + case LIST_item: + outflags |= MD_br; + return 0; + case LIST_inset: + case LIST_diag: + case LIST_ohang: + outflags |= MD_br; + return 1; + case LIST_tag: + case LIST_hang: + outflags |= MD_sp; + return 1; + case LIST_bullet: + md_rawword("*\t"); + break; + case LIST_dash: + case LIST_hyphen: + md_rawword("-\t"); + break; + case LIST_enum: + md_preword(); + if (bln->norm->Bl.count < 99) + bln->norm->Bl.count++; + printf("%d.\t", bln->norm->Bl.count); + escflags &= ~ESC_FON; + break; + case LIST_column: + outflags |= MD_br; + return 0; + default: + return 0; + } + outflags &= ~MD_spc; + outflags |= MD_nonl; + outcount = 0; + md_stack('\t'); + if (code_blocks || quote_blocks) + list_blocks++; + return 0; + + case ROFFT_BODY: + bln = n->parent->parent; + switch (bln->norm->Bl.type) { + case LIST_ohang: + outflags |= MD_br; + break; + case LIST_tag: + case LIST_hang: + md_pre_D1(n); + break; + default: + break; + } + return 1; + + default: + return 0; + } +} + +static void +md_post_It(struct roff_node *n) +{ + struct roff_node *bln; + int i, nc; + + if (n->type != ROFFT_BODY) + return; + + bln = n->parent->parent; + switch (bln->norm->Bl.type) { + case LIST_bullet: + case LIST_dash: + case LIST_hyphen: + case LIST_enum: + md_stack((char)-1); + if (code_blocks || quote_blocks) + list_blocks--; + break; + case LIST_tag: + case LIST_hang: + md_post_D1(n); + break; + + case LIST_column: + if (n->next == NULL) + break; + + /* Calculate the array index of the current column. */ + + i = 0; + while ((n = n->prev) != NULL && n->type != ROFFT_HEAD) + i++; + + /* + * If a width was specified for this column, + * subtract what printed, and + * add the same spacing as in mdoc_term.c. + */ + + nc = bln->norm->Bl.ncols; + i = i < nc ? strlen(bln->norm->Bl.cols[i]) - outcount + + (nc < 5 ? 4 : nc == 5 ? 3 : 1) : 1; + if (i < 1) + i = 1; + while (i-- > 0) + putchar(' '); + + outflags &= ~MD_spc; + escflags &= ~ESC_FON; + outcount = 0; + break; + + default: + break; + } +} + +static void +md_post_Lb(struct roff_node *n) +{ + if (n->sec == SEC_LIBRARY) + outflags |= MD_br; +} + +static void +md_uri(const char *s) +{ + while (*s != '\0') { + if (strchr("%()<>", *s) != NULL) { + printf("%%%2.2hhX", *s); + outcount += 3; + } else { + putchar(*s); + outcount++; + } + s++; + } +} + +static int +md_pre_Lk(struct roff_node *n) +{ + const struct roff_node *link, *descr, *punct; + + if ((link = n->child) == NULL) + return 0; + + /* Find beginning of trailing punctuation. */ + punct = n->last; + while (punct != link && punct->flags & NODE_DELIMC) + punct = punct->prev; + punct = punct->next; + + /* Link text. */ + descr = link->next; + if (descr == punct) + descr = link; /* no text */ + md_rawword("["); + outflags &= ~MD_spc; + do { + md_word(descr->string); + descr = descr->next; + } while (descr != punct); + outflags &= ~MD_spc; + + /* Link target. */ + md_rawword("]("); + md_uri(link->string); + outflags &= ~MD_spc; + md_rawword(")"); + + /* Trailing punctuation. */ + while (punct != NULL) { + md_word(punct->string); + punct = punct->next; + } + return 0; +} + +static int +md_pre_Mt(struct roff_node *n) +{ + const struct roff_node *nch; + + md_rawword("["); + outflags &= ~MD_spc; + for (nch = n->child; nch != NULL; nch = nch->next) + md_word(nch->string); + outflags &= ~MD_spc; + md_rawword("](mailto:"); + for (nch = n->child; nch != NULL; nch = nch->next) { + md_uri(nch->string); + if (nch->next != NULL) { + putchar(' '); + outcount++; + } + } + outflags &= ~MD_spc; + md_rawword(")"); + return 0; +} + +static int +md_pre_Nd(struct roff_node *n) +{ + outflags &= ~MD_nl; + outflags |= MD_spc; + md_word("-"); + return 1; +} + +static int +md_pre_Nm(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + outflags |= MD_Bk; + md_pre_syn(n); + break; + case ROFFT_HEAD: + case ROFFT_ELEM: + md_pre_raw(n); + break; + default: + break; + } + return 1; +} + +static void +md_post_Nm(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + outflags &= ~MD_Bk; + break; + case ROFFT_HEAD: + case ROFFT_ELEM: + md_post_raw(n); + break; + default: + break; + } +} + +static int +md_pre_No(struct roff_node *n) +{ + outflags |= MD_spc_force; + return 1; +} + +static int +md_pre_Ns(struct roff_node *n) +{ + outflags &= ~MD_spc; + return 0; +} + +static void +md_post_Pf(struct roff_node *n) +{ + if (n->next != NULL && (n->next->flags & NODE_LINE) == 0) + outflags &= ~MD_spc; +} + +static int +md_pre_Pp(struct roff_node *n) +{ + outflags |= MD_sp; + return 0; +} + +static int +md_pre_Rs(struct roff_node *n) +{ + if (n->sec == SEC_SEE_ALSO) + outflags |= MD_sp; + return 1; +} + +static int +md_pre_Sh(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + if (n->sec == SEC_AUTHORS) + outflags &= ~(MD_An_split | MD_An_nosplit); + break; + case ROFFT_HEAD: + outflags |= MD_sp; + md_rawword(n->tok == MDOC_Sh ? "#" : "##"); + break; + case ROFFT_BODY: + outflags |= MD_sp; + break; + default: + break; + } + return 1; +} + +static int +md_pre_Sm(struct roff_node *n) +{ + if (n->child == NULL) + outflags ^= MD_Sm; + else if (strcmp("on", n->child->string) == 0) + outflags |= MD_Sm; + else + outflags &= ~MD_Sm; + + if (outflags & MD_Sm) + outflags |= MD_spc; + + return 0; +} + +static int +md_pre_Vt(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BLOCK: + md_pre_syn(n); + return 1; + case ROFFT_BODY: + case ROFFT_ELEM: + md_pre_raw(n); + return 1; + default: + return 0; + } +} + +static void +md_post_Vt(struct roff_node *n) +{ + switch (n->type) { + case ROFFT_BODY: + case ROFFT_ELEM: + md_post_raw(n); + break; + default: + break; + } +} + +static int +md_pre_Xr(struct roff_node *n) +{ + n = n->child; + if (n == NULL) + return 0; + md_node(n); + n = n->next; + if (n == NULL) + return 0; + outflags &= ~MD_spc; + md_word("("); + md_node(n); + md_word(")"); + return 0; +} + +static int +md_pre__T(struct roff_node *n) +{ + if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) + md_word("\""); + else + md_rawword("*"); + outflags &= ~MD_spc; + return 1; +} + +static void +md_post__T(struct roff_node *n) +{ + outflags &= ~MD_spc; + if (n->parent->tok == MDOC_Rs && n->parent->norm->Rs.quote_T) + md_word("\""); + else + md_rawword("*"); + md_post_pc(n); +} + +static int +md_pre_br(struct roff_node *n) +{ + outflags |= MD_br; + return 0; +} diff --git a/usr/src/cmd/mandoc/mdoc_state.c b/usr/src/cmd/mandoc/mdoc_state.c index 4e376ef073..d9cad18b10 100644 --- a/usr/src/cmd/mandoc/mdoc_state.c +++ b/usr/src/cmd/mandoc/mdoc_state.c @@ -1,6 +1,6 @@ -/* $Id: mdoc_state.c,v 1.4 2017/01/10 13:47:00 schwarze Exp $ */ +/* $Id: mdoc_state.c,v 1.8 2017/05/05 15:17:32 schwarze Exp $ */ /* - * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -16,6 +16,7 @@ */ #include <sys/types.h> +#include <assert.h> #include <stdlib.h> #include <string.h> @@ -35,8 +36,7 @@ static void state_dl(STATE_ARGS); static void state_sh(STATE_ARGS); static void state_sm(STATE_ARGS); -static const state_handler state_handlers[MDOC_MAX] = { - NULL, /* Ap */ +static const state_handler __state_handlers[MDOC_MAX - MDOC_Dd] = { NULL, /* Dd */ NULL, /* Dt */ NULL, /* Os */ @@ -52,6 +52,7 @@ static const state_handler state_handlers[MDOC_MAX] = { NULL, /* It */ NULL, /* Ad */ NULL, /* An */ + NULL, /* Ap */ NULL, /* Ar */ NULL, /* Cd */ NULL, /* Cm */ @@ -154,12 +155,10 @@ static const state_handler state_handlers[MDOC_MAX] = { NULL, /* En */ NULL, /* Dx */ NULL, /* %Q */ - NULL, /* br */ - NULL, /* sp */ NULL, /* %U */ NULL, /* Ta */ - NULL, /* ll */ }; +static const state_handler *const state_handlers = __state_handlers - MDOC_Dd; void @@ -167,9 +166,10 @@ mdoc_state(struct roff_man *mdoc, struct roff_node *n) { state_handler handler; - if (n->tok == TOKEN_NONE) + if (n->tok == TOKEN_NONE || n->tok < ROFF_MAX) return; + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); if ( ! (mdoc_macros[n->tok].flags & MDOC_PROLOGUE)) mdoc->flags |= MDOC_PBODY; diff --git a/usr/src/cmd/mandoc/mdoc_term.c b/usr/src/cmd/mandoc/mdoc_term.c index e9ea455a48..4e420c5c21 100644 --- a/usr/src/cmd/mandoc/mdoc_term.c +++ b/usr/src/cmd/mandoc/mdoc_term.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_term.c,v 1.346 2017/02/17 19:15:41 schwarze Exp $ */ +/* $Id: mdoc_term.c,v 1.364 2017/06/14 17:51:15 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> @@ -106,7 +106,6 @@ static int termp_ft_pre(DECL_ARGS); static int termp_in_pre(DECL_ARGS); static int termp_it_pre(DECL_ARGS); static int termp_li_pre(DECL_ARGS); -static int termp_ll_pre(DECL_ARGS); static int termp_lk_pre(DECL_ARGS); static int termp_nd_pre(DECL_ARGS); static int termp_nm_pre(DECL_ARGS); @@ -116,7 +115,7 @@ static int termp_rs_pre(DECL_ARGS); static int termp_sh_pre(DECL_ARGS); static int termp_skip_pre(DECL_ARGS); static int termp_sm_pre(DECL_ARGS); -static int termp_sp_pre(DECL_ARGS); +static int termp_pp_pre(DECL_ARGS); static int termp_ss_pre(DECL_ARGS); static int termp_sy_pre(DECL_ARGS); static int termp_tag_pre(DECL_ARGS); @@ -125,14 +124,13 @@ static int termp_vt_pre(DECL_ARGS); static int termp_xr_pre(DECL_ARGS); static int termp_xx_pre(DECL_ARGS); -static const struct termact termacts[MDOC_MAX] = { - { termp_ap_pre, NULL }, /* Ap */ +static const struct termact __termacts[MDOC_MAX - MDOC_Dd] = { { NULL, NULL }, /* Dd */ { NULL, NULL }, /* Dt */ { NULL, NULL }, /* Os */ { termp_sh_pre, termp_sh_post }, /* Sh */ { termp_ss_pre, termp_ss_post }, /* Ss */ - { termp_sp_pre, NULL }, /* Pp */ + { termp_pp_pre, NULL }, /* Pp */ { termp_d1_pre, termp_bl_post }, /* D1 */ { termp_d1_pre, termp_bl_post }, /* Dl */ { termp_bd_pre, termp_bd_post }, /* Bd */ @@ -142,6 +140,7 @@ static const struct termact termacts[MDOC_MAX] = { { termp_it_pre, termp_it_post }, /* It */ { termp_under_pre, NULL }, /* Ad */ { termp_an_pre, NULL }, /* An */ + { termp_ap_pre, NULL }, /* Ap */ { termp_under_pre, NULL }, /* Ar */ { termp_cd_pre, NULL }, /* Cd */ { termp_bold_pre, NULL }, /* Cm */ @@ -233,7 +232,7 @@ static const struct termact termacts[MDOC_MAX] = { { termp_under_pre, NULL }, /* Fr */ { NULL, NULL }, /* Ud */ { NULL, termp_lb_post }, /* Lb */ - { termp_sp_pre, NULL }, /* Lp */ + { termp_pp_pre, NULL }, /* Lp */ { termp_lk_pre, NULL }, /* Lk */ { termp_under_pre, NULL }, /* Mt */ { termp_quote_pre, termp_quote_post }, /* Brq */ @@ -244,15 +243,14 @@ static const struct termact termacts[MDOC_MAX] = { { termp_quote_pre, termp_quote_post }, /* En */ { termp_xx_pre, termp_xx_post }, /* Dx */ { NULL, termp____post }, /* %Q */ - { termp_sp_pre, NULL }, /* br */ - { termp_sp_pre, NULL }, /* sp */ { NULL, termp____post }, /* %U */ { NULL, NULL }, /* Ta */ - { termp_ll_pre, NULL }, /* ll */ }; +static const struct termact *const termacts = __termacts - MDOC_Dd; static int fn_prio; + void terminal_mdoc(void *arg, const struct roff_man *mdoc) { @@ -261,9 +259,10 @@ terminal_mdoc(void *arg, const struct roff_man *mdoc) size_t save_defindent; p = (struct termp *)arg; - p->overstep = 0; - p->rmargin = p->maxrmargin = p->defrmargin; - p->tabwidth = term_len(p, 5); + p->tcol->rmargin = p->maxrmargin = p->defrmargin; + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); n = mdoc->first->child; if (p->synopsisonly) { @@ -317,8 +316,8 @@ print_mdoc_node(DECL_ARGS) return; chld = 1; - offset = p->offset; - rmargin = p->rmargin; + offset = p->tcol->offset; + rmargin = p->tcol->rmargin; n->flags &= ~NODE_ENDED; n->prev_font = p->fonti; @@ -342,7 +341,8 @@ print_mdoc_node(DECL_ARGS) switch (n->type) { case ROFFT_TEXT: - if (' ' == *n->string && NODE_LINE & n->flags) + if (*n->string == ' ' && n->flags & NODE_LINE && + (p->flags & TERMP_NONEWLINE) == 0) term_newln(p); if (NODE_DELIMC & n->flags) p->flags |= TERMP_NOSPACE; @@ -363,7 +363,12 @@ print_mdoc_node(DECL_ARGS) term_tbl(p, n->span); break; default: - if (termacts[n->tok].pre && + if (n->tok < ROFF_MAX) { + roff_term_pre(p, n); + return; + } + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); + if (termacts[n->tok].pre != NULL && (n->end == ENDBODY_NOT || n->child != NULL)) chld = (*termacts[n->tok].pre) (p, &npair, meta, n); @@ -384,7 +389,7 @@ print_mdoc_node(DECL_ARGS) case ROFFT_EQN: break; default: - if ( ! termacts[n->tok].post || NODE_ENDED & n->flags) + if (termacts[n->tok].post == NULL || n->flags & NODE_ENDED) break; (void)(*termacts[n->tok].post)(p, &npair, meta, n); @@ -401,10 +406,9 @@ print_mdoc_node(DECL_ARGS) if (NODE_EOS & n->flags) p->flags |= TERMP_SENTENCE; - if (MDOC_ll != n->tok) { - p->offset = offset; - p->rmargin = rmargin; - } + if (n->type != ROFFT_TEXT) + p->tcol->offset = offset; + p->tcol->rmargin = rmargin; } static void @@ -424,9 +428,9 @@ print_mdoc_foot(struct termp *p, const struct roff_meta *meta) term_vspace(p); - p->offset = 0; + p->tcol->offset = 0; sz = term_strlen(p, meta->date); - p->rmargin = p->maxrmargin > sz ? + p->tcol->rmargin = p->maxrmargin > sz ? (p->maxrmargin + term_len(p, 1) - sz) / 2 : 0; p->trailspace = 1; p->flags |= TERMP_NOSPACE | TERMP_NOBREAK; @@ -434,16 +438,16 @@ print_mdoc_foot(struct termp *p, const struct roff_meta *meta) term_word(p, meta->os); term_flushln(p); - p->offset = p->rmargin; + p->tcol->offset = p->tcol->rmargin; sz = term_strlen(p, meta->os); - p->rmargin = p->maxrmargin > sz ? p->maxrmargin - sz : 0; + p->tcol->rmargin = p->maxrmargin > sz ? p->maxrmargin - sz : 0; p->flags |= TERMP_NOSPACE; term_word(p, meta->date); term_flushln(p); - p->offset = p->rmargin; - p->rmargin = p->maxrmargin; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; p->trailspace = 0; p->flags &= ~TERMP_NOBREAK; p->flags |= TERMP_NOSPACE; @@ -451,8 +455,8 @@ print_mdoc_foot(struct termp *p, const struct roff_meta *meta) term_word(p, meta->os); term_flushln(p); - p->offset = 0; - p->rmargin = p->maxrmargin; + p->tcol->offset = 0; + p->tcol->rmargin = p->maxrmargin; p->flags = 0; } @@ -492,8 +496,8 @@ print_mdoc_head(struct termp *p, const struct roff_meta *meta) p->flags |= TERMP_NOBREAK | TERMP_NOSPACE; p->trailspace = 1; - p->offset = 0; - p->rmargin = 2 * (titlen+1) + vollen < p->maxrmargin ? + p->tcol->offset = 0; + p->tcol->rmargin = 2 * (titlen+1) + vollen < p->maxrmargin ? (p->maxrmargin - vollen + term_len(p, 1)) / 2 : vollen < p->maxrmargin ? p->maxrmargin - vollen : 0; @@ -501,26 +505,26 @@ print_mdoc_head(struct termp *p, const struct roff_meta *meta) term_flushln(p); p->flags |= TERMP_NOSPACE; - p->offset = p->rmargin; - p->rmargin = p->offset + vollen + titlen < p->maxrmargin ? - p->maxrmargin - titlen : p->maxrmargin; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->tcol->offset + vollen + titlen < + p->maxrmargin ? p->maxrmargin - titlen : p->maxrmargin; term_word(p, volume); term_flushln(p); p->flags &= ~TERMP_NOBREAK; p->trailspace = 0; - if (p->rmargin + titlen <= p->maxrmargin) { + if (p->tcol->rmargin + titlen <= p->maxrmargin) { p->flags |= TERMP_NOSPACE; - p->offset = p->rmargin; - p->rmargin = p->maxrmargin; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; term_word(p, title); term_flushln(p); } p->flags &= ~TERMP_NOSPACE; - p->offset = 0; - p->rmargin = p->maxrmargin; + p->tcol->offset = 0; + p->tcol->rmargin = p->maxrmargin; free(title); free(volume); } @@ -529,12 +533,14 @@ static int a2width(const struct termp *p, const char *v) { struct roffsu su; + const char *end; - if (a2roffsu(v, &su, SCALE_MAX) < 2) { + end = a2roffsu(v, &su, SCALE_MAX); + if (end == NULL || *end != '\0') { SCALE_HS_INIT(&su, term_strlen(p, v)); su.scale /= term_strlen(p, "0"); } - return term_hspan(p, &su) / 24; + return term_hen(p, &su); } /* @@ -596,14 +602,6 @@ print_bvspace(struct termp *p, static int -termp_ll_pre(DECL_ARGS) -{ - - term_setwidth(p, n->child != NULL ? n->child->string : NULL); - return 0; -} - -static int termp_it_pre(DECL_ARGS) { struct roffsu su; @@ -653,8 +651,8 @@ termp_it_pre(DECL_ARGS) if (bl->norm->Bl.offs != NULL) { offset = a2width(p, bl->norm->Bl.offs); - if (offset < 0 && (size_t)(-offset) > p->offset) - offset = -p->offset; + if (offset < 0 && (size_t)(-offset) > p->tcol->offset) + offset = -p->tcol->offset; else if (offset > SHRT_MAX) offset = 0; } @@ -688,7 +686,7 @@ termp_it_pre(DECL_ARGS) SCALE_HS_INIT(&su, term_strlen(p, bl->norm->Bl.cols[i])); su.scale /= term_strlen(p, "0"); - offset += term_hspan(p, &su) / 24 + dcol; + offset += term_hen(p, &su) + dcol; } /* @@ -706,7 +704,7 @@ termp_it_pre(DECL_ARGS) */ SCALE_HS_INIT(&su, term_strlen(p, bl->norm->Bl.cols[i])); su.scale /= term_strlen(p, "0"); - width = term_hspan(p, &su) / 24 + dcol; + width = term_hen(p, &su) + dcol; break; default: if (NULL == bl->norm->Bl.width) @@ -718,8 +716,8 @@ termp_it_pre(DECL_ARGS) * handling for column for how this changes. */ width = a2width(p, bl->norm->Bl.width) + term_len(p, 2); - if (width < 0 && (size_t)(-width) > p->offset) - width = -p->offset; + if (width < 0 && (size_t)(-width) > p->tcol->offset) + width = -p->tcol->offset; else if (width > SHRT_MAX) width = 0; break; @@ -768,33 +766,15 @@ termp_it_pre(DECL_ARGS) case LIST_bullet: case LIST_dash: case LIST_hyphen: - /* - * Weird special case. - * Some very narrow lists actually hang. - */ - if (width <= (int)term_len(p, 2)) - p->flags |= TERMP_HANG; - if (n->type != ROFFT_HEAD) - break; - p->flags |= TERMP_NOBREAK; - p->trailspace = 1; + if (n->type == ROFFT_HEAD) { + p->flags |= TERMP_NOBREAK | TERMP_HANG; + p->trailspace = 1; + } else if (width <= (int)term_len(p, 2)) + p->flags |= TERMP_NOPAD; break; case LIST_hang: if (n->type != ROFFT_HEAD) break; - - /* - * This is ugly. If `-hang' is specified and the body - * is a `Bl' or `Bd', then we want basically to nullify - * the "overstep" effect in term_flushln() and treat - * this as a `-ohang' list instead. - */ - if (NULL != n->next && - NULL != n->next->child && - (MDOC_Bl == n->next->child->tok || - MDOC_Bd == n->next->child->tok)) - break; - p->flags |= TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG; p->trailspace = 1; break; @@ -806,7 +786,7 @@ termp_it_pre(DECL_ARGS) p->trailspace = 2; if (NULL == n->next || NULL == n->next->child) - p->flags |= TERMP_DANGLE; + p->flags |= TERMP_HANG; break; case LIST_column: if (n->type == ROFFT_HEAD) @@ -837,43 +817,31 @@ termp_it_pre(DECL_ARGS) * necessarily lengthened. Everybody gets the offset. */ - p->offset += offset; + p->tcol->offset += offset; switch (type) { - case LIST_hang: - /* - * Same stipulation as above, regarding `-hang'. We - * don't want to recalculate rmargin and offsets when - * using `Bd' or `Bl' within `-hang' overstep lists. - */ - if (n->type == ROFFT_HEAD && - NULL != n->next && - NULL != n->next->child && - (MDOC_Bl == n->next->child->tok || - MDOC_Bd == n->next->child->tok)) - break; - /* FALLTHROUGH */ case LIST_bullet: case LIST_dash: case LIST_enum: case LIST_hyphen: + case LIST_hang: case LIST_tag: if (n->type == ROFFT_HEAD) - p->rmargin = p->offset + width; + p->tcol->rmargin = p->tcol->offset + width; else - p->offset += width; + p->tcol->offset += width; break; case LIST_column: assert(width); - p->rmargin = p->offset + width; + p->tcol->rmargin = p->tcol->offset + width; /* * XXX - this behaviour is not documented: the * right-most column is filled to the right margin. */ if (n->type == ROFFT_HEAD) break; - if (NULL == n->next && p->rmargin < p->maxrmargin) - p->rmargin = p->maxrmargin; + if (n->next == NULL && p->tcol->rmargin < p->maxrmargin) + p->tcol->rmargin = p->maxrmargin; break; default: break; @@ -923,6 +891,7 @@ termp_it_pre(DECL_ARGS) case LIST_column: if (n->type == ROFFT_HEAD) return 0; + p->minbl = 0; break; default: break; @@ -963,8 +932,7 @@ termp_it_post(DECL_ARGS) * has munged them in the meanwhile. */ - p->flags &= ~(TERMP_NOBREAK | TERMP_BRTRSP | TERMP_BRIND | - TERMP_DANGLE | TERMP_HANG); + p->flags &= ~(TERMP_NOBREAK | TERMP_BRTRSP | TERMP_BRIND | TERMP_HANG); p->trailspace = 0; } @@ -979,7 +947,7 @@ termp_nm_pre(DECL_ARGS) } if (n->type == ROFFT_BODY) { - if (NULL == n->child) + if (n->child == NULL) return 0; p->flags |= TERMP_NOSPACE; cp = NULL; @@ -988,9 +956,10 @@ termp_nm_pre(DECL_ARGS) if (cp == NULL) cp = meta->name; if (cp == NULL) - p->offset += term_len(p, 6); + p->tcol->offset += term_len(p, 6); else - p->offset += term_len(p, 1) + term_strlen(p, cp); + p->tcol->offset += term_len(p, 1) + + term_strlen(p, cp); return 1; } @@ -1001,18 +970,18 @@ termp_nm_pre(DECL_ARGS) synopsis_pre(p, n->parent); if (n->type == ROFFT_HEAD && - NULL != n->next && NULL != n->next->child) { + n->next != NULL && n->next->child != NULL) { p->flags |= TERMP_NOSPACE | TERMP_NOBREAK | TERMP_BRIND; p->trailspace = 1; - p->rmargin = p->offset + term_len(p, 1); - if (NULL == n->child) { - p->rmargin += term_strlen(p, meta->name); - } else if (n->child->type == ROFFT_TEXT) { - p->rmargin += term_strlen(p, n->child->string); - if (n->child->next) + p->tcol->rmargin = p->tcol->offset + term_len(p, 1); + if (n->child == NULL) + p->tcol->rmargin += term_strlen(p, meta->name); + else if (n->child->type == ROFFT_TEXT) { + p->tcol->rmargin += term_strlen(p, n->child->string); + if (n->child->next != NULL) p->flags |= TERMP_HANG; } else { - p->rmargin += term_len(p, 5); + p->tcol->rmargin += term_len(p, 5); p->flags |= TERMP_HANG; } } @@ -1135,8 +1104,14 @@ static void termp_bl_post(DECL_ARGS) { - if (n->type == ROFFT_BLOCK) - term_newln(p); + if (n->type != ROFFT_BLOCK) + return; + term_newln(p); + if (n->tok != MDOC_Bl || n->norm->Bl.type != LIST_column) + return; + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); } static int @@ -1278,7 +1253,10 @@ termp_sh_pre(DECL_ARGS) term_fontpush(p, TERMFONT_BOLD); break; case ROFFT_BODY: - p->offset = term_len(p, p->defindent); + p->tcol->offset = term_len(p, p->defindent); + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); switch (n->sec) { case SEC_DESCRIPTION: fn_prio = 0; @@ -1306,7 +1284,7 @@ termp_sh_post(DECL_ARGS) break; case ROFFT_BODY: term_newln(p); - p->offset = 0; + p->tcol->offset = 0; break; default: break; @@ -1328,7 +1306,10 @@ termp_d1_pre(DECL_ARGS) if (n->type != ROFFT_BLOCK) return 1; term_newln(p); - p->offset += term_len(p, p->defindent + 1); + p->tcol->offset += term_len(p, p->defindent + 1); + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); return 1; } @@ -1356,8 +1337,8 @@ termp_fn_pre(DECL_ARGS) return 0; if (pretty) { - rmargin = p->rmargin; - p->rmargin = p->offset + term_len(p, 4); + rmargin = p->tcol->rmargin; + p->tcol->rmargin = p->tcol->offset + term_len(p, 4); p->flags |= TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG; } @@ -1372,8 +1353,9 @@ termp_fn_pre(DECL_ARGS) if (pretty) { term_flushln(p); p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG); - p->offset = p->rmargin; - p->rmargin = rmargin; + p->flags |= TERMP_NOPAD; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = rmargin; } p->flags |= TERMP_NOSPACE; @@ -1434,7 +1416,7 @@ termp_fa_pre(DECL_ARGS) static int termp_bd_pre(DECL_ARGS) { - size_t tabwidth, lm, len, rm, rmax; + size_t lm, len; struct roff_node *nn; int offset; @@ -1450,15 +1432,15 @@ termp_bd_pre(DECL_ARGS) ! strcmp(n->norm->Bd.offs, "left")) /* nothing */; else if ( ! strcmp(n->norm->Bd.offs, "indent")) - p->offset += term_len(p, p->defindent + 1); + p->tcol->offset += term_len(p, p->defindent + 1); else if ( ! strcmp(n->norm->Bd.offs, "indent-two")) - p->offset += term_len(p, (p->defindent + 1) * 2); + p->tcol->offset += term_len(p, (p->defindent + 1) * 2); else { offset = a2width(p, n->norm->Bd.offs); - if (offset < 0 && (size_t)(-offset) > p->offset) - p->offset = 0; + if (offset < 0 && (size_t)(-offset) > p->tcol->offset) + p->tcol->offset = 0; else if (offset < SHRT_MAX) - p->offset += offset; + p->tcol->offset += offset; } /* @@ -1469,29 +1451,29 @@ termp_bd_pre(DECL_ARGS) * lines are allowed. */ - if (DISP_literal != n->norm->Bd.type && - DISP_unfilled != n->norm->Bd.type && - DISP_centered != n->norm->Bd.type) + if (n->norm->Bd.type != DISP_literal && + n->norm->Bd.type != DISP_unfilled && + n->norm->Bd.type != DISP_centered) return 1; - tabwidth = p->tabwidth; - if (DISP_literal == n->norm->Bd.type) - p->tabwidth = term_len(p, 8); - - lm = p->offset; - rm = p->rmargin; - rmax = p->maxrmargin; - p->rmargin = p->maxrmargin = TERM_MAXMARGIN; + if (n->norm->Bd.type == DISP_literal) { + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, "8n"); + } - for (nn = n->child; nn; nn = nn->next) { - if (DISP_centered == n->norm->Bd.type) { + lm = p->tcol->offset; + p->flags |= TERMP_BRNEVER; + for (nn = n->child; nn != NULL; nn = nn->next) { + if (n->norm->Bd.type == DISP_centered) { if (nn->type == ROFFT_TEXT) { len = term_strlen(p, nn->string); - p->offset = len >= rm ? 0 : - lm + len >= rm ? rm - len : - (lm + rm - len) / 2; + p->tcol->offset = len >= p->tcol->rmargin ? + 0 : lm + len >= p->tcol->rmargin ? + p->tcol->rmargin - len : + (lm + p->tcol->rmargin - len) / 2; } else - p->offset = lm; + p->tcol->offset = lm; } print_mdoc_node(p, pair, meta, nn); /* @@ -1500,10 +1482,10 @@ termp_bd_pre(DECL_ARGS) * notion of selective eoln whitespace is pretty dumb * anyway, so don't sweat it. */ + if (nn->tok < ROFF_MAX) + continue; switch (nn->tok) { case MDOC_Sm: - case MDOC_br: - case MDOC_sp: case MDOC_Bl: case MDOC_D1: case MDOC_Dl: @@ -1519,33 +1501,21 @@ termp_bd_pre(DECL_ARGS) term_flushln(p); p->flags |= TERMP_NOSPACE; } - - p->tabwidth = tabwidth; - p->rmargin = rm; - p->maxrmargin = rmax; + p->flags &= ~TERMP_BRNEVER; return 0; } static void termp_bd_post(DECL_ARGS) { - size_t rm, rmax; - if (n->type != ROFFT_BODY) return; - - rm = p->rmargin; - rmax = p->maxrmargin; - if (DISP_literal == n->norm->Bd.type || DISP_unfilled == n->norm->Bd.type) - p->rmargin = p->maxrmargin = TERM_MAXMARGIN; - + p->flags |= TERMP_BRNEVER; p->flags |= TERMP_NOSPACE; term_newln(p); - - p->rmargin = rm; - p->maxrmargin = rmax; + p->flags &= ~TERMP_BRNEVER; } static int @@ -1587,10 +1557,13 @@ termp_ss_pre(DECL_ARGS) break; case ROFFT_HEAD: term_fontpush(p, TERMFONT_BOLD); - p->offset = term_len(p, (p->defindent+1)/2); + p->tcol->offset = term_len(p, (p->defindent+1)/2); break; case ROFFT_BODY: - p->offset = term_len(p, p->defindent); + p->tcol->offset = term_len(p, p->defindent); + term_tab_set(p, NULL); + term_tab_set(p, "T"); + term_tab_set(p, ".5i"); break; default: break; @@ -1650,37 +1623,10 @@ termp_in_post(DECL_ARGS) } static int -termp_sp_pre(DECL_ARGS) +termp_pp_pre(DECL_ARGS) { - struct roffsu su; - int i, len; - - switch (n->tok) { - case MDOC_sp: - if (n->child) { - if ( ! a2roffsu(n->child->string, &su, SCALE_VS)) - su.scale = 1.0; - len = term_vspan(p, &su); - } else - len = 1; - break; - case MDOC_br: - len = 0; - break; - default: - len = 1; - fn_prio = 0; - break; - } - - if (0 == len) - term_newln(p); - else if (len < 0) - p->skipvsp -= len; - else - for (i = 0; i < len; i++) - term_vspace(p); - + fn_prio = 0; + term_vspace(p); return 0; } @@ -1861,8 +1807,8 @@ termp_fo_pre(DECL_ARGS) return 1; } else if (n->type == ROFFT_BODY) { if (pretty) { - rmargin = p->rmargin; - p->rmargin = p->offset + term_len(p, 4); + rmargin = p->tcol->rmargin; + p->tcol->rmargin = p->tcol->offset + term_len(p, 4); p->flags |= TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG; } @@ -1873,8 +1819,9 @@ termp_fo_pre(DECL_ARGS) term_flushln(p); p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND | TERMP_HANG); - p->offset = p->rmargin; - p->rmargin = rmargin; + p->flags |= TERMP_NOPAD; + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = rmargin; } return 1; } @@ -1992,26 +1939,50 @@ termp_li_pre(DECL_ARGS) static int termp_lk_pre(DECL_ARGS) { - const struct roff_node *link, *descr; + const struct roff_node *link, *descr, *punct; + int display; - if (NULL == (link = n->child)) + if ((link = n->child) == NULL) return 0; - if (NULL != (descr = link->next)) { + /* Find beginning of trailing punctuation. */ + punct = n->last; + while (punct != link && punct->flags & NODE_DELIMC) + punct = punct->prev; + punct = punct->next; + + /* Link text. */ + if ((descr = link->next) != NULL && descr != punct) { term_fontpush(p, TERMFONT_UNDER); - while (NULL != descr) { + while (descr != punct) { + if (descr->flags & (NODE_DELIMC | NODE_DELIMO)) + p->flags |= TERMP_NOSPACE; term_word(p, descr->string); descr = descr->next; } + term_fontpop(p); p->flags |= TERMP_NOSPACE; term_word(p, ":"); - term_fontpop(p); } + /* Link target. */ + display = term_strlen(p, link->string) >= 26; + if (display) { + term_newln(p); + p->tcol->offset += term_len(p, p->defindent + 1); + } term_fontpush(p, TERMFONT_BOLD); term_word(p, link->string); term_fontpop(p); + /* Trailing punctuation. */ + while (punct != NULL) { + p->flags |= TERMP_NOSPACE; + term_word(p, punct->string); + punct = punct->next; + } + if (display) + term_newln(p); return 0; } diff --git a/usr/src/cmd/mandoc/mdoc_validate.c b/usr/src/cmd/mandoc/mdoc_validate.c index e58e7a4721..3a9b86f3fd 100644 --- a/usr/src/cmd/mandoc/mdoc_validate.c +++ b/usr/src/cmd/mandoc/mdoc_validate.c @@ -1,4 +1,4 @@ -/* $Id: mdoc_validate.c,v 1.318 2017/02/06 03:44:58 schwarze Exp $ */ +/* $Id: mdoc_validate.c,v 1.352 2017/08/02 13:29:04 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> @@ -33,6 +33,7 @@ #include "mandoc_aux.h" #include "mandoc.h" +#include "mandoc_xr.h" #include "roff.h" #include "mdoc.h" #include "libmandoc.h" @@ -56,9 +57,11 @@ static void check_text(struct roff_man *, int, int, char *); static void check_argv(struct roff_man *, struct roff_node *, struct mdoc_argv *); static void check_args(struct roff_man *, struct roff_node *); +static void check_toptext(struct roff_man *, int, int, const char *); static int child_an(const struct roff_node *); -static size_t macro2len(int); -static void rewrite_macro2len(char **); +static size_t macro2len(enum roff_tok); +static void rewrite_macro2len(struct roff_man *, char **); +static int similar(const char *, const char *); static void post_an(POST_ARGS); static void post_an_norm(POST_ARGS); @@ -74,6 +77,8 @@ static void post_bx(POST_ARGS); static void post_defaults(POST_ARGS); static void post_display(POST_ARGS); static void post_dd(POST_ARGS); +static void post_delim(POST_ARGS); +static void post_delim_nb(POST_ARGS); static void post_dt(POST_ARGS); static void post_en(POST_ARGS); static void post_es(POST_ARGS); @@ -105,11 +110,12 @@ static void post_sh_authors(POST_ARGS); static void post_sm(POST_ARGS); static void post_st(POST_ARGS); static void post_std(POST_ARGS); +static void post_sx(POST_ARGS); +static void post_useless(POST_ARGS); static void post_xr(POST_ARGS); static void post_xx(POST_ARGS); -static v_post mdoc_valids[MDOC_MAX] = { - NULL, /* Ap */ +static const v_post __mdoc_valids[MDOC_MAX - MDOC_Dd] = { post_dd, /* Dd */ post_dt, /* Dt */ post_os, /* Os */ @@ -123,32 +129,33 @@ static v_post mdoc_valids[MDOC_MAX] = { post_bl, /* Bl */ NULL, /* El */ post_it, /* It */ - NULL, /* Ad */ + post_delim_nb, /* Ad */ post_an, /* An */ + NULL, /* Ap */ post_defaults, /* Ar */ NULL, /* Cd */ - NULL, /* Cm */ - NULL, /* Dv */ - NULL, /* Er */ - NULL, /* Ev */ + post_delim_nb, /* Cm */ + post_delim_nb, /* Dv */ + post_delim_nb, /* Er */ + post_delim_nb, /* Ev */ post_ex, /* Ex */ post_fa, /* Fa */ NULL, /* Fd */ - NULL, /* Fl */ + post_delim_nb, /* Fl */ post_fn, /* Fn */ - NULL, /* Ft */ - NULL, /* Ic */ - NULL, /* In */ + post_delim_nb, /* Ft */ + post_delim_nb, /* Ic */ + post_delim_nb, /* In */ post_defaults, /* Li */ post_nd, /* Nd */ post_nm, /* Nm */ - NULL, /* Op */ + post_delim_nb, /* Op */ post_obsolete, /* Ot */ post_defaults, /* Pa */ post_rv, /* Rv */ post_st, /* St */ - NULL, /* Va */ - NULL, /* Vt */ + post_delim_nb, /* Va */ + post_delim_nb, /* Vt */ post_xr, /* Xr */ NULL, /* %A */ post_hyph, /* %B */ /* FIXME: can be used outside Rs/Re. */ @@ -162,12 +169,12 @@ static v_post mdoc_valids[MDOC_MAX] = { post_hyph, /* %T */ /* FIXME: can be used outside Rs/Re. */ NULL, /* %V */ NULL, /* Ac */ - NULL, /* Ao */ - NULL, /* Aq */ + post_delim_nb, /* Ao */ + post_delim_nb, /* Aq */ post_at, /* At */ NULL, /* Bc */ post_bf, /* Bf */ - NULL, /* Bo */ + post_delim_nb, /* Bo */ NULL, /* Bq */ post_xx, /* Bsx */ post_bx, /* Bx */ @@ -177,66 +184,64 @@ static v_post mdoc_valids[MDOC_MAX] = { NULL, /* Dq */ NULL, /* Ec */ NULL, /* Ef */ - NULL, /* Em */ + post_delim_nb, /* Em */ NULL, /* Eo */ post_xx, /* Fx */ - NULL, /* Ms */ + post_delim_nb, /* Ms */ NULL, /* No */ post_ns, /* Ns */ post_xx, /* Nx */ post_xx, /* Ox */ NULL, /* Pc */ NULL, /* Pf */ - NULL, /* Po */ - NULL, /* Pq */ + post_delim_nb, /* Po */ + post_delim_nb, /* Pq */ NULL, /* Qc */ - NULL, /* Ql */ - NULL, /* Qo */ - NULL, /* Qq */ + post_delim_nb, /* Ql */ + post_delim_nb, /* Qo */ + post_delim_nb, /* Qq */ NULL, /* Re */ post_rs, /* Rs */ NULL, /* Sc */ - NULL, /* So */ - NULL, /* Sq */ + post_delim_nb, /* So */ + post_delim_nb, /* Sq */ post_sm, /* Sm */ - post_hyph, /* Sx */ - NULL, /* Sy */ - NULL, /* Tn */ + post_sx, /* Sx */ + post_delim_nb, /* Sy */ + post_useless, /* Tn */ post_xx, /* Ux */ NULL, /* Xc */ NULL, /* Xo */ post_fo, /* Fo */ NULL, /* Fc */ - NULL, /* Oo */ + post_delim_nb, /* Oo */ NULL, /* Oc */ post_bk, /* Bk */ NULL, /* Ek */ post_eoln, /* Bt */ - NULL, /* Hf */ + post_obsolete, /* Hf */ post_obsolete, /* Fr */ post_eoln, /* Ud */ post_lb, /* Lb */ post_par, /* Lp */ - NULL, /* Lk */ + post_delim_nb, /* Lk */ post_defaults, /* Mt */ - NULL, /* Brq */ - NULL, /* Bro */ + post_delim_nb, /* Brq */ + post_delim_nb, /* Bro */ NULL, /* Brc */ NULL, /* %C */ post_es, /* Es */ post_en, /* En */ post_xx, /* Dx */ NULL, /* %Q */ - post_par, /* br */ - post_par, /* sp */ NULL, /* %U */ NULL, /* Ta */ - NULL, /* ll */ }; +static const v_post *const mdoc_valids = __mdoc_valids - MDOC_Dd; #define RSORD_MAX 14 /* Number of `Rs' blocks. */ -static const int rsord[RSORD_MAX] = { +static const enum roff_tok rsord[RSORD_MAX] = { MDOC__A, MDOC__T, MDOC__B, @@ -284,7 +289,7 @@ void mdoc_node_validate(struct roff_man *mdoc) { struct roff_node *n; - v_post *p; + const v_post *p; n = mdoc->last; mdoc->last = mdoc->last->child; @@ -303,6 +308,11 @@ mdoc_node_validate(struct roff_man *mdoc) if (n->sec != SEC_SYNOPSIS || (n->parent->tok != MDOC_Cd && n->parent->tok != MDOC_Fd)) check_text(mdoc, n->line, n->pos, n->string); + if (n->parent->tok == MDOC_It || + (n->parent->type == ROFFT_BODY && + (n->parent->tok == MDOC_Sh || + n->parent->tok == MDOC_Ss))) + check_toptext(mdoc, n->line, n->pos, n->string); break; case ROFFT_EQN: case ROFFT_TBL: @@ -326,6 +336,20 @@ mdoc_node_validate(struct roff_man *mdoc) /* Call the macro's postprocessor. */ + if (n->tok < ROFF_MAX) { + switch(n->tok) { + case ROFF_br: + case ROFF_sp: + post_par(mdoc); + break; + default: + roff_validate(mdoc); + break; + } + break; + } + + assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); p = mdoc_valids + n->tok; if (*p) (*p)(mdoc); @@ -371,6 +395,164 @@ check_text(struct roff_man *mdoc, int ln, int pos, char *p) } static void +check_toptext(struct roff_man *mdoc, int ln, int pos, const char *p) +{ + const char *cp, *cpr; + + if (*p == '\0') + return; + + if ((cp = strstr(p, "OpenBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Ox"); + if ((cp = strstr(p, "NetBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Nx"); + if ((cp = strstr(p, "FreeBSD")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Fx"); + if ((cp = strstr(p, "DragonFly")) != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + ln, pos + (cp - p), "Dx"); + + cp = p; + while ((cp = strstr(cp + 1, "()")) != NULL) { + for (cpr = cp - 1; cpr >= p; cpr--) + if (*cpr != '_' && !isalnum((unsigned char)*cpr)) + break; + if ((cpr < p || *cpr == ' ') && cpr + 1 < cp) { + cpr++; + mandoc_vmsg(MANDOCERR_FUNC, mdoc->parse, + ln, pos + (cpr - p), + "%.*s()", (int)(cp - cpr), cpr); + } + } +} + +static void +post_delim(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc; + enum mdelim delim; + enum roff_tok tok; + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc < nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + if (*lc == ')' && (tok == MDOC_Nd || tok == MDOC_Sh || + tok == MDOC_Ss || tok == MDOC_Fo)) + return; + + mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, + nch->line, nch->pos + (lc - nch->string), + "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void +post_delim_nb(POST_ARGS) +{ + const struct roff_node *nch; + const char *lc, *cp; + int nw; + enum mdelim delim; + enum roff_tok tok; + + /* + * Find candidates: at least two bytes, + * the last one a closing or middle delimiter. + */ + + tok = mdoc->last->tok; + nch = mdoc->last->last; + if (nch == NULL || nch->type != ROFFT_TEXT) + return; + lc = strchr(nch->string, '\0') - 1; + if (lc <= nch->string) + return; + delim = mdoc_isdelim(lc); + if (delim == DELIM_NONE || delim == DELIM_OPEN) + return; + + /* + * Reduce false positives by allowing various cases. + */ + + /* Escaped delimiters. */ + if (lc > nch->string + 1 && lc[-2] == '\\' && + (lc[-1] == '&' || lc[-1] == 'e')) + return; + + /* Specific byte sequences. */ + switch (*lc) { + case ')': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '(') + return; + break; + case '.': + if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.') + return; + if (lc[-1] == '.') + return; + break; + case ';': + if (tok == MDOC_Vt) + return; + break; + case '?': + if (lc[-1] == '?') + return; + break; + case ']': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '[') + return; + break; + case '|': + if (lc == nch->string + 1 && lc[-1] == '|') + return; + default: + break; + } + + /* Exactly two non-alphanumeric bytes. */ + if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1])) + return; + + /* At least three alphabetic words with a sentence ending. */ + if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || + tok == MDOC_Li || tok == MDOC_Po || tok == MDOC_Pq || + tok == MDOC_Sy)) { + nw = 0; + for (cp = lc - 1; cp >= nch->string; cp--) { + if (*cp == ' ') { + nw++; + if (cp > nch->string && cp[-1] == ',') + cp--; + } else if (isalpha((unsigned int)*cp)) { + if (nw > 1) + return; + } else + break; + } + } + + mandoc_vmsg(MANDOCERR_DELIM_NB, mdoc->parse, + nch->line, nch->pos + (lc - nch->string), + "%s%s %s", roff_name[tok], + nch == mdoc->last->child ? "" : " ...", nch->string); +} + +static void post_bl_norm(POST_ARGS) { struct roff_node *n; @@ -450,7 +632,7 @@ post_bl_norm(POST_ARGS) mdoc->parse, argv->line, argv->pos, "Bl -width %s", argv->value[0]); - rewrite_macro2len(argv->value); + rewrite_macro2len(mdoc, argv->value); n->norm->Bl.width = argv->value[0]; break; case MDOC_Offset: @@ -465,7 +647,7 @@ post_bl_norm(POST_ARGS) mdoc->parse, argv->line, argv->pos, "Bl -offset %s", argv->value[0]); - rewrite_macro2len(argv->value); + rewrite_macro2len(mdoc, argv->value); n->norm->Bl.offs = argv->value[0]; break; default: @@ -518,7 +700,7 @@ post_bl_norm(POST_ARGS) switch (n->norm->Bl.type) { case LIST_tag: - if (NULL == n->norm->Bl.width) + if (n->norm->Bl.width == NULL) mandoc_msg(MANDOCERR_BL_NOWIDTH, mdoc->parse, n->line, n->pos, "Bl -tag"); break; @@ -527,19 +709,20 @@ post_bl_norm(POST_ARGS) case LIST_ohang: case LIST_inset: case LIST_item: - if (n->norm->Bl.width) + if (n->norm->Bl.width != NULL) mandoc_vmsg(MANDOCERR_BL_SKIPW, mdoc->parse, wa->line, wa->pos, "Bl -%s", mdoc_argnames[mdoclt]); + n->norm->Bl.width = NULL; break; case LIST_bullet: case LIST_dash: case LIST_hyphen: - if (NULL == n->norm->Bl.width) + if (n->norm->Bl.width == NULL) n->norm->Bl.width = "2n"; break; case LIST_enum: - if (NULL == n->norm->Bl.width) + if (n->norm->Bl.width == NULL) n->norm->Bl.width = "3n"; break; default: @@ -592,7 +775,7 @@ post_bd(POST_ARGS) mdoc->parse, argv->line, argv->pos, "Bd -offset %s", argv->value[0]); - rewrite_macro2len(argv->value); + rewrite_macro2len(mdoc, argv->value); n->norm->Bd.offs = argv->value[0]; break; case MDOC_Compact: @@ -659,11 +842,11 @@ post_eoln(POST_ARGS) { struct roff_node *n; + post_useless(mdoc); n = mdoc->last; if (n->child != NULL) - mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, - n->line, n->pos, "%s %s", - mdoc_macronames[n->tok], n->child->string); + mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, n->line, + n->pos, "%s %s", roff_name[n->tok], n->child->string); while (n->child != NULL) roff_node_delete(mdoc, n->child); @@ -745,6 +928,8 @@ post_lb(POST_ARGS) struct roff_node *n; const char *p; + post_delim_nb(mdoc); + n = mdoc->last; assert(n->child->type == ROFFT_TEXT); mdoc->next = ROFF_NEXT_CHILD; @@ -757,6 +942,9 @@ post_lb(POST_ARGS) return; } + mandoc_vmsg(MANDOCERR_LB_BAD, mdoc->parse, n->child->line, + n->child->pos, "Lb %s", n->child->string); + roff_word_alloc(mdoc, n->line, n->pos, "library"); mdoc->last->flags = NODE_NOSRC; roff_word_alloc(mdoc, n->line, n->pos, "\\(Lq"); @@ -811,13 +999,15 @@ post_std(POST_ARGS) { struct roff_node *n; + post_delim(mdoc); + n = mdoc->last; if (n->args && n->args->argc == 1) if (n->args->argv[0].arg == MDOC_Std) return; mandoc_msg(MANDOCERR_ARG_STD, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + n->line, n->pos, roff_name[n->tok]); } static void @@ -852,7 +1042,17 @@ post_obsolete(POST_ARGS) n = mdoc->last; if (n->type == ROFFT_ELEM || n->type == ROFFT_BLOCK) mandoc_msg(MANDOCERR_MACRO_OBS, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + n->line, n->pos, roff_name[n->tok]); +} + +static void +post_useless(POST_ARGS) +{ + struct roff_node *n; + + n = mdoc->last; + mandoc_msg(MANDOCERR_MACRO_USELESS, mdoc->parse, + n->line, n->pos, roff_name[n->tok]); } /* @@ -968,7 +1168,8 @@ post_fo(POST_ARGS) "Fo ... %s", n->child->next->string); while (n->child != n->last) roff_node_delete(mdoc, n->last); - } + } else + post_delim(mdoc); post_fname(mdoc); } @@ -992,6 +1193,7 @@ post_fa(POST_ARGS) break; } } + post_delim_nb(mdoc); } static void @@ -1001,6 +1203,10 @@ post_nm(POST_ARGS) n = mdoc->last; + if (n->sec == SEC_NAME && n->child != NULL && + n->child->type == ROFFT_TEXT && mdoc->meta.msec != NULL) + mandoc_xr_add(mdoc->meta.msec, n->child->string, -1, -1); + if (n->last != NULL && (n->last->tok == MDOC_Pp || n->last->tok == MDOC_Lp)) @@ -1014,8 +1220,18 @@ post_nm(POST_ARGS) mandoc_msg(MANDOCERR_NM_NONAME, mdoc->parse, n->line, n->pos, "Nm"); - if ((n->type != ROFFT_ELEM && n->type != ROFFT_HEAD) || - (n->child != NULL && n->child->type == ROFFT_TEXT) || + switch (n->type) { + case ROFFT_ELEM: + post_delim_nb(mdoc); + break; + case ROFFT_HEAD: + post_delim(mdoc); + break; + default: + return; + } + + if ((n->child != NULL && n->child->type == ROFFT_TEXT) || mdoc->meta.name == NULL) return; @@ -1035,9 +1251,15 @@ post_nd(POST_ARGS) if (n->type != ROFFT_BODY) return; + if (n->sec != SEC_NAME) + mandoc_msg(MANDOCERR_ND_LATE, mdoc->parse, + n->line, n->pos, "Nd"); + if (n->child == NULL) mandoc_msg(MANDOCERR_ND_EMPTY, mdoc->parse, n->line, n->pos, "Nd"); + else + post_delim(mdoc); post_hyph(mdoc); } @@ -1056,7 +1278,7 @@ post_display(POST_ARGS) roff_node_delete(mdoc, n); } else if (n->child == NULL) mandoc_msg(MANDOCERR_BLK_EMPTY, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + n->line, n->pos, roff_name[n->tok]); else if (n->tok == MDOC_D1) post_hyph(mdoc); break; @@ -1079,7 +1301,7 @@ post_display(POST_ARGS) if (np->type == ROFFT_BLOCK && np->tok == MDOC_Bd) { mandoc_vmsg(MANDOCERR_BD_NEST, mdoc->parse, n->line, n->pos, - "%s in Bd", mdoc_macronames[n->tok]); + "%s in Bd", roff_name[n->tok]); break; } } @@ -1094,17 +1316,18 @@ post_defaults(POST_ARGS) { struct roff_node *nn; + if (mdoc->last->child != NULL) { + post_delim_nb(mdoc); + return; + } + /* * The `Ar' defaults to "file ..." if no value is provided as an * argument; the `Mt' and `Pa' macros use "~"; the `Li' just * gets an empty string. */ - if (mdoc->last->child != NULL) - return; - nn = mdoc->last; - switch (nn->tok) { case MDOC_Ar: mdoc->next = ROFF_NEXT_CHILD; @@ -1168,6 +1391,8 @@ post_an(POST_ARGS) if (nch == NULL) mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, np->line, np->pos, "An"); + else + post_delim_nb(mdoc); } else if (nch != NULL) mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, nch->line, nch->pos, "An ... %s", nch->string); @@ -1195,6 +1420,9 @@ post_xx(POST_ARGS) { struct roff_node *n; const char *os; + char *v; + + post_delim_nb(mdoc); n = mdoc->last; switch (n->tok) { @@ -1209,6 +1437,20 @@ post_xx(POST_ARGS) break; case MDOC_Nx: os = "NetBSD"; + if (n->child == NULL) + break; + v = n->child->string; + if ((v[0] != '0' && v[0] != '1') || v[1] != '.' || + v[2] < '0' || v[2] > '9' || + v[3] < 'a' || v[3] > 'z' || v[4] != '\0') + break; + n->child->flags |= NODE_NOPRT; + mdoc->next = ROFF_NEXT_CHILD; + roff_word_alloc(mdoc, n->child->line, n->child->pos, v); + v = mdoc->last->string; + v[3] = toupper((unsigned char)v[3]); + mdoc->last->flags |= NODE_NOSRC; + mdoc->last = n; break; case MDOC_Ox: os = "OpenBSD"; @@ -1265,25 +1507,40 @@ post_it(POST_ARGS) /* FALLTHROUGH */ case LIST_item: if ((nch = nit->head->child) != NULL) - mandoc_vmsg(MANDOCERR_ARG_SKIP, - mdoc->parse, nit->line, nit->pos, - "It %s", nch->string == NULL ? - mdoc_macronames[nch->tok] : nch->string); + mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, + nit->line, nit->pos, "It %s", + nch->string == NULL ? roff_name[nch->tok] : + nch->string); break; case LIST_column: cols = (int)nbl->norm->Bl.ncols; assert(nit->head->child == NULL); - i = 0; - for (nch = nit->child; nch != NULL; nch = nch->next) - if (nch->type == ROFFT_BODY) - i++; + if (nit->head->next->child == NULL && + nit->head->next->next == NULL) { + mandoc_msg(MANDOCERR_MACRO_EMPTY, mdoc->parse, + nit->line, nit->pos, "It"); + roff_node_delete(mdoc, nit); + break; + } + i = 0; + for (nch = nit->child; nch != NULL; nch = nch->next) { + if (nch->type != ROFFT_BODY) + continue; + if (i++ && nch->flags & NODE_LINE) + mandoc_msg(MANDOCERR_TA_LINE, mdoc->parse, + nch->line, nch->pos, "Ta"); + } if (i < cols || i > cols + 1) mandoc_vmsg(MANDOCERR_BL_COL, mdoc->parse, nit->line, nit->pos, "%d columns, %d cells", cols, i); + else if (nit->head->next->child != NULL && + nit->head->next->child->line > nit->line) + mandoc_msg(MANDOCERR_IT_NOARG, mdoc->parse, + nit->line, nit->pos, "Bl -column It"); break; default: abort(); @@ -1306,7 +1563,7 @@ post_bl_block(POST_ARGS) switch (nc->tok) { case MDOC_Pp: case MDOC_Lp: - case MDOC_br: + case ROFF_br: break; default: nc = NULL; @@ -1315,14 +1572,13 @@ post_bl_block(POST_ARGS) if (ni->next == NULL) { mandoc_msg(MANDOCERR_PAR_MOVE, mdoc->parse, nc->line, nc->pos, - mdoc_macronames[nc->tok]); + roff_name[nc->tok]); mdoc_node_relink(mdoc, nc); } else if (n->norm->Bl.comp == 0 && n->norm->Bl.type != LIST_column) { mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, nc->line, nc->pos, - "%s before It", - mdoc_macronames[nc->tok]); + "%s before It", roff_name[nc->tok]); roff_node_delete(mdoc, nc); } else break; @@ -1335,17 +1591,17 @@ post_bl_block(POST_ARGS) * If the argument of -offset or -width is a macro, * replace it with the associated default width. */ -void -rewrite_macro2len(char **arg) +static void +rewrite_macro2len(struct roff_man *mdoc, char **arg) { size_t width; - int tok; + enum roff_tok tok; if (*arg == NULL) return; else if ( ! strcmp(*arg, "Ds")) width = 6; - else if ((tok = mdoc_hash_find(*arg)) == TOKEN_NONE) + else if ((tok = roffhash_find(mdoc->mdocmac, *arg, 0)) == TOKEN_NONE) return; else width = macro2len(tok); @@ -1423,6 +1679,8 @@ post_bl(POST_ARGS) struct roff_node *nparent, *nprev; /* of the Bl block */ struct roff_node *nblock, *nbody; /* of the Bl */ struct roff_node *nchild, *nnext; /* of the Bl body */ + const char *prev_Er; + int order; nbody = mdoc->last; switch (nbody->type) { @@ -1486,8 +1744,7 @@ post_bl(POST_ARGS) } mandoc_msg(MANDOCERR_BL_MOVE, mdoc->parse, - nchild->line, nchild->pos, - mdoc_macronames[nchild->tok]); + nchild->line, nchild->pos, roff_name[nchild->tok]); /* * Move the node out of the Bl block. @@ -1524,6 +1781,35 @@ post_bl(POST_ARGS) nchild = nnext; } + + if (mdoc->meta.os_e != MANDOC_OS_NETBSD) + return; + + prev_Er = NULL; + for (nchild = nbody->child; nchild != NULL; nchild = nchild->next) { + if (nchild->tok != MDOC_It) + continue; + if ((nnext = nchild->head->child) == NULL) + continue; + if (nnext->type == ROFFT_BLOCK) + nnext = nnext->body->child; + if (nnext == NULL || nnext->tok != MDOC_Er) + continue; + nnext = nnext->child; + if (prev_Er != NULL) { + order = strcmp(prev_Er, nnext->string); + if (order > 0) + mandoc_vmsg(MANDOCERR_ER_ORDER, + mdoc->parse, nnext->line, nnext->pos, + "Er %s %s (NetBSD)", + prev_Er, nnext->string); + else if (order == 0) + mandoc_vmsg(MANDOCERR_ER_REP, + mdoc->parse, nnext->line, nnext->pos, + "Er %s (NetBSD)", prev_Er); + } + prev_Er = nnext->string; + } } static void @@ -1565,7 +1851,7 @@ post_sm(POST_ARGS) mandoc_vmsg(MANDOCERR_SM_BAD, mdoc->parse, nch->line, nch->pos, - "%s %s", mdoc_macronames[mdoc->last->tok], nch->string); + "%s %s", roff_name[mdoc->last->tok], nch->string); mdoc_node_relink(mdoc, nch); return; } @@ -1573,14 +1859,35 @@ post_sm(POST_ARGS) static void post_root(POST_ARGS) { + const char *openbsd_arch[] = { + "alpha", "amd64", "arm64", "armv7", "hppa", "i386", + "landisk", "loongson", "luna88k", "macppc", "mips64", + "octeon", "sgi", "socppc", "sparc64", NULL + }; + const char *netbsd_arch[] = { + "acorn26", "acorn32", "algor", "alpha", "amiga", + "arc", "atari", + "bebox", "cats", "cesfic", "cobalt", "dreamcast", + "emips", "evbarm", "evbmips", "evbppc", "evbsh3", "evbsh5", + "hp300", "hpcarm", "hpcmips", "hpcsh", "hppa", + "i386", "ibmnws", "luna68k", + "mac68k", "macppc", "mipsco", "mmeye", "mvme68k", "mvmeppc", + "netwinder", "news68k", "newsmips", "next68k", + "pc532", "playstation2", "pmax", "pmppc", "prep", + "sandpoint", "sbmips", "sgimips", "shark", + "sparc", "sparc64", "sun2", "sun3", + "vax", "walnut", "x68k", "x86", "x86_64", "xen", NULL + }; + const char **arches[] = { NULL, netbsd_arch, openbsd_arch }; + struct roff_node *n; + const char **arch; /* Add missing prologue data. */ if (mdoc->meta.date == NULL) - mdoc->meta.date = mdoc->quick ? - mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, 0, 0); + mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : + mandoc_normdate(mdoc, NULL, 0, 0); if (mdoc->meta.title == NULL) { mandoc_msg(MANDOCERR_DT_NOTITLE, @@ -1595,12 +1902,33 @@ post_root(POST_ARGS) mandoc_msg(MANDOCERR_OS_MISSING, mdoc->parse, 0, 0, NULL); mdoc->meta.os = mandoc_strdup(""); + } else if (mdoc->meta.os_e && + (mdoc->meta.rcsids & (1 << mdoc->meta.os_e)) == 0) + mandoc_msg(MANDOCERR_RCS_MISSING, mdoc->parse, 0, 0, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + + if (mdoc->meta.arch != NULL && + (arch = arches[mdoc->meta.os_e]) != NULL) { + while (*arch != NULL && strcmp(*arch, mdoc->meta.arch)) + arch++; + if (*arch == NULL) { + n = mdoc->first->child; + while (n->tok != MDOC_Dt) + n = n->next; + n = n->child->next->next; + mandoc_vmsg(MANDOCERR_ARCH_BAD, + mdoc->parse, n->line, n->pos, + "Dt ... %s %s", mdoc->meta.arch, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "(OpenBSD)" : "(NetBSD)"); + } } /* Check that we begin with a proper `Sh'. */ n = mdoc->first->child; - while (n != NULL && n->tok != TOKEN_NONE && + while (n != NULL && n->tok >= MDOC_Dd && mdoc_macros[n->tok].flags & MDOC_PROLOGUE) n = n->next; @@ -1608,7 +1936,7 @@ post_root(POST_ARGS) mandoc_msg(MANDOCERR_DOC_EMPTY, mdoc->parse, 0, 0, NULL); else if (n->tok != MDOC_Sh) mandoc_msg(MANDOCERR_SEC_BEFORE, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + n->line, n->pos, roff_name[n->tok]); } static void @@ -1642,9 +1970,8 @@ post_rs(POST_ARGS) break; if (i == RSORD_MAX) { - mandoc_msg(MANDOCERR_RS_BAD, - mdoc->parse, nch->line, nch->pos, - mdoc_macronames[nch->tok]); + mandoc_msg(MANDOCERR_RS_BAD, mdoc->parse, + nch->line, nch->pos, roff_name[nch->tok]); i = -1; } else if (nch->tok == MDOC__J || nch->tok == MDOC__B) np->norm->Rs.quote_T++; @@ -1728,10 +2055,20 @@ post_hyph(POST_ARGS) static void post_ns(POST_ARGS) { + struct roff_node *n; - if (mdoc->last->flags & NODE_LINE) + n = mdoc->last; + if (n->flags & NODE_LINE || + (n->next != NULL && n->next->flags & NODE_DELIMC)) mandoc_msg(MANDOCERR_NS_SKIP, mdoc->parse, - mdoc->last->line, mdoc->last->pos, NULL); + n->line, n->pos, NULL); +} + +static void +post_sx(POST_ARGS) +{ + post_delim(mdoc); + post_hyph(mdoc); } static void @@ -1797,7 +2134,7 @@ post_sh_name(POST_ARGS) /* FALLTHROUGH */ default: mandoc_msg(MANDOCERR_NAMESEC_BAD, mdoc->parse, - n->line, n->pos, mdoc_macronames[n->tok]); + n->line, n->pos, roff_name[n->tok]); continue; } break; @@ -1867,7 +2204,7 @@ post_sh_see_also(POST_ARGS) if (isalpha((const unsigned char)*name)) return; lastpunct = n->string; - if (n->next == NULL) + if (n->next == NULL || n->next->tok == MDOC_Rs) mandoc_vmsg(MANDOCERR_XR_PUNCT, mdoc->parse, n->line, n->pos, "%s after %s(%s)", lastpunct, lastname, lastsec); @@ -1894,11 +2231,54 @@ post_sh_authors(POST_ARGS) mdoc->last->line, mdoc->last->pos, NULL); } +/* + * Return an upper bound for the string distance (allowing + * transpositions). Not a full Levenshtein implementation + * because Levenshtein is quadratic in the string length + * and this function is called for every standard name, + * so the check for each custom name would be cubic. + * The following crude heuristics is linear, resulting + * in quadratic behaviour for checking one custom name, + * which does not cause measurable slowdown. + */ +static int +similar(const char *s1, const char *s2) +{ + const int maxdist = 3; + int dist = 0; + + while (s1[0] != '\0' && s2[0] != '\0') { + if (s1[0] == s2[0]) { + s1++; + s2++; + continue; + } + if (++dist > maxdist) + return INT_MAX; + if (s1[1] == s2[1]) { /* replacement */ + s1++; + s2++; + } else if (s1[0] == s2[1] && s1[1] == s2[0]) { + s1 += 2; /* transposition */ + s2 += 2; + } else if (s1[0] == s2[1]) /* insertion */ + s2++; + else if (s1[1] == s2[0]) /* deletion */ + s1++; + else + return INT_MAX; + } + dist += strlen(s1) + strlen(s2); + return dist > maxdist ? INT_MAX : dist; +} + static void post_sh_head(POST_ARGS) { struct roff_node *nch; const char *goodsec; + const char *const *testsec; + int dist, mindist; enum roff_sec sec; /* @@ -1918,7 +2298,7 @@ post_sh_head(POST_ARGS) sec != SEC_CUSTOM ? secnames[sec] : (nch = mdoc->last->child) == NULL ? "" : nch->type == ROFFT_TEXT ? nch->string : - mdoc_macronames[nch->tok]); + roff_name[nch->tok]); /* The SYNOPSIS gets special attention in other areas. */ @@ -1936,8 +2316,25 @@ post_sh_head(POST_ARGS) /* We don't care about custom sections after this. */ - if (sec == SEC_CUSTOM) + if (sec == SEC_CUSTOM) { + if ((nch = mdoc->last->child) == NULL || + nch->type != ROFFT_TEXT || nch->next != NULL) + return; + goodsec = NULL; + mindist = INT_MAX; + for (testsec = secnames + 1; *testsec != NULL; testsec++) { + dist = similar(nch->string, *testsec); + if (dist < mindist) { + goodsec = *testsec; + mindist = dist; + } + } + if (goodsec != NULL) + mandoc_vmsg(MANDOCERR_SEC_TYPO, mdoc->parse, + nch->line, nch->pos, "Sh %s instead of %s", + nch->string, goodsec); return; + } /* * Check whether our non-custom section is being repeated or is @@ -2003,9 +2400,15 @@ post_xr(POST_ARGS) if (nch->next == NULL) { mandoc_vmsg(MANDOCERR_XR_NOSEC, mdoc->parse, n->line, n->pos, "Xr %s", nch->string); - return; + } else { + assert(nch->next == n->last); + if(mandoc_xr_add(nch->next->string, nch->string, + nch->line, nch->pos)) + mandoc_vmsg(MANDOCERR_XR_SELF, mdoc->parse, + nch->line, nch->pos, "Xr %s %s", + nch->string, nch->next->string); } - assert(nch->next == n->last); + post_delim_nb(mdoc); } static void @@ -2014,7 +2417,11 @@ post_ignpar(POST_ARGS) struct roff_node *np; switch (mdoc->last->type) { + case ROFFT_BLOCK: + post_prevpar(mdoc); + return; case ROFFT_HEAD: + post_delim(mdoc); post_hyph(mdoc); return; case ROFFT_BODY: @@ -2027,8 +2434,8 @@ post_ignpar(POST_ARGS) if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, np->line, np->pos, - "%s after %s", mdoc_macronames[np->tok], - mdoc_macronames[mdoc->last->tok]); + "%s after %s", roff_name[np->tok], + roff_name[mdoc->last->tok]); roff_node_delete(mdoc, np); } @@ -2036,8 +2443,8 @@ post_ignpar(POST_ARGS) if (np->tok == MDOC_Pp || np->tok == MDOC_Lp) { mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, np->line, np->pos, "%s at the end of %s", - mdoc_macronames[np->tok], - mdoc_macronames[mdoc->last->tok]); + roff_name[np->tok], + roff_name[mdoc->last->tok]); roff_node_delete(mdoc, np); } } @@ -2060,7 +2467,7 @@ post_prevpar(POST_ARGS) if (n->prev->tok != MDOC_Pp && n->prev->tok != MDOC_Lp && - n->prev->tok != MDOC_br) + n->prev->tok != ROFF_br) return; if (n->tok == MDOC_Bl && n->norm->Bl.comp) return; @@ -2070,9 +2477,8 @@ post_prevpar(POST_ARGS) return; mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, - n->prev->line, n->prev->pos, - "%s before %s", mdoc_macronames[n->prev->tok], - mdoc_macronames[n->tok]); + n->prev->line, n->prev->pos, "%s before %s", + roff_name[n->prev->tok], roff_name[n->tok]); roff_node_delete(mdoc, n->prev); } @@ -2082,10 +2488,10 @@ post_par(POST_ARGS) struct roff_node *np; np = mdoc->last; - if (np->tok != MDOC_br && np->tok != MDOC_sp) + if (np->tok != ROFF_br && np->tok != ROFF_sp) post_prevpar(mdoc); - if (np->tok == MDOC_sp) { + if (np->tok == ROFF_sp) { if (np->child != NULL && np->child->next != NULL) mandoc_vmsg(MANDOCERR_ARG_EXCESS, mdoc->parse, np->child->next->line, np->child->next->pos, @@ -2093,21 +2499,20 @@ post_par(POST_ARGS) } else if (np->child != NULL) mandoc_vmsg(MANDOCERR_ARG_SKIP, mdoc->parse, np->line, np->pos, "%s %s", - mdoc_macronames[np->tok], np->child->string); + roff_name[np->tok], np->child->string); if ((np = mdoc->last->prev) == NULL) { np = mdoc->last->parent; if (np->tok != MDOC_Sh && np->tok != MDOC_Ss) return; } else if (np->tok != MDOC_Pp && np->tok != MDOC_Lp && - (mdoc->last->tok != MDOC_br || - (np->tok != MDOC_sp && np->tok != MDOC_br))) + (mdoc->last->tok != ROFF_br || + (np->tok != ROFF_sp && np->tok != ROFF_br))) return; mandoc_vmsg(MANDOCERR_PAR_SKIP, mdoc->parse, - mdoc->last->line, mdoc->last->pos, - "%s after %s", mdoc_macronames[mdoc->last->tok], - mdoc_macronames[np->tok]); + mdoc->last->line, mdoc->last->pos, "%s after %s", + roff_name[mdoc->last->tok], roff_name[np->tok]); roff_node_delete(mdoc, mdoc->last); } @@ -2136,7 +2541,7 @@ post_dd(POST_ARGS) if (n->child == NULL || n->child->string[0] == '\0') { mdoc->meta.date = mdoc->quick ? mandoc_strdup("") : - mandoc_normdate(mdoc->parse, NULL, n->line, n->pos); + mandoc_normdate(mdoc, NULL, n->line, n->pos); return; } @@ -2145,7 +2550,7 @@ post_dd(POST_ARGS) if (mdoc->quick) mdoc->meta.date = datestr; else { - mdoc->meta.date = mandoc_normdate(mdoc->parse, + mdoc->meta.date = mandoc_normdate(mdoc, datestr, n->line, n->pos); free(datestr); } @@ -2251,11 +2656,21 @@ static void post_bx(POST_ARGS) { struct roff_node *n, *nch; + const char *macro; + + post_delim_nb(mdoc); n = mdoc->last; nch = n->child; if (nch != NULL) { + macro = !strcmp(nch->string, "Open") ? "Ox" : + !strcmp(nch->string, "Net") ? "Nx" : + !strcmp(nch->string, "Free") ? "Fx" : + !strcmp(nch->string, "DragonFly") ? "Dx" : NULL; + if (macro != NULL) + mandoc_msg(MANDOCERR_BX, mdoc->parse, + n->line, n->pos, macro); mdoc->last = nch; nch = nch->next; mdoc->next = ROFF_NEXT_SIBLING; @@ -2309,6 +2724,8 @@ post_os(POST_ARGS) mandoc_msg(MANDOCERR_PROLOG_LATE, mdoc->parse, n->line, n->pos, "Os"); + post_delim(mdoc); + /* * Set the operating system by way of the `Os' macro. * The order of precedence is: @@ -2322,11 +2739,11 @@ post_os(POST_ARGS) mdoc->meta.os = NULL; deroff(&mdoc->meta.os, n); if (mdoc->meta.os) - return; + goto out; - if (mdoc->defos) { - mdoc->meta.os = mandoc_strdup(mdoc->defos); - return; + if (mdoc->os_s != NULL) { + mdoc->meta.os = mandoc_strdup(mdoc->os_s); + goto out; } #ifdef OSNAME @@ -2343,6 +2760,44 @@ post_os(POST_ARGS) } mdoc->meta.os = mandoc_strdup(defbuf); #endif /*!OSNAME*/ + +out: + if (mdoc->meta.os_e == MANDOC_OS_OTHER) { + if (strstr(mdoc->meta.os, "OpenBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_OPENBSD; + else if (strstr(mdoc->meta.os, "NetBSD") != NULL) + mdoc->meta.os_e = MANDOC_OS_NETBSD; + } + + /* + * This is the earliest point where we can check + * Mdocdate conventions because we don't know + * the operating system earlier. + */ + + if (n->child != NULL) + mandoc_vmsg(MANDOCERR_OS_ARG, mdoc->parse, + n->child->line, n->child->pos, + "Os %s (%s)", n->child->string, + mdoc->meta.os_e == MANDOC_OS_OPENBSD ? + "OpenBSD" : "NetBSD"); + + while (n->tok != MDOC_Dd) + if ((n = n->prev) == NULL) + return; + if ((n = n->child) == NULL) + return; + if (strncmp(n->string, "$" "Mdocdate", 9)) { + if (mdoc->meta.os_e == MANDOC_OS_OPENBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE_MISSING, + mdoc->parse, n->line, n->pos, + "Dd %s (OpenBSD)", n->string); + } else { + if (mdoc->meta.os_e == MANDOC_OS_NETBSD) + mandoc_vmsg(MANDOCERR_MDOCDATE, + mdoc->parse, n->line, n->pos, + "Dd %s (NetBSD)", n->string); + } } enum roff_sec @@ -2358,7 +2813,7 @@ mdoc_a2sec(const char *p) } static size_t -macro2len(int macro) +macro2len(enum roff_tok macro) { switch (macro) { diff --git a/usr/src/cmd/mandoc/out.c b/usr/src/cmd/mandoc/out.c index aff3558372..b2b643787e 100644 --- a/usr/src/cmd/mandoc/out.c +++ b/usr/src/cmd/mandoc/out.c @@ -1,7 +1,7 @@ -/* $Id: out.c,v 1.62 2015/10/12 00:08:16 schwarze Exp $ */ +/* $Id: out.c,v 1.70 2017/06/27 18:25:02 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -20,6 +20,7 @@ #include <sys/types.h> #include <assert.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> #include <time.h> @@ -29,9 +30,10 @@ #include "out.h" static void tblcalc_data(struct rofftbl *, struct roffcol *, - const struct tbl_opts *, const struct tbl_dat *); + const struct tbl_opts *, const struct tbl_dat *, + size_t); static void tblcalc_literal(struct rofftbl *, struct roffcol *, - const struct tbl_dat *); + const struct tbl_dat *, size_t); static void tblcalc_number(struct rofftbl *, struct roffcol *, const struct tbl_opts *, const struct tbl_dat *); @@ -40,10 +42,10 @@ static void tblcalc_number(struct rofftbl *, struct roffcol *, * Parse the *src string and store a scaling unit into *dst. * If the string doesn't specify the unit, use the default. * If no default is specified, fail. - * Return 2 on complete success, 1 when a conversion was done, - * but there was trailing garbage, and 0 on total failure. + * Return a pointer to the byte after the last byte used, + * or NULL on total failure. */ -int +const char * a2roffsu(const char *src, struct roffsu *dst, enum roffscale def) { char *endptr; @@ -51,7 +53,7 @@ a2roffsu(const char *src, struct roffsu *dst, enum roffscale def) dst->unit = def == SCALE_MAX ? SCALE_BU : def; dst->scale = strtod(src, &endptr); if (endptr == src) - return 0; + return NULL; switch (*endptr++) { case 'c': @@ -84,17 +86,14 @@ a2roffsu(const char *src, struct roffsu *dst, enum roffscale def) case 'v': dst->unit = SCALE_VS; break; - case '\0': - endptr--; - /* FALLTHROUGH */ default: + endptr--; if (SCALE_MAX == def) - return 0; + return NULL; dst->unit = def; break; } - - return *endptr == '\0' ? 2 : 1; + return endptr; } /* @@ -105,8 +104,9 @@ a2roffsu(const char *src, struct roffsu *dst, enum roffscale def) */ void tblcalc(struct rofftbl *tbl, const struct tbl_span *sp, - size_t totalwidth) + size_t offset, size_t rmargin) { + struct roffsu su; const struct tbl_opts *opts; const struct tbl_dat *dp; struct roffcol *col; @@ -141,13 +141,29 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp, if (1 < spans) continue; icol = dp->layout->col; - if (maxcol < icol) - maxcol = icol; + while (maxcol < icol) + tbl->cols[++maxcol].spacing = SIZE_MAX; col = tbl->cols + icol; col->flags |= dp->layout->flags; if (dp->layout->flags & TBL_CELL_WIGN) continue; - tblcalc_data(tbl, col, opts, dp); + if (dp->layout->wstr != NULL && + dp->layout->width == 0 && + a2roffsu(dp->layout->wstr, &su, SCALE_EN) + != NULL) + dp->layout->width = + (*tbl->sulen)(&su, tbl->arg); + if (col->width < dp->layout->width) + col->width = dp->layout->width; + if (dp->layout->spacing != SIZE_MAX && + (col->spacing == SIZE_MAX || + col->spacing < dp->layout->spacing)) + col->spacing = dp->layout->spacing; + tblcalc_data(tbl, col, opts, dp, + dp->block == 0 ? 0 : + dp->layout->width ? dp->layout->width : + rmargin ? (rmargin + sp->opts->cols / 2) + / (sp->opts->cols + 1) : 0); } } @@ -161,6 +177,8 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp, ewidth = xwidth = 0; for (icol = 0; icol <= maxcol; icol++) { col = tbl->cols + icol; + if (col->spacing == SIZE_MAX || icol == maxcol) + col->spacing = 3; if (col->flags & TBL_CELL_EQUAL) { necol++; if (ewidth < col->width) @@ -184,7 +202,7 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp, continue; if (col->width == ewidth) continue; - if (nxcol && totalwidth) + if (nxcol && rmargin) xwidth += ewidth - col->width; col->width = ewidth; } @@ -196,10 +214,13 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp, * Distribute the available width evenly. */ - if (nxcol && totalwidth) { - xwidth = totalwidth - xwidth - 3*maxcol - + if (nxcol && rmargin) { + xwidth += 3*maxcol + (opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) ? 2 : !!opts->lvert + !!opts->rvert); + if (rmargin <= offset + xwidth) + return; + xwidth = rmargin - offset - xwidth; /* * Emulate a bug in GNU tbl width calculation that @@ -232,7 +253,7 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp, static void tblcalc_data(struct rofftbl *tbl, struct roffcol *col, - const struct tbl_opts *opts, const struct tbl_dat *dp) + const struct tbl_opts *opts, const struct tbl_dat *dp, size_t mw) { size_t sz; @@ -249,7 +270,7 @@ tblcalc_data(struct rofftbl *tbl, struct roffcol *col, case TBL_CELL_CENTRE: case TBL_CELL_LEFT: case TBL_CELL_RIGHT: - tblcalc_literal(tbl, col, dp); + tblcalc_literal(tbl, col, dp, mw); break; case TBL_CELL_NUMBER: tblcalc_number(tbl, col, opts, dp); @@ -263,16 +284,35 @@ tblcalc_data(struct rofftbl *tbl, struct roffcol *col, static void tblcalc_literal(struct rofftbl *tbl, struct roffcol *col, - const struct tbl_dat *dp) + const struct tbl_dat *dp, size_t mw) { - size_t sz; - const char *str; - - str = dp->string ? dp->string : ""; - sz = (*tbl->slen)(str, tbl->arg); - - if (col->width < sz) - col->width = sz; + const char *str; /* Beginning of the first line. */ + const char *beg; /* Beginning of the current line. */ + char *end; /* End of the current line. */ + size_t lsz; /* Length of the current line. */ + size_t wsz; /* Length of the current word. */ + + if (dp->string == NULL || *dp->string == '\0') + return; + str = mw ? mandoc_strdup(dp->string) : dp->string; + lsz = 0; + for (beg = str; beg != NULL && *beg != '\0'; beg = end) { + end = mw ? strchr(beg, ' ') : NULL; + if (end != NULL) { + *end++ = '\0'; + while (*end == ' ') + end++; + } + wsz = (*tbl->slen)(beg, tbl->arg); + if (mw && lsz && lsz + 1 + wsz <= mw) + lsz += 1 + wsz; + else + lsz = wsz; + if (col->width < lsz) + col->width = lsz; + } + if (mw) + free((void *)str); } static void diff --git a/usr/src/cmd/mandoc/out.h b/usr/src/cmd/mandoc/out.h index 2c1cf3fe97..f6aceb9c13 100644 --- a/usr/src/cmd/mandoc/out.h +++ b/usr/src/cmd/mandoc/out.h @@ -1,6 +1,7 @@ -/* $Id: out.h,v 1.27 2015/11/07 14:01:16 schwarze Exp $ */ +/* $Id: out.h,v 1.31 2017/06/27 18:25:02 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -32,6 +33,7 @@ enum roffscale { struct roffcol { size_t width; /* width of cell */ size_t decimal; /* decimal position in cell */ + size_t spacing; /* spacing after the column */ int flags; /* layout flags, see tbl_cell */ }; @@ -40,14 +42,16 @@ struct roffsu { double scale; }; +typedef size_t (*tbl_sulen)(const struct roffsu *, void *); typedef size_t (*tbl_strlen)(const char *, void *); typedef size_t (*tbl_len)(size_t, void *); struct rofftbl { + tbl_sulen sulen; /* calculate scaling unit length */ tbl_strlen slen; /* calculate string length */ tbl_len len; /* produce width of empty space */ struct roffcol *cols; /* master column specifiers */ - void *arg; /* passed to slen and len */ + void *arg; /* passed to sulen, slen, and len */ }; #define SCALE_VS_INIT(p, v) \ @@ -63,6 +67,6 @@ struct rofftbl { struct tbl_span; -int a2roffsu(const char *, struct roffsu *, enum roffscale); +const char *a2roffsu(const char *, struct roffsu *, enum roffscale); void tblcalc(struct rofftbl *tbl, - const struct tbl_span *, size_t); + const struct tbl_span *, size_t, size_t); diff --git a/usr/src/cmd/mandoc/read.c b/usr/src/cmd/mandoc/read.c index 3e5d41161a..1af1b28363 100644 --- a/usr/src/cmd/mandoc/read.c +++ b/usr/src/cmd/mandoc/read.c @@ -1,4 +1,4 @@ -/* $Id: read.c,v 1.161 2017/02/18 17:29:28 schwarze Exp $ */ +/* $Id: read.c,v 1.192 2017/07/20 14:36:36 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> @@ -24,9 +24,6 @@ #include <assert.h> #include <ctype.h> -#if HAVE_ERR -#include <err.h> -#endif #include <errno.h> #include <fcntl.h> #include <stdarg.h> @@ -42,7 +39,6 @@ #include "mdoc.h" #include "man.h" #include "libmandoc.h" -#include "roff_int.h" #define REPARSE_LIMIT 1000 @@ -53,10 +49,10 @@ struct mparse { const char *file; /* filename of current input file */ struct buf *primary; /* buffer currently being parsed */ struct buf *secondary; /* preprocessed copy of input */ - const char *defos; /* default operating system */ + const char *os_s; /* default operating system */ mandocmsg mmsg; /* warning/error message handler */ enum mandoclevel file_status; /* status of current parse */ - enum mandoclevel wlevel; /* ignore messages below this */ + enum mandocerr mmin; /* ignore messages below this */ int options; /* parser options */ int gzip; /* current input file is gzipped */ int filenc; /* encoding of the current file */ @@ -66,7 +62,7 @@ struct mparse { static void choose_parser(struct mparse *); static void resize_buf(struct buf *, size_t); -static void mparse_buf_r(struct mparse *, struct buf, size_t, int); +static int mparse_buf_r(struct mparse *, struct buf, size_t, int); static int read_whole_file(struct mparse *, const char *, int, struct buf *, int *); static void mparse_end(struct mparse *); @@ -75,7 +71,7 @@ static void mparse_parse_buffer(struct mparse *, struct buf, static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { MANDOCERR_OK, - MANDOCERR_WARNING, + MANDOCERR_OK, MANDOCERR_WARNING, MANDOCERR_ERROR, MANDOCERR_UNSUPP, @@ -86,20 +82,46 @@ static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { static const char * const mandocerrs[MANDOCERR_MAX] = { "ok", + "base system convention", + + "Mdocdate found", + "Mdocdate missing", + "unknown architecture", + "operating system explicitly specified", + "RCS id missing", + "referenced manual not found", + + "generic style suggestion", + + "legacy man(7) date format", + "lower case character in document title", + "duplicate RCS id", + "typo in section name", + "unterminated quoted argument", + "useless macro", + "consider using OS macro", + "errnos out of order", + "duplicate errno", + "trailing delimiter", + "no blank before trailing delimiter", + "fill mode already enabled, skipping", + "fill mode already disabled, skipping", + "function name without markup", + "whitespace at end of input line", + "bad comment style", + "generic warning", /* related to the prologue */ "missing manual title, using UNTITLED", "missing manual title, using \"\"", - "lower case character in document title", "missing manual section, using \"\"", "unknown manual section", "missing date, using today's date", "cannot parse date, using it verbatim", + "date in the future, using it anyway", "missing Os macro, using \"\"", - "duplicate prologue macro", "late prologue macro", - "skipping late title macro", "prologue macros out of order", /* related to document structure */ @@ -113,9 +135,11 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "bad NAME section content", "missing comma before name", "missing description line, using \"\"", + "description line outside NAME section", "sections out of conventional order", "duplicate section title", "unexpected section", + "cross reference to self", "unusual Xr order", "unusual Xr punctuation", "AUTHORS section without An macro", @@ -129,9 +153,9 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "blocks badly nested", "nested displays are not portable", "moving content out of list", - "fill mode already enabled, skipping", - "fill mode already disabled, skipping", + "first macro on line", "line scope broken", + "skipping blank line in line scope", /* related to missing macro arguments */ "skipping empty request", @@ -146,6 +170,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "missing function name, using \"\"", "empty head in list item", "empty list item", + "missing argument, using next line", "missing font type, using \\fR", "unknown font type, using \\fR", "nothing follows prefix", @@ -157,7 +182,6 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "missing eqn box, using \"\"", /* related to bad macro arguments */ - "unterminated quoted argument", "duplicate argument", "skipping duplicate argument", "skipping duplicate display type", @@ -167,6 +191,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "unknown AT&T UNIX version", "comma in function argument", "parenthesis in function name", + "unknown library name", "invalid content in Rs block", "invalid Boolean argument", "unknown font, skipping request", @@ -175,9 +200,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { /* related to plain text */ "blank line in fill mode, using .sp", "tab in filled text", - "whitespace at end of input line", "new sentence, new line", - "bad comment style", "invalid escape sequence", "undefined string, using \"\"", @@ -203,6 +226,8 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { /* related to document structure and macros */ NULL, + "duplicate prologue macro", + "skipping late title macro", "input stack limit exceeded, infinite loop?", "skipping bad character", "skipping unknown macro", @@ -219,6 +244,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { "NOT IMPLEMENTED: Bd -file", "skipping display without arguments", "missing list type, using -item", + "argument is not numeric, using 1", "missing manual name, using \"\"", "uname(3) system call failed, using UNKNOWN", "unknown standard specifier", @@ -240,7 +266,7 @@ static const char * const mandocerrs[MANDOCERR_MAX] = { static const char * const mandoclevels[MANDOCLEVEL_MAX] = { "SUCCESS", - "RESERVED", + "STYLE", "WARNING", "ERROR", "UNSUPP", @@ -292,14 +318,15 @@ choose_parser(struct mparse *curp) } if (format == MPARSE_MDOC) { - mdoc_hash_init(); curp->man->macroset = MACROSET_MDOC; - curp->man->first->tok = TOKEN_NONE; + if (curp->man->mdocmac == NULL) + curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); } else { - man_hash_init(); curp->man->macroset = MACROSET_MAN; - curp->man->first->tok = TOKEN_NONE; + if (curp->man->manmac == NULL) + curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); } + curp->man->first->tok = TOKEN_NONE; } /* @@ -309,15 +336,13 @@ choose_parser(struct mparse *curp) * macros, inline equations, and input line traps) * and indirectly (for .so file inclusion). */ -static void +static int mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) { - const struct tbl_span *span; struct buf ln; const char *save_file; char *cp; size_t pos; /* byte number in the ln buffer */ - size_t j; /* auxiliary byte number in the blk buffer */ enum rofferr rr; int of; int lnn; /* line number in the real file */ @@ -399,79 +424,14 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start) continue; } - /* Trailing backslash = a plain char. */ - - if (blk.buf[i] != '\\' || i + 1 == blk.sz) { - ln.buf[pos++] = blk.buf[i++]; - continue; - } - - /* - * Found escape and at least one other character. - * When it's a newline character, skip it. - * When there is a carriage return in between, - * skip that one as well. - */ - - if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz && - '\n' == blk.buf[i + 2]) - ++i; - if ('\n' == blk.buf[i + 1]) { - i += 2; - ++lnn; - continue; - } - - if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) { - j = i; - i += 2; - /* Comment, skip to end of line */ - for (; i < blk.sz; ++i) { - if (blk.buf[i] != '\n') - continue; - if (blk.buf[i - 1] == ' ' || - blk.buf[i - 1] == '\t') - mandoc_msg( - MANDOCERR_SPACE_EOL, - curp, curp->line, - pos + i-1 - j, NULL); - ++i; - ++lnn; - break; - } - - /* Backout trailing whitespaces */ - for (; pos > 0; --pos) { - if (ln.buf[pos - 1] != ' ') - break; - if (pos > 2 && ln.buf[pos - 2] == '\\') - break; - } - break; - } - - /* Catch escaped bogus characters. */ - - c = (unsigned char) blk.buf[i+1]; - - if ( ! (isascii(c) && - (isgraph(c) || isblank(c)))) { - mandoc_vmsg(MANDOCERR_CHAR_BAD, curp, - curp->line, pos, "0x%x", c); - i += 2; - ln.buf[pos++] = '?'; - continue; - } - - /* Some other escape sequence, copy & cont. */ - - ln.buf[pos++] = blk.buf[i++]; ln.buf[pos++] = blk.buf[i++]; } - if (pos >= ln.sz) + if (pos + 1 >= ln.sz) resize_buf(&ln, 256); + if (i == blk.sz || blk.buf[i] == '\0') + ln.buf[pos++] = '\n'; ln.buf[pos] = '\0'; /* @@ -510,13 +470,16 @@ rerun: switch (rr) { case ROFF_REPARSE: - if (REPARSE_LIMIT >= ++curp->reparse_count) - mparse_buf_r(curp, ln, of, 0); - else + if (++curp->reparse_count > REPARSE_LIMIT) mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, pos, NULL); - pos = 0; - continue; + else if (mparse_buf_r(curp, ln, of, 0) == 1 || + start == 1) { + pos = 0; + continue; + } + free(ln.buf); + return 0; case ROFF_APPEND: pos = strlen(ln.buf); continue; @@ -530,7 +493,7 @@ rerun: (i >= blk.sz || blk.buf[i] == '\0')) { curp->sodest = mandoc_strdup(ln.buf + of); free(ln.buf); - return; + return 1; } /* * We remove `so' clauses from our lookaside @@ -566,21 +529,7 @@ rerun: if (curp->man->macroset == MACROSET_NONE) choose_parser(curp); - /* - * Lastly, push down into the parsers themselves. - * If libroff returns ROFF_TBL, then add it to the - * currently open parse. Since we only get here if - * there does exist data (see tbl_data.c), we're - * guaranteed that something's been allocated. - * Do the same for ROFF_EQN. - */ - - if (rr == ROFF_TBL) - while ((span = roff_span(curp->roff)) != NULL) - roff_addtbl(curp->man, span); - else if (rr == ROFF_EQN) - roff_addeqn(curp->man, roff_eqn(curp->roff)); - else if ((curp->man->macroset == MACROSET_MDOC ? + if ((curp->man->macroset == MACROSET_MDOC ? mdoc_parseln(curp->man, curp->line, ln.buf, of) : man_parseln(curp->man, curp->line, ln.buf, of)) == 2) break; @@ -596,6 +545,7 @@ rerun: } free(ln.buf); + return 1; } static int @@ -607,8 +557,11 @@ read_whole_file(struct mparse *curp, const char *file, int fd, size_t off; ssize_t ssz; - if (fstat(fd, &st) == -1) - err((int)MANDOCLEVEL_SYSERR, "%s", file); + if (fstat(fd, &st) == -1) { + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, + "fstat: %s", strerror(errno)); + return 0; + } /* * If we're a regular file, try just reading in the whole entry @@ -630,8 +583,11 @@ read_whole_file(struct mparse *curp, const char *file, int fd, } if (curp->gzip) { - if ((gz = gzdopen(fd, "rb")) == NULL) - err((int)MANDOCLEVEL_SYSERR, "%s", file); + if ((gz = gzdopen(fd, "rb")) == NULL) { + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, + "gzdopen: %s", strerror(errno)); + return 0; + } } else gz = NULL; @@ -660,8 +616,11 @@ read_whole_file(struct mparse *curp, const char *file, int fd, fb->sz = off; return 1; } - if (ssz == -1) - err((int)MANDOCLEVEL_SYSERR, "%s", file); + if (ssz == -1) { + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, + "read: %s", strerror(errno)); + break; + } off += (size_t)ssz; } @@ -797,29 +756,32 @@ mparse_open(struct mparse *curp, const char *file) } struct mparse * -mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg, - const char *defos) +mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg, + enum mandoc_os os_e, const char *os_s) { struct mparse *curp; curp = mandoc_calloc(1, sizeof(struct mparse)); curp->options = options; - curp->wlevel = wlevel; + curp->mmin = mmin; curp->mmsg = mmsg; - curp->defos = defos; + curp->os_s = os_s; curp->roff = roff_alloc(curp, options); - curp->man = roff_man_alloc( curp->roff, curp, curp->defos, + curp->man = roff_man_alloc(curp->roff, curp, curp->os_s, curp->options & MPARSE_QUICK ? 1 : 0); if (curp->options & MPARSE_MDOC) { - mdoc_hash_init(); curp->man->macroset = MACROSET_MDOC; + if (curp->man->mdocmac == NULL) + curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX); } else if (curp->options & MPARSE_MAN) { - man_hash_init(); curp->man->macroset = MACROSET_MAN; + if (curp->man->manmac == NULL) + curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX); } curp->man->first->tok = TOKEN_NONE; + curp->man->meta.os_e = os_e; return curp; } @@ -843,6 +805,8 @@ void mparse_free(struct mparse *curp) { + roffhash_free(curp->man->mdocmac); + roffhash_free(curp->man->manmac); roff_man_free(curp->man); roff_free(curp->roff); if (curp->secondary) @@ -893,13 +857,13 @@ mandoc_msg(enum mandocerr er, struct mparse *m, { enum mandoclevel level; + if (er < m->mmin && er != MANDOCERR_FILE) + return; + level = MANDOCLEVEL_UNSUPP; while (er < mandoclimits[level]) level--; - if (level < m->wlevel && er != MANDOCERR_FILE) - return; - if (m->mmsg) (*m->mmsg)(er, level, m->file, ln, col, msg); diff --git a/usr/src/cmd/mandoc/roff.c b/usr/src/cmd/mandoc/roff.c index ad55d320e4..e2e498da0c 100644 --- a/usr/src/cmd/mandoc/roff.c +++ b/usr/src/cmd/mandoc/roff.c @@ -1,4 +1,4 @@ -/* $Id: roff.c,v 1.289 2017/02/17 03:03:03 schwarze Exp $ */ +/* $Id: roff.c,v 1.324 2017/07/14 17:16:16 schwarze Exp $ */ /* * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010-2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -22,12 +22,15 @@ #include <assert.h> #include <ctype.h> #include <limits.h> +#include <stddef.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "mandoc.h" #include "mandoc_aux.h" +#include "mandoc_ohash.h" #include "roff.h" #include "libmandoc.h" #include "roff_int.h" @@ -36,254 +39,15 @@ /* Maximum number of string expansions per line, to break infinite loops. */ #define EXPAND_LIMIT 1000 -/* --- data types --------------------------------------------------------- */ +/* Types of definitions of macros and strings. */ +#define ROFFDEF_USER (1 << 1) /* User-defined. */ +#define ROFFDEF_PRE (1 << 2) /* Predefined. */ +#define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ +#define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ +#define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ + ROFFDEF_REN | ROFFDEF_STD) -enum rofft { - ROFF_ab, - ROFF_ad, - ROFF_af, - ROFF_aln, - ROFF_als, - ROFF_am, - ROFF_am1, - ROFF_ami, - ROFF_ami1, - ROFF_as, - ROFF_as1, - ROFF_asciify, - ROFF_backtrace, - ROFF_bd, - ROFF_bleedat, - ROFF_blm, - ROFF_box, - ROFF_boxa, - ROFF_bp, - ROFF_BP, - /* MAN_br, MDOC_br */ - ROFF_break, - ROFF_breakchar, - ROFF_brnl, - ROFF_brp, - ROFF_brpnl, - ROFF_c2, - ROFF_cc, - ROFF_ce, - ROFF_cf, - ROFF_cflags, - ROFF_ch, - ROFF_char, - ROFF_chop, - ROFF_class, - ROFF_close, - ROFF_CL, - ROFF_color, - ROFF_composite, - ROFF_continue, - ROFF_cp, - ROFF_cropat, - ROFF_cs, - ROFF_cu, - ROFF_da, - ROFF_dch, - ROFF_Dd, - ROFF_de, - ROFF_de1, - ROFF_defcolor, - ROFF_dei, - ROFF_dei1, - ROFF_device, - ROFF_devicem, - ROFF_di, - ROFF_do, - ROFF_ds, - ROFF_ds1, - ROFF_dwh, - ROFF_dt, - ROFF_ec, - ROFF_ecr, - ROFF_ecs, - ROFF_el, - ROFF_em, - ROFF_EN, - ROFF_eo, - ROFF_EP, - ROFF_EQ, - ROFF_errprint, - ROFF_ev, - ROFF_evc, - ROFF_ex, - ROFF_fallback, - ROFF_fam, - ROFF_fc, - ROFF_fchar, - ROFF_fcolor, - ROFF_fdeferlig, - ROFF_feature, - /* MAN_fi; ignored in mdoc(7) */ - ROFF_fkern, - ROFF_fl, - ROFF_flig, - ROFF_fp, - ROFF_fps, - ROFF_fschar, - ROFF_fspacewidth, - ROFF_fspecial, - /* MAN_ft; ignored in mdoc(7) */ - ROFF_ftr, - ROFF_fzoom, - ROFF_gcolor, - ROFF_hc, - ROFF_hcode, - ROFF_hidechar, - ROFF_hla, - ROFF_hlm, - ROFF_hpf, - ROFF_hpfa, - ROFF_hpfcode, - ROFF_hw, - ROFF_hy, - ROFF_hylang, - ROFF_hylen, - ROFF_hym, - ROFF_hypp, - ROFF_hys, - ROFF_ie, - ROFF_if, - ROFF_ig, - /* MAN_in; ignored in mdoc(7) */ - ROFF_index, - ROFF_it, - ROFF_itc, - ROFF_IX, - ROFF_kern, - ROFF_kernafter, - ROFF_kernbefore, - ROFF_kernpair, - ROFF_lc, - ROFF_lc_ctype, - ROFF_lds, - ROFF_length, - ROFF_letadj, - ROFF_lf, - ROFF_lg, - ROFF_lhang, - ROFF_linetabs, - /* MAN_ll, MDOC_ll */ - ROFF_lnr, - ROFF_lnrf, - ROFF_lpfx, - ROFF_ls, - ROFF_lsm, - ROFF_lt, - ROFF_mc, - ROFF_mediasize, - ROFF_minss, - ROFF_mk, - ROFF_mso, - ROFF_na, - ROFF_ne, - /* MAN_nf; ignored in mdoc(7) */ - ROFF_nh, - ROFF_nhychar, - ROFF_nm, - ROFF_nn, - ROFF_nop, - ROFF_nr, - ROFF_nrf, - ROFF_nroff, - ROFF_ns, - ROFF_nx, - ROFF_open, - ROFF_opena, - ROFF_os, - ROFF_output, - ROFF_padj, - ROFF_papersize, - ROFF_pc, - ROFF_pev, - ROFF_pi, - ROFF_PI, - ROFF_pl, - ROFF_pm, - ROFF_pn, - ROFF_pnr, - ROFF_po, - ROFF_ps, - ROFF_psbb, - ROFF_pshape, - ROFF_pso, - ROFF_ptr, - ROFF_pvs, - ROFF_rchar, - ROFF_rd, - ROFF_recursionlimit, - ROFF_return, - ROFF_rfschar, - ROFF_rhang, - ROFF_rj, - ROFF_rm, - ROFF_rn, - ROFF_rnn, - ROFF_rr, - ROFF_rs, - ROFF_rt, - ROFF_schar, - ROFF_sentchar, - ROFF_shc, - ROFF_shift, - ROFF_sizes, - ROFF_so, - /* MAN_sp, MDOC_sp */ - ROFF_spacewidth, - ROFF_special, - ROFF_spreadwarn, - ROFF_ss, - ROFF_sty, - ROFF_substring, - ROFF_sv, - ROFF_sy, - ROFF_T_, - ROFF_ta, - ROFF_tc, - ROFF_TE, - ROFF_TH, - ROFF_ti, - ROFF_tkf, - ROFF_tl, - ROFF_tm, - ROFF_tm1, - ROFF_tmc, - ROFF_tr, - ROFF_track, - ROFF_transchar, - ROFF_trf, - ROFF_trimat, - ROFF_trin, - ROFF_trnt, - ROFF_troff, - ROFF_TS, - ROFF_uf, - ROFF_ul, - ROFF_unformat, - ROFF_unwatch, - ROFF_unwatchn, - ROFF_vpt, - ROFF_vs, - ROFF_warn, - ROFF_warnscale, - ROFF_watch, - ROFF_watchlength, - ROFF_watchn, - ROFF_wh, - ROFF_while, - ROFF_write, - ROFF_writec, - ROFF_writem, - ROFF_xflag, - ROFF_cblock, - ROFF_USERDEF, - ROFF_MAX -}; +/* --- data types --------------------------------------------------------- */ /* * An incredibly-simple string buffer. @@ -311,21 +75,31 @@ struct roffreg { struct roffreg *next; }; +/* + * Association of request and macro names with token IDs. + */ +struct roffreq { + enum roff_tok tok; + char name[]; +}; + struct roff { struct mparse *parse; /* parse point */ + struct roff_man *man; /* mdoc or man parser */ struct roffnode *last; /* leaf of stack */ int *rstack; /* stack of inverted `ie' values */ + struct ohash *reqtab; /* request lookup table */ struct roffreg *regtab; /* number registers */ struct roffkv *strtab; /* user-defined strings & macros */ + struct roffkv *rentab; /* renamed strings & macros */ struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ struct roffstr *xtab; /* single-byte trans table (`tr') */ const char *current_string; /* value of last called user macro */ struct tbl_node *first_tbl; /* first table parsed */ struct tbl_node *last_tbl; /* last table parsed */ struct tbl_node *tbl; /* current table being parsed */ - struct eqn_node *last_eqn; /* last equation parsed */ - struct eqn_node *first_eqn; /* first equation parsed */ - struct eqn_node *eqn; /* current equation being parsed */ + struct eqn_node *last_eqn; /* equation parser */ + struct eqn_node *eqn; /* active equation parser */ int eqn_inline; /* current equation is inline */ int options; /* parse options */ int rstacksz; /* current size limit of rstack */ @@ -333,10 +107,11 @@ struct roff { int format; /* current file in mdoc or man format */ int argc; /* number of args of the last macro */ char control; /* control character */ + char escape; /* escape character */ }; struct roffnode { - enum rofft tok; /* type of node */ + enum roff_tok tok; /* type of node */ struct roffnode *parent; /* up one in stack */ int line; /* parse line */ int col; /* parse col */ @@ -347,7 +122,7 @@ struct roffnode { }; #define ROFF_ARGS struct roff *r, /* parse ctx */ \ - enum rofft tok, /* tok of macro */ \ + enum roff_tok tok, /* tok of macro */ \ struct buf *buf, /* input buffer */ \ int ln, /* parse line */ \ int ppos, /* original pos in buffer */ \ @@ -357,13 +132,11 @@ struct roffnode { typedef enum rofferr (*roffproc)(ROFF_ARGS); struct roffmac { - const char *name; /* macro name */ roffproc proc; /* process new macro */ roffproc text; /* process as child text of macro */ roffproc sub; /* process as child of macro */ int flags; #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ - struct roffmac *next; }; struct predef { @@ -376,16 +149,16 @@ struct predef { /* --- function prototypes ------------------------------------------------ */ -static enum rofft roffhash_find(const char *, size_t); -static void roffhash_init(void); static void roffnode_cleanscope(struct roff *); static void roffnode_pop(struct roff *); -static void roffnode_push(struct roff *, enum rofft, +static void roffnode_push(struct roff *, enum roff_tok, const char *, int, int); +static void roff_addtbl(struct roff_man *, struct tbl_node *); +static enum rofferr roff_als(ROFF_ARGS); static enum rofferr roff_block(ROFF_ARGS); static enum rofferr roff_block_text(ROFF_ARGS); static enum rofferr roff_block_sub(ROFF_ARGS); -static enum rofferr roff_brp(ROFF_ARGS); +static enum rofferr roff_br(ROFF_ARGS); static enum rofferr roff_cblock(ROFF_ARGS); static enum rofferr roff_cc(ROFF_ARGS); static void roff_ccond(struct roff *, int, int); @@ -393,6 +166,8 @@ static enum rofferr roff_cond(ROFF_ARGS); static enum rofferr roff_cond_text(ROFF_ARGS); static enum rofferr roff_cond_sub(ROFF_ARGS); static enum rofferr roff_ds(ROFF_ARGS); +static enum rofferr roff_ec(ROFF_ARGS); +static enum rofferr roff_eo(ROFF_ARGS); static enum rofferr roff_eqndelim(struct roff *, struct buf *, int); static int roff_evalcond(struct roff *r, int, char *, int *); static int roff_evalnum(struct roff *, int, @@ -411,7 +186,7 @@ static int roff_getregn(const struct roff *, static int roff_getregro(const struct roff *, const char *name); static const char *roff_getstrn(const struct roff *, - const char *, size_t); + const char *, size_t, int *); static int roff_hasregn(const struct roff *, const char *, size_t); static enum rofferr roff_insec(ROFF_ARGS); @@ -419,12 +194,17 @@ static enum rofferr roff_it(ROFF_ARGS); static enum rofferr roff_line_ignore(ROFF_ARGS); static void roff_man_alloc1(struct roff_man *); static void roff_man_free1(struct roff_man *); +static enum rofferr roff_manyarg(ROFF_ARGS); static enum rofferr roff_nr(ROFF_ARGS); -static enum rofft roff_parse(struct roff *, char *, int *, +static enum rofferr roff_onearg(ROFF_ARGS); +static enum roff_tok roff_parse(struct roff *, char *, int *, int, int); -static enum rofferr roff_parsetext(struct buf *, int, int *); +static enum rofferr roff_parsetext(struct roff *, struct buf *, + int, int *); +static enum rofferr roff_renamed(ROFF_ARGS); static enum rofferr roff_res(struct roff *, struct buf *, int, int); static enum rofferr roff_rm(ROFF_ARGS); +static enum rofferr roff_rn(ROFF_ARGS); static enum rofferr roff_rr(ROFF_ARGS); static void roff_setstr(struct roff *, const char *, const char *, int); @@ -433,7 +213,6 @@ static void roff_setstrn(struct roffkv **, const char *, static enum rofferr roff_so(ROFF_ARGS); static enum rofferr roff_tr(ROFF_ARGS); static enum rofferr roff_Dd(ROFF_ARGS); -static enum rofferr roff_TH(ROFF_ARGS); static enum rofferr roff_TE(ROFF_ARGS); static enum rofferr roff_TS(ROFF_ARGS); static enum rofferr roff_EQ(ROFF_ARGS); @@ -444,287 +223,360 @@ static enum rofferr roff_userdef(ROFF_ARGS); /* --- constant data ------------------------------------------------------ */ -/* See roffhash_find() */ - -#define ASCII_HI 126 -#define ASCII_LO 33 -#define HASHWIDTH (ASCII_HI - ASCII_LO + 1) - #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ -static struct roffmac *hash[HASHWIDTH]; - -static struct roffmac roffs[ROFF_MAX] = { - { "ab", roff_unsupp, NULL, NULL, 0, NULL }, - { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, - { "af", roff_line_ignore, NULL, NULL, 0, NULL }, - { "aln", roff_unsupp, NULL, NULL, 0, NULL }, - { "als", roff_unsupp, NULL, NULL, 0, NULL }, - { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, - { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, - { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, - { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, - { "as", roff_ds, NULL, NULL, 0, NULL }, - { "as1", roff_ds, NULL, NULL, 0, NULL }, - { "asciify", roff_unsupp, NULL, NULL, 0, NULL }, - { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL }, - { "bd", roff_line_ignore, NULL, NULL, 0, NULL }, - { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL }, - { "blm", roff_unsupp, NULL, NULL, 0, NULL }, - { "box", roff_unsupp, NULL, NULL, 0, NULL }, - { "boxa", roff_unsupp, NULL, NULL, 0, NULL }, - { "bp", roff_line_ignore, NULL, NULL, 0, NULL }, - { "BP", roff_unsupp, NULL, NULL, 0, NULL }, - { "break", roff_unsupp, NULL, NULL, 0, NULL }, - { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL }, - { "brnl", roff_line_ignore, NULL, NULL, 0, NULL }, - { "brp", roff_brp, NULL, NULL, 0, NULL }, - { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL }, - { "c2", roff_unsupp, NULL, NULL, 0, NULL }, - { "cc", roff_cc, NULL, NULL, 0, NULL }, - { "ce", roff_line_ignore, NULL, NULL, 0, NULL }, - { "cf", roff_insec, NULL, NULL, 0, NULL }, - { "cflags", roff_line_ignore, NULL, NULL, 0, NULL }, - { "ch", roff_line_ignore, NULL, NULL, 0, NULL }, - { "char", roff_unsupp, NULL, NULL, 0, NULL }, - { "chop", roff_unsupp, NULL, NULL, 0, NULL }, - { "class", roff_line_ignore, NULL, NULL, 0, NULL }, - { "close", roff_insec, NULL, NULL, 0, NULL }, - { "CL", roff_unsupp, NULL, NULL, 0, NULL }, - { "color", roff_line_ignore, NULL, NULL, 0, NULL }, - { "composite", roff_unsupp, NULL, NULL, 0, NULL }, - { "continue", roff_unsupp, NULL, NULL, 0, NULL }, - { "cp", roff_line_ignore, NULL, NULL, 0, NULL }, - { "cropat", roff_line_ignore, NULL, NULL, 0, NULL }, - { "cs", roff_line_ignore, NULL, NULL, 0, NULL }, - { "cu", roff_line_ignore, NULL, NULL, 0, NULL }, - { "da", roff_unsupp, NULL, NULL, 0, NULL }, - { "dch", roff_unsupp, NULL, NULL, 0, NULL }, - { "Dd", roff_Dd, NULL, NULL, 0, NULL }, - { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, - { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, - { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL }, - { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, - { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, - { "device", roff_unsupp, NULL, NULL, 0, NULL }, - { "devicem", roff_unsupp, NULL, NULL, 0, NULL }, - { "di", roff_unsupp, NULL, NULL, 0, NULL }, - { "do", roff_unsupp, NULL, NULL, 0, NULL }, - { "ds", roff_ds, NULL, NULL, 0, NULL }, - { "ds1", roff_ds, NULL, NULL, 0, NULL }, - { "dwh", roff_unsupp, NULL, NULL, 0, NULL }, - { "dt", roff_unsupp, NULL, NULL, 0, NULL }, - { "ec", roff_unsupp, NULL, NULL, 0, NULL }, - { "ecr", roff_unsupp, NULL, NULL, 0, NULL }, - { "ecs", roff_unsupp, NULL, NULL, 0, NULL }, - { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, - { "em", roff_unsupp, NULL, NULL, 0, NULL }, - { "EN", roff_EN, NULL, NULL, 0, NULL }, - { "eo", roff_unsupp, NULL, NULL, 0, NULL }, - { "EP", roff_unsupp, NULL, NULL, 0, NULL }, - { "EQ", roff_EQ, NULL, NULL, 0, NULL }, - { "errprint", roff_line_ignore, NULL, NULL, 0, NULL }, - { "ev", roff_unsupp, NULL, NULL, 0, NULL }, - { "evc", roff_unsupp, NULL, NULL, 0, NULL }, - { "ex", roff_unsupp, NULL, NULL, 0, NULL }, - { "fallback", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fam", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fc", roff_unsupp, NULL, NULL, 0, NULL }, - { "fchar", roff_unsupp, NULL, NULL, 0, NULL }, - { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL }, - { "feature", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fkern", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fl", roff_line_ignore, NULL, NULL, 0, NULL }, - { "flig", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fp", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fps", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fschar", roff_unsupp, NULL, NULL, 0, NULL }, - { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL }, - { "ftr", roff_line_ignore, NULL, NULL, 0, NULL }, - { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL }, - { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hc", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hcode", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hla", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hlm", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hpf", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hw", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hylang", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hylen", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hym", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hypp", roff_line_ignore, NULL, NULL, 0, NULL }, - { "hys", roff_line_ignore, NULL, NULL, 0, NULL }, - { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, - { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, - { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, - { "index", roff_unsupp, NULL, NULL, 0, NULL }, - { "it", roff_it, NULL, NULL, 0, NULL }, - { "itc", roff_unsupp, NULL, NULL, 0, NULL }, - { "IX", roff_line_ignore, NULL, NULL, 0, NULL }, - { "kern", roff_line_ignore, NULL, NULL, 0, NULL }, - { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL }, - { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL }, - { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL }, - { "lc", roff_unsupp, NULL, NULL, 0, NULL }, - { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL }, - { "lds", roff_unsupp, NULL, NULL, 0, NULL }, - { "length", roff_unsupp, NULL, NULL, 0, NULL }, - { "letadj", roff_line_ignore, NULL, NULL, 0, NULL }, - { "lf", roff_insec, NULL, NULL, 0, NULL }, - { "lg", roff_line_ignore, NULL, NULL, 0, NULL }, - { "lhang", roff_line_ignore, NULL, NULL, 0, NULL }, - { "linetabs", roff_unsupp, NULL, NULL, 0, NULL }, - { "lnr", roff_unsupp, NULL, NULL, 0, NULL }, - { "lnrf", roff_unsupp, NULL, NULL, 0, NULL }, - { "lpfx", roff_unsupp, NULL, NULL, 0, NULL }, - { "ls", roff_line_ignore, NULL, NULL, 0, NULL }, - { "lsm", roff_unsupp, NULL, NULL, 0, NULL }, - { "lt", roff_line_ignore, NULL, NULL, 0, NULL }, - { "mc", roff_line_ignore, NULL, NULL, 0, NULL }, - { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL }, - { "minss", roff_line_ignore, NULL, NULL, 0, NULL }, - { "mk", roff_line_ignore, NULL, NULL, 0, NULL }, - { "mso", roff_insec, NULL, NULL, 0, NULL }, - { "na", roff_line_ignore, NULL, NULL, 0, NULL }, - { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, - { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, - { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL }, - { "nm", roff_unsupp, NULL, NULL, 0, NULL }, - { "nn", roff_unsupp, NULL, NULL, 0, NULL }, - { "nop", roff_unsupp, NULL, NULL, 0, NULL }, - { "nr", roff_nr, NULL, NULL, 0, NULL }, - { "nrf", roff_unsupp, NULL, NULL, 0, NULL }, - { "nroff", roff_line_ignore, NULL, NULL, 0, NULL }, - { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, - { "nx", roff_insec, NULL, NULL, 0, NULL }, - { "open", roff_insec, NULL, NULL, 0, NULL }, - { "opena", roff_insec, NULL, NULL, 0, NULL }, - { "os", roff_line_ignore, NULL, NULL, 0, NULL }, - { "output", roff_unsupp, NULL, NULL, 0, NULL }, - { "padj", roff_line_ignore, NULL, NULL, 0, NULL }, - { "papersize", roff_line_ignore, NULL, NULL, 0, NULL }, - { "pc", roff_line_ignore, NULL, NULL, 0, NULL }, - { "pev", roff_line_ignore, NULL, NULL, 0, NULL }, - { "pi", roff_insec, NULL, NULL, 0, NULL }, - { "PI", roff_unsupp, NULL, NULL, 0, NULL }, - { "pl", roff_line_ignore, NULL, NULL, 0, NULL }, - { "pm", roff_line_ignore, NULL, NULL, 0, NULL }, - { "pn", roff_line_ignore, NULL, NULL, 0, NULL }, - { "pnr", roff_line_ignore, NULL, NULL, 0, NULL }, - { "po", roff_line_ignore, NULL, NULL, 0, NULL }, - { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, - { "psbb", roff_unsupp, NULL, NULL, 0, NULL }, - { "pshape", roff_unsupp, NULL, NULL, 0, NULL }, - { "pso", roff_insec, NULL, NULL, 0, NULL }, - { "ptr", roff_line_ignore, NULL, NULL, 0, NULL }, - { "pvs", roff_line_ignore, NULL, NULL, 0, NULL }, - { "rchar", roff_unsupp, NULL, NULL, 0, NULL }, - { "rd", roff_line_ignore, NULL, NULL, 0, NULL }, - { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL }, - { "return", roff_unsupp, NULL, NULL, 0, NULL }, - { "rfschar", roff_unsupp, NULL, NULL, 0, NULL }, - { "rhang", roff_line_ignore, NULL, NULL, 0, NULL }, - { "rj", roff_line_ignore, NULL, NULL, 0, NULL }, - { "rm", roff_rm, NULL, NULL, 0, NULL }, - { "rn", roff_unsupp, NULL, NULL, 0, NULL }, - { "rnn", roff_unsupp, NULL, NULL, 0, NULL }, - { "rr", roff_rr, NULL, NULL, 0, NULL }, - { "rs", roff_line_ignore, NULL, NULL, 0, NULL }, - { "rt", roff_line_ignore, NULL, NULL, 0, NULL }, - { "schar", roff_unsupp, NULL, NULL, 0, NULL }, - { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL }, - { "shc", roff_line_ignore, NULL, NULL, 0, NULL }, - { "shift", roff_unsupp, NULL, NULL, 0, NULL }, - { "sizes", roff_line_ignore, NULL, NULL, 0, NULL }, - { "so", roff_so, NULL, NULL, 0, NULL }, - { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL }, - { "special", roff_line_ignore, NULL, NULL, 0, NULL }, - { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL }, - { "ss", roff_line_ignore, NULL, NULL, 0, NULL }, - { "sty", roff_line_ignore, NULL, NULL, 0, NULL }, - { "substring", roff_unsupp, NULL, NULL, 0, NULL }, - { "sv", roff_line_ignore, NULL, NULL, 0, NULL }, - { "sy", roff_insec, NULL, NULL, 0, NULL }, - { "T&", roff_T_, NULL, NULL, 0, NULL }, - { "ta", roff_unsupp, NULL, NULL, 0, NULL }, - { "tc", roff_unsupp, NULL, NULL, 0, NULL }, - { "TE", roff_TE, NULL, NULL, 0, NULL }, - { "TH", roff_TH, NULL, NULL, 0, NULL }, - { "ti", roff_unsupp, NULL, NULL, 0, NULL }, - { "tkf", roff_line_ignore, NULL, NULL, 0, NULL }, - { "tl", roff_unsupp, NULL, NULL, 0, NULL }, - { "tm", roff_line_ignore, NULL, NULL, 0, NULL }, - { "tm1", roff_line_ignore, NULL, NULL, 0, NULL }, - { "tmc", roff_line_ignore, NULL, NULL, 0, NULL }, - { "tr", roff_tr, NULL, NULL, 0, NULL }, - { "track", roff_line_ignore, NULL, NULL, 0, NULL }, - { "transchar", roff_line_ignore, NULL, NULL, 0, NULL }, - { "trf", roff_insec, NULL, NULL, 0, NULL }, - { "trimat", roff_line_ignore, NULL, NULL, 0, NULL }, - { "trin", roff_unsupp, NULL, NULL, 0, NULL }, - { "trnt", roff_unsupp, NULL, NULL, 0, NULL }, - { "troff", roff_line_ignore, NULL, NULL, 0, NULL }, - { "TS", roff_TS, NULL, NULL, 0, NULL }, - { "uf", roff_line_ignore, NULL, NULL, 0, NULL }, - { "ul", roff_line_ignore, NULL, NULL, 0, NULL }, - { "unformat", roff_unsupp, NULL, NULL, 0, NULL }, - { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL }, - { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL }, - { "vpt", roff_line_ignore, NULL, NULL, 0, NULL }, - { "vs", roff_line_ignore, NULL, NULL, 0, NULL }, - { "warn", roff_line_ignore, NULL, NULL, 0, NULL }, - { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL }, - { "watch", roff_line_ignore, NULL, NULL, 0, NULL }, - { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL }, - { "watchn", roff_line_ignore, NULL, NULL, 0, NULL }, - { "wh", roff_unsupp, NULL, NULL, 0, NULL }, - { "while", roff_unsupp, NULL, NULL, 0, NULL }, - { "write", roff_insec, NULL, NULL, 0, NULL }, - { "writec", roff_insec, NULL, NULL, 0, NULL }, - { "writem", roff_insec, NULL, NULL, 0, NULL }, - { "xflag", roff_line_ignore, NULL, NULL, 0, NULL }, - { ".", roff_cblock, NULL, NULL, 0, NULL }, - { NULL, roff_userdef, NULL, NULL, 0, NULL }, +const char *__roff_name[MAN_MAX + 1] = { + "br", "ce", "ft", "ll", + "mc", "po", "rj", "sp", + "ta", "ti", NULL, + "ab", "ad", "af", "aln", + "als", "am", "am1", "ami", + "ami1", "as", "as1", "asciify", + "backtrace", "bd", "bleedat", "blm", + "box", "boxa", "bp", "BP", + "break", "breakchar", "brnl", "brp", + "brpnl", "c2", "cc", + "cf", "cflags", "ch", "char", + "chop", "class", "close", "CL", + "color", "composite", "continue", "cp", + "cropat", "cs", "cu", "da", + "dch", "Dd", "de", "de1", + "defcolor", "dei", "dei1", "device", + "devicem", "di", "do", "ds", + "ds1", "dwh", "dt", "ec", + "ecr", "ecs", "el", "em", + "EN", "eo", "EP", "EQ", + "errprint", "ev", "evc", "ex", + "fallback", "fam", "fc", "fchar", + "fcolor", "fdeferlig", "feature", "fkern", + "fl", "flig", "fp", "fps", + "fschar", "fspacewidth", "fspecial", "ftr", + "fzoom", "gcolor", "hc", "hcode", + "hidechar", "hla", "hlm", "hpf", + "hpfa", "hpfcode", "hw", "hy", + "hylang", "hylen", "hym", "hypp", + "hys", "ie", "if", "ig", + "index", "it", "itc", "IX", + "kern", "kernafter", "kernbefore", "kernpair", + "lc", "lc_ctype", "lds", "length", + "letadj", "lf", "lg", "lhang", + "linetabs", "lnr", "lnrf", "lpfx", + "ls", "lsm", "lt", + "mediasize", "minss", "mk", "mso", + "na", "ne", "nh", "nhychar", + "nm", "nn", "nop", "nr", + "nrf", "nroff", "ns", "nx", + "open", "opena", "os", "output", + "padj", "papersize", "pc", "pev", + "pi", "PI", "pl", "pm", + "pn", "pnr", "ps", + "psbb", "pshape", "pso", "ptr", + "pvs", "rchar", "rd", "recursionlimit", + "return", "rfschar", "rhang", + "rm", "rn", "rnn", "rr", + "rs", "rt", "schar", "sentchar", + "shc", "shift", "sizes", "so", + "spacewidth", "special", "spreadwarn", "ss", + "sty", "substring", "sv", "sy", + "T&", "tc", "TE", + "TH", "tkf", "tl", + "tm", "tm1", "tmc", "tr", + "track", "transchar", "trf", "trimat", + "trin", "trnt", "troff", "TS", + "uf", "ul", "unformat", "unwatch", + "unwatchn", "vpt", "vs", "warn", + "warnscale", "watch", "watchlength", "watchn", + "wh", "while", "write", "writec", + "writem", "xflag", ".", NULL, + NULL, "text", + "Dd", "Dt", "Os", "Sh", + "Ss", "Pp", "D1", "Dl", + "Bd", "Ed", "Bl", "El", + "It", "Ad", "An", "Ap", + "Ar", "Cd", "Cm", "Dv", + "Er", "Ev", "Ex", "Fa", + "Fd", "Fl", "Fn", "Ft", + "Ic", "In", "Li", "Nd", + "Nm", "Op", "Ot", "Pa", + "Rv", "St", "Va", "Vt", + "Xr", "%A", "%B", "%D", + "%I", "%J", "%N", "%O", + "%P", "%R", "%T", "%V", + "Ac", "Ao", "Aq", "At", + "Bc", "Bf", "Bo", "Bq", + "Bsx", "Bx", "Db", "Dc", + "Do", "Dq", "Ec", "Ef", + "Em", "Eo", "Fx", "Ms", + "No", "Ns", "Nx", "Ox", + "Pc", "Pf", "Po", "Pq", + "Qc", "Ql", "Qo", "Qq", + "Re", "Rs", "Sc", "So", + "Sq", "Sm", "Sx", "Sy", + "Tn", "Ux", "Xc", "Xo", + "Fo", "Fc", "Oo", "Oc", + "Bk", "Ek", "Bt", "Hf", + "Fr", "Ud", "Lb", "Lp", + "Lk", "Mt", "Brq", "Bro", + "Brc", "%C", "Es", "En", + "Dx", "%Q", "%U", "Ta", + NULL, + "TH", "SH", "SS", "TP", + "LP", "PP", "P", "IP", + "HP", "SM", "SB", "BI", + "IB", "BR", "RB", "R", + "B", "I", "IR", "RI", + "nf", "fi", + "RE", "RS", "DT", "UC", + "PD", "AT", "in", + "OP", "EX", "EE", "UR", + "UE", "MT", "ME", NULL }; - -/* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */ -const char *const __mdoc_reserved[] = { - "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", - "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq", - "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx", - "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq", - "Dt", "Dv", "Dx", "D1", - "Ec", "Ed", "Ef", "Ek", "El", "Em", - "En", "Eo", "Er", "Es", "Ev", "Ex", - "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx", - "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", - "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx", - "Oc", "Oo", "Op", "Os", "Ot", "Ox", - "Pa", "Pc", "Pf", "Po", "Pp", "Pq", - "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv", - "Sc", "Sh", "Sm", "So", "Sq", - "Ss", "St", "Sx", "Sy", - "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr", - "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O", - "%P", "%Q", "%R", "%T", "%U", "%V", - NULL -}; - -/* not currently implemented: BT DE DS ME MT PT SY TQ YS */ -const char *const __man_reserved[] = { - "AT", "B", "BI", "BR", "DT", - "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR", - "LP", "OP", "P", "PD", "PP", - "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", - "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR", - NULL +const char *const *roff_name = __roff_name; + +static struct roffmac roffs[TOKEN_NONE] = { + { roff_br, NULL, NULL, 0 }, /* br */ + { roff_onearg, NULL, NULL, 0 }, /* ce */ + { roff_onearg, NULL, NULL, 0 }, /* ft */ + { roff_onearg, NULL, NULL, 0 }, /* ll */ + { roff_onearg, NULL, NULL, 0 }, /* mc */ + { roff_onearg, NULL, NULL, 0 }, /* po */ + { roff_onearg, NULL, NULL, 0 }, /* rj */ + { roff_onearg, NULL, NULL, 0 }, /* sp */ + { roff_manyarg, NULL, NULL, 0 }, /* ta */ + { roff_onearg, NULL, NULL, 0 }, /* ti */ + { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ + { roff_unsupp, NULL, NULL, 0 }, /* ab */ + { roff_line_ignore, NULL, NULL, 0 }, /* ad */ + { roff_line_ignore, NULL, NULL, 0 }, /* af */ + { roff_unsupp, NULL, NULL, 0 }, /* aln */ + { roff_als, NULL, NULL, 0 }, /* als */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ + { roff_ds, NULL, NULL, 0 }, /* as */ + { roff_ds, NULL, NULL, 0 }, /* as1 */ + { roff_unsupp, NULL, NULL, 0 }, /* asciify */ + { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ + { roff_line_ignore, NULL, NULL, 0 }, /* bd */ + { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ + { roff_unsupp, NULL, NULL, 0 }, /* blm */ + { roff_unsupp, NULL, NULL, 0 }, /* box */ + { roff_unsupp, NULL, NULL, 0 }, /* boxa */ + { roff_line_ignore, NULL, NULL, 0 }, /* bp */ + { roff_unsupp, NULL, NULL, 0 }, /* BP */ + { roff_unsupp, NULL, NULL, 0 }, /* break */ + { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ + { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ + { roff_br, NULL, NULL, 0 }, /* brp */ + { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ + { roff_unsupp, NULL, NULL, 0 }, /* c2 */ + { roff_cc, NULL, NULL, 0 }, /* cc */ + { roff_insec, NULL, NULL, 0 }, /* cf */ + { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ + { roff_line_ignore, NULL, NULL, 0 }, /* ch */ + { roff_unsupp, NULL, NULL, 0 }, /* char */ + { roff_unsupp, NULL, NULL, 0 }, /* chop */ + { roff_line_ignore, NULL, NULL, 0 }, /* class */ + { roff_insec, NULL, NULL, 0 }, /* close */ + { roff_unsupp, NULL, NULL, 0 }, /* CL */ + { roff_line_ignore, NULL, NULL, 0 }, /* color */ + { roff_unsupp, NULL, NULL, 0 }, /* composite */ + { roff_unsupp, NULL, NULL, 0 }, /* continue */ + { roff_line_ignore, NULL, NULL, 0 }, /* cp */ + { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ + { roff_line_ignore, NULL, NULL, 0 }, /* cs */ + { roff_line_ignore, NULL, NULL, 0 }, /* cu */ + { roff_unsupp, NULL, NULL, 0 }, /* da */ + { roff_unsupp, NULL, NULL, 0 }, /* dch */ + { roff_Dd, NULL, NULL, 0 }, /* Dd */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ + { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ + { roff_unsupp, NULL, NULL, 0 }, /* device */ + { roff_unsupp, NULL, NULL, 0 }, /* devicem */ + { roff_unsupp, NULL, NULL, 0 }, /* di */ + { roff_unsupp, NULL, NULL, 0 }, /* do */ + { roff_ds, NULL, NULL, 0 }, /* ds */ + { roff_ds, NULL, NULL, 0 }, /* ds1 */ + { roff_unsupp, NULL, NULL, 0 }, /* dwh */ + { roff_unsupp, NULL, NULL, 0 }, /* dt */ + { roff_ec, NULL, NULL, 0 }, /* ec */ + { roff_unsupp, NULL, NULL, 0 }, /* ecr */ + { roff_unsupp, NULL, NULL, 0 }, /* ecs */ + { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ + { roff_unsupp, NULL, NULL, 0 }, /* em */ + { roff_EN, NULL, NULL, 0 }, /* EN */ + { roff_eo, NULL, NULL, 0 }, /* eo */ + { roff_unsupp, NULL, NULL, 0 }, /* EP */ + { roff_EQ, NULL, NULL, 0 }, /* EQ */ + { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ + { roff_unsupp, NULL, NULL, 0 }, /* ev */ + { roff_unsupp, NULL, NULL, 0 }, /* evc */ + { roff_unsupp, NULL, NULL, 0 }, /* ex */ + { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ + { roff_line_ignore, NULL, NULL, 0 }, /* fam */ + { roff_unsupp, NULL, NULL, 0 }, /* fc */ + { roff_unsupp, NULL, NULL, 0 }, /* fchar */ + { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ + { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ + { roff_line_ignore, NULL, NULL, 0 }, /* feature */ + { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ + { roff_line_ignore, NULL, NULL, 0 }, /* fl */ + { roff_line_ignore, NULL, NULL, 0 }, /* flig */ + { roff_line_ignore, NULL, NULL, 0 }, /* fp */ + { roff_line_ignore, NULL, NULL, 0 }, /* fps */ + { roff_unsupp, NULL, NULL, 0 }, /* fschar */ + { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ + { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ + { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ + { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ + { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ + { roff_line_ignore, NULL, NULL, 0 }, /* hc */ + { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ + { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ + { roff_line_ignore, NULL, NULL, 0 }, /* hla */ + { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ + { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ + { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ + { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ + { roff_line_ignore, NULL, NULL, 0 }, /* hw */ + { roff_line_ignore, NULL, NULL, 0 }, /* hy */ + { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ + { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ + { roff_line_ignore, NULL, NULL, 0 }, /* hym */ + { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ + { roff_line_ignore, NULL, NULL, 0 }, /* hys */ + { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ + { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ + { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ + { roff_unsupp, NULL, NULL, 0 }, /* index */ + { roff_it, NULL, NULL, 0 }, /* it */ + { roff_unsupp, NULL, NULL, 0 }, /* itc */ + { roff_line_ignore, NULL, NULL, 0 }, /* IX */ + { roff_line_ignore, NULL, NULL, 0 }, /* kern */ + { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ + { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ + { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ + { roff_unsupp, NULL, NULL, 0 }, /* lc */ + { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ + { roff_unsupp, NULL, NULL, 0 }, /* lds */ + { roff_unsupp, NULL, NULL, 0 }, /* length */ + { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ + { roff_insec, NULL, NULL, 0 }, /* lf */ + { roff_line_ignore, NULL, NULL, 0 }, /* lg */ + { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ + { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ + { roff_unsupp, NULL, NULL, 0 }, /* lnr */ + { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ + { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ + { roff_line_ignore, NULL, NULL, 0 }, /* ls */ + { roff_unsupp, NULL, NULL, 0 }, /* lsm */ + { roff_line_ignore, NULL, NULL, 0 }, /* lt */ + { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ + { roff_line_ignore, NULL, NULL, 0 }, /* minss */ + { roff_line_ignore, NULL, NULL, 0 }, /* mk */ + { roff_insec, NULL, NULL, 0 }, /* mso */ + { roff_line_ignore, NULL, NULL, 0 }, /* na */ + { roff_line_ignore, NULL, NULL, 0 }, /* ne */ + { roff_line_ignore, NULL, NULL, 0 }, /* nh */ + { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ + { roff_unsupp, NULL, NULL, 0 }, /* nm */ + { roff_unsupp, NULL, NULL, 0 }, /* nn */ + { roff_unsupp, NULL, NULL, 0 }, /* nop */ + { roff_nr, NULL, NULL, 0 }, /* nr */ + { roff_unsupp, NULL, NULL, 0 }, /* nrf */ + { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ + { roff_line_ignore, NULL, NULL, 0 }, /* ns */ + { roff_insec, NULL, NULL, 0 }, /* nx */ + { roff_insec, NULL, NULL, 0 }, /* open */ + { roff_insec, NULL, NULL, 0 }, /* opena */ + { roff_line_ignore, NULL, NULL, 0 }, /* os */ + { roff_unsupp, NULL, NULL, 0 }, /* output */ + { roff_line_ignore, NULL, NULL, 0 }, /* padj */ + { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ + { roff_line_ignore, NULL, NULL, 0 }, /* pc */ + { roff_line_ignore, NULL, NULL, 0 }, /* pev */ + { roff_insec, NULL, NULL, 0 }, /* pi */ + { roff_unsupp, NULL, NULL, 0 }, /* PI */ + { roff_line_ignore, NULL, NULL, 0 }, /* pl */ + { roff_line_ignore, NULL, NULL, 0 }, /* pm */ + { roff_line_ignore, NULL, NULL, 0 }, /* pn */ + { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ + { roff_line_ignore, NULL, NULL, 0 }, /* ps */ + { roff_unsupp, NULL, NULL, 0 }, /* psbb */ + { roff_unsupp, NULL, NULL, 0 }, /* pshape */ + { roff_insec, NULL, NULL, 0 }, /* pso */ + { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ + { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ + { roff_unsupp, NULL, NULL, 0 }, /* rchar */ + { roff_line_ignore, NULL, NULL, 0 }, /* rd */ + { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ + { roff_unsupp, NULL, NULL, 0 }, /* return */ + { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ + { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ + { roff_rm, NULL, NULL, 0 }, /* rm */ + { roff_rn, NULL, NULL, 0 }, /* rn */ + { roff_unsupp, NULL, NULL, 0 }, /* rnn */ + { roff_rr, NULL, NULL, 0 }, /* rr */ + { roff_line_ignore, NULL, NULL, 0 }, /* rs */ + { roff_line_ignore, NULL, NULL, 0 }, /* rt */ + { roff_unsupp, NULL, NULL, 0 }, /* schar */ + { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ + { roff_line_ignore, NULL, NULL, 0 }, /* shc */ + { roff_unsupp, NULL, NULL, 0 }, /* shift */ + { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ + { roff_so, NULL, NULL, 0 }, /* so */ + { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ + { roff_line_ignore, NULL, NULL, 0 }, /* special */ + { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ + { roff_line_ignore, NULL, NULL, 0 }, /* ss */ + { roff_line_ignore, NULL, NULL, 0 }, /* sty */ + { roff_unsupp, NULL, NULL, 0 }, /* substring */ + { roff_line_ignore, NULL, NULL, 0 }, /* sv */ + { roff_insec, NULL, NULL, 0 }, /* sy */ + { roff_T_, NULL, NULL, 0 }, /* T& */ + { roff_unsupp, NULL, NULL, 0 }, /* tc */ + { roff_TE, NULL, NULL, 0 }, /* TE */ + { roff_Dd, NULL, NULL, 0 }, /* TH */ + { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ + { roff_unsupp, NULL, NULL, 0 }, /* tl */ + { roff_line_ignore, NULL, NULL, 0 }, /* tm */ + { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ + { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ + { roff_tr, NULL, NULL, 0 }, /* tr */ + { roff_line_ignore, NULL, NULL, 0 }, /* track */ + { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ + { roff_insec, NULL, NULL, 0 }, /* trf */ + { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ + { roff_unsupp, NULL, NULL, 0 }, /* trin */ + { roff_unsupp, NULL, NULL, 0 }, /* trnt */ + { roff_line_ignore, NULL, NULL, 0 }, /* troff */ + { roff_TS, NULL, NULL, 0 }, /* TS */ + { roff_line_ignore, NULL, NULL, 0 }, /* uf */ + { roff_line_ignore, NULL, NULL, 0 }, /* ul */ + { roff_unsupp, NULL, NULL, 0 }, /* unformat */ + { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ + { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ + { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ + { roff_line_ignore, NULL, NULL, 0 }, /* vs */ + { roff_line_ignore, NULL, NULL, 0 }, /* warn */ + { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ + { roff_line_ignore, NULL, NULL, 0 }, /* watch */ + { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ + { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ + { roff_unsupp, NULL, NULL, 0 }, /* wh */ + { roff_unsupp, NULL, NULL, 0 }, /* while */ + { roff_insec, NULL, NULL, 0 }, /* write */ + { roff_insec, NULL, NULL, 0 }, /* writec */ + { roff_insec, NULL, NULL, 0 }, /* writem */ + { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ + { roff_cblock, NULL, NULL, 0 }, /* . */ + { roff_renamed, NULL, NULL, 0 }, + { roff_userdef, NULL, NULL, 0 } }; /* Array of injected predefined strings. */ @@ -733,65 +585,66 @@ static const struct predef predefs[PREDEFS_MAX] = { #include "predefs.in" }; -/* See roffhash_find() */ -#define ROFF_HASH(p) (p[0] - ASCII_LO) - +static int roffce_lines; /* number of input lines to center */ +static struct roff_node *roffce_node; /* active request */ static int roffit_lines; /* number of lines to delay */ static char *roffit_macro; /* nil-terminated macro line */ /* --- request table ------------------------------------------------------ */ -static void -roffhash_init(void) +struct ohash * +roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) { - struct roffmac *n; - int buc, i; - - for (i = 0; i < (int)ROFF_USERDEF; i++) { - assert(roffs[i].name[0] >= ASCII_LO); - assert(roffs[i].name[0] <= ASCII_HI); + struct ohash *htab; + struct roffreq *req; + enum roff_tok tok; + size_t sz; + unsigned int slot; - buc = ROFF_HASH(roffs[i].name); + htab = mandoc_malloc(sizeof(*htab)); + mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); - if (NULL != (n = hash[buc])) { - for ( ; n->next; n = n->next) - /* Do nothing. */ ; - n->next = &roffs[i]; - } else - hash[buc] = &roffs[i]; + for (tok = mintok; tok < maxtok; tok++) { + if (roff_name[tok] == NULL) + continue; + sz = strlen(roff_name[tok]); + req = mandoc_malloc(sizeof(*req) + sz + 1); + req->tok = tok; + memcpy(req->name, roff_name[tok], sz + 1); + slot = ohash_qlookup(htab, req->name); + ohash_insert(htab, slot, req); } + return htab; } -/* - * Look up a roff token by its name. Returns ROFF_MAX if no macro by - * the nil-terminated string name could be found. - */ -static enum rofft -roffhash_find(const char *p, size_t s) +void +roffhash_free(struct ohash *htab) { - int buc; - struct roffmac *n; + struct roffreq *req; + unsigned int slot; - /* - * libroff has an extremely simple hashtable, for the time - * being, which simply keys on the first character, which must - * be printable, then walks a chain. It works well enough until - * optimised. - */ - - if (p[0] < ASCII_LO || p[0] > ASCII_HI) - return ROFF_MAX; - - buc = ROFF_HASH(p); + if (htab == NULL) + return; + for (req = ohash_first(htab, &slot); req != NULL; + req = ohash_next(htab, &slot)) + free(req); + ohash_delete(htab); + free(htab); +} - if (NULL == (n = hash[buc])) - return ROFF_MAX; - for ( ; n; n = n->next) - if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) - return (enum rofft)(n - roffs); +enum roff_tok +roffhash_find(struct ohash *htab, const char *name, size_t sz) +{ + struct roffreq *req; + const char *end; - return ROFF_MAX; + if (sz) { + end = name + sz; + req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); + } else + req = ohash_find(htab, ohash_qlookup(htab, name)); + return req == NULL ? TOKEN_NONE : req->tok; } /* --- stack of request blocks -------------------------------------------- */ @@ -819,7 +672,7 @@ roffnode_pop(struct roff *r) * removed with roffnode_pop(). */ static void -roffnode_push(struct roff *r, enum rofft tok, const char *name, +roffnode_push(struct roff *r, enum roff_tok tok, const char *name, int line, int col) { struct roffnode *p; @@ -842,7 +695,6 @@ static void roff_free1(struct roff *r) { struct tbl_node *tbl; - struct eqn_node *e; int i; while (NULL != (tbl = r->first_tbl)) { @@ -851,11 +703,9 @@ roff_free1(struct roff *r) } r->first_tbl = r->last_tbl = r->tbl = NULL; - while (NULL != (e = r->first_eqn)) { - r->first_eqn = e->next; - eqn_free(e); - } - r->first_eqn = r->last_eqn = r->eqn = NULL; + if (r->last_eqn != NULL) + eqn_free(r->last_eqn); + r->last_eqn = r->eqn = NULL; while (r->last) roffnode_pop(r); @@ -869,8 +719,9 @@ roff_free1(struct roff *r) r->regtab = NULL; roff_freestr(r->strtab); + roff_freestr(r->rentab); roff_freestr(r->xmbtab); - r->strtab = r->xmbtab = NULL; + r->strtab = r->rentab = r->xmbtab = NULL; if (r->xtab) for (i = 0; i < 128; i++) @@ -882,17 +733,21 @@ roff_free1(struct roff *r) void roff_reset(struct roff *r) { - roff_free1(r); r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); - r->control = 0; + r->control = '\0'; + r->escape = '\\'; + roffce_lines = 0; + roffce_node = NULL; + roffit_lines = 0; + roffit_macro = NULL; } void roff_free(struct roff *r) { - roff_free1(r); + roffhash_free(r->reqtab); free(r); } @@ -903,12 +758,11 @@ roff_alloc(struct mparse *parse, int options) r = mandoc_calloc(1, sizeof(struct roff)); r->parse = parse; + r->reqtab = roffhash_alloc(0, ROFF_USERDEF); r->options = options; r->format = options & (MPARSE_MDOC | MPARSE_MAN); r->rstackpos = -1; - - roffhash_init(); - + r->escape = '\\'; return r; } @@ -962,16 +816,17 @@ roff_man_free(struct roff_man *man) struct roff_man * roff_man_alloc(struct roff *roff, struct mparse *parse, - const char *defos, int quick) + const char *os_s, int quick) { struct roff_man *man; man = mandoc_calloc(1, sizeof(*man)); man->parse = parse; man->roff = roff; - man->defos = defos; + man->os_s = os_s; man->quick = quick; roff_man_alloc1(man); + roff->man = man; return man; } @@ -1125,31 +980,21 @@ roff_body_alloc(struct roff_man *man, int line, int pos, int tok) return n; } -void -roff_addeqn(struct roff_man *man, const struct eqn *eqn) -{ - struct roff_node *n; - - n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE); - n->eqn = eqn; - if (eqn->ln > man->last->line) - n->flags |= NODE_LINE; - roff_node_append(man, n); - man->next = ROFF_NEXT_SIBLING; -} - -void -roff_addtbl(struct roff_man *man, const struct tbl_span *tbl) +static void +roff_addtbl(struct roff_man *man, struct tbl_node *tbl) { struct roff_node *n; + const struct tbl_span *span; if (man->macroset == MACROSET_MAN) - man_breakscope(man, TOKEN_NONE); - n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE); - n->span = tbl; - roff_node_append(man, n); - n->flags |= NODE_VALID | NODE_ENDED; - man->next = ROFF_NEXT_SIBLING; + man_breakscope(man, ROFF_TS); + while ((span = tbl_span(tbl)) != NULL) { + n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE); + n->span = span; + roff_node_append(man, n); + n->flags |= NODE_VALID | NODE_ENDED; + man->next = ROFF_NEXT_SIBLING; + } } void @@ -1197,6 +1042,8 @@ roff_node_free(struct roff_node *n) mdoc_argv_free(n->args); if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) free(n->norm); + if (n->eqn != NULL) + eqn_box_free(n->eqn); free(n->string); free(n); } @@ -1236,7 +1083,7 @@ deroff(char **dest, const struct roff_node *n) /* Skip trailing backslash. */ sz = strlen(cp); - if (cp[sz - 1] == '\\') + if (sz > 0 && cp[sz - 1] == '\\') sz--; /* Skip trailing whitespace. */ @@ -1284,27 +1131,104 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) int expand_count; /* to avoid infinite loops */ int npos; /* position in numeric expression */ int arg_complete; /* argument not interrupted by eol */ + int done; /* no more input available */ + int deftype; /* type of definition to paste */ + int rcsid; /* kind of RCS id seen */ char term; /* character terminating the escape */ - expand_count = 0; + /* Search forward for comments. */ + + done = 0; start = buf->buf + pos; - stesc = strchr(start, '\0') - 1; - while (stesc-- > start) { + for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { + if (stesc[0] != r->escape || stesc[1] == '\0') + continue; + stesc++; + if (*stesc != '"' && *stesc != '#') + continue; + + /* Comment found, look for RCS id. */ + + rcsid = 0; + if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { + rcsid = 1 << MANDOC_OS_OPENBSD; + cp += 8; + } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { + rcsid = 1 << MANDOC_OS_NETBSD; + cp += 7; + } + if (cp != NULL && + isalnum((unsigned char)*cp) == 0 && + strchr(cp, '$') != NULL) { + if (r->man->meta.rcsids & rcsid) + mandoc_msg(MANDOCERR_RCS_REP, r->parse, + ln, stesc + 1 - buf->buf, stesc + 1); + r->man->meta.rcsids |= rcsid; + } + + /* Handle trailing whitespace. */ + + cp = strchr(stesc--, '\0') - 1; + if (*cp == '\n') { + done = 1; + cp--; + } + if (*cp == ' ' || *cp == '\t') + mandoc_msg(MANDOCERR_SPACE_EOL, r->parse, + ln, cp - buf->buf, NULL); + while (stesc > start && stesc[-1] == ' ') + stesc--; + *stesc = '\0'; + break; + } + if (stesc == start) + return ROFF_CONT; + stesc--; + + /* Notice the end of the input. */ + + if (*stesc == '\n') { + *stesc-- = '\0'; + done = 1; + } + + expand_count = 0; + while (stesc >= start) { /* Search backwards for the next backslash. */ - if (*stesc != '\\') + if (*stesc != r->escape) { + if (*stesc == '\\') { + *stesc = '\0'; + buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", + buf->buf, stesc + 1) + 1; + start = nbuf + pos; + stesc = nbuf + (stesc - buf->buf); + free(buf->buf); + buf->buf = nbuf; + } + stesc--; continue; + } /* If it is escaped, skip it. */ for (cp = stesc - 1; cp >= start; cp--) - if (*cp != '\\') + if (*cp != r->escape) break; if ((stesc - cp) % 2 == 0) { - stesc = (char *)cp; + while (stesc > cp) + *stesc-- = '\\'; continue; + } else if (stesc[1] != '\0') { + *stesc = '\\'; + } else { + *stesc-- = '\0'; + if (done) + continue; + else + return ROFF_APPEND; } /* Decide whether to expand or to check only. */ @@ -1330,6 +1254,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) mandoc_vmsg(MANDOCERR_ESC_BAD, r->parse, ln, (int)(stesc - buf->buf), "%.*s", (int)(cp - stesc), stesc); + stesc--; continue; } @@ -1407,8 +1332,10 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) switch (stesc[1]) { case '*': - if (arg_complete) - res = roff_getstrn(r, stnam, naml); + if (arg_complete) { + deftype = ROFFDEF_USER | ROFFDEF_PRE; + res = roff_getstrn(r, stnam, naml, &deftype); + } break; case 'B': npos = 0; @@ -1463,7 +1390,7 @@ roff_res(struct roff *r, struct buf *buf, int ln, int pos) * Process text streams. */ static enum rofferr -roff_parsetext(struct buf *buf, int pos, int *offs) +roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) { size_t sz; const char *start; @@ -1485,6 +1412,16 @@ roff_parsetext(struct buf *buf, int pos, int *offs) } else if (roffit_lines > 1) --roffit_lines; + if (roffce_node != NULL && buf->buf[pos] != '\0') { + if (roffce_lines < 1) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + roffce_lines = 0; + roffce_node = NULL; + } else + roffce_lines--; + } + /* Convert all breakable hyphens into ASCII_HYPH. */ start = p = buf->buf + pos; @@ -1521,7 +1458,7 @@ roff_parsetext(struct buf *buf, int pos, int *offs) enum rofferr roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) { - enum rofft t; + enum roff_tok t; enum rofferr e; int pos; /* parse point */ int spos; /* saved parse point for messages */ @@ -1544,7 +1481,7 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) /* Expand some escape sequences. */ e = roff_res(r, buf, ln, pos); - if (e == ROFF_IGN) + if (e == ROFF_IGN || e == ROFF_APPEND) return e; assert(e == ROFF_CONT); @@ -1560,18 +1497,22 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) if (r->last != NULL && ! ctl) { t = r->last->tok; - assert(roffs[t].text); e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); - assert(e == ROFF_IGN || e == ROFF_CONT); - if (e != ROFF_CONT) + if (e == ROFF_IGN) return e; + assert(e == ROFF_CONT); + } + if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { + eqn_read(r->eqn, buf->buf + ppos); + return ROFF_IGN; + } + if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { + tbl_read(r->tbl, ln, buf->buf, ppos); + roff_addtbl(r->man, r->tbl); + return ROFF_IGN; } - if (r->eqn != NULL) - return eqn_read(&r->eqn, ln, buf->buf, ppos, offs); - if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0')) - return tbl_read(r->tbl, ln, buf->buf, ppos); if ( ! ctl) - return roff_parsetext(buf, pos, offs); + return roff_parsetext(r, buf, pos, offs); /* Skip empty request lines. */ @@ -1590,7 +1531,6 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) if (r->last) { t = r->last->tok; - assert(roffs[t].sub); return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); } @@ -1601,16 +1541,30 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) /* Tables ignore most macros. */ - if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) { + if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || + t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { mandoc_msg(MANDOCERR_TBLMACRO, r->parse, ln, pos, buf->buf + spos); - if (t == ROFF_TS) + if (t != TOKEN_NONE) return ROFF_IGN; while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') pos++; - while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ') + while (buf->buf[pos] == ' ') pos++; - return tbl_read(r->tbl, ln, buf->buf, pos); + tbl_read(r->tbl, ln, buf->buf, pos); + roff_addtbl(r->man, r->tbl); + return ROFF_IGN; + } + + /* For now, let high level macros abort .ce mode. */ + + if (ctl && roffce_node != NULL && + (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || + t == ROFF_TH || t == ROFF_TS)) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + roffce_lines = 0; + roffce_node = NULL; } /* @@ -1618,34 +1572,34 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) * Let the standard macro set parsers handle it. */ - if (t == ROFF_MAX) + if (t == TOKEN_NONE) return ROFF_CONT; /* Execute a roff request or a user defined macro. */ - assert(roffs[t].proc); - return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); + return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); } void roff_endparse(struct roff *r) { - - if (r->last) + if (r->last != NULL) mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, r->last->line, r->last->col, - roffs[r->last->tok].name); + roff_name[r->last->tok]); - if (r->eqn) { + if (r->eqn != NULL) { mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, - r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ"); - eqn_end(&r->eqn); + r->eqn->node->line, r->eqn->node->pos, "EQ"); + eqn_parse(r->eqn); + r->eqn = NULL; } - if (r->tbl) { + if (r->tbl != NULL) { mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, r->tbl->line, r->tbl->pos, "TS"); - tbl_end(&r->tbl); + tbl_end(r->tbl); + r->tbl = NULL; } } @@ -1653,28 +1607,38 @@ roff_endparse(struct roff *r) * Parse a roff node's type from the input buffer. This must be in the * form of ".foo xxx" in the usual way. */ -static enum rofft +static enum roff_tok roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) { char *cp; const char *mac; size_t maclen; - enum rofft t; + int deftype; + enum roff_tok t; cp = buf + *pos; if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) - return ROFF_MAX; + return TOKEN_NONE; mac = cp; maclen = roff_getname(r, &cp, ln, ppos); - t = (r->current_string = roff_getstrn(r, mac, maclen)) - ? ROFF_USERDEF : roffhash_find(mac, maclen); - - if (ROFF_MAX != t) + deftype = ROFFDEF_USER | ROFFDEF_REN; + r->current_string = roff_getstrn(r, mac, maclen, &deftype); + switch (deftype) { + case ROFFDEF_USER: + t = ROFF_USERDEF; + break; + case ROFFDEF_REN: + t = ROFF_RENAMED; + break; + default: + t = roffhash_find(r->reqtab, mac, maclen); + break; + } + if (t != TOKEN_NONE) *pos = cp - buf; - return t; } @@ -1766,9 +1730,10 @@ roff_ccond(struct roff *r, int ln, int ppos) static enum rofferr roff_block(ROFF_ARGS) { - const char *name; - char *iname, *cp; - size_t namesz; + const char *name, *value; + char *call, *cp, *iname, *rname; + size_t csz, namesz, rsz; + int deftype; /* Ignore groff compatibility mode for now. */ @@ -1796,7 +1761,9 @@ roff_block(ROFF_ARGS) /* Resolve the macro name argument if it is indirect. */ if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { - if ((name = roff_getstrn(r, iname, namesz)) == NULL) { + deftype = ROFFDEF_USER; + name = roff_getstrn(r, iname, namesz, &deftype); + if (name == NULL) { mandoc_vmsg(MANDOCERR_STR_UNDEF, r->parse, ln, (int)(iname - buf->buf), "%.*s", (int)namesz, iname); @@ -1808,7 +1775,7 @@ roff_block(ROFF_ARGS) if (namesz == 0 && tok != ROFF_ig) { mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, - ln, ppos, roffs[tok].name); + ln, ppos, roff_name[tok]); return ROFF_IGN; } @@ -1820,8 +1787,37 @@ roff_block(ROFF_ARGS) * appended from roff_block_text() in multiline mode. */ - if (tok == ROFF_de || tok == ROFF_dei) + if (tok == ROFF_de || tok == ROFF_dei) { roff_setstrn(&r->strtab, name, namesz, "", 0, 0); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); + } else if (tok == ROFF_am || tok == ROFF_ami) { + deftype = ROFFDEF_ANY; + value = roff_getstrn(r, iname, namesz, &deftype); + switch (deftype) { /* Before appending, ... */ + case ROFFDEF_PRE: /* copy predefined to user-defined. */ + roff_setstrn(&r->strtab, name, namesz, + value, strlen(value), 0); + break; + case ROFFDEF_REN: /* call original standard macro. */ + csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", + (int)strlen(value), value); + roff_setstrn(&r->strtab, name, namesz, call, csz, 0); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); + free(call); + break; + case ROFFDEF_STD: /* rename and call standard macro. */ + rsz = mandoc_asprintf(&rname, "__%s_renamed", name); + roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); + csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", + (int)rsz, rname); + roff_setstrn(&r->strtab, name, namesz, call, csz, 0); + free(call); + free(rname); + break; + default: + break; + } + } if (*cp == '\0') return ROFF_IGN; @@ -1834,7 +1830,9 @@ roff_block(ROFF_ARGS) /* Resolve the end marker if it is indirect. */ if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { - if ((name = roff_getstrn(r, iname, namesz)) == NULL) { + deftype = ROFFDEF_USER; + name = roff_getstrn(r, iname, namesz, &deftype); + if (name == NULL) { mandoc_vmsg(MANDOCERR_STR_UNDEF, r->parse, ln, (int)(iname - buf->buf), "%.*s", (int)namesz, iname); @@ -1849,7 +1847,7 @@ roff_block(ROFF_ARGS) if (*cp != '\0') mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, - ln, pos, ".%s ... %s", roffs[tok].name, cp); + ln, pos, ".%s ... %s", roff_name[tok], cp); return ROFF_IGN; } @@ -1857,7 +1855,7 @@ roff_block(ROFF_ARGS) static enum rofferr roff_block_sub(ROFF_ARGS) { - enum rofft t; + enum roff_tok t; int i, j; /* @@ -1886,7 +1884,7 @@ roff_block_sub(ROFF_ARGS) pos = i; if (roff_parse(r, buf->buf, &pos, ln, ppos) != - ROFF_MAX) + TOKEN_NONE) return ROFF_RERUN; return ROFF_IGN; } @@ -1905,7 +1903,6 @@ roff_block_sub(ROFF_ARGS) return ROFF_IGN; } - assert(roffs[t].proc); return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); } @@ -1922,24 +1919,12 @@ roff_block_text(ROFF_ARGS) static enum rofferr roff_cond_sub(ROFF_ARGS) { - enum rofft t; + enum roff_tok t; char *ep; int rr; rr = r->last->rule; roffnode_cleanscope(r); - t = roff_parse(r, buf->buf, &pos, ln, ppos); - - /* - * Fully handle known macros when they are structurally - * required or when the conditional evaluated to true. - */ - - if ((t != ROFF_MAX) && - (rr || roffs[t].flags & ROFFMAC_STRUCT)) { - assert(roffs[t].proc); - return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); - } /* * If `\}' occurs on a macro line without a preceding macro, @@ -1953,14 +1938,29 @@ roff_cond_sub(ROFF_ARGS) /* Always check for the closing delimiter `\}'. */ while ((ep = strchr(ep, '\\')) != NULL) { - if (*(++ep) == '}') { - *ep = '&'; - roff_ccond(r, ln, ep - buf->buf - 1); - } - if (*ep != '\0') + switch (ep[1]) { + case '}': + memmove(ep, ep + 2, strlen(ep + 2) + 1); + roff_ccond(r, ln, ep - buf->buf); + break; + case '\0': ++ep; + break; + default: + ep += 2; + break; + } } - return rr ? ROFF_CONT : ROFF_IGN; + + /* + * Fully handle known macros when they are structurally + * required or when the conditional evaluated to true. + */ + + t = roff_parse(r, buf->buf, &pos, ln, ppos); + return t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) + ? (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : rr + ? ROFF_CONT : ROFF_IGN; } static enum rofferr @@ -2110,7 +2110,7 @@ roff_evalcond(struct roff *r, int ln, char *v, int *pos) { char *cp, *name; size_t sz; - int number, savepos, wanttrue; + int deftype, number, savepos, istrue, wanttrue; if ('!' == v[*pos]) { wanttrue = 0; @@ -2126,17 +2126,29 @@ roff_evalcond(struct roff *r, int ln, char *v, int *pos) (*pos)++; return wanttrue; case 'c': - case 'd': case 'e': case 't': case 'v': (*pos)++; return !wanttrue; + case 'd': case 'r': - cp = name = v + ++*pos; - sz = roff_getname(r, &cp, ln, *pos); + cp = v + *pos + 1; + while (*cp == ' ') + cp++; + name = cp; + sz = roff_getname(r, &cp, ln, cp - v); + if (sz == 0) + istrue = 0; + else if (v[*pos] == 'r') + istrue = roff_hasregn(r, name, sz); + else { + deftype = ROFFDEF_ANY; + roff_getstrn(r, name, sz, &deftype); + istrue = !!deftype; + } *pos = cp - v; - return (sz && roff_hasregn(r, name, sz)) == wanttrue; + return istrue == wanttrue; default: break; } @@ -2162,7 +2174,7 @@ roff_insec(ROFF_ARGS) { mandoc_msg(MANDOCERR_REQ_INSEC, r->parse, - ln, ppos, roffs[tok].name); + ln, ppos, roff_name[tok]); return ROFF_IGN; } @@ -2171,7 +2183,7 @@ roff_unsupp(ROFF_ARGS) { mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse, - ln, ppos, roffs[tok].name); + ln, ppos, roff_name[tok]); return ROFF_IGN; } @@ -2244,7 +2256,7 @@ roff_cond(ROFF_ARGS) if (buf->buf[pos] == '\0') mandoc_msg(MANDOCERR_COND_EMPTY, r->parse, - ln, ppos, roffs[tok].name); + ln, ppos, roff_name[tok]); r->last->endspan = 1; @@ -2288,6 +2300,7 @@ roff_ds(ROFF_ARGS) /* The rest is the value. */ roff_setstrn(&r->strtab, name, namesz, string, strlen(string), ROFF_as == tok); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); return ROFF_IGN; } @@ -2699,6 +2712,7 @@ roff_rm(ROFF_ARGS) name = cp; namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); if (name[namesz] == '\\') break; } @@ -2737,46 +2751,49 @@ roff_it(ROFF_ARGS) static enum rofferr roff_Dd(ROFF_ARGS) { - const char *const *cp; - - if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0) - for (cp = __mdoc_reserved; *cp; cp++) - roff_setstr(r, *cp, NULL, 0); - - if (r->format == 0) - r->format = MPARSE_MDOC; - - return ROFF_CONT; -} - -static enum rofferr -roff_TH(ROFF_ARGS) -{ - const char *const *cp; - - if ((r->options & MPARSE_QUICK) == 0) - for (cp = __man_reserved; *cp; cp++) - roff_setstr(r, *cp, NULL, 0); - - if (r->format == 0) - r->format = MPARSE_MAN; + int mask; + enum roff_tok t, te; + switch (tok) { + case ROFF_Dd: + tok = MDOC_Dd; + te = MDOC_MAX; + if (r->format == 0) + r->format = MPARSE_MDOC; + mask = MPARSE_MDOC | MPARSE_QUICK; + break; + case ROFF_TH: + tok = MAN_TH; + te = MAN_MAX; + if (r->format == 0) + r->format = MPARSE_MAN; + mask = MPARSE_QUICK; + break; + default: + abort(); + } + if ((r->options & mask) == 0) + for (t = tok; t < te; t++) + roff_setstr(r, roff_name[t], NULL, 0); return ROFF_CONT; } static enum rofferr roff_TE(ROFF_ARGS) { - - if (NULL == r->tbl) + if (r->tbl == NULL) { mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "TE"); - else if ( ! tbl_end(&r->tbl)) { + return ROFF_IGN; + } + if (tbl_end(r->tbl) == 0) { + r->tbl = NULL; free(buf->buf); buf->buf = mandoc_strdup(".sp"); buf->sz = 4; return ROFF_REPARSE; } + r->tbl = NULL; return ROFF_IGN; } @@ -2788,7 +2805,7 @@ roff_T_(ROFF_ARGS) mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "T&"); else - tbl_restart(ppos, ln, r->tbl); + tbl_restart(ln, ppos, r->tbl); return ROFF_IGN; } @@ -2862,20 +2879,25 @@ roff_eqndelim(struct roff *r, struct buf *buf, int pos) static enum rofferr roff_EQ(ROFF_ARGS) { - struct eqn_node *e; - - assert(r->eqn == NULL); - e = eqn_alloc(ppos, ln, r->parse); + struct roff_node *n; - if (r->last_eqn) { - r->last_eqn->next = e; - e->delim = r->last_eqn->delim; - e->odelim = r->last_eqn->odelim; - e->cdelim = r->last_eqn->cdelim; - } else - r->first_eqn = r->last_eqn = e; + if (r->man->macroset == MACROSET_MAN) + man_breakscope(r->man, ROFF_EQ); + n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); + if (ln > r->man->last->line) + n->flags |= NODE_LINE; + n->eqn = mandoc_calloc(1, sizeof(*n->eqn)); + n->eqn->expectargs = UINT_MAX; + roff_node_append(r->man, n); + r->man->next = ROFF_NEXT_SIBLING; - r->eqn = r->last_eqn = e; + assert(r->eqn == NULL); + if (r->last_eqn == NULL) + r->last_eqn = eqn_alloc(r->parse); + else + eqn_reset(r->last_eqn); + r->eqn = r->last_eqn; + r->eqn->node = n; if (buf->buf[pos] != '\0') mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, @@ -2887,39 +2909,156 @@ roff_EQ(ROFF_ARGS) static enum rofferr roff_EN(ROFF_ARGS) { - - mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN"); + if (r->eqn != NULL) { + eqn_parse(r->eqn); + r->eqn = NULL; + } else + mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN"); + if (buf->buf[pos] != '\0') + mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, + "EN %s", buf->buf + pos); return ROFF_IGN; } static enum rofferr roff_TS(ROFF_ARGS) { - struct tbl_node *tbl; - - if (r->tbl) { + if (r->tbl != NULL) { mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse, ln, ppos, "TS breaks TS"); - tbl_end(&r->tbl); + tbl_end(r->tbl); } - - tbl = tbl_alloc(ppos, ln, r->parse); - + r->tbl = tbl_alloc(ppos, ln, r->parse); if (r->last_tbl) - r->last_tbl->next = tbl; + r->last_tbl->next = r->tbl; else - r->first_tbl = r->last_tbl = tbl; + r->first_tbl = r->tbl; + r->last_tbl = r->tbl; + return ROFF_IGN; +} + +static enum rofferr +roff_onearg(ROFF_ARGS) +{ + struct roff_node *n; + char *cp; + int npos; + + if (r->man->flags & (MAN_BLINE | MAN_ELINE) && + (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || + tok == ROFF_ti)) + man_breakscope(r->man, tok); + + if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { + r->man->last = roffce_node; + r->man->next = ROFF_NEXT_SIBLING; + } + + roff_elem_alloc(r->man, ln, ppos, tok); + n = r->man->last; - r->tbl = r->last_tbl = tbl; + cp = buf->buf + pos; + if (*cp != '\0') { + while (*cp != '\0' && *cp != ' ') + cp++; + while (*cp == ' ') + *cp++ = '\0'; + if (*cp != '\0') + mandoc_vmsg(MANDOCERR_ARG_EXCESS, + r->parse, ln, cp - buf->buf, + "%s ... %s", roff_name[tok], cp); + roff_word_alloc(r->man, ln, pos, buf->buf + pos); + } + + if (tok == ROFF_ce || tok == ROFF_rj) { + if (r->man->last->type == ROFFT_ELEM) { + roff_word_alloc(r->man, ln, pos, "1"); + r->man->last->flags |= NODE_NOSRC; + } + npos = 0; + if (roff_evalnum(r, ln, r->man->last->string, &npos, + &roffce_lines, 0) == 0) { + mandoc_vmsg(MANDOCERR_CE_NONUM, + r->parse, ln, pos, "ce %s", buf->buf + pos); + roffce_lines = 1; + } + if (roffce_lines < 1) { + r->man->last = r->man->last->parent; + roffce_node = NULL; + roffce_lines = 0; + } else + roffce_node = r->man->last->parent; + } else { + n->flags |= NODE_VALID | NODE_ENDED; + r->man->last = n; + } + n->flags |= NODE_LINE; + r->man->next = ROFF_NEXT_SIBLING; return ROFF_IGN; } static enum rofferr -roff_brp(ROFF_ARGS) +roff_manyarg(ROFF_ARGS) { + struct roff_node *n; + char *sp, *ep; - buf->buf[pos - 1] = '\0'; - return ROFF_CONT; + roff_elem_alloc(r->man, ln, ppos, tok); + n = r->man->last; + + for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { + while (*ep != '\0' && *ep != ' ') + ep++; + while (*ep == ' ') + *ep++ = '\0'; + roff_word_alloc(r->man, ln, sp - buf->buf, sp); + } + + n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; + r->man->last = n; + r->man->next = ROFF_NEXT_SIBLING; + return ROFF_IGN; +} + +static enum rofferr +roff_als(ROFF_ARGS) +{ + char *oldn, *newn, *end, *value; + size_t oldsz, newsz, valsz; + + newn = oldn = buf->buf + pos; + if (*newn == '\0') + return ROFF_IGN; + + newsz = roff_getname(r, &oldn, ln, pos); + if (newn[newsz] == '\\' || *oldn == '\0') + return ROFF_IGN; + + end = oldn; + oldsz = roff_getname(r, &end, ln, oldn - buf->buf); + if (oldsz == 0) + return ROFF_IGN; + + valsz = mandoc_asprintf(&value, ".%.*s \\$*\\\"\n", + (int)oldsz, oldn); + roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); + roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); + free(value); + return ROFF_IGN; +} + +static enum rofferr +roff_br(ROFF_ARGS) +{ + if (r->man->flags & (MAN_BLINE | MAN_ELINE)) + man_breakscope(r->man, ROFF_br); + roff_elem_alloc(r->man, ln, ppos, ROFF_br); + if (buf->buf[pos] != '\0') + mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, + "%s %s", roff_name[tok], buf->buf + pos); + r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; + r->man->next = ROFF_NEXT_SIBLING; + return ROFF_IGN; } static enum rofferr @@ -2930,7 +3069,7 @@ roff_cc(ROFF_ARGS) p = buf->buf + pos; if (*p == '\0' || (r->control = *p++) == '.') - r->control = 0; + r->control = '\0'; if (*p != '\0') mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, @@ -2940,6 +3079,33 @@ roff_cc(ROFF_ARGS) } static enum rofferr +roff_ec(ROFF_ARGS) +{ + const char *p; + + p = buf->buf + pos; + if (*p == '\0') + r->escape = '\\'; + else { + r->escape = *p; + if (*++p != '\0') + mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, + ln, p - buf->buf, "ec ... %s", p); + } + return ROFF_IGN; +} + +static enum rofferr +roff_eo(ROFF_ARGS) +{ + r->escape = '\0'; + if (buf->buf[pos] != '\0') + mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, + ln, pos, "eo %s", buf->buf + pos); + return ROFF_IGN; +} + +static enum rofferr roff_tr(ROFF_ARGS) { const char *p, *first, *second; @@ -3002,6 +3168,56 @@ roff_tr(ROFF_ARGS) } static enum rofferr +roff_rn(ROFF_ARGS) +{ + const char *value; + char *oldn, *newn, *end; + size_t oldsz, newsz; + int deftype; + + oldn = newn = buf->buf + pos; + if (*oldn == '\0') + return ROFF_IGN; + + oldsz = roff_getname(r, &newn, ln, pos); + if (oldn[oldsz] == '\\' || *newn == '\0') + return ROFF_IGN; + + end = newn; + newsz = roff_getname(r, &end, ln, newn - buf->buf); + if (newsz == 0) + return ROFF_IGN; + + deftype = ROFFDEF_ANY; + value = roff_getstrn(r, oldn, oldsz, &deftype); + switch (deftype) { + case ROFFDEF_USER: + roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); + roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); + roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); + break; + case ROFFDEF_PRE: + roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); + roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); + break; + case ROFFDEF_REN: + roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); + roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); + roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); + break; + case ROFFDEF_STD: + roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); + roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); + break; + default: + roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); + roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); + break; + } + return ROFF_IGN; +} + +static enum rofferr roff_so(ROFF_ARGS) { char *name, *cp; @@ -3036,9 +3252,9 @@ roff_so(ROFF_ARGS) static enum rofferr roff_userdef(ROFF_ARGS) { - const char *arg[9], *ap; + const char *arg[16], *ap; char *cp, *n1, *n2; - int i, ib, ie; + int expand_count, i, ib, ie; size_t asz, rsz; /* @@ -3048,7 +3264,7 @@ roff_userdef(ROFF_ARGS) r->argc = 0; cp = buf->buf + pos; - for (i = 0; i < 9; i++) { + for (i = 0; i < 16; i++) { if (*cp == '\0') arg[i] = ""; else { @@ -3062,8 +3278,9 @@ roff_userdef(ROFF_ARGS) */ buf->sz = strlen(r->current_string) + 1; - n1 = cp = mandoc_malloc(buf->sz); + n1 = n2 = cp = mandoc_malloc(buf->sz); memcpy(n1, r->current_string, buf->sz); + expand_count = 0; while (*cp != '\0') { /* Scan ahead for the next argument invocation. */ @@ -3083,6 +3300,20 @@ roff_userdef(ROFF_ARGS) cp -= 2; /* + * Prevent infinite recursion. + */ + + if (cp >= n2) + expand_count = 1; + else if (++expand_count > EXPAND_LIMIT) { + mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, + ln, (int)(cp - n1), NULL); + free(buf->buf); + buf->buf = n1; + return ROFF_IGN; + } + + /* * Determine the size of the expanded argument, * taking escaping of quotes into account. */ @@ -3160,6 +3391,22 @@ roff_userdef(ROFF_ARGS) ROFF_REPARSE : ROFF_APPEND; } +/* + * Calling a high-level macro that was renamed with .rn. + * r->current_string has already been set up by roff_parse(). + */ +static enum rofferr +roff_renamed(ROFF_ARGS) +{ + char *nbuf; + + buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, + buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; + free(buf->buf); + buf->buf = nbuf; + return ROFF_CONT; +} + static size_t roff_getname(struct roff *r, char **cpp, int ln, int pos) { @@ -3209,9 +3456,12 @@ static void roff_setstr(struct roff *r, const char *name, const char *string, int append) { + size_t namesz; - roff_setstrn(&r->strtab, name, strlen(name), string, + namesz = strlen(name); + roff_setstrn(&r->strtab, name, namesz, string, string ? strlen(string) : 0, append); + roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); } static void @@ -3287,21 +3537,63 @@ roff_setstrn(struct roffkv **r, const char *name, size_t namesz, } static const char * -roff_getstrn(const struct roff *r, const char *name, size_t len) -{ - const struct roffkv *n; - int i; - - for (n = r->strtab; n; n = n->next) - if (0 == strncmp(name, n->key.p, len) && - '\0' == n->key.p[(int)len]) - return n->val.p; - - for (i = 0; i < PREDEFS_MAX; i++) - if (0 == strncmp(name, predefs[i].name, len) && - '\0' == predefs[i].name[(int)len]) - return predefs[i].str; - +roff_getstrn(const struct roff *r, const char *name, size_t len, + int *deftype) +{ + const struct roffkv *n; + int i; + enum roff_tok tok; + + if (*deftype & ROFFDEF_USER) { + for (n = r->strtab; n != NULL; n = n->next) { + if (strncmp(name, n->key.p, len) == 0 && + n->key.p[len] == '\0' && + n->val.p != NULL) { + *deftype = ROFFDEF_USER; + return n->val.p; + } + } + } + if (*deftype & ROFFDEF_PRE) { + for (i = 0; i < PREDEFS_MAX; i++) { + if (strncmp(name, predefs[i].name, len) == 0 && + predefs[i].name[len] == '\0') { + *deftype = ROFFDEF_PRE; + return predefs[i].str; + } + } + } + if (*deftype & ROFFDEF_REN) { + for (n = r->rentab; n != NULL; n = n->next) { + if (strncmp(name, n->key.p, len) == 0 && + n->key.p[len] == '\0' && + n->val.p != NULL) { + *deftype = ROFFDEF_REN; + return n->val.p; + } + } + } + if (*deftype & ROFFDEF_STD) { + if (r->man->macroset != MACROSET_MAN) { + for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { + if (strncmp(name, roff_name[tok], len) == 0 && + roff_name[tok][len] == '\0') { + *deftype = ROFFDEF_STD; + return NULL; + } + } + } + if (r->man->macroset != MACROSET_MDOC) { + for (tok = MAN_TH; tok < MAN_MAX; tok++) { + if (strncmp(name, roff_name[tok], len) == 0 && + roff_name[tok][len] == '\0') { + *deftype = ROFFDEF_STD; + return NULL; + } + } + } + } + *deftype = 0; return NULL; } @@ -3320,20 +3612,6 @@ roff_freestr(struct roffkv *r) /* --- accessors and utility functions ------------------------------------ */ -const struct tbl_span * -roff_span(const struct roff *r) -{ - - return r->tbl ? tbl_span(r->tbl) : NULL; -} - -const struct eqn * -roff_eqn(const struct roff *r) -{ - - return r->last_eqn ? &r->last_eqn->eqn : NULL; -} - /* * Duplicate an input string, making the appropriate character * conversations (as stipulated by `tr') along the way. @@ -3451,9 +3729,9 @@ roff_getcontrol(const struct roff *r, const char *cp, int *ppos) pos = *ppos; - if (0 != r->control && cp[pos] == r->control) + if (r->control != '\0' && cp[pos] == r->control) pos++; - else if (0 != r->control) + else if (r->control != '\0') return 0; else if ('\\' == cp[pos] && '.' == cp[pos + 1]) pos += 2; diff --git a/usr/src/cmd/mandoc/roff.h b/usr/src/cmd/mandoc/roff.h index b87cf132b7..8b28d59609 100644 --- a/usr/src/cmd/mandoc/roff.h +++ b/usr/src/cmd/mandoc/roff.h @@ -1,4 +1,4 @@ -/* $Id: roff.h,v 1.40 2017/02/16 03:00:23 schwarze Exp $ */ +/* $Id: roff.h,v 1.58 2017/07/08 14:51:05 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -16,6 +16,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +struct ohash; struct mdoc_arg; union mdoc_data; @@ -64,6 +65,413 @@ enum roff_type { ROFFT_EQN }; +enum roff_tok { + ROFF_br = 0, + ROFF_ce, + ROFF_ft, + ROFF_ll, + ROFF_mc, + ROFF_po, + ROFF_rj, + ROFF_sp, + ROFF_ta, + ROFF_ti, + ROFF_MAX, + ROFF_ab, + ROFF_ad, + ROFF_af, + ROFF_aln, + ROFF_als, + ROFF_am, + ROFF_am1, + ROFF_ami, + ROFF_ami1, + ROFF_as, + ROFF_as1, + ROFF_asciify, + ROFF_backtrace, + ROFF_bd, + ROFF_bleedat, + ROFF_blm, + ROFF_box, + ROFF_boxa, + ROFF_bp, + ROFF_BP, + ROFF_break, + ROFF_breakchar, + ROFF_brnl, + ROFF_brp, + ROFF_brpnl, + ROFF_c2, + ROFF_cc, + ROFF_cf, + ROFF_cflags, + ROFF_ch, + ROFF_char, + ROFF_chop, + ROFF_class, + ROFF_close, + ROFF_CL, + ROFF_color, + ROFF_composite, + ROFF_continue, + ROFF_cp, + ROFF_cropat, + ROFF_cs, + ROFF_cu, + ROFF_da, + ROFF_dch, + ROFF_Dd, + ROFF_de, + ROFF_de1, + ROFF_defcolor, + ROFF_dei, + ROFF_dei1, + ROFF_device, + ROFF_devicem, + ROFF_di, + ROFF_do, + ROFF_ds, + ROFF_ds1, + ROFF_dwh, + ROFF_dt, + ROFF_ec, + ROFF_ecr, + ROFF_ecs, + ROFF_el, + ROFF_em, + ROFF_EN, + ROFF_eo, + ROFF_EP, + ROFF_EQ, + ROFF_errprint, + ROFF_ev, + ROFF_evc, + ROFF_ex, + ROFF_fallback, + ROFF_fam, + ROFF_fc, + ROFF_fchar, + ROFF_fcolor, + ROFF_fdeferlig, + ROFF_feature, + /* MAN_fi; ignored in mdoc(7) */ + ROFF_fkern, + ROFF_fl, + ROFF_flig, + ROFF_fp, + ROFF_fps, + ROFF_fschar, + ROFF_fspacewidth, + ROFF_fspecial, + ROFF_ftr, + ROFF_fzoom, + ROFF_gcolor, + ROFF_hc, + ROFF_hcode, + ROFF_hidechar, + ROFF_hla, + ROFF_hlm, + ROFF_hpf, + ROFF_hpfa, + ROFF_hpfcode, + ROFF_hw, + ROFF_hy, + ROFF_hylang, + ROFF_hylen, + ROFF_hym, + ROFF_hypp, + ROFF_hys, + ROFF_ie, + ROFF_if, + ROFF_ig, + /* MAN_in; ignored in mdoc(7) */ + ROFF_index, + ROFF_it, + ROFF_itc, + ROFF_IX, + ROFF_kern, + ROFF_kernafter, + ROFF_kernbefore, + ROFF_kernpair, + ROFF_lc, + ROFF_lc_ctype, + ROFF_lds, + ROFF_length, + ROFF_letadj, + ROFF_lf, + ROFF_lg, + ROFF_lhang, + ROFF_linetabs, + ROFF_lnr, + ROFF_lnrf, + ROFF_lpfx, + ROFF_ls, + ROFF_lsm, + ROFF_lt, + ROFF_mediasize, + ROFF_minss, + ROFF_mk, + ROFF_mso, + ROFF_na, + ROFF_ne, + /* MAN_nf; ignored in mdoc(7) */ + ROFF_nh, + ROFF_nhychar, + ROFF_nm, + ROFF_nn, + ROFF_nop, + ROFF_nr, + ROFF_nrf, + ROFF_nroff, + ROFF_ns, + ROFF_nx, + ROFF_open, + ROFF_opena, + ROFF_os, + ROFF_output, + ROFF_padj, + ROFF_papersize, + ROFF_pc, + ROFF_pev, + ROFF_pi, + ROFF_PI, + ROFF_pl, + ROFF_pm, + ROFF_pn, + ROFF_pnr, + ROFF_ps, + ROFF_psbb, + ROFF_pshape, + ROFF_pso, + ROFF_ptr, + ROFF_pvs, + ROFF_rchar, + ROFF_rd, + ROFF_recursionlimit, + ROFF_return, + ROFF_rfschar, + ROFF_rhang, + ROFF_rm, + ROFF_rn, + ROFF_rnn, + ROFF_rr, + ROFF_rs, + ROFF_rt, + ROFF_schar, + ROFF_sentchar, + ROFF_shc, + ROFF_shift, + ROFF_sizes, + ROFF_so, + ROFF_spacewidth, + ROFF_special, + ROFF_spreadwarn, + ROFF_ss, + ROFF_sty, + ROFF_substring, + ROFF_sv, + ROFF_sy, + ROFF_T_, + ROFF_tc, + ROFF_TE, + ROFF_TH, + ROFF_tkf, + ROFF_tl, + ROFF_tm, + ROFF_tm1, + ROFF_tmc, + ROFF_tr, + ROFF_track, + ROFF_transchar, + ROFF_trf, + ROFF_trimat, + ROFF_trin, + ROFF_trnt, + ROFF_troff, + ROFF_TS, + ROFF_uf, + ROFF_ul, + ROFF_unformat, + ROFF_unwatch, + ROFF_unwatchn, + ROFF_vpt, + ROFF_vs, + ROFF_warn, + ROFF_warnscale, + ROFF_watch, + ROFF_watchlength, + ROFF_watchn, + ROFF_wh, + ROFF_while, + ROFF_write, + ROFF_writec, + ROFF_writem, + ROFF_xflag, + ROFF_cblock, + ROFF_RENAMED, + ROFF_USERDEF, + TOKEN_NONE, + MDOC_Dd, + MDOC_Dt, + MDOC_Os, + MDOC_Sh, + MDOC_Ss, + MDOC_Pp, + MDOC_D1, + MDOC_Dl, + MDOC_Bd, + MDOC_Ed, + MDOC_Bl, + MDOC_El, + MDOC_It, + MDOC_Ad, + MDOC_An, + MDOC_Ap, + MDOC_Ar, + MDOC_Cd, + MDOC_Cm, + MDOC_Dv, + MDOC_Er, + MDOC_Ev, + MDOC_Ex, + MDOC_Fa, + MDOC_Fd, + MDOC_Fl, + MDOC_Fn, + MDOC_Ft, + MDOC_Ic, + MDOC_In, + MDOC_Li, + MDOC_Nd, + MDOC_Nm, + MDOC_Op, + MDOC_Ot, + MDOC_Pa, + MDOC_Rv, + MDOC_St, + MDOC_Va, + MDOC_Vt, + MDOC_Xr, + MDOC__A, + MDOC__B, + MDOC__D, + MDOC__I, + MDOC__J, + MDOC__N, + MDOC__O, + MDOC__P, + MDOC__R, + MDOC__T, + MDOC__V, + MDOC_Ac, + MDOC_Ao, + MDOC_Aq, + MDOC_At, + MDOC_Bc, + MDOC_Bf, + MDOC_Bo, + MDOC_Bq, + MDOC_Bsx, + MDOC_Bx, + MDOC_Db, + MDOC_Dc, + MDOC_Do, + MDOC_Dq, + MDOC_Ec, + MDOC_Ef, + MDOC_Em, + MDOC_Eo, + MDOC_Fx, + MDOC_Ms, + MDOC_No, + MDOC_Ns, + MDOC_Nx, + MDOC_Ox, + MDOC_Pc, + MDOC_Pf, + MDOC_Po, + MDOC_Pq, + MDOC_Qc, + MDOC_Ql, + MDOC_Qo, + MDOC_Qq, + MDOC_Re, + MDOC_Rs, + MDOC_Sc, + MDOC_So, + MDOC_Sq, + MDOC_Sm, + MDOC_Sx, + MDOC_Sy, + MDOC_Tn, + MDOC_Ux, + MDOC_Xc, + MDOC_Xo, + MDOC_Fo, + MDOC_Fc, + MDOC_Oo, + MDOC_Oc, + MDOC_Bk, + MDOC_Ek, + MDOC_Bt, + MDOC_Hf, + MDOC_Fr, + MDOC_Ud, + MDOC_Lb, + MDOC_Lp, + MDOC_Lk, + MDOC_Mt, + MDOC_Brq, + MDOC_Bro, + MDOC_Brc, + MDOC__C, + MDOC_Es, + MDOC_En, + MDOC_Dx, + MDOC__Q, + MDOC__U, + MDOC_Ta, + MDOC_MAX, + MAN_TH, + MAN_SH, + MAN_SS, + MAN_TP, + MAN_LP, + MAN_PP, + MAN_P, + MAN_IP, + MAN_HP, + MAN_SM, + MAN_SB, + MAN_BI, + MAN_IB, + MAN_BR, + MAN_RB, + MAN_R, + MAN_B, + MAN_I, + MAN_IR, + MAN_RI, + MAN_nf, + MAN_fi, + MAN_RE, + MAN_RS, + MAN_DT, + MAN_UC, + MAN_PD, + MAN_AT, + MAN_in, + MAN_OP, + MAN_EX, + MAN_EE, + MAN_UR, + MAN_UE, + MAN_MT, + MAN_ME, + MAN_MAX +}; + enum roff_next { ROFF_NEXT_SIBLING = 0, ROFF_NEXT_CHILD @@ -91,11 +499,9 @@ struct roff_node { union mdoc_data *norm; /* Normalized arguments. */ char *string; /* TEXT */ const struct tbl_span *span; /* TBL */ - const struct eqn *eqn; /* EQN */ + struct eqn_box *eqn; /* EQN */ int line; /* Input file line number. */ int pos; /* Input file column number. */ - int tok; /* Request or macro ID. */ -#define TOKEN_NONE (-1) /* No request or macro. */ int flags; #define NODE_VALID (1 << 0) /* Has been validated. */ #define NODE_ENDED (1 << 1) /* Gone past body end mark. */ @@ -109,6 +515,7 @@ struct roff_node { #define NODE_NOPRT (1 << 9) /* Shall not print anything. */ int prev_font; /* Before entering this node. */ int aux; /* Decoded node data, type-dependent. */ + enum roff_tok tok; /* Request or macro ID. */ enum roff_type type; /* AST node type. */ enum roff_sec sec; /* Current named section. */ enum mdoc_endbody end; /* BODY */ @@ -123,13 +530,17 @@ struct roff_meta { char *name; /* Leading manual name. */ char *date; /* Normalized date. */ int hasbody; /* Document is not empty. */ + int rcsids; /* Bits indexed by enum mandoc_os. */ + enum mandoc_os os_e; /* Operating system. */ }; struct roff_man { struct roff_meta meta; /* Document meta-data. */ struct mparse *parse; /* Parse pointer. */ struct roff *roff; /* Roff parser state data. */ - const char *defos; /* Default operating system. */ + struct ohash *mdocmac; /* Mdoc macro lookup table. */ + struct ohash *manmac; /* Man macro lookup table. */ + const char *os_s; /* Default operating system. */ struct roff_node *first; /* The first node parsed. */ struct roff_node *last; /* The last node parsed. */ struct roff_node *last_es; /* The most recent Es node. */ @@ -158,5 +569,11 @@ struct roff_man { enum roff_next next; /* Where to put the next node. */ }; +extern const char *const *roff_name; + void deroff(char **, const struct roff_node *); +struct ohash *roffhash_alloc(enum roff_tok, enum roff_tok); +enum roff_tok roffhash_find(struct ohash *, const char *, size_t); +void roffhash_free(struct ohash *); +void roff_validate(struct roff_man *); diff --git a/usr/src/cmd/mandoc/roff_html.c b/usr/src/cmd/mandoc/roff_html.c new file mode 100644 index 0000000000..53ae6d7ca2 --- /dev/null +++ b/usr/src/cmd/mandoc/roff_html.c @@ -0,0 +1,96 @@ +/* $Id: roff_html.c,v 1.11 2017/06/24 14:38:33 schwarze Exp $ */ +/* + * Copyright (c) 2010 Kristaps Dzonsons <kristaps@bsd.lv> + * Copyright (c) 2014, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stddef.h> + +#include "mandoc.h" +#include "roff.h" +#include "out.h" +#include "html.h" + +#define ROFF_HTML_ARGS struct html *h, const struct roff_node *n + +typedef void (*roff_html_pre_fp)(ROFF_HTML_ARGS); + +static void roff_html_pre_br(ROFF_HTML_ARGS); +static void roff_html_pre_ce(ROFF_HTML_ARGS); +static void roff_html_pre_sp(ROFF_HTML_ARGS); + +static const roff_html_pre_fp roff_html_pre_acts[ROFF_MAX] = { + roff_html_pre_br, /* br */ + roff_html_pre_ce, /* ce */ + NULL, /* ft */ + NULL, /* ll */ + NULL, /* mc */ + NULL, /* po */ + roff_html_pre_ce, /* rj */ + roff_html_pre_sp, /* sp */ + NULL, /* ta */ + NULL, /* ti */ +}; + + +void +roff_html_pre(struct html *h, const struct roff_node *n) +{ + assert(n->tok < ROFF_MAX); + if (roff_html_pre_acts[n->tok] != NULL) + (*roff_html_pre_acts[n->tok])(h, n); +} + +static void +roff_html_pre_br(ROFF_HTML_ARGS) +{ + struct tag *t; + + t = print_otag(h, TAG_DIV, ""); + print_text(h, "\\~"); /* So the div isn't empty. */ + print_tagq(h, t); +} + +static void +roff_html_pre_ce(ROFF_HTML_ARGS) +{ + for (n = n->child->next; n != NULL; n = n->next) { + if (n->type == ROFFT_TEXT) { + if (n->flags & NODE_LINE) + roff_html_pre_br(h, n); + print_text(h, n->string); + } else + roff_html_pre(h, n); + } + roff_html_pre_br(h, n); +} + +static void +roff_html_pre_sp(ROFF_HTML_ARGS) +{ + struct roffsu su; + + SCALE_VS_INIT(&su, 1); + if ((n = n->child) != NULL) { + if (a2roffsu(n->string, &su, SCALE_VS) == NULL) + su.scale = 1.0; + else if (su.scale < 0.0) + su.scale = 0.0; + } + print_otag(h, TAG_DIV, "suh", &su); + print_text(h, "\\~"); /* So the div isn't empty. */ +} diff --git a/usr/src/cmd/mandoc/roff_int.h b/usr/src/cmd/mandoc/roff_int.h index 5567b758de..48996dce53 100644 --- a/usr/src/cmd/mandoc/roff_int.h +++ b/usr/src/cmd/mandoc/roff_int.h @@ -1,4 +1,4 @@ -/* $Id: roff_int.h,v 1.7 2015/11/07 14:01:16 schwarze Exp $ */ +/* $Id: roff_int.h,v 1.9 2017/07/08 17:52:50 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> @@ -25,8 +25,6 @@ void roff_elem_alloc(struct roff_man *, int, int, int); struct roff_node *roff_block_alloc(struct roff_man *, int, int, int); struct roff_node *roff_head_alloc(struct roff_man *, int, int, int); struct roff_node *roff_body_alloc(struct roff_man *, int, int, int); -void roff_addeqn(struct roff_man *, const struct eqn *); -void roff_addtbl(struct roff_man *, const struct tbl_span *); void roff_node_unlink(struct roff_man *, struct roff_node *); void roff_node_free(struct roff_node *); void roff_node_delete(struct roff_man *, struct roff_node *); diff --git a/usr/src/cmd/mandoc/roff_term.c b/usr/src/cmd/mandoc/roff_term.c new file mode 100644 index 0000000000..b5ec764963 --- /dev/null +++ b/usr/src/cmd/mandoc/roff_term.c @@ -0,0 +1,248 @@ +/* $Id: roff_term.c,v 1.14 2017/06/24 14:38:33 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stddef.h> + +#include "mandoc.h" +#include "roff.h" +#include "out.h" +#include "term.h" + +#define ROFF_TERM_ARGS struct termp *p, const struct roff_node *n + +typedef void (*roff_term_pre_fp)(ROFF_TERM_ARGS); + +static void roff_term_pre_br(ROFF_TERM_ARGS); +static void roff_term_pre_ce(ROFF_TERM_ARGS); +static void roff_term_pre_ft(ROFF_TERM_ARGS); +static void roff_term_pre_ll(ROFF_TERM_ARGS); +static void roff_term_pre_mc(ROFF_TERM_ARGS); +static void roff_term_pre_po(ROFF_TERM_ARGS); +static void roff_term_pre_sp(ROFF_TERM_ARGS); +static void roff_term_pre_ta(ROFF_TERM_ARGS); +static void roff_term_pre_ti(ROFF_TERM_ARGS); + +static const roff_term_pre_fp roff_term_pre_acts[ROFF_MAX] = { + roff_term_pre_br, /* br */ + roff_term_pre_ce, /* ce */ + roff_term_pre_ft, /* ft */ + roff_term_pre_ll, /* ll */ + roff_term_pre_mc, /* mc */ + roff_term_pre_po, /* po */ + roff_term_pre_ce, /* rj */ + roff_term_pre_sp, /* sp */ + roff_term_pre_ta, /* ta */ + roff_term_pre_ti, /* ti */ +}; + + +void +roff_term_pre(struct termp *p, const struct roff_node *n) +{ + assert(n->tok < ROFF_MAX); + (*roff_term_pre_acts[n->tok])(p, n); +} + +static void +roff_term_pre_br(ROFF_TERM_ARGS) +{ + term_newln(p); + if (p->flags & TERMP_BRIND) { + p->tcol->offset = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin; + p->flags &= ~(TERMP_NOBREAK | TERMP_BRIND); + } +} + +static void +roff_term_pre_ce(ROFF_TERM_ARGS) +{ + const struct roff_node *nc1, *nc2; + size_t len, lm; + + roff_term_pre_br(p, n); + lm = p->tcol->offset; + nc1 = n->child->next; + while (nc1 != NULL) { + nc2 = nc1; + len = 0; + do { + if (nc2->type == ROFFT_TEXT) { + if (len) + len++; + len += term_strlen(p, nc2->string); + } + nc2 = nc2->next; + } while (nc2 != NULL && (nc2->type != ROFFT_TEXT || + (nc2->flags & NODE_LINE) == 0)); + p->tcol->offset = len >= p->tcol->rmargin ? 0 : + lm + len >= p->tcol->rmargin ? p->tcol->rmargin - len : + n->tok == ROFF_rj ? p->tcol->rmargin - len : + (lm + p->tcol->rmargin - len) / 2; + while (nc1 != nc2) { + if (nc1->type == ROFFT_TEXT) + term_word(p, nc1->string); + else + roff_term_pre(p, nc1); + nc1 = nc1->next; + } + p->flags |= TERMP_NOSPACE; + term_flushln(p); + } + p->tcol->offset = lm; +} + +static void +roff_term_pre_ft(ROFF_TERM_ARGS) +{ + switch (*n->child->string) { + case '4': + case '3': + case 'B': + term_fontrepl(p, TERMFONT_BOLD); + break; + case '2': + case 'I': + term_fontrepl(p, TERMFONT_UNDER); + break; + case 'P': + term_fontlast(p); + break; + case '1': + case 'C': + case 'R': + term_fontrepl(p, TERMFONT_NONE); + break; + default: + break; + } +} + +static void +roff_term_pre_ll(ROFF_TERM_ARGS) +{ + term_setwidth(p, n->child != NULL ? n->child->string : NULL); +} + +static void +roff_term_pre_mc(ROFF_TERM_ARGS) +{ + if (p->col) { + p->flags |= TERMP_NOBREAK; + term_flushln(p); + p->flags &= ~(TERMP_NOBREAK | TERMP_NOSPACE); + } + if (n->child != NULL) { + p->mc = n->child->string; + p->flags |= TERMP_NEWMC; + } else + p->flags |= TERMP_ENDMC; +} + +static void +roff_term_pre_po(ROFF_TERM_ARGS) +{ + struct roffsu su; + static int po, polast; + int ponew; + + if (n->child != NULL && + a2roffsu(n->child->string, &su, SCALE_EM) != NULL) { + ponew = term_hen(p, &su); + if (*n->child->string == '+' || + *n->child->string == '-') + ponew += po; + } else + ponew = polast; + polast = po; + po = ponew; + + ponew = po - polast + (int)p->tcol->offset; + p->tcol->offset = ponew > 0 ? ponew : 0; +} + +static void +roff_term_pre_sp(ROFF_TERM_ARGS) +{ + struct roffsu su; + int len; + + if (n->child != NULL) { + if (a2roffsu(n->child->string, &su, SCALE_VS) == NULL) + su.scale = 1.0; + len = term_vspan(p, &su); + } else + len = 1; + + if (len < 0) + p->skipvsp -= len; + else + while (len--) + term_vspace(p); + + roff_term_pre_br(p, n); +} + +static void +roff_term_pre_ta(ROFF_TERM_ARGS) +{ + term_tab_set(p, NULL); + for (n = n->child; n != NULL; n = n->next) + term_tab_set(p, n->string); +} + +static void +roff_term_pre_ti(ROFF_TERM_ARGS) +{ + struct roffsu su; + const char *cp; + int len, sign; + + roff_term_pre_br(p, n); + + if (n->child == NULL) + return; + cp = n->child->string; + if (*cp == '+') { + sign = 1; + cp++; + } else if (*cp == '-') { + sign = -1; + cp++; + } else + sign = 0; + + if (a2roffsu(cp, &su, SCALE_EM) == NULL) + return; + len = term_hen(p, &su); + + if (sign == 0) { + p->ti = len - p->tcol->offset; + p->tcol->offset = len; + } else if (sign == 1) { + p->ti = len; + p->tcol->offset += len; + } else if ((size_t)len < p->tcol->offset) { + p->ti = -len; + p->tcol->offset -= len; + } else { + p->ti = -p->tcol->offset; + p->tcol->offset = 0; + } +} diff --git a/usr/src/cmd/mandoc/roff_validate.c b/usr/src/cmd/mandoc/roff_validate.c new file mode 100644 index 0000000000..801e931485 --- /dev/null +++ b/usr/src/cmd/mandoc/roff_validate.c @@ -0,0 +1,97 @@ +/* $Id: roff_validate.c,v 1.9 2017/06/14 22:51:25 schwarze Exp $ */ +/* + * Copyright (c) 2010, 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <assert.h> +#include <stddef.h> + +#include "mandoc.h" +#include "roff.h" +#include "libmandoc.h" +#include "roff_int.h" + +#define ROFF_VALID_ARGS struct roff_man *man, struct roff_node *n + +typedef void (*roff_valid_fp)(ROFF_VALID_ARGS); + +static void roff_valid_ft(ROFF_VALID_ARGS); + +static const roff_valid_fp roff_valids[ROFF_MAX] = { + NULL, /* br */ + NULL, /* ce */ + roff_valid_ft, /* ft */ + NULL, /* ll */ + NULL, /* mc */ + NULL, /* po */ + NULL, /* rj */ + NULL, /* sp */ + NULL, /* ta */ + NULL, /* ti */ +}; + + +void +roff_validate(struct roff_man *man) +{ + struct roff_node *n; + + n = man->last; + assert(n->tok < ROFF_MAX); + if (roff_valids[n->tok] != NULL) + (*roff_valids[n->tok])(man, n); +} + +static void +roff_valid_ft(ROFF_VALID_ARGS) +{ + char *cp; + + if (n->child == NULL) { + man->next = ROFF_NEXT_CHILD; + roff_word_alloc(man, n->line, n->pos, "P"); + man->last = n; + return; + } + + cp = n->child->string; + switch (*cp) { + case '1': + case '2': + case '3': + case '4': + case 'I': + case 'P': + case 'R': + if (cp[1] == '\0') + return; + break; + case 'B': + if (cp[1] == '\0' || (cp[1] == 'I' && cp[2] == '\0')) + return; + break; + case 'C': + if (cp[1] == 'W' && cp[2] == '\0') + return; + break; + default: + break; + } + + mandoc_vmsg(MANDOCERR_FT_BAD, man->parse, + n->line, n->pos, "ft %s", cp); + roff_node_delete(man, n); +} diff --git a/usr/src/cmd/mandoc/st.c b/usr/src/cmd/mandoc/st.c index 02868f08d2..d166566ece 100644 --- a/usr/src/cmd/mandoc/st.c +++ b/usr/src/cmd/mandoc/st.c @@ -1,4 +1,4 @@ -/* $Id: st.c,v 1.13 2015/10/06 18:32:20 schwarze Exp $ */ +/* $Id: st.c,v 1.14 2017/06/24 14:38:33 schwarze Exp $ */ /* * Copyright (c) 2009 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -20,6 +20,7 @@ #include <string.h> +#include "mandoc.h" #include "roff.h" #include "mdoc.h" #include "libmdoc.h" diff --git a/usr/src/cmd/mandoc/st.in b/usr/src/cmd/mandoc/st.in index e70680f3ab..6087c768fb 100644 --- a/usr/src/cmd/mandoc/st.in +++ b/usr/src/cmd/mandoc/st.in @@ -1,4 +1,4 @@ -/* $Id: st.in,v 1.28 2015/02/17 20:37:17 schwarze Exp $ */ +/* $Id: st.in,v 1.29 2017/06/24 13:49:29 schwarze Exp $ */ /* * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> * @@ -34,7 +34,6 @@ LINE("-p1003.1-96", "ISO/IEC 9945-1:1996 (\\(LqPOSIX.1\\(Rq)") LINE("-p1003.1-2001", "IEEE Std 1003.1-2001 (\\(LqPOSIX.1\\(Rq)") LINE("-p1003.1-2004", "IEEE Std 1003.1-2004 (\\(LqPOSIX.1\\(Rq)") LINE("-p1003.1-2008", "IEEE Std 1003.1-2008 (\\(LqPOSIX.1\\(Rq)") -LINE("-p1003.1-2013", "IEEE Std 1003.1-2008/Cor 1-2013 (\\(LqPOSIX.1\\(Rq)") LINE("-p1003.1", "IEEE Std 1003.1 (\\(LqPOSIX.1\\(Rq)") LINE("-p1003.1b", "IEEE Std 1003.1b (\\(LqPOSIX.1b\\(Rq)") LINE("-p1003.1b-93", "IEEE Std 1003.1b-1993 (\\(LqPOSIX.1b\\(Rq)") diff --git a/usr/src/cmd/mandoc/tbl.c b/usr/src/cmd/mandoc/tbl.c index ed6fbd876a..3fb8e52a4c 100644 --- a/usr/src/cmd/mandoc/tbl.c +++ b/usr/src/cmd/mandoc/tbl.c @@ -1,4 +1,4 @@ -/* $Id: tbl.c,v 1.40 2015/10/06 18:32:20 schwarze Exp $ */ +/* $Id: tbl.c,v 1.42 2017/07/08 17:52:50 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org> @@ -31,7 +31,7 @@ #include "libroff.h" -enum rofferr +void tbl_read(struct tbl_node *tbl, int ln, const char *p, int pos) { const char *cp; @@ -66,7 +66,7 @@ tbl_read(struct tbl_node *tbl, int ln, const char *p, int pos) if (*cp == ';') { tbl_option(tbl, ln, p, &pos); if (p[pos] == '\0') - return ROFF_IGN; + return; } } @@ -75,15 +75,14 @@ tbl_read(struct tbl_node *tbl, int ln, const char *p, int pos) switch (tbl->part) { case TBL_PART_LAYOUT: tbl_layout(tbl, ln, p, pos); - return ROFF_IGN; + break; case TBL_PART_CDATA: - return tbl_cdata(tbl, ln, p, pos) ? ROFF_TBL : ROFF_IGN; + tbl_cdata(tbl, ln, p, pos); + break; default: + tbl_data(tbl, ln, p, pos); break; } - - tbl_data(tbl, ln, p, pos); - return ROFF_TBL; } struct tbl_node * @@ -114,6 +113,7 @@ tbl_free(struct tbl_node *tbl) while (rp->first != NULL) { cp = rp->first; rp->first = cp->next; + free(cp->wstr); free(cp); } free(rp); @@ -159,14 +159,10 @@ tbl_span(struct tbl_node *tbl) } int -tbl_end(struct tbl_node **tblp) +tbl_end(struct tbl_node *tbl) { - struct tbl_node *tbl; struct tbl_span *sp; - tbl = *tblp; - *tblp = NULL; - if (tbl->part == TBL_PART_CDATA) mandoc_msg(MANDOCERR_TBLDATA_BLK, tbl->parse, tbl->line, tbl->pos, "TE"); diff --git a/usr/src/cmd/mandoc/tbl_data.c b/usr/src/cmd/mandoc/tbl_data.c index 40b756a05e..ae1906ef73 100644 --- a/usr/src/cmd/mandoc/tbl_data.c +++ b/usr/src/cmd/mandoc/tbl_data.c @@ -1,7 +1,7 @@ -/* $Id: tbl_data.c,v 1.41 2015/10/06 18:32:20 schwarze Exp $ */ +/* $Id: tbl_data.c,v 1.45 2017/07/08 17:52:50 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -51,17 +51,26 @@ getdata(struct tbl_node *tbl, struct tbl_span *dp, cp = cp->next; /* - * Stop processing when we reach the end of the available layout - * cells. This means that we have extra input. + * If the current layout row is out of cells, allocate + * a new cell if another row of the table has at least + * this number of columns, or discard the input if we + * are beyond the last column of the table as a whole. */ if (cp == NULL) { - mandoc_msg(MANDOCERR_TBLDATA_EXTRA, tbl->parse, - ln, *pos, p + *pos); - /* Skip to the end... */ - while (p[*pos]) - (*pos)++; - return; + if (dp->layout->last->col + 1 < dp->opts->cols) { + cp = mandoc_calloc(1, sizeof(*cp)); + cp->pos = TBL_CELL_LEFT; + dp->layout->last->next = cp; + cp->col = dp->layout->last->col + 1; + dp->layout->last = cp; + } else { + mandoc_msg(MANDOCERR_TBLDATA_EXTRA, tbl->parse, + ln, *pos, p + *pos); + while (p[*pos]) + (*pos)++; + return; + } } dat = mandoc_calloc(1, sizeof(*dat)); @@ -119,7 +128,7 @@ getdata(struct tbl_node *tbl, struct tbl_span *dp, tbl->parse, ln, sv, dat->string); } -int +void tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos) { struct tbl_dat *dat; @@ -134,16 +143,17 @@ tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos) pos++; while (p[pos] != '\0') getdata(tbl, tbl->last_span, ln, p, &pos); - return 1; + return; } else if (p[pos] == '\0') { tbl->part = TBL_PART_DATA; - return 1; + return; } /* Fallthrough: T} is part of a word. */ } dat->pos = TBL_DATA_DATA; + dat->block = 1; if (dat->string != NULL) { sz = strlen(p + pos) + strlen(dat->string) + 2; @@ -156,8 +166,6 @@ tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos) if (dat->layout->pos == TBL_CELL_DOWN) mandoc_msg(MANDOCERR_TBLDATA_SPAN, tbl->parse, ln, pos, dat->string); - - return 0; } static struct tbl_span * @@ -184,58 +192,50 @@ newspan(struct tbl_node *tbl, int line, struct tbl_row *rp) void tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos) { - struct tbl_span *dp; struct tbl_row *rp; + struct tbl_cell *cp; + struct tbl_span *sp; - /* - * Choose a layout row: take the one following the last parsed - * span's. If that doesn't exist, use the last parsed span's. - * If there's no last parsed span, use the first row. Lastly, - * if the last span was a horizontal line, use the same layout - * (it doesn't "consume" the layout). - */ - - if (tbl->last_span != NULL) { - if (tbl->last_span->pos == TBL_SPAN_DATA) { - for (rp = tbl->last_span->layout->next; - rp != NULL && rp->first != NULL; - rp = rp->next) { - switch (rp->first->pos) { - case TBL_CELL_HORIZ: - dp = newspan(tbl, ln, rp); - dp->pos = TBL_SPAN_HORIZ; - continue; - case TBL_CELL_DHORIZ: - dp = newspan(tbl, ln, rp); - dp->pos = TBL_SPAN_DHORIZ; - continue; - default: - break; - } - break; - } - } else - rp = tbl->last_span->layout; - - if (rp == NULL) - rp = tbl->last_span->layout; - } else - rp = tbl->first_row; - - assert(rp); + rp = (sp = tbl->last_span) == NULL ? tbl->first_row : + sp->pos == TBL_SPAN_DATA && sp->layout->next != NULL ? + sp->layout->next : sp->layout; - dp = newspan(tbl, ln, rp); + assert(rp != NULL); if ( ! strcmp(p, "_")) { - dp->pos = TBL_SPAN_HORIZ; + sp = newspan(tbl, ln, rp); + sp->pos = TBL_SPAN_HORIZ; return; } else if ( ! strcmp(p, "=")) { - dp->pos = TBL_SPAN_DHORIZ; + sp = newspan(tbl, ln, rp); + sp->pos = TBL_SPAN_DHORIZ; return; } - dp->pos = TBL_SPAN_DATA; + /* + * If the layout row contains nothing but horizontal lines, + * allocate an empty span for it and assign the current span + * to the next layout row accepting data. + */ + + while (rp->next != NULL) { + if (rp->last->col + 1 < tbl->opts.cols) + break; + for (cp = rp->first; cp != NULL; cp = cp->next) + if (cp->pos != TBL_CELL_HORIZ && + cp->pos != TBL_CELL_DHORIZ) + break; + if (cp != NULL) + break; + sp = newspan(tbl, ln, rp); + sp->pos = TBL_SPAN_DATA; + rp = rp->next; + } + + /* Process a real data row. */ + sp = newspan(tbl, ln, rp); + sp->pos = TBL_SPAN_DATA; while (p[pos] != '\0') - getdata(tbl, dp, ln, p, &pos); + getdata(tbl, sp, ln, p, &pos); } diff --git a/usr/src/cmd/mandoc/tbl_html.c b/usr/src/cmd/mandoc/tbl_html.c index 4dff29370a..d59f1635d0 100644 --- a/usr/src/cmd/mandoc/tbl_html.c +++ b/usr/src/cmd/mandoc/tbl_html.c @@ -1,4 +1,4 @@ -/* $Id: tbl_html.c,v 1.20 2017/02/05 18:15:39 schwarze Exp $ */ +/* $Id: tbl_html.c,v 1.23 2017/07/31 16:14:10 schwarze Exp $ */ /* * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -31,22 +31,51 @@ static void html_tblopen(struct html *, const struct tbl_span *); static size_t html_tbl_len(size_t, void *); static size_t html_tbl_strlen(const char *, void *); +static size_t html_tbl_sulen(const struct roffsu *, void *); static size_t html_tbl_len(size_t sz, void *arg) { - return sz; } static size_t html_tbl_strlen(const char *p, void *arg) { - return strlen(p); } +static size_t +html_tbl_sulen(const struct roffsu *su, void *arg) +{ + if (su->scale < 0.0) + return 0; + + switch (su->unit) { + case SCALE_FS: /* 2^16 basic units */ + return su->scale * 65536.0 / 24.0; + case SCALE_IN: /* 10 characters per inch */ + return su->scale * 10.0; + case SCALE_CM: /* 2.54 cm per inch */ + return su->scale * 10.0 / 2.54; + case SCALE_PC: /* 6 pica per inch */ + case SCALE_VS: + return su->scale * 10.0 / 6.0; + case SCALE_EN: + case SCALE_EM: + return su->scale; + case SCALE_PT: /* 12 points per pica */ + return su->scale * 10.0 / 6.0 / 12.0; + case SCALE_BU: /* 24 basic units per character */ + return su->scale / 24.0; + case SCALE_MM: /* 1/1000 inch */ + return su->scale / 100.0; + default: + abort(); + } +} + static void html_tblopen(struct html *h, const struct tbl_span *sp) { @@ -56,7 +85,8 @@ html_tblopen(struct html *h, const struct tbl_span *sp) if (h->tbl.cols == NULL) { h->tbl.len = html_tbl_len; h->tbl.slen = html_tbl_strlen; - tblcalc(&h->tbl, sp, 0); + h->tbl.sulen = html_tbl_sulen; + tblcalc(&h->tbl, sp, 0, 0); } assert(NULL == h->tblt); diff --git a/usr/src/cmd/mandoc/tbl_layout.c b/usr/src/cmd/mandoc/tbl_layout.c index c0eafbddbf..42fc0e8296 100644 --- a/usr/src/cmd/mandoc/tbl_layout.c +++ b/usr/src/cmd/mandoc/tbl_layout.c @@ -1,7 +1,7 @@ -/* $Id: tbl_layout.c,v 1.41 2015/10/12 00:08:16 schwarze Exp $ */ +/* $Id: tbl_layout.c,v 1.44 2017/06/27 18:25:02 schwarze Exp $ */ /* * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2012, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -20,6 +20,7 @@ #include <sys/types.h> #include <ctype.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> #include <time.h> @@ -62,6 +63,7 @@ mods(struct tbl_node *tbl, struct tbl_cell *cp, int ln, const char *p, int *pos) { char *endptr; + size_t sz; mod: while (p[*pos] == ' ' || p[*pos] == '\t') @@ -127,7 +129,22 @@ mod: case 'u': cp->flags |= TBL_CELL_UP; goto mod; - case 'w': /* XXX for now, ignore minimal column width */ + case 'w': + sz = 0; + if (p[*pos] == '(') { + (*pos)++; + while (p[*pos + sz] != '\0' && p[*pos + sz] != ')') + sz++; + } else + while (isdigit((unsigned char)p[*pos + sz])) + sz++; + if (sz) { + free(cp->wstr); + cp->wstr = mandoc_strndup(p + *pos, sz); + *pos += sz; + if (p[*pos] == ')') + (*pos)++; + } goto mod; case 'x': cp->flags |= TBL_CELL_WMAX; @@ -282,6 +299,8 @@ tbl_layout(struct tbl_node *tbl, int ln, const char *p, int pos) tbl->parse, ln, pos, NULL); cell_alloc(tbl, tbl->first_row, TBL_CELL_LEFT); + if (tbl->opts.lvert < tbl->first_row->vert) + tbl->opts.lvert = tbl->first_row->vert; return; } @@ -339,6 +358,7 @@ cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos) struct tbl_cell *p, *pp; p = mandoc_calloc(1, sizeof(*p)); + p->spacing = SIZE_MAX; p->pos = pos; if ((pp = rp->last) != NULL) { diff --git a/usr/src/cmd/mandoc/tbl_term.c b/usr/src/cmd/mandoc/tbl_term.c index eceaa4b60f..c154a0e9b9 100644 --- a/usr/src/cmd/mandoc/tbl_term.c +++ b/usr/src/cmd/mandoc/tbl_term.c @@ -1,7 +1,7 @@ -/* $Id: tbl_term.c,v 1.43 2015/10/12 00:08:16 schwarze Exp $ */ +/* $Id: tbl_term.c,v 1.57 2017/07/31 16:14:10 schwarze Exp $ */ /* * Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011, 2012, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011,2012,2014,2015,2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -28,10 +28,15 @@ #include "out.h" #include "term.h" +#define IS_HORIZ(cp) ((cp)->pos == TBL_CELL_HORIZ || \ + (cp)->pos == TBL_CELL_DHORIZ) + static size_t term_tbl_len(size_t, void *); static size_t term_tbl_strlen(const char *, void *); +static size_t term_tbl_sulen(const struct roffsu *, void *); static void tbl_char(struct termp *, char, size_t); static void tbl_data(struct termp *, const struct tbl_opts *, + const struct tbl_cell *, const struct tbl_dat *, const struct roffcol *); static void tbl_literal(struct termp *, const struct tbl_dat *, @@ -44,37 +49,39 @@ static void tbl_word(struct termp *, const struct tbl_dat *); static size_t -term_tbl_strlen(const char *p, void *arg) +term_tbl_sulen(const struct roffsu *su, void *arg) { + int i; + i = term_hen((const struct termp *)arg, su); + return i > 0 ? i : 0; +} + +static size_t +term_tbl_strlen(const char *p, void *arg) +{ return term_strlen((const struct termp *)arg, p); } static size_t term_tbl_len(size_t sz, void *arg) { - return term_len((const struct termp *)arg, sz); } void term_tbl(struct termp *tp, const struct tbl_span *sp) { - const struct tbl_cell *cp; + const struct tbl_cell *cp, *cpn, *cpp; const struct tbl_dat *dp; static size_t offset; - size_t rmargin, maxrmargin, tsz; - int ic, horiz, spans, vert; - - rmargin = tp->rmargin; - maxrmargin = tp->maxrmargin; - - tp->rmargin = tp->maxrmargin = TERM_MAXMARGIN; + size_t coloff, tsz; + int ic, horiz, spans, vert, more; + char fc; /* Inhibit printing of spaces: we do padding ourselves. */ - tp->flags |= TERMP_NONOSPACE; - tp->flags |= TERMP_NOSPACE; + tp->flags |= TERMP_NOSPACE | TERMP_NONOSPACE; /* * The first time we're invoked for a given table block, @@ -84,165 +91,413 @@ term_tbl(struct termp *tp, const struct tbl_span *sp) if (tp->tbl.cols == NULL) { tp->tbl.len = term_tbl_len; tp->tbl.slen = term_tbl_strlen; + tp->tbl.sulen = term_tbl_sulen; tp->tbl.arg = tp; - tblcalc(&tp->tbl, sp, rmargin - tp->offset); + tblcalc(&tp->tbl, sp, tp->tcol->offset, tp->tcol->rmargin); + + /* Tables leak .ta settings to subsequent text. */ + + term_tab_set(tp, NULL); + coloff = sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) || + sp->opts->lvert; + for (ic = 0; ic < sp->opts->cols; ic++) { + coloff += tp->tbl.cols[ic].width; + term_tab_iset(coloff); + coloff += tp->tbl.cols[ic].spacing; + } /* Center the table as a whole. */ - offset = tp->offset; + offset = tp->tcol->offset; if (sp->opts->opts & TBL_OPT_CENTRE) { tsz = sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) ? 2 : !!sp->opts->lvert + !!sp->opts->rvert; - for (ic = 0; ic < sp->opts->cols; ic++) - tsz += tp->tbl.cols[ic].width + 3; - tsz -= 3; - if (offset + tsz > rmargin) + for (ic = 0; ic + 1 < sp->opts->cols; ic++) + tsz += tp->tbl.cols[ic].width + + tp->tbl.cols[ic].spacing; + if (sp->opts->cols) + tsz += tp->tbl.cols[sp->opts->cols - 1].width; + if (offset + tsz > tp->tcol->rmargin) tsz -= 1; - tp->offset = (offset + rmargin > tsz) ? - (offset + rmargin - tsz) / 2 : 0; + tp->tcol->offset = offset + tp->tcol->rmargin > tsz ? + (offset + tp->tcol->rmargin - tsz) / 2 : 0; } /* Horizontal frame at the start of boxed tables. */ if (sp->opts->opts & TBL_OPT_DBOX) - tbl_hrule(tp, sp, 2); + tbl_hrule(tp, sp, 3); if (sp->opts->opts & (TBL_OPT_DBOX | TBL_OPT_BOX)) - tbl_hrule(tp, sp, 1); + tbl_hrule(tp, sp, 2); } - /* Vertical frame at the start of each row. */ + /* Set up the columns. */ - horiz = sp->pos == TBL_SPAN_HORIZ || sp->pos == TBL_SPAN_DHORIZ; + tp->flags |= TERMP_MULTICOL; + horiz = 0; + switch (sp->pos) { + case TBL_SPAN_HORIZ: + case TBL_SPAN_DHORIZ: + horiz = 1; + term_setcol(tp, 1); + break; + case TBL_SPAN_DATA: + term_setcol(tp, sp->opts->cols + 2); + coloff = tp->tcol->offset; - if (sp->layout->vert || - (sp->prev != NULL && sp->prev->layout->vert) || - sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX)) - term_word(tp, horiz ? "+" : "|"); - else if (sp->opts->lvert) - tbl_char(tp, horiz ? '-' : ASCII_NBRSP, 1); + /* Set up a column for a left vertical frame. */ - /* - * Now print the actual data itself depending on the span type. - * Match data cells to column numbers. - */ + if (sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX) || + sp->opts->lvert) + coloff++; + tp->tcol->rmargin = coloff; + + /* Set up the data columns. */ - if (sp->pos == TBL_SPAN_DATA) { - cp = sp->layout->first; dp = sp->first; spans = 0; for (ic = 0; ic < sp->opts->cols; ic++) { + if (spans == 0) { + tp->tcol++; + tp->tcol->offset = coloff; + } + coloff += tp->tbl.cols[ic].width; + tp->tcol->rmargin = coloff; + if (ic + 1 < sp->opts->cols) + coloff += tp->tbl.cols[ic].spacing; + if (spans) { + spans--; + continue; + } + if (dp == NULL) + continue; + spans = dp->spans; + if (ic || sp->layout->first->pos != TBL_CELL_SPAN) + dp = dp->next; + } - /* - * Remeber whether we need a vertical bar - * after this cell. - */ + /* Set up a column for a right vertical frame. */ - vert = cp == NULL ? 0 : cp->vert; + tp->tcol++; + tp->tcol->offset = coloff + 1; + tp->tcol->rmargin = tp->maxrmargin; - /* - * Print the data and advance to the next cell. - */ + /* Spans may have reduced the number of columns. */ - if (spans == 0) { - tbl_data(tp, sp->opts, dp, tp->tbl.cols + ic); + tp->lasttcol = tp->tcol - tp->tcols; + + /* Fill the buffers for all data columns. */ + + tp->tcol = tp->tcols; + cp = cpn = sp->layout->first; + dp = sp->first; + spans = 0; + for (ic = 0; ic < sp->opts->cols; ic++) { + if (cpn != NULL) { + cp = cpn; + cpn = cpn->next; + } + if (spans) { + spans--; + continue; + } + tp->tcol++; + tp->col = 0; + tbl_data(tp, sp->opts, cp, dp, tp->tbl.cols + ic); + if (dp == NULL) + continue; + spans = dp->spans; + if (cp->pos != TBL_CELL_SPAN) + dp = dp->next; + } + break; + } + + do { + /* Print the vertical frame at the start of each row. */ + + tp->tcol = tp->tcols; + fc = '\0'; + if (sp->layout->vert || + (sp->next != NULL && sp->next->layout->vert && + sp->next->pos == TBL_SPAN_DATA) || + (sp->prev != NULL && sp->prev->layout->vert && + (horiz || (IS_HORIZ(sp->layout->first) && + !IS_HORIZ(sp->prev->layout->first)))) || + sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX)) + fc = horiz || IS_HORIZ(sp->layout->first) ? '+' : '|'; + else if (horiz && sp->opts->lvert) + fc = '-'; + if (fc != '\0') { + (*tp->advance)(tp, tp->tcols->offset); + (*tp->letter)(tp, fc); + tp->viscol = tp->tcol->offset + 1; + } + + /* Print the data cells. */ + + more = 0; + if (horiz) { + tbl_hrule(tp, sp, 0); + term_flushln(tp); + } else { + cp = sp->layout->first; + cpn = sp->next == NULL ? NULL : + sp->next->layout->first; + cpp = sp->prev == NULL ? NULL : + sp->prev->layout->first; + dp = sp->first; + spans = 0; + for (ic = 0; ic < sp->opts->cols; ic++) { + + /* + * Figure out whether to print a + * vertical line after this cell + * and advance to next layout cell. + */ + + if (cp != NULL) { + vert = cp->vert; + switch (cp->pos) { + case TBL_CELL_HORIZ: + fc = '-'; + break; + case TBL_CELL_DHORIZ: + fc = '='; + break; + default: + fc = ' '; + break; + } + } else { + vert = 0; + fc = ' '; + } + if (cpp != NULL) { + if (vert == 0 && + cp != NULL && + ((IS_HORIZ(cp) && + !IS_HORIZ(cpp)) || + (cp->next != NULL && + cpp->next != NULL && + IS_HORIZ(cp->next) && + !IS_HORIZ(cpp->next)))) + vert = cpp->vert; + cpp = cpp->next; + } + if (vert == 0 && + sp->opts->opts & TBL_OPT_ALLBOX) + vert = 1; + if (cpn != NULL) { + if (vert == 0) + vert = cpn->vert; + cpn = cpn->next; + } + if (cp != NULL) + cp = cp->next; + + /* + * Skip later cells in a span, + * figure out whether to start a span, + * and advance to next data cell. + */ + + if (spans) { + spans--; + continue; + } if (dp != NULL) { spans = dp->spans; - dp = dp->next; + if (ic || sp->layout->first->pos + != TBL_CELL_SPAN) + dp = dp->next; } - } else - spans--; - if (cp != NULL) - cp = cp->next; - /* - * Separate columns, except in the middle - * of spans and after the last cell. - */ + /* + * Print one line of text in the cell + * and remember whether there is more. + */ + + tp->tcol++; + if (tp->tcol->col < tp->tcol->lastcol) + term_flushln(tp); + if (tp->tcol->col < tp->tcol->lastcol) + more = 1; + + /* + * Vertical frames between data cells, + * but not after the last column. + */ + + if (fc == ' ' && ((vert == 0 && + (cp == NULL || !IS_HORIZ(cp))) || + tp->tcol + 1 == tp->tcols + tp->lasttcol)) + continue; + + if (tp->viscol < tp->tcol->rmargin) { + (*tp->advance)(tp, tp->tcol->rmargin + - tp->viscol); + tp->viscol = tp->tcol->rmargin; + } + while (tp->viscol < tp->tcol->rmargin + + tp->tbl.cols[ic].spacing / 2) { + (*tp->letter)(tp, fc); + tp->viscol++; + } - if (ic + 1 == sp->opts->cols || spans) - continue; + if (tp->tcol + 1 == tp->tcols + tp->lasttcol) + continue; + + if (fc == ' ' && cp != NULL) { + switch (cp->pos) { + case TBL_CELL_HORIZ: + fc = '-'; + break; + case TBL_CELL_DHORIZ: + fc = '='; + break; + default: + break; + } + } + if (tp->tbl.cols[ic].spacing) { + (*tp->letter)(tp, fc == ' ' ? '|' : + vert ? '+' : fc); + tp->viscol++; + } - tbl_char(tp, ASCII_NBRSP, 1); - if (vert > 0) - tbl_char(tp, '|', vert); - if (vert < 2) - tbl_char(tp, ASCII_NBRSP, 2 - vert); + if (fc != ' ') { + if (cp != NULL && + cp->pos == TBL_CELL_HORIZ) + fc = '-'; + else if (cp != NULL && + cp->pos == TBL_CELL_DHORIZ) + fc = '='; + else + fc = ' '; + } + if (tp->tbl.cols[ic].spacing > 2 && + (vert > 1 || fc != ' ')) { + (*tp->letter)(tp, fc == ' ' ? '|' : + vert > 1 ? '+' : fc); + tp->viscol++; + } + } } - } else if (horiz) - tbl_hrule(tp, sp, 0); - - /* Vertical frame at the end of each row. */ - if (sp->layout->last->vert || - (sp->prev != NULL && sp->prev->layout->last->vert) || - (sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX))) - term_word(tp, horiz ? "+" : " |"); - else if (sp->opts->rvert) - tbl_char(tp, horiz ? '-' : ASCII_NBRSP, 1); - term_flushln(tp); + /* Print the vertical frame at the end of each row. */ + + fc = '\0'; + if ((sp->layout->last->vert && + sp->layout->last->col + 1 == sp->opts->cols) || + (sp->next != NULL && + sp->next->layout->last->vert && + sp->next->layout->last->col + 1 == sp->opts->cols) || + (sp->prev != NULL && + sp->prev->layout->last->vert && + sp->prev->layout->last->col + 1 == sp->opts->cols && + (horiz || (IS_HORIZ(sp->layout->last) && + !IS_HORIZ(sp->prev->layout->last)))) || + (sp->opts->opts & (TBL_OPT_BOX | TBL_OPT_DBOX))) + fc = horiz || IS_HORIZ(sp->layout->last) ? '+' : '|'; + else if (horiz && sp->opts->rvert) + fc = '-'; + if (fc != '\0') { + if (horiz == 0 && (IS_HORIZ(sp->layout->last) == 0 || + sp->layout->last->col + 1 < sp->opts->cols)) { + tp->tcol++; + (*tp->advance)(tp, + tp->tcol->offset > tp->viscol ? + tp->tcol->offset - tp->viscol : 1); + } + (*tp->letter)(tp, fc); + } + (*tp->endline)(tp); + tp->viscol = 0; + } while (more); /* - * If we're the last row, clean up after ourselves: clear the - * existing table configuration and set it to NULL. + * Clean up after this row. If it is the last line + * of the table, print the box line and clean up + * column data; otherwise, print the allbox line. */ + term_setcol(tp, 1); + tp->flags &= ~TERMP_MULTICOL; + tp->tcol->rmargin = tp->maxrmargin; if (sp->next == NULL) { if (sp->opts->opts & (TBL_OPT_DBOX | TBL_OPT_BOX)) { - tbl_hrule(tp, sp, 1); + tbl_hrule(tp, sp, 2); tp->skipvsp = 1; } if (sp->opts->opts & TBL_OPT_DBOX) { - tbl_hrule(tp, sp, 2); + tbl_hrule(tp, sp, 3); tp->skipvsp = 2; } assert(tp->tbl.cols); free(tp->tbl.cols); tp->tbl.cols = NULL; - tp->offset = offset; - } + tp->tcol->offset = offset; + } else if (horiz == 0 && sp->opts->opts & TBL_OPT_ALLBOX && + (sp->next == NULL || sp->next->pos == TBL_SPAN_DATA || + sp->next->next != NULL)) + tbl_hrule(tp, sp, 1); tp->flags &= ~TERMP_NONOSPACE; - tp->rmargin = rmargin; - tp->maxrmargin = maxrmargin; } /* * Kinds of horizontal rulers: * 0: inside the table (single or double line with crossings) - * 1: inner frame (single line with crossings and ends) - * 2: outer frame (single line without crossings with ends) + * 1: inside the table (single or double line with crossings and ends) + * 2: inner frame (single line with crossings and ends) + * 3: outer frame (single line without crossings with ends) */ static void tbl_hrule(struct termp *tp, const struct tbl_span *sp, int kind) { - const struct tbl_cell *c1, *c2; + const struct tbl_cell *cp, *cpn, *cpp; + const struct roffcol *col; int vert; char line, cross; - line = (kind == 0 && TBL_SPAN_DHORIZ == sp->pos) ? '=' : '-'; - cross = (kind < 2) ? '+' : '-'; + line = (kind < 2 && TBL_SPAN_DHORIZ == sp->pos) ? '=' : '-'; + cross = (kind < 3) ? '+' : '-'; if (kind) term_word(tp, "+"); - c1 = sp->layout->first; - c2 = sp->prev == NULL ? NULL : sp->prev->layout->first; - if (c2 == c1) - c2 = NULL; + cp = sp->layout->first; + cpp = kind || sp->prev == NULL ? NULL : sp->prev->layout->first; + if (cpp == cp) + cpp = NULL; + cpn = kind > 1 || sp->next == NULL ? NULL : sp->next->layout->first; + if (cpn == cp) + cpn = NULL; for (;;) { - tbl_char(tp, line, tp->tbl.cols[c1->col].width + 1); - vert = c1->vert; - if ((c1 = c1->next) == NULL) + col = tp->tbl.cols + cp->col; + tbl_char(tp, line, col->width + col->spacing / 2); + vert = cp->vert; + if ((cp = cp->next) == NULL) break; - if (c2 != NULL) { - if (vert < c2->vert) - vert = c2->vert; - c2 = c2->next; + if (cpp != NULL) { + if (vert < cpp->vert) + vert = cpp->vert; + cpp = cpp->next; } - if (vert) - tbl_char(tp, cross, vert); - if (vert < 2) - tbl_char(tp, line, 2 - vert); + if (cpn != NULL) { + if (vert < cpn->vert) + vert = cpn->vert; + cpn = cpn->next; + } + if (sp->opts->opts & TBL_OPT_ALLBOX && !vert) + vert = 1; + if (col->spacing) + tbl_char(tp, vert ? cross : line, 1); + if (col->spacing > 2) + tbl_char(tp, vert > 1 ? cross : line, 1); + if (col->spacing > 4) + tbl_char(tp, line, (col->spacing - 3) / 2); } if (kind) { term_word(tp, "+"); @@ -252,18 +507,25 @@ tbl_hrule(struct termp *tp, const struct tbl_span *sp, int kind) static void tbl_data(struct termp *tp, const struct tbl_opts *opts, - const struct tbl_dat *dp, - const struct roffcol *col) + const struct tbl_cell *cp, const struct tbl_dat *dp, + const struct roffcol *col) { - - if (dp == NULL) { - tbl_char(tp, ASCII_NBRSP, col->width); + switch (cp->pos) { + case TBL_CELL_HORIZ: + tbl_char(tp, '-', col->width); return; + case TBL_CELL_DHORIZ: + tbl_char(tp, '=', col->width); + return; + default: + break; } + if (dp == NULL) + return; + switch (dp->pos) { case TBL_DATA_NONE: - tbl_char(tp, ASCII_NBRSP, col->width); return; case TBL_DATA_HORIZ: case TBL_DATA_NHORIZ: @@ -277,13 +539,7 @@ tbl_data(struct termp *tp, const struct tbl_opts *opts, break; } - switch (dp->layout->pos) { - case TBL_CELL_HORIZ: - tbl_char(tp, '-', col->width); - break; - case TBL_CELL_DHORIZ: - tbl_char(tp, '=', col->width); - break; + switch (cp->pos) { case TBL_CELL_LONG: case TBL_CELL_CENTRE: case TBL_CELL_LEFT: @@ -294,7 +550,7 @@ tbl_data(struct termp *tp, const struct tbl_opts *opts, tbl_number(tp, opts, dp, col); break; case TBL_CELL_DOWN: - tbl_char(tp, ASCII_NBRSP, col->width); + case TBL_CELL_SPAN: break; default: abort(); diff --git a/usr/src/cmd/mandoc/term.c b/usr/src/cmd/mandoc/term.c index 1217d473ca..f67fcf9d95 100644 --- a/usr/src/cmd/mandoc/term.c +++ b/usr/src/cmd/mandoc/term.c @@ -1,4 +1,4 @@ -/* $Id: term.c,v 1.259 2017/01/08 18:16:58 schwarze Exp $ */ +/* $Id: term.c,v 1.274 2017/07/28 14:25:48 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org> @@ -32,17 +32,31 @@ #include "main.h" static size_t cond_width(const struct termp *, int, int *); -static void adjbuf(struct termp *p, size_t); +static void adjbuf(struct termp_col *, size_t); static void bufferc(struct termp *, char); static void encode(struct termp *, const char *, size_t); static void encode1(struct termp *, int); +static void endline(struct termp *); void -term_free(struct termp *p) +term_setcol(struct termp *p, size_t maxtcol) { + if (maxtcol > p->maxtcol) { + p->tcols = mandoc_recallocarray(p->tcols, + p->maxtcol, maxtcol, sizeof(*p->tcols)); + p->maxtcol = maxtcol; + } + p->lasttcol = maxtcol - 1; + p->tcol = p->tcols; +} - free(p->buf); +void +term_free(struct termp *p) +{ + for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++) + free(p->tcol->buf); + free(p->tcols); free(p->fontq); free(p); } @@ -84,69 +98,54 @@ term_end(struct termp *p) * to be broken, start the next line at the right margin instead * of at the offset. Used together with TERMP_NOBREAK for the tags * in various kinds of tagged lists. - * - TERMP_DANGLE: Do not break the output line at the right margin, + * - TERMP_HANG: Do not break the output line at the right margin, * append the next chunk after it even if this one is too long. * To be used together with TERMP_NOBREAK. - * - TERMP_HANG: Like TERMP_DANGLE, and also suppress padding before - * the next chunk if this column is not full. + * - TERMP_NOPAD: Start writing at the current position, + * do not pad with blank characters up to the offset. */ void term_flushln(struct termp *p) { - size_t i; /* current input position in p->buf */ - int ntab; /* number of tabs to prepend */ size_t vis; /* current visual position on output */ size_t vbl; /* number of blanks to prepend to output */ size_t vend; /* end of word visual position on output */ size_t bp; /* visual right border position */ size_t dv; /* temporary for visual pos calculations */ - size_t j; /* temporary loop index for p->buf */ + size_t j; /* temporary loop index for p->tcol->buf */ size_t jhy; /* last hyph before overflow w/r/t j */ size_t maxvis; /* output position of visible boundary */ - - /* - * First, establish the maximum columns of "visible" content. - * This is usually the difference between the right-margin and - * an indentation, but can be, for tagged lists or columns, a - * small set of values. - * - * The following unsigned-signed subtractions look strange, - * but they are actually correct. If the int p->overstep - * is negative, it gets sign extended. Subtracting that - * very large size_t effectively adds a small number to dv. - */ - dv = p->rmargin > p->offset ? p->rmargin - p->offset : 0; - maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; - - if (p->flags & TERMP_NOBREAK) { - dv = p->maxrmargin > p->offset ? - p->maxrmargin - p->offset : 0; - bp = (int)dv > p->overstep ? - dv - (size_t)p->overstep : 0; - } else - bp = maxvis; - - /* - * Calculate the required amount of padding. - */ - vbl = p->offset + p->overstep > p->viscol ? - p->offset + p->overstep - p->viscol : 0; - + int ntab; /* number of tabs to prepend */ + int breakline; /* after this word */ + + vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ? + 0 : p->tcol->offset - p->viscol; + if (p->minbl && vbl < p->minbl) + vbl = p->minbl; + maxvis = p->tcol->rmargin > p->viscol + vbl ? + p->tcol->rmargin - p->viscol - vbl : 0; + bp = !(p->flags & TERMP_NOBREAK) ? maxvis : + p->maxrmargin > p->viscol + vbl ? + p->maxrmargin - p->viscol - vbl : 0; vis = vend = 0; - i = 0; - while (i < p->col) { + if ((p->flags & TERMP_MULTICOL) == 0) + p->tcol->col = 0; + while (p->tcol->col < p->tcol->lastcol) { + /* * Handle literal tab characters: collapse all * subsequent tabs into a single huge set of spaces. */ + ntab = 0; - while (i < p->col && '\t' == p->buf[i]) { - vend = (vis / p->tabwidth + 1) * p->tabwidth; + while (p->tcol->col < p->tcol->lastcol && + p->tcol->buf[p->tcol->col] == '\t') { + vend = term_tab_next(vis); vbl += vend - vis; vis = vend; ntab++; - i++; + p->tcol->col++; } /* @@ -156,84 +155,98 @@ term_flushln(struct termp *p) * space is printed according to regular spacing rules). */ - for (j = i, jhy = 0; j < p->col; j++) { - if (' ' == p->buf[j] || '\t' == p->buf[j]) + jhy = 0; + breakline = 0; + for (j = p->tcol->col; j < p->tcol->lastcol; j++) { + if (p->tcol->buf[j] == '\n') { + if ((p->flags & TERMP_BRIND) == 0) + breakline = 1; + continue; + } + if (p->tcol->buf[j] == ' ' || p->tcol->buf[j] == '\t') break; /* Back over the last printed character. */ - if (8 == p->buf[j]) { + if (p->tcol->buf[j] == '\b') { assert(j); - vend -= (*p->width)(p, p->buf[j - 1]); + vend -= (*p->width)(p, p->tcol->buf[j - 1]); continue; } /* Regular word. */ /* Break at the hyphen point if we overrun. */ if (vend > vis && vend < bp && - (ASCII_HYPH == p->buf[j] || - ASCII_BREAK == p->buf[j])) + (p->tcol->buf[j] == ASCII_HYPH|| + p->tcol->buf[j] == ASCII_BREAK)) jhy = j; /* * Hyphenation now decided, put back a real * hyphen such that we get the correct width. */ - if (ASCII_HYPH == p->buf[j]) - p->buf[j] = '-'; + if (p->tcol->buf[j] == ASCII_HYPH) + p->tcol->buf[j] = '-'; - vend += (*p->width)(p, p->buf[j]); + vend += (*p->width)(p, p->tcol->buf[j]); } /* * Find out whether we would exceed the right margin. * If so, break to the next line. */ - if (vend > bp && 0 == jhy && vis > 0) { + + if (vend > bp && jhy == 0 && vis > 0 && + (p->flags & TERMP_BRNEVER) == 0) { + if (p->flags & TERMP_MULTICOL) + return; + + endline(p); vend -= vis; - (*p->endline)(p); - p->viscol = 0; - if (TERMP_BRIND & p->flags) { - vbl = p->rmargin; - vend += p->rmargin; - vend -= p->offset; - } else - vbl = p->offset; - /* use pending tabs on the new line */ + /* Use pending tabs on the new line. */ - if (0 < ntab) - vbl += ntab * p->tabwidth; + vbl = 0; + while (ntab--) + vbl = term_tab_next(vbl); - /* - * Remove the p->overstep width. - * Again, if p->overstep is negative, - * sign extension does the right thing. - */ + /* Re-establish indentation. */ - bp += (size_t)p->overstep; - p->overstep = 0; + if (p->flags & TERMP_BRIND) + vbl += p->tcol->rmargin; + else + vbl += p->tcol->offset; + maxvis = p->tcol->rmargin > vbl ? + p->tcol->rmargin - vbl : 0; + bp = !(p->flags & TERMP_NOBREAK) ? maxvis : + p->maxrmargin > vbl ? p->maxrmargin - vbl : 0; } - /* Write out the [remaining] word. */ - for ( ; i < p->col; i++) { - if (vend > bp && jhy > 0 && i > jhy) + /* + * Write out the rest of the word. + */ + + for ( ; p->tcol->col < p->tcol->lastcol; p->tcol->col++) { + if (vend > bp && jhy > 0 && p->tcol->col > jhy) break; - if ('\t' == p->buf[i]) + if (p->tcol->buf[p->tcol->col] == '\n') + continue; + if (p->tcol->buf[p->tcol->col] == '\t') break; - if (' ' == p->buf[i]) { - j = i; - while (i < p->col && ' ' == p->buf[i]) - i++; - dv = (i - j) * (*p->width)(p, ' '); + if (p->tcol->buf[p->tcol->col] == ' ') { + j = p->tcol->col; + while (p->tcol->col < p->tcol->lastcol && + p->tcol->buf[p->tcol->col] == ' ') + p->tcol->col++; + dv = (p->tcol->col - j) * (*p->width)(p, ' '); vbl += dv; vend += dv; break; } - if (ASCII_NBRSP == p->buf[i]) { + if (p->tcol->buf[p->tcol->col] == ASCII_NBRSP) { vbl += (*p->width)(p, ' '); continue; } - if (ASCII_BREAK == p->buf[i]) + if (p->tcol->buf[p->tcol->col] == ASCII_BREAK) continue; /* @@ -247,61 +260,83 @@ term_flushln(struct termp *p) vbl = 0; } - (*p->letter)(p, p->buf[i]); - if (8 == p->buf[i]) - p->viscol -= (*p->width)(p, p->buf[i-1]); + (*p->letter)(p, p->tcol->buf[p->tcol->col]); + if (p->tcol->buf[p->tcol->col] == '\b') + p->viscol -= (*p->width)(p, + p->tcol->buf[p->tcol->col - 1]); else - p->viscol += (*p->width)(p, p->buf[i]); + p->viscol += (*p->width)(p, + p->tcol->buf[p->tcol->col]); } vis = vend; + + if (breakline == 0) + continue; + + /* Explicitly requested output line break. */ + + if (p->flags & TERMP_MULTICOL) + return; + + endline(p); + breakline = 0; + vis = vend = 0; + + /* Re-establish indentation. */ + + vbl = p->tcol->offset; + maxvis = p->tcol->rmargin > vbl ? + p->tcol->rmargin - vbl : 0; + bp = !(p->flags & TERMP_NOBREAK) ? maxvis : + p->maxrmargin > vbl ? p->maxrmargin - vbl : 0; } /* * If there was trailing white space, it was not printed; * so reset the cursor position accordingly. */ + if (vis > vbl) vis -= vbl; else vis = 0; - p->col = 0; - p->overstep = 0; - p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE); + p->col = p->tcol->col = p->tcol->lastcol = 0; + p->minbl = p->trailspace; + p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD); - if ( ! (TERMP_NOBREAK & p->flags)) { - p->viscol = 0; - (*p->endline)(p); - return; - } - - if (TERMP_HANG & p->flags) { - p->overstep += (int)(p->offset + vis - p->rmargin + - p->trailspace * (*p->width)(p, ' ')); - - /* - * If we have overstepped the margin, temporarily move - * it to the right and flag the rest of the line to be - * shorter. - * If there is a request to keep the columns together, - * allow negative overstep when the column is not full. - */ - if (p->trailspace && p->overstep < 0) - p->overstep = 0; - return; - - } else if (TERMP_DANGLE & p->flags) + if (p->flags & TERMP_MULTICOL) return; /* Trailing whitespace is significant in some columns. */ + if (vis && vbl && (TERMP_BRTRSP & p->flags)) vis += vbl; /* If the column was overrun, break the line. */ - if (maxvis < vis + p->trailspace * (*p->width)(p, ' ')) { - (*p->endline)(p); - p->viscol = 0; + if ((p->flags & TERMP_NOBREAK) == 0 || + ((p->flags & TERMP_HANG) == 0 && + vis + p->trailspace * (*p->width)(p, ' ') > maxvis)) + endline(p); +} + +static void +endline(struct termp *p) +{ + if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) { + p->mc = NULL; + p->flags &= ~TERMP_ENDMC; + } + if (p->mc != NULL) { + if (p->viscol && p->maxrmargin >= p->viscol) + (*p->advance)(p, p->maxrmargin - p->viscol + 1); + p->flags |= TERMP_NOBUF | TERMP_NOSPACE; + term_word(p, p->mc); + p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC); } + p->viscol = 0; + p->minbl = 0; + (*p->endline)(p); } /* @@ -314,7 +349,7 @@ term_newln(struct termp *p) { p->flags |= TERMP_NOSPACE; - if (p->col || p->viscol) + if (p->tcol->lastcol || p->viscol) term_flushln(p); } @@ -330,6 +365,7 @@ term_vspace(struct termp *p) term_newln(p); p->viscol = 0; + p->minbl = 0; if (0 < p->skipvsp) p->skipvsp--; else @@ -397,30 +433,31 @@ term_fontpop(struct termp *p) void term_word(struct termp *p, const char *word) { + struct roffsu su; const char nbrsp[2] = { ASCII_NBRSP, 0 }; const char *seq, *cp; int sz, uc; - size_t ssz; + size_t csz, lsz, ssz; enum mandoc_esc esc; - if ( ! (TERMP_NOSPACE & p->flags)) { - if ( ! (TERMP_KEEP & p->flags)) { - bufferc(p, ' '); - if (TERMP_SENTENCE & p->flags) + if ((p->flags & TERMP_NOBUF) == 0) { + if ((p->flags & TERMP_NOSPACE) == 0) { + if ((p->flags & TERMP_KEEP) == 0) { bufferc(p, ' '); - } else - bufferc(p, ASCII_NBRSP); + if (p->flags & TERMP_SENTENCE) + bufferc(p, ' '); + } else + bufferc(p, ASCII_NBRSP); + } + if (p->flags & TERMP_PREKEEP) + p->flags |= TERMP_KEEP; + if (p->flags & TERMP_NONOSPACE) + p->flags |= TERMP_NOSPACE; + else + p->flags &= ~TERMP_NOSPACE; + p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); + p->skipvsp = 0; } - if (TERMP_PREKEEP & p->flags) - p->flags |= TERMP_KEEP; - - if ( ! (p->flags & TERMP_NONOSPACE)) - p->flags &= ~TERMP_NOSPACE; - else - p->flags |= TERMP_NOSPACE; - - p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); - p->skipvsp = 0; while ('\0' != *word) { if ('\\' != *word) { @@ -479,12 +516,88 @@ term_word(struct termp *p, const char *word) case ESCAPE_FONTPREV: term_fontlast(p); continue; + case ESCAPE_BREAK: + bufferc(p, '\n'); + continue; case ESCAPE_NOSPACE: if (p->flags & TERMP_BACKAFTER) p->flags &= ~TERMP_BACKAFTER; else if (*word == '\0') p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE); continue; + case ESCAPE_HORIZ: + if (*seq == '|') { + seq++; + uc = -p->col; + } else + uc = 0; + if (a2roffsu(seq, &su, SCALE_EM) == NULL) + continue; + uc += term_hen(p, &su); + if (uc > 0) + while (uc-- > 0) + bufferc(p, ASCII_NBRSP); + else if (p->col > (size_t)(-uc)) + p->col += uc; + else { + uc += p->col; + p->col = 0; + if (p->tcol->offset > (size_t)(-uc)) { + p->ti += uc; + p->tcol->offset += uc; + } else { + p->ti -= p->tcol->offset; + p->tcol->offset = 0; + } + } + continue; + case ESCAPE_HLINE: + if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL) + continue; + uc = term_hen(p, &su); + if (uc <= 0) { + if (p->tcol->rmargin <= p->tcol->offset) + continue; + lsz = p->tcol->rmargin - p->tcol->offset; + } else + lsz = uc; + if (*cp == seq[-1]) + uc = -1; + else if (*cp == '\\') { + seq = cp + 1; + esc = mandoc_escape(&seq, &cp, &sz); + switch (esc) { + case ESCAPE_UNICODE: + uc = mchars_num2uc(cp + 1, sz - 1); + break; + case ESCAPE_NUMBERED: + uc = mchars_num2char(cp, sz); + break; + case ESCAPE_SPECIAL: + uc = mchars_spec2cp(cp, sz); + break; + default: + uc = -1; + break; + } + } else + uc = *cp; + if (uc < 0x20 || (uc > 0x7E && uc < 0xA0)) + uc = '_'; + if (p->enc == TERMENC_ASCII) { + cp = ascii_uc2str(uc); + csz = term_strlen(p, cp); + ssz = strlen(cp); + } else + csz = (*p->width)(p, uc); + while (lsz >= csz) { + if (p->enc == TERMENC_ASCII) + encode(p, cp, ssz); + else + encode1(p, uc); + lsz -= csz; + } + continue; case ESCAPE_SKIPCHAR: p->flags |= TERMP_BACKAFTER; continue; @@ -504,10 +617,12 @@ term_word(struct termp *p, const char *word) } } /* Trim trailing backspace/blank pair. */ - if (p->col > 2 && - (p->buf[p->col - 1] == ' ' || - p->buf[p->col - 1] == '\t')) - p->col -= 2; + if (p->tcol->lastcol > 2 && + (p->tcol->buf[p->tcol->lastcol - 1] == ' ' || + p->tcol->buf[p->tcol->lastcol - 1] == '\t')) + p->tcol->lastcol -= 2; + if (p->col > p->tcol->lastcol) + p->col = p->tcol->lastcol; continue; default: continue; @@ -532,25 +647,28 @@ term_word(struct termp *p, const char *word) } static void -adjbuf(struct termp *p, size_t sz) +adjbuf(struct termp_col *c, size_t sz) { - - if (0 == p->maxcols) - p->maxcols = 1024; - while (sz >= p->maxcols) - p->maxcols <<= 2; - - p->buf = mandoc_reallocarray(p->buf, p->maxcols, sizeof(int)); + if (c->maxcols == 0) + c->maxcols = 1024; + while (c->maxcols <= sz) + c->maxcols <<= 2; + c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf)); } static void bufferc(struct termp *p, char c) { - - if (p->col + 1 >= p->maxcols) - adjbuf(p, p->col + 1); - - p->buf[p->col++] = c; + if (p->flags & TERMP_NOBUF) { + (*p->letter)(p, c); + return; + } + if (p->col + 1 >= p->tcol->maxcols) + adjbuf(p->tcol, p->col + 1); + if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) + p->tcol->buf[p->col] = c; + if (p->tcol->lastcol < ++p->col) + p->tcol->lastcol = p->col; } /* @@ -563,31 +681,40 @@ encode1(struct termp *p, int c) { enum termfont f; - if (p->col + 7 >= p->maxcols) - adjbuf(p, p->col + 7); + if (p->flags & TERMP_NOBUF) { + (*p->letter)(p, c); + return; + } + + if (p->col + 7 >= p->tcol->maxcols) + adjbuf(p->tcol, p->col + 7); f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ? p->fontq[p->fonti] : TERMFONT_NONE; if (p->flags & TERMP_BACKBEFORE) { - if (p->buf[p->col - 1] == ' ' || p->buf[p->col - 1] == '\t') + if (p->tcol->buf[p->col - 1] == ' ' || + p->tcol->buf[p->col - 1] == '\t') p->col--; else - p->buf[p->col++] = 8; + p->tcol->buf[p->col++] = '\b'; p->flags &= ~TERMP_BACKBEFORE; } - if (TERMFONT_UNDER == f || TERMFONT_BI == f) { - p->buf[p->col++] = '_'; - p->buf[p->col++] = 8; + if (f == TERMFONT_UNDER || f == TERMFONT_BI) { + p->tcol->buf[p->col++] = '_'; + p->tcol->buf[p->col++] = '\b'; } - if (TERMFONT_BOLD == f || TERMFONT_BI == f) { - if (ASCII_HYPH == c) - p->buf[p->col++] = '-'; + if (f == TERMFONT_BOLD || f == TERMFONT_BI) { + if (c == ASCII_HYPH) + p->tcol->buf[p->col++] = '-'; else - p->buf[p->col++] = c; - p->buf[p->col++] = 8; + p->tcol->buf[p->col++] = c; + p->tcol->buf[p->col++] = '\b'; } - p->buf[p->col++] = c; + if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) + p->tcol->buf[p->col] = c; + if (p->tcol->lastcol < ++p->col) + p->tcol->lastcol = p->col; if (p->flags & TERMP_BACKAFTER) { p->flags |= TERMP_BACKBEFORE; p->flags &= ~TERMP_BACKAFTER; @@ -599,15 +726,24 @@ encode(struct termp *p, const char *word, size_t sz) { size_t i; - if (p->col + 2 + (sz * 5) >= p->maxcols) - adjbuf(p, p->col + 2 + (sz * 5)); + if (p->flags & TERMP_NOBUF) { + for (i = 0; i < sz; i++) + (*p->letter)(p, word[i]); + return; + } + + if (p->col + 2 + (sz * 5) >= p->tcol->maxcols) + adjbuf(p->tcol, p->col + 2 + (sz * 5)); for (i = 0; i < sz; i++) { if (ASCII_HYPH == word[i] || isgraph((unsigned char)word[i])) encode1(p, word[i]); else { - p->buf[p->col++] = word[i]; + if (p->tcol->lastcol <= p->col || + (word[i] != ' ' && word[i] != ASCII_NBRSP)) + p->tcol->buf[p->col] = word[i]; + p->col++; /* * Postpone the effect of \z while handling @@ -621,6 +757,8 @@ encode(struct termp *p, const char *word, size_t sz) } } } + if (p->tcol->lastcol < p->col) + p->tcol->lastcol = p->col; } void @@ -644,7 +782,7 @@ term_setwidth(struct termp *p, const char *wstr) default: break; } - if (a2roffsu(wstr, &su, SCALE_MAX)) + if (a2roffsu(wstr, &su, SCALE_MAX) != NULL) width = term_hspan(p, &su); else iop = 0; @@ -833,7 +971,7 @@ term_vspan(const struct termp *p, const struct roffsu *su) } /* - * Convert a scaling width to basic units, rounding down. + * Convert a scaling width to basic units, rounding towards 0. */ int term_hspan(const struct termp *p, const struct roffsu *su) @@ -841,3 +979,17 @@ term_hspan(const struct termp *p, const struct roffsu *su) return (*p->hspan)(p, su); } + +/* + * Convert a scaling width to basic units, rounding to closest. + */ +int +term_hen(const struct termp *p, const struct roffsu *su) +{ + int bu; + + if ((bu = (*p->hspan)(p, su)) >= 0) + return (bu + 11) / 24; + else + return -((-bu + 11) / 24); +} diff --git a/usr/src/cmd/mandoc/term.h b/usr/src/cmd/mandoc/term.h index fabc117d64..493191d7d3 100644 --- a/usr/src/cmd/mandoc/term.h +++ b/usr/src/cmd/mandoc/term.h @@ -1,7 +1,7 @@ -/* $Id: term.h,v 1.118 2015/11/07 14:01:16 schwarze Exp $ */ +/* $Id: term.h,v 1.130 2017/07/08 14:51:05 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -36,9 +36,10 @@ enum termfont { TERMFONT__MAX }; -#define TERM_MAXMARGIN 100000 /* FIXME */ - +struct eqn_box; struct roff_meta; +struct roff_node; +struct tbl_span; struct termp; typedef void (*term_margin)(struct termp *, const struct roff_meta *); @@ -48,24 +49,33 @@ struct termp_tbl { int decimal; /* decimal point position */ }; +struct termp_col { + int *buf; /* Output buffer. */ + size_t maxcols; /* Allocated bytes in buf. */ + size_t lastcol; /* Last byte in buf. */ + size_t col; /* Byte in buf to be written. */ + size_t rmargin; /* Current right margin. */ + size_t offset; /* Current left margin. */ +}; + struct termp { - enum termtype type; - struct rofftbl tbl; /* table configuration */ - int synopsisonly; /* print the synopsis only */ - int mdocstyle; /* imitate mdoc(7) output */ + struct rofftbl tbl; /* Table configuration. */ + struct termp_col *tcols; /* Array of table columns. */ + struct termp_col *tcol; /* Current table column. */ + size_t maxtcol; /* Allocated table columns. */ + size_t lasttcol; /* Last column currently used. */ size_t line; /* Current output line number. */ size_t defindent; /* Default indent for text. */ size_t defrmargin; /* Right margin of the device. */ size_t lastrmargin; /* Right margin before the last ll. */ - size_t rmargin; /* Current right margin. */ size_t maxrmargin; /* Max right margin. */ - size_t maxcols; /* Max size of buf. */ - size_t offset; /* Margin offest. */ - size_t tabwidth; /* Distance of tab positions. */ - size_t col; /* Bytes in buf. */ + size_t col; /* Byte position in buf. */ size_t viscol; /* Chars on current line. */ - size_t trailspace; /* See termp_flushln(). */ - int overstep; /* See termp_flushln(). */ + size_t trailspace; /* See term_flushln(). */ + size_t minbl; /* Minimum blanks before next field. */ + int synopsisonly; /* Print the synopsis only. */ + int mdocstyle; /* Imitate mdoc(7) output. */ + int ti; /* Temporary indent for one line. */ int skipvsp; /* Vertical space to skip. */ int flags; #define TERMP_SENTENCE (1 << 0) /* Space before a sentence. */ @@ -79,12 +89,17 @@ struct termp { #define TERMP_NOBREAK (1 << 8) /* See term_flushln(). */ #define TERMP_BRTRSP (1 << 9) /* See term_flushln(). */ #define TERMP_BRIND (1 << 10) /* See term_flushln(). */ -#define TERMP_DANGLE (1 << 11) /* See term_flushln(). */ -#define TERMP_HANG (1 << 12) /* See term_flushln(). */ +#define TERMP_HANG (1 << 11) /* See term_flushln(). */ +#define TERMP_NOPAD (1 << 12) /* See term_flushln(). */ #define TERMP_NOSPLIT (1 << 13) /* Do not break line before .An. */ #define TERMP_SPLIT (1 << 14) /* Break line before .An. */ #define TERMP_NONEWLINE (1 << 15) /* No line break in nofill mode. */ - int *buf; /* Output buffer. */ +#define TERMP_BRNEVER (1 << 16) /* Don't even break at maxrmargin. */ +#define TERMP_NOBUF (1 << 17) /* Bypass output buffer. */ +#define TERMP_NEWMC (1 << 18) /* No .mc printed yet. */ +#define TERMP_ENDMC (1 << 19) /* Next break ends .mc mode. */ +#define TERMP_MULTICOL (1 << 20) /* Multiple column mode. */ + enum termtype type; /* Terminal, PS, or PDF. */ enum termenc enc; /* Type of encoding. */ enum termfont fontl; /* Last font set. */ enum termfont *fontq; /* Symmetric fonts. */ @@ -102,18 +117,19 @@ struct termp { int (*hspan)(const struct termp *, const struct roffsu *); const void *argf; /* arg for headf/footf */ + const char *mc; /* Margin character. */ struct termp_ps *ps; }; -struct tbl_span; -struct eqn; - const char *ascii_uc2str(int); -void term_eqn(struct termp *, const struct eqn *); +void roff_term_pre(struct termp *, const struct roff_node *); + +void term_eqn(struct termp *, const struct eqn_box *); void term_tbl(struct termp *, const struct tbl_span *); void term_free(struct termp *); +void term_setcol(struct termp *, size_t); void term_newln(struct termp *); void term_vspace(struct termp *); void term_word(struct termp *, const char *); @@ -124,10 +140,15 @@ void term_end(struct termp *); void term_setwidth(struct termp *, const char *); int term_hspan(const struct termp *, const struct roffsu *); +int term_hen(const struct termp *, const struct roffsu *); int term_vspan(const struct termp *, const struct roffsu *); size_t term_strlen(const struct termp *, const char *); size_t term_len(const struct termp *, size_t); +void term_tab_set(const struct termp *, const char *); +void term_tab_iset(size_t); +size_t term_tab_next(size_t); + void term_fontpush(struct termp *, enum termfont); void term_fontpop(struct termp *); void term_fontpopq(struct termp *, int); diff --git a/usr/src/cmd/mandoc/term_ascii.c b/usr/src/cmd/mandoc/term_ascii.c index df5ff13901..e819c0ef8a 100644 --- a/usr/src/cmd/mandoc/term_ascii.c +++ b/usr/src/cmd/mandoc/term_ascii.c @@ -1,7 +1,7 @@ -/* $Id: term_ascii.c,v 1.54 2016/07/31 09:29:13 schwarze Exp $ */ +/* $Id: term_ascii.c,v 1.58 2017/06/14 14:24:20 schwarze Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -65,13 +65,14 @@ ascii_init(enum termenc enc, const struct manoutput *outopts) #endif struct termp *p; - p = mandoc_calloc(1, sizeof(struct termp)); + p = mandoc_calloc(1, sizeof(*p)); + p->tcol = p->tcols = mandoc_calloc(1, sizeof(*p->tcol)); + p->maxtcol = 1; p->line = 1; - p->tabwidth = 5; p->defrmargin = p->lastrmargin = 78; p->fontq = mandoc_reallocarray(NULL, - (p->fontsz = 8), sizeof(enum termfont)); + (p->fontsz = 8), sizeof(*p->fontq)); p->fontq[0] = p->fontl = TERMFONT_NONE; p->begin = ascii_begin; @@ -149,7 +150,7 @@ ascii_setwidth(struct termp *p, int iop, int width) { width /= 24; - p->rmargin = p->defrmargin; + p->tcol->rmargin = p->defrmargin; if (iop > 0) p->defrmargin += width; else if (iop == 0) @@ -158,8 +159,8 @@ ascii_setwidth(struct termp *p, int iop, int width) p->defrmargin -= width; else p->defrmargin = 0; - p->lastrmargin = p->rmargin; - p->rmargin = p->maxrmargin = p->defrmargin; + p->lastrmargin = p->tcol->rmargin; + p->tcol->rmargin = p->maxrmargin = p->defrmargin; } void @@ -216,6 +217,8 @@ ascii_endline(struct termp *p) { p->line++; + p->tcol->offset -= p->ti; + p->ti = 0; putchar('\n'); } @@ -290,7 +293,7 @@ ascii_uc2str(int uc) "<80>", "<81>", "<82>", "<83>", "<84>", "<85>", "<86>", "<87>", "<88>", "<89>", "<8A>", "<8B>", "<8C>", "<8D>", "<8E>", "<8F>", "<90>", "<91>", "<92>", "<93>", "<94>", "<95>", "<96>", "<97>", - "<99>", "<99>", "<9A>", "<9B>", "<9C>", "<9D>", "<9E>", "<9F>", + "<98>", "<99>", "<9A>", "<9B>", "<9C>", "<9D>", "<9E>", "<9F>", nbrsp, "!", "/\bc", "GBP", "o\bx", "=\bY", "|", "<sec>", "\"", "(C)", "_\ba", "<<", "~", "", "(R)", "-", "<deg>","+-", "2", "3", "'", ",\bu", "<par>",".", @@ -370,6 +373,8 @@ locale_endline(struct termp *p) { p->line++; + p->tcol->offset -= p->ti; + p->ti = 0; putwchar(L'\n'); } diff --git a/usr/src/cmd/mandoc/term_ps.c b/usr/src/cmd/mandoc/term_ps.c index 696ff22435..9638ae4cb9 100644 --- a/usr/src/cmd/mandoc/term_ps.c +++ b/usr/src/cmd/mandoc/term_ps.c @@ -1,7 +1,7 @@ -/* $Id: term_ps.c,v 1.83 2017/02/17 14:31:52 schwarze Exp $ */ +/* $Id: term_ps.c,v 1.85 2017/06/07 17:38:26 schwarze Exp $ */ /* * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> - * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@openbsd.org> + * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -538,12 +538,15 @@ pspdf_alloc(const struct manoutput *outopts) size_t marginx, marginy, lineheight; const char *pp; - p = mandoc_calloc(1, sizeof(struct termp)); + p = mandoc_calloc(1, sizeof(*p)); + p->tcol = p->tcols = mandoc_calloc(1, sizeof(*p->tcol)); + p->maxtcol = 1; + p->enc = TERMENC_ASCII; p->fontq = mandoc_reallocarray(NULL, - (p->fontsz = 8), sizeof(enum termfont)); + (p->fontsz = 8), sizeof(*p->fontq)); p->fontq[0] = p->fontl = TERMFONT_NONE; - p->ps = mandoc_calloc(1, sizeof(struct termp_ps)); + p->ps = mandoc_calloc(1, sizeof(*p->ps)); p->advance = ps_advance; p->begin = ps_begin; @@ -1219,6 +1222,9 @@ ps_endline(struct termp *p) } ps_closepage(p); + + p->tcol->offset -= p->ti; + p->ti = 0; } static void diff --git a/usr/src/cmd/mandoc/term_tab.c b/usr/src/cmd/mandoc/term_tab.c new file mode 100644 index 0000000000..5251a8425a --- /dev/null +++ b/usr/src/cmd/mandoc/term_tab.c @@ -0,0 +1,128 @@ +/* $OpenBSD: term.c,v 1.119 2017/01/08 18:08:44 schwarze Exp $ */ +/* + * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> + +#include <stddef.h> + +#include "mandoc_aux.h" +#include "out.h" +#include "term.h" + +struct tablist { + size_t *t; /* Allocated array of tab positions. */ + size_t s; /* Allocated number of positions. */ + size_t n; /* Currently used number of positions. */ +}; + +static struct { + struct tablist a; /* All tab positions for lookup. */ + struct tablist p; /* Periodic tab positions to add. */ + size_t d; /* Default tab width in units of n. */ +} tabs; + + +void +term_tab_set(const struct termp *p, const char *arg) +{ + static int recording_period; + + struct roffsu su; + struct tablist *tl; + size_t pos; + int add; + + /* Special arguments: clear all tabs or switch lists. */ + + if (arg == NULL) { + tabs.a.n = tabs.p.n = 0; + recording_period = 0; + if (tabs.d == 0) { + a2roffsu(".8i", &su, SCALE_IN); + tabs.d = term_hen(p, &su); + } + return; + } + if (arg[0] == 'T' && arg[1] == '\0') { + recording_period = 1; + return; + } + + /* Parse the sign, the number, and the unit. */ + + if (*arg == '+') { + add = 1; + arg++; + } else + add = 0; + if (a2roffsu(arg, &su, SCALE_EM) == NULL) + return; + + /* Select the list, and extend it if it is full. */ + + tl = recording_period ? &tabs.p : &tabs.a; + if (tl->n >= tl->s) { + tl->s += 8; + tl->t = mandoc_reallocarray(tl->t, tl->s, sizeof(*tl->t)); + } + + /* Append the new position. */ + + pos = term_hen(p, &su); + tl->t[tl->n] = pos; + if (add && tl->n) + tl->t[tl->n] += tl->t[tl->n - 1]; + tl->n++; +} + +/* + * Simplified version without a parser, + * never incremental, never periodic, for use by tbl(7). + */ +void +term_tab_iset(size_t inc) +{ + if (tabs.a.n >= tabs.a.s) { + tabs.a.s += 8; + tabs.a.t = mandoc_reallocarray(tabs.a.t, tabs.a.s, + sizeof(*tabs.a.t)); + } + tabs.a.t[tabs.a.n++] = inc; +} + +size_t +term_tab_next(size_t prev) +{ + size_t i, j; + + for (i = 0;; i++) { + if (i == tabs.a.n) { + if (tabs.p.n == 0) + return prev; + tabs.a.n += tabs.p.n; + if (tabs.a.s < tabs.a.n) { + tabs.a.s = tabs.a.n; + tabs.a.t = mandoc_reallocarray(tabs.a.t, + tabs.a.s, sizeof(*tabs.a.t)); + } + for (j = 0; j < tabs.p.n; j++) + tabs.a.t[i + j] = tabs.p.t[j] + + (i ? tabs.a.t[i - 1] : 0); + } + if (prev < tabs.a.t[i]) + return tabs.a.t[i]; + } +} diff --git a/usr/src/cmd/mandoc/tree.c b/usr/src/cmd/mandoc/tree.c index dd36ff594e..7d18b9d9e6 100644 --- a/usr/src/cmd/mandoc/tree.c +++ b/usr/src/cmd/mandoc/tree.c @@ -1,4 +1,4 @@ -/* $Id: tree.c,v 1.73 2017/02/10 15:45:28 schwarze Exp $ */ +/* $Id: tree.c,v 1.77 2017/07/08 14:51:05 schwarze Exp $ */ /* * Copyright (c) 2008, 2009, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> @@ -129,23 +129,23 @@ print_mdoc(const struct roff_node *n, int indent) p = n->string; break; case ROFFT_BODY: - p = mdoc_macronames[n->tok]; + p = roff_name[n->tok]; break; case ROFFT_HEAD: - p = mdoc_macronames[n->tok]; + p = roff_name[n->tok]; break; case ROFFT_TAIL: - p = mdoc_macronames[n->tok]; + p = roff_name[n->tok]; break; case ROFFT_ELEM: - p = mdoc_macronames[n->tok]; + p = roff_name[n->tok]; if (n->args) { argv = n->args->argv; argc = n->args->argc; } break; case ROFFT_BLOCK: - p = mdoc_macronames[n->tok]; + p = roff_name[n->tok]; if (n->args) { argv = n->args->argv; argc = n->args->argc; @@ -202,7 +202,7 @@ print_mdoc(const struct roff_node *n, int indent) } if (n->eqn) - print_box(n->eqn->root->first, indent + 4); + print_box(n->eqn->first, indent + 4); if (n->child) print_mdoc(n->child, indent + (n->type == ROFFT_BLOCK ? 2 : 4)); @@ -257,7 +257,7 @@ print_man(const struct roff_node *n, int indent) case ROFFT_BLOCK: case ROFFT_HEAD: case ROFFT_BODY: - p = man_macronames[n->tok]; + p = roff_name[n->tok]; break; case ROFFT_ROOT: p = "root"; @@ -287,7 +287,7 @@ print_man(const struct roff_node *n, int indent) } if (n->eqn) - print_box(n->eqn->root->first, indent + 4); + print_box(n->eqn->first, indent + 4); if (n->child) print_man(n->child, indent + (n->type == ROFFT_BLOCK ? 2 : 4)); @@ -313,10 +313,6 @@ print_box(const struct eqn_box *ep, int indent) t = NULL; switch (ep->type) { - case EQN_ROOT: - t = "eqn-root"; - break; - case EQN_LISTONE: case EQN_LIST: t = "eqn-list"; break; |