diff options
Diffstat (limited to 'usr/src/cmd/man/makewhatis.c')
| -rw-r--r-- | usr/src/cmd/man/makewhatis.c | 837 | 
1 files changed, 837 insertions, 0 deletions
| diff --git a/usr/src/cmd/man/makewhatis.c b/usr/src/cmd/man/makewhatis.c new file mode 100644 index 0000000000..c5428e4633 --- /dev/null +++ b/usr/src/cmd/man/makewhatis.c @@ -0,0 +1,837 @@ +/* + * Copyright (c) 2002 John Rochester + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer, + *    in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + *    derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + * Copyright 2014 Garrett D'Amore <garrett@damore.org> + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/param.h> + +#include <ctype.h> +#include <dirent.h> +#include <err.h> +#include <signal.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "man.h" +#include "stringlist.h" + + +/* Information collected about each man page in a section */ +struct page_info { +	char	*filename; +	char	*name; +	char	*suffix; +	ino_t	inode; +}; + +/* An expanding string */ +struct sbuf { +	char	*content;	/* the start of the buffer */ +	char	*end;		/* just past the end of the content */ +	char	*last;		/* the last allocated character */ +}; + +/* Remove the last amount characters from the sbuf */ +#define	sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount)) +/* Return the length of the sbuf content */ +#define	sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content) + +typedef char *edited_copy(char *from, char *to, int length); + +/* + * While the whatis line is being formed, it is stored in whatis_proto. + * When finished, it is reformatted into whatis_final and then appended + * to whatis_lines. + */ +static struct sbuf	*whatis_proto; +static struct sbuf	*whatis_final; +static stringlist	*whatis_lines;	/* collected output lines */ + +static char tempfile[MAXPATHLEN];	/* path of temporary file, if any */ + +#define	MDOC_COMMANDS	"ArDvErEvFlLiNmPa" + + +/* Free a struct page_info and its content */ +static void +free_page_info(struct page_info *info) +{ + +	free(info->filename); +	free(info->name); +	free(info->suffix); +	free(info); +} + +/* + * Allocate and fill in a new struct page_info given the + * name of the man section directory and the dirent of the file. + * If the file is not a man page, return NULL. + */ +static struct page_info * +new_page_info(char *dir, struct dirent *dirent) +{ +	struct page_info *info; +	int		basename_length; +	char		*suffix; +	struct stat	st; + +	if ((info = malloc(sizeof (struct page_info))) == NULL) +		err(1, "malloc"); +	basename_length = strlen(dirent->d_name); +	suffix = &dirent->d_name[basename_length]; +	if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1) +		err(1, "asprintf"); +	for (;;) { +		if (--suffix == dirent->d_name || !isalnum(*suffix)) { +			if (*suffix == '.') +				break; +			free(info->filename); +			free(info); +			return (NULL); +		} +	} +	*suffix++ = '\0'; +	info->name = strdup(dirent->d_name); +	info->suffix = strdup(suffix); +	if (stat(info->filename, &st) < 0) { +		warn("%s", info->filename); +		free_page_info(info); +		return (NULL); +	} +	if (!S_ISREG(st.st_mode)) { +		free_page_info(info); +		return (NULL); +	} +	info->inode = st.st_ino; +	return (info); +} + +/* + * Reset sbuf length to 0. + */ +static void +sbuf_clear(struct sbuf *sbuf) +{ + +	sbuf->end = sbuf->content; +} + +/* + * Allocate a new sbuf. + */ +static struct sbuf * +new_sbuf(void) +{ +	struct sbuf	*sbuf; + +	if ((sbuf = malloc(sizeof (struct sbuf))) == NULL) +		err(1, "malloc"); +	if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL) +		err(1, "malloc"); +	sbuf->last = sbuf->content + LINE_ALLOC - 1; +	sbuf_clear(sbuf); + +	return (sbuf); +} + +/* + * Ensure that there is enough room in the sbuf + * for nchars more characters. + */ +static void +sbuf_need(struct sbuf *sbuf, int nchars) +{ +	char *new_content; +	size_t size, cntsize; +	size_t grow = 128; + +	while (grow < nchars) { +		grow += 128;	/* we grow in chunks of 128 bytes */ +	} + +	/* Grow if the buffer isn't big enough */ +	if (sbuf->end + nchars > sbuf->last) { +		size = sbuf->last + 1 - sbuf->content; +		size += grow; +		cntsize = sbuf->end - sbuf->content; + +		if ((new_content = realloc(sbuf->content, size)) == NULL) { +			perror("realloc"); +			if (tempfile[0] != '\0') +				(void) unlink(tempfile); +			exit(1); +		} +		sbuf->content = new_content; +		sbuf->end = new_content + cntsize; +		sbuf->last = new_content + size - 1; +	} +} + +/* + * Append a string of a given length to the sbuf. + */ +static void +sbuf_append(struct sbuf *sbuf, const char *text, int length) +{ +	if (length > 0) { +		sbuf_need(sbuf, length); +		(void) memcpy(sbuf->end, text, length); +		sbuf->end += length; +	} +} + +/* + * Append a null-terminated string to the sbuf. + */ +static void +sbuf_append_str(struct sbuf *sbuf, char *text) +{ + +	sbuf_append(sbuf, text, strlen(text)); +} + +/* + * Append an edited null-terminated string to the sbuf. + */ +static void +sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy) +{ +	int	length; + +	if ((length = strlen(text)) > 0) { +		sbuf_need(sbuf, length); +		sbuf->end = copy(text, sbuf->end, length); +	} +} + +/* + * Strip any of a set of chars from the end of the sbuf. + */ +static void +sbuf_strip(struct sbuf *sbuf, const char *set) +{ + +	while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL) +		sbuf->end--; +} + +/* + * Return the null-terminated string built by the sbuf. + */ +static char * +sbuf_content(struct sbuf *sbuf) +{ + +	*sbuf->end = '\0'; +	return (sbuf->content); +} + +/* + * Return true if no man page exists in the directory with + * any of the names in the stringlist. + */ +static int +no_page_exists(char *dir, stringlist *names, char *suffix) +{ +	char	path[MAXPATHLEN]; +	char	*suffixes[] = { "", ".gz", ".bz2", NULL }; +	size_t	i; +	int	j; + +	for (i = 0; i < names->sl_cur; i++) { +		for (j = 0; suffixes[j] != NULL; j++) { +			(void) snprintf(path, MAXPATHLEN, "%s/%s.%s%s", +			    dir, names->sl_str[i], suffix, suffixes[j]); +			if (access(path, F_OK) == 0) { +				return (0); +			} +		} +	} +	return (1); +} + +/* ARGSUSED sig */ +static void +trap_signal(int sig) +{ + +	if (tempfile[0] != '\0') +		(void) unlink(tempfile); + +	exit(1); +} + +/* + * Attempt to open an output file. + * Return NULL if unsuccessful. + */ +static FILE * +open_output(char *name) +{ +	FILE	*output; + +	whatis_lines = sl_init(); +	(void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name); +	name = tempfile; +	if ((output = fopen(name, "w")) == NULL) { +		warn("%s", name); +		return (NULL); +	} +	return (output); +} + +static int +linesort(const void *a, const void *b) +{ + +	return (strcmp((*(const char * const *)a), (*(const char * const *)b))); +} + +/* + * Write the unique sorted lines to the output file. + */ +static void +finish_output(FILE *output, char *name) +{ +	size_t	i; +	char	*prev = NULL; + +	qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *), +	    linesort); +	for (i = 0; i < whatis_lines->sl_cur; i++) { +		char *line = whatis_lines->sl_str[i]; +		if (i > 0 && strcmp(line, prev) == 0) +			continue; +		prev = line; +		(void) fputs(line, output); +		(void) putc('\n', output); +	} +	(void) fclose(output); +	sl_free(whatis_lines, 1); +	(void) rename(tempfile, name); +	(void) unlink(tempfile); +} + +static FILE * +open_whatis(char *mandir) +{ +	char	filename[MAXPATHLEN]; + +	(void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); +	return (open_output(filename)); +} + +static void +finish_whatis(FILE *output, char *mandir) +{ +	char	filename[MAXPATHLEN]; + +	(void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); +	finish_output(output, filename); +} + +/* + * Remove trailing spaces from a string, returning a pointer to just + * beyond the new last character. + */ +static char * +trim_rhs(char *str) +{ +	char	*rhs; + +	rhs = &str[strlen(str)]; +	while (--rhs > str && isspace(*rhs)) +		; +	*++rhs = '\0'; +	return (rhs); +} + +/* + * Return a pointer to the next non-space character in the string. + */ +static char * +skip_spaces(char *s) +{ + +	while (*s != '\0' && isspace(*s)) +		s++; + +	return (s); +} + +/* + * Return whether the line is of one of the forms: + *	.Sh NAME + *	.Sh "NAME" + *	etc. + * assuming that section_start is ".Sh". + */ +static int +name_section_line(char *line, const char *section_start) +{ +	char		*rhs; + +	if (strncmp(line, section_start, 3) != 0) +		return (0); +	line = skip_spaces(line + 3); +	rhs = trim_rhs(line); +	if (*line == '"') { +		line++; +		if (*--rhs == '"') +			*rhs = '\0'; +	} +	if (strcmp(line, "NAME") == 0) +		return (1); + +	return (0); +} + +/* + * Copy characters while removing the most common nroff/troff markup: + *	\(em, \(mi, \s[+-N], \& + *	\fF, \f(fo, \f[font] + *	\*s, \*(st, \*[stringvar] + */ +static char * +de_nroff_copy(char *from, char *to, int fromlen) +{ +	char	*from_end = &from[fromlen]; + +	while (from < from_end) { +		switch (*from) { +		case '\\': +			switch (*++from) { +			case '(': +				if (strncmp(&from[1], "em", 2) == 0 || +				    strncmp(&from[1], "mi", 2) == 0) { +					from += 3; +					continue; +				} +				break; +			case 's': +				if (*++from == '-') +					from++; +				while (isdigit(*from)) +					from++; +				continue; +			case 'f': +			case '*': +				if (*++from == '(') { +					from += 3; +				} else if (*from == '[') { +					while (*++from != ']' && +					    from < from_end) +						; +					from++; +				} else { +					from++; +				} +				continue; +			case '&': +				from++; +				continue; +			} +			break; +		} +		*to++ = *from++; +	} +	return (to); +} + +/* + * Append a string with the nroff formatting removed. + */ +static void +add_nroff(char *text) +{ + +	sbuf_append_edited(whatis_proto, text, de_nroff_copy); +} + +/* + * Appends "name(suffix), " to whatis_final + */ +static void +add_whatis_name(char *name, char *suffix) +{ + +	if (*name != '\0') { +		sbuf_append_str(whatis_final, name); +		sbuf_append(whatis_final, "(", 1); +		sbuf_append_str(whatis_final, suffix); +		sbuf_append(whatis_final, "), ", 3); +	} +} + +/* + * Processes an old-style man(7) line. This ignores commands with only + * a single number argument. + */ +static void +process_man_line(char *line) +{ +	char	*p; + +	if (*line == '.') { +		while (isalpha(*++line)) +			; +		p = line = skip_spaces(line); +		while (*p != '\0') { +			if (!isdigit(*p)) +				break; +			p++; +		} +		if (*p == '\0') +			return; +	} else +		line = skip_spaces(line); +	if (*line != '\0') { +		add_nroff(line); +		sbuf_append(whatis_proto, " ", 1); +	} +} + +/* + * Processes a new-style mdoc(7) line. + */ +static void +process_mdoc_line(char *line) +{ +	int	xref; +	int	arg = 0; +	char	*line_end = &line[strlen(line)]; +	int	orig_length = sbuf_length(whatis_proto); +	char	*next; + +	if (*line == '\0') +		return; +	if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) { +		add_nroff(skip_spaces(line)); +		sbuf_append(whatis_proto, " ", 1); +		return; +	} +	xref = strncmp(line, ".Xr", 3) == 0; +	line += 3; +	while ((line = skip_spaces(line)) < line_end) { +		if (*line == '"') { +			next = ++line; +			for (;;) { +				next = strchr(next, '"'); +				if (next == NULL) +					break; +				(void) memmove(next, next + 1, strlen(next)); +				line_end--; +				if (*next != '"') +					break; +				next++; +			} +		} else { +			next = strpbrk(line, " \t"); +		} +		if (next != NULL) +			*next++ = '\0'; +		else +			next = line_end; +		if (isupper(*line) && islower(line[1]) && line[2] == '\0') { +			if (strcmp(line, "Ns") == 0) { +				arg = 0; +				line = next; +				continue; +			} +			if (strstr(line, MDOC_COMMANDS) != NULL) { +				line = next; +				continue; +			} +		} +		if (arg > 0 && strchr(",.:;?!)]", *line) == 0) { +			if (xref) { +				sbuf_append(whatis_proto, "(", 1); +				add_nroff(line); +				sbuf_append(whatis_proto, ")", 1); +				xref = 0; +			} else { +				sbuf_append(whatis_proto, " ", 1); +			} +		} +		add_nroff(line); +		arg++; +		line = next; +	} +	if (sbuf_length(whatis_proto) > orig_length) +		sbuf_append(whatis_proto, " ", 1); +} + +/* + * Collect a list of comma-separated names from the text. + */ +static void +collect_names(stringlist *names, char *text) +{ +	char	*arg; + +	for (;;) { +		arg = text; +		text = strchr(text, ','); +		if (text != NULL) +			*text++ = '\0'; +		(void) sl_add(names, arg); +		if (text == NULL) +			return; +		if (*text == ' ') +			text++; +	} +} + +enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC }; + +/* + * Process a man page source into a single whatis line and add it + * to whatis_lines. + */ +static void +process_page(struct page_info *page, char *section_dir) +{ +	FILE		*fp; +	stringlist	*names; +	char		*descr; +	int		state = STATE_UNKNOWN; +	size_t		i; +	char		*line = NULL; +	size_t		linecap = 0; + +	sbuf_clear(whatis_proto); +	if ((fp = fopen(page->filename, "r")) == NULL) { +		warn("%s", page->filename); +		return; +	} +	while (getline(&line, &linecap, fp) > 0) { +		/* Skip comments */ +		if (strncmp(line, ".\\\"", 3) == 0) +			continue; +		switch (state) { +		/* Haven't reached the NAME section yet */ +		case STATE_UNKNOWN: +			if (name_section_line(line, ".SH")) +				state = STATE_MANSTYLE; +			else if (name_section_line(line, ".Sh")) +				state = STATE_MDOCNAME; +			continue; +		/* Inside an old-style .SH NAME section */ +		case STATE_MANSTYLE: +			if (strncmp(line, ".SH", 3) == 0 || +			    strncmp(line, ".SS", 3) == 0) +				break; +			(void) trim_rhs(line); +			if (strcmp(line, ".") == 0) +				continue; +			if (strncmp(line, ".IX", 3) == 0) { +				line += 3; +				line = skip_spaces(line); +			} +			process_man_line(line); +			continue; +		/* Inside a new-style .Sh NAME section (the .Nm part) */ +		case STATE_MDOCNAME: +			(void) trim_rhs(line); +			if (strncmp(line, ".Nm", 3) == 0) { +				process_mdoc_line(line); +				continue; +			} else { +				if (strcmp(line, ".") == 0) +					continue; +				sbuf_append(whatis_proto, "- ", 2); +				state = STATE_MDOCDESC; +			} +			/* FALLTHROUGH */ +		/* Inside a new-style .Sh NAME section (after the .Nm-s) */ +		case STATE_MDOCDESC: +			if (strncmp(line, ".Sh", 3) == 0) +				break; +			(void) trim_rhs(line); +			if (strcmp(line, ".") == 0) +				continue; +			process_mdoc_line(line); +			continue; +		} +		break; +	} +	(void) fclose(fp); +	sbuf_strip(whatis_proto, " \t.-"); +	line = sbuf_content(whatis_proto); +	/* +	 * Line now contains the appropriate data, but without the +	 * proper indentation or the section appended to each name. +	 */ +	descr = strstr(line, " - "); +	if (descr == NULL) { +		descr = strchr(line, ' '); +		if (descr == NULL) +			return; +		*descr++ = '\0'; +	} else { +		*descr = '\0'; +		descr += 3; +	} +	names = sl_init(); +	collect_names(names, line); +	sbuf_clear(whatis_final); +	if (!sl_find(names, page->name) && +	    no_page_exists(section_dir, names, page->suffix)) { +		/* +		 * Add the page name since that's the only +		 * thing that man(1) will find. +		 */ +		add_whatis_name(page->name, page->suffix); +	} +	for (i = 0; i < names->sl_cur; i++) +		add_whatis_name(names->sl_str[i], page->suffix); +	sl_free(names, 0); +	/* Remove last ", " */ +	sbuf_retract(whatis_final, 2); +	while (sbuf_length(whatis_final) < INDENT) +		sbuf_append(whatis_final, " ", 1); +	sbuf_append(whatis_final, " - ", 3); +	sbuf_append_str(whatis_final, skip_spaces(descr)); +	(void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final))); +} + +/* + * Sort pages first by inode number, then by name. + */ +static int +pagesort(const void *a, const void *b) +{ +	const struct page_info *p1 = *(struct page_info * const *) a; +	const struct page_info *p2 = *(struct page_info * const *) b; + +	if (p1->inode == p2->inode) +		return (strcmp(p1->name, p2->name)); + +	return (p1->inode - p2->inode); +} + +/* + * Process a single man section. + */ +static void +process_section(char *section_dir) +{ +	struct dirent	**entries; +	int		nentries; +	struct page_info **pages; +	int		npages = 0; +	int		i; +	ino_t		prev_inode = 0; + +	/* Scan the man section directory for pages */ +	nentries = scandir(section_dir, &entries, NULL, alphasort); + +	/* Collect information about man pages */ +	pages = (struct page_info **)calloc(nentries, +	    sizeof (struct page_info *)); +	for (i = 0; i < nentries; i++) { +		struct page_info *info = new_page_info(section_dir, entries[i]); +		if (info != NULL) +			pages[npages++] = info; +		free(entries[i]); +	} +	free(entries); +	qsort(pages, npages, sizeof (struct page_info *), pagesort); + +	/* Process each unique page */ +	for (i = 0; i < npages; i++) { +		struct page_info *page = pages[i]; +		if (page->inode != prev_inode) { +			prev_inode = page->inode; +			process_page(page, section_dir); +		} +		free_page_info(page); +	} +	free(pages); +} + +/* + * Return whether the directory entry is a man page section. + */ +static int +select_sections(const struct dirent *entry) +{ +	const char	*p = &entry->d_name[3]; + +	if (strncmp(entry->d_name, "man", 3) != 0) +		return (0); +	while (*p != '\0') { +		if (!isalnum(*p++)) +			return (0); +	} +	return (1); +} + +/* + * Process a single top-level man directory by finding all the + * sub-directories named man* and processing each one in turn. + */ +void +mwpath(char *path) +{ +	FILE		*fp = NULL; +	struct dirent	**entries; +	int		nsections; +	int		i; + +	(void) signal(SIGINT, trap_signal); +	(void) signal(SIGHUP, trap_signal); +	(void) signal(SIGQUIT, trap_signal); +	(void) signal(SIGTERM, trap_signal); + +	whatis_proto = new_sbuf(); +	whatis_final = new_sbuf(); + +	nsections = scandir(path, &entries, select_sections, alphasort); +	if ((fp = open_whatis(path)) == NULL) +		return; +	for (i = 0; i < nsections; i++) { +		char	section_dir[MAXPATHLEN]; + +		(void) snprintf(section_dir, MAXPATHLEN, "%s/%s", +		    path, entries[i]->d_name); +		process_section(section_dir); +		free(entries[i]); +	} +	free(entries); +	finish_whatis(fp, path); +} | 
