diff options
Diffstat (limited to 'src/pkg/go/doc/comment.go')
| -rw-r--r-- | src/pkg/go/doc/comment.go | 376 |
1 files changed, 236 insertions, 140 deletions
diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go index e1989226b..6f0edd4ba 100644 --- a/src/pkg/go/doc/comment.go +++ b/src/pkg/go/doc/comment.go @@ -7,114 +7,14 @@ package doc import ( - "go/ast" "io" "regexp" "strings" - "template" // for HTMLEscape + "text/template" // for HTMLEscape + "unicode" + "unicode/utf8" ) -func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' } - -func stripTrailingWhitespace(s string) string { - i := len(s) - for i > 0 && isWhitespace(s[i-1]) { - i-- - } - return s[0:i] -} - -// CommentText returns the text of comment, -// with the comment markers - //, /*, and */ - removed. -func CommentText(comment *ast.CommentGroup) string { - if comment == nil { - return "" - } - comments := make([]string, len(comment.List)) - for i, c := range comment.List { - comments[i] = string(c.Text) - } - - lines := make([]string, 0, 10) // most comments are less than 10 lines - for _, c := range comments { - // Remove comment markers. - // The parser has given us exactly the comment text. - switch c[1] { - case '/': - //-style comment - c = c[2:] - // Remove leading space after //, if there is one. - // TODO(gri) This appears to be necessary in isolated - // cases (bignum.RatFromString) - why? - if len(c) > 0 && c[0] == ' ' { - c = c[1:] - } - case '*': - /*-style comment */ - c = c[2 : len(c)-2] - } - - // Split on newlines. - cl := strings.Split(c, "\n") - - // Walk lines, stripping trailing white space and adding to list. - for _, l := range cl { - lines = append(lines, stripTrailingWhitespace(l)) - } - } - - // Remove leading blank lines; convert runs of - // interior blank lines to a single blank line. - n := 0 - for _, line := range lines { - if line != "" || n > 0 && lines[n-1] != "" { - lines[n] = line - n++ - } - } - lines = lines[0:n] - - // Add final "" entry to get trailing newline from Join. - if n > 0 && lines[n-1] != "" { - lines = append(lines, "") - } - - return strings.Join(lines, "\n") -} - -// Split bytes into lines. -func split(text []byte) [][]byte { - // count lines - n := 0 - last := 0 - for i, c := range text { - if c == '\n' { - last = i + 1 - n++ - } - } - if last < len(text) { - n++ - } - - // split - out := make([][]byte, n) - last = 0 - n = 0 - for i, c := range text { - if c == '\n' { - out[n] = text[last : i+1] - last = i + 1 - n++ - } - } - if last < len(text) { - out[n] = text[last:] - } - - return out -} - var ( ldquo = []byte("“") rdquo = []byte("”") @@ -122,13 +22,13 @@ var ( // Escape comment text for HTML. If nice is set, // also turn `` into “ and '' into ”. -func commentEscape(w io.Writer, s []byte, nice bool) { +func commentEscape(w io.Writer, text string, nice bool) { last := 0 if nice { - for i := 0; i < len(s)-1; i++ { - ch := s[i] - if ch == s[i+1] && (ch == '`' || ch == '\'') { - template.HTMLEscape(w, s[last:i]) + for i := 0; i < len(text)-1; i++ { + ch := text[i] + if ch == text[i+1] && (ch == '`' || ch == '\'') { + template.HTMLEscape(w, []byte(text[last:i])) last = i + 2 switch ch { case '`': @@ -140,7 +40,7 @@ func commentEscape(w io.Writer, s []byte, nice bool) { } } } - template.HTMLEscape(w, s[last:]) + template.HTMLEscape(w, []byte(text[last:])) } const ( @@ -156,7 +56,7 @@ const ( filePart + `([:.,]` + filePart + `)*` ) -var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`) +var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) var ( html_a = []byte(`<a href="`) @@ -168,6 +68,9 @@ var ( html_endp = []byte("</p>\n") html_pre = []byte("<pre>") html_endpre = []byte("</pre>\n") + html_h = []byte(`<h3 id="`) + html_hq = []byte(`">`) + html_endh = []byte("</h3>\n") ) // Emphasize and escape a line of text for HTML. URLs are converted into links; @@ -178,13 +81,13 @@ var ( // and the word is converted into a link. If nice is set, the remaining text's // appearance is improved where it makes sense (e.g., `` is turned into “ // and '' into ”). -func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) { +func emphasize(w io.Writer, line string, words map[string]string, nice bool) { for { - m := matchRx.FindSubmatchIndex(line) + m := matchRx.FindStringSubmatchIndex(line) if m == nil { break } - // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx) + // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) // write text before match commentEscape(w, line[0:m[0]], nice) @@ -196,8 +99,8 @@ func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) { if words != nil { url, italics = words[string(match)] } - if m[2] < 0 { - // didn't match against first parenthesized sub-regexp; must be match against urlRx + if m[2] >= 0 { + // match against first parenthesized sub-regexp; must be match against urlRx if !italics { // no alternative URL in words list, use match instead url = string(match) @@ -228,7 +131,7 @@ func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) { commentEscape(w, line, nice) } -func indentLen(s []byte) int { +func indentLen(s string) int { i := 0 for i < len(s) && (s[i] == ' ' || s[i] == '\t') { i++ @@ -236,9 +139,11 @@ func indentLen(s []byte) int { return i } -func isBlank(s []byte) bool { return len(s) == 0 || (len(s) == 1 && s[0] == '\n') } +func isBlank(s string) bool { + return len(s) == 0 || (len(s) == 1 && s[0] == '\n') +} -func commonPrefix(a, b []byte) []byte { +func commonPrefix(a, b string) string { i := 0 for i < len(a) && i < len(b) && a[i] == b[i] { i++ @@ -246,7 +151,7 @@ func commonPrefix(a, b []byte) []byte { return a[0:i] } -func unindent(block [][]byte) { +func unindent(block []string) { if len(block) == 0 { return } @@ -268,7 +173,66 @@ func unindent(block [][]byte) { } } -// Convert comment text to formatted HTML. +// heading returns the trimmed line if it passes as a section heading; +// otherwise it returns the empty string. +func heading(line string) string { + line = strings.TrimSpace(line) + if len(line) == 0 { + return "" + } + + // a heading must start with an uppercase letter + r, _ := utf8.DecodeRuneInString(line) + if !unicode.IsLetter(r) || !unicode.IsUpper(r) { + return "" + } + + // it must end in a letter or digit: + r, _ = utf8.DecodeLastRuneInString(line) + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + return "" + } + + // exclude lines with illegal characters + if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 { + return "" + } + + // allow "'" for possessive "'s" only + for b := line; ; { + i := strings.IndexRune(b, '\'') + if i < 0 { + break + } + if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { + return "" // not followed by "s " + } + b = b[i+2:] + } + + return line +} + +type op int + +const ( + opPara op = iota + opHead + opPre +) + +type block struct { + op op + lines []string +} + +var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`) + +func anchorID(line string) string { + return nonAlphaNumRx.ReplaceAllString(line, "_") +} + +// ToHTML converts comment text to formatted HTML. // The comment was prepared by DocReader, // so it is known not to have leading, trailing blank lines // nor to have trailing spaces at the end of lines. @@ -276,6 +240,7 @@ func unindent(block [][]byte) { // // Turn each run of multiple \n into </p><p>. // Turn each run of indented lines into a <pre> block without indent. +// Enclose headings with header tags. // // URLs in the comment text are converted into links; if the URL also appears // in the words map, the link is taken from the map (if the corresponding map @@ -284,23 +249,57 @@ func unindent(block [][]byte) { // Go identifiers that appear in the words map are italicized; if the corresponding // map value is not the empty string, it is considered a URL and the word is converted // into a link. -func ToHTML(w io.Writer, s []byte, words map[string]string) { - inpara := false - - close := func() { - if inpara { +func ToHTML(w io.Writer, text string, words map[string]string) { + for _, b := range blocks(text) { + switch b.op { + case opPara: + w.Write(html_p) + for _, line := range b.lines { + emphasize(w, line, words, true) + } w.Write(html_endp) - inpara = false + case opHead: + w.Write(html_h) + id := "" + for _, line := range b.lines { + if id == "" { + id = anchorID(line) + w.Write([]byte(id)) + w.Write(html_hq) + } + commentEscape(w, line, true) + } + if id == "" { + w.Write(html_hq) + } + w.Write(html_endh) + case opPre: + w.Write(html_pre) + for _, line := range b.lines { + emphasize(w, line, nil, false) + } + w.Write(html_endpre) } } - open := func() { - if !inpara { - w.Write(html_p) - inpara = true +} + +func blocks(text string) []block { + var ( + out []block + para []string + + lastWasBlank = false + lastWasHeading = false + ) + + close := func() { + if para != nil { + out = append(out, block{opPara, para}) + para = nil } } - lines := split(s) + lines := strings.SplitAfter(text, "\n") unindent(lines) for i := 0; i < len(lines); { line := lines[i] @@ -308,6 +307,7 @@ func ToHTML(w io.Writer, s []byte, words map[string]string) { // close paragraph close() i++ + lastWasBlank = true continue } if indentLen(line) > 0 { @@ -323,23 +323,119 @@ func ToHTML(w io.Writer, s []byte, words map[string]string) { for j > i && isBlank(lines[j-1]) { j-- } - block := lines[i:j] + pre := lines[i:j] i = j - unindent(block) + unindent(pre) // put those lines in a pre block - w.Write(html_pre) - for _, line := range block { - emphasize(w, line, nil, false) // no nice text formatting - } - w.Write(html_endpre) + out = append(out, block{opPre, pre}) + lastWasHeading = false continue } + + if lastWasBlank && !lastWasHeading && i+2 < len(lines) && + isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { + // current line is non-blank, surrounded by blank lines + // and the next non-blank line is not indented: this + // might be a heading. + if head := heading(line); head != "" { + close() + out = append(out, block{opHead, []string{head}}) + i += 2 + lastWasHeading = true + continue + } + } + // open paragraph - open() - emphasize(w, lines[i], words, true) // nice text formatting + lastWasBlank = false + lastWasHeading = false + para = append(para, lines[i]) i++ } close() + + return out +} + +// ToText prepares comment text for presentation in textual output. +// It wraps paragraphs of text to width or fewer Unicode code points +// and then prefixes each line with the indent. In preformatted sections +// (such as program text), it prefixes each non-blank line with preIndent. +func ToText(w io.Writer, text string, indent, preIndent string, width int) { + l := lineWrapper{ + out: w, + width: width, + indent: indent, + } + for _, b := range blocks(text) { + switch b.op { + case opPara: + // l.write will add leading newline if required + for _, line := range b.lines { + l.write(line) + } + l.flush() + case opHead: + w.Write(nl) + for _, line := range b.lines { + l.write(line + "\n") + } + l.flush() + case opPre: + w.Write(nl) + for _, line := range b.lines { + if !isBlank(line) { + w.Write([]byte(preIndent)) + w.Write([]byte(line)) + } + } + } + } +} + +type lineWrapper struct { + out io.Writer + printed bool + width int + indent string + n int + pendSpace int +} + +var nl = []byte("\n") +var space = []byte(" ") + +func (l *lineWrapper) write(text string) { + if l.n == 0 && l.printed { + l.out.Write(nl) // blank line before new paragraph + } + l.printed = true + + for _, f := range strings.Fields(text) { + w := utf8.RuneCountInString(f) + // wrap if line is too long + if l.n > 0 && l.n+l.pendSpace+w > l.width { + l.out.Write(nl) + l.n = 0 + l.pendSpace = 0 + } + if l.n == 0 { + l.out.Write([]byte(l.indent)) + } + l.out.Write(space[:l.pendSpace]) + l.out.Write([]byte(f)) + l.n += l.pendSpace + w + l.pendSpace = 1 + } +} + +func (l *lineWrapper) flush() { + if l.n == 0 { + return + } + l.out.Write(nl) + l.pendSpace = 0 + l.n = 0 } |
