summaryrefslogtreecommitdiff
path: root/src/pkg/go/doc/comment.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/go/doc/comment.go')
-rw-r--r--src/pkg/go/doc/comment.go376
1 files changed, 236 insertions, 140 deletions
diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go
index e1989226b..6f0edd4ba 100644
--- a/src/pkg/go/doc/comment.go
+++ b/src/pkg/go/doc/comment.go
@@ -7,114 +7,14 @@
package doc
import (
- "go/ast"
"io"
"regexp"
"strings"
- "template" // for HTMLEscape
+ "text/template" // for HTMLEscape
+ "unicode"
+ "unicode/utf8"
)
-func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
-
-func stripTrailingWhitespace(s string) string {
- i := len(s)
- for i > 0 && isWhitespace(s[i-1]) {
- i--
- }
- return s[0:i]
-}
-
-// CommentText returns the text of comment,
-// with the comment markers - //, /*, and */ - removed.
-func CommentText(comment *ast.CommentGroup) string {
- if comment == nil {
- return ""
- }
- comments := make([]string, len(comment.List))
- for i, c := range comment.List {
- comments[i] = string(c.Text)
- }
-
- lines := make([]string, 0, 10) // most comments are less than 10 lines
- for _, c := range comments {
- // Remove comment markers.
- // The parser has given us exactly the comment text.
- switch c[1] {
- case '/':
- //-style comment
- c = c[2:]
- // Remove leading space after //, if there is one.
- // TODO(gri) This appears to be necessary in isolated
- // cases (bignum.RatFromString) - why?
- if len(c) > 0 && c[0] == ' ' {
- c = c[1:]
- }
- case '*':
- /*-style comment */
- c = c[2 : len(c)-2]
- }
-
- // Split on newlines.
- cl := strings.Split(c, "\n")
-
- // Walk lines, stripping trailing white space and adding to list.
- for _, l := range cl {
- lines = append(lines, stripTrailingWhitespace(l))
- }
- }
-
- // Remove leading blank lines; convert runs of
- // interior blank lines to a single blank line.
- n := 0
- for _, line := range lines {
- if line != "" || n > 0 && lines[n-1] != "" {
- lines[n] = line
- n++
- }
- }
- lines = lines[0:n]
-
- // Add final "" entry to get trailing newline from Join.
- if n > 0 && lines[n-1] != "" {
- lines = append(lines, "")
- }
-
- return strings.Join(lines, "\n")
-}
-
-// Split bytes into lines.
-func split(text []byte) [][]byte {
- // count lines
- n := 0
- last := 0
- for i, c := range text {
- if c == '\n' {
- last = i + 1
- n++
- }
- }
- if last < len(text) {
- n++
- }
-
- // split
- out := make([][]byte, n)
- last = 0
- n = 0
- for i, c := range text {
- if c == '\n' {
- out[n] = text[last : i+1]
- last = i + 1
- n++
- }
- }
- if last < len(text) {
- out[n] = text[last:]
- }
-
- return out
-}
-
var (
ldquo = []byte("&ldquo;")
rdquo = []byte("&rdquo;")
@@ -122,13 +22,13 @@ var (
// Escape comment text for HTML. If nice is set,
// also turn `` into &ldquo; and '' into &rdquo;.
-func commentEscape(w io.Writer, s []byte, nice bool) {
+func commentEscape(w io.Writer, text string, nice bool) {
last := 0
if nice {
- for i := 0; i < len(s)-1; i++ {
- ch := s[i]
- if ch == s[i+1] && (ch == '`' || ch == '\'') {
- template.HTMLEscape(w, s[last:i])
+ for i := 0; i < len(text)-1; i++ {
+ ch := text[i]
+ if ch == text[i+1] && (ch == '`' || ch == '\'') {
+ template.HTMLEscape(w, []byte(text[last:i]))
last = i + 2
switch ch {
case '`':
@@ -140,7 +40,7 @@ func commentEscape(w io.Writer, s []byte, nice bool) {
}
}
}
- template.HTMLEscape(w, s[last:])
+ template.HTMLEscape(w, []byte(text[last:]))
}
const (
@@ -156,7 +56,7 @@ const (
filePart + `([:.,]` + filePart + `)*`
)
-var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`)
+var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
var (
html_a = []byte(`<a href="`)
@@ -168,6 +68,9 @@ var (
html_endp = []byte("</p>\n")
html_pre = []byte("<pre>")
html_endpre = []byte("</pre>\n")
+ html_h = []byte(`<h3 id="`)
+ html_hq = []byte(`">`)
+ html_endh = []byte("</h3>\n")
)
// Emphasize and escape a line of text for HTML. URLs are converted into links;
@@ -178,13 +81,13 @@ var (
// and the word is converted into a link. If nice is set, the remaining text's
// appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
// and '' into &rdquo;).
-func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
+func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
for {
- m := matchRx.FindSubmatchIndex(line)
+ m := matchRx.FindStringSubmatchIndex(line)
if m == nil {
break
}
- // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
+ // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
// write text before match
commentEscape(w, line[0:m[0]], nice)
@@ -196,8 +99,8 @@ func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
if words != nil {
url, italics = words[string(match)]
}
- if m[2] < 0 {
- // didn't match against first parenthesized sub-regexp; must be match against urlRx
+ if m[2] >= 0 {
+ // match against first parenthesized sub-regexp; must be match against urlRx
if !italics {
// no alternative URL in words list, use match instead
url = string(match)
@@ -228,7 +131,7 @@ func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
commentEscape(w, line, nice)
}
-func indentLen(s []byte) int {
+func indentLen(s string) int {
i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
i++
@@ -236,9 +139,11 @@ func indentLen(s []byte) int {
return i
}
-func isBlank(s []byte) bool { return len(s) == 0 || (len(s) == 1 && s[0] == '\n') }
+func isBlank(s string) bool {
+ return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
+}
-func commonPrefix(a, b []byte) []byte {
+func commonPrefix(a, b string) string {
i := 0
for i < len(a) && i < len(b) && a[i] == b[i] {
i++
@@ -246,7 +151,7 @@ func commonPrefix(a, b []byte) []byte {
return a[0:i]
}
-func unindent(block [][]byte) {
+func unindent(block []string) {
if len(block) == 0 {
return
}
@@ -268,7 +173,66 @@ func unindent(block [][]byte) {
}
}
-// Convert comment text to formatted HTML.
+// heading returns the trimmed line if it passes as a section heading;
+// otherwise it returns the empty string.
+func heading(line string) string {
+ line = strings.TrimSpace(line)
+ if len(line) == 0 {
+ return ""
+ }
+
+ // a heading must start with an uppercase letter
+ r, _ := utf8.DecodeRuneInString(line)
+ if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
+ return ""
+ }
+
+ // it must end in a letter or digit:
+ r, _ = utf8.DecodeLastRuneInString(line)
+ if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
+ return ""
+ }
+
+ // exclude lines with illegal characters
+ if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 {
+ return ""
+ }
+
+ // allow "'" for possessive "'s" only
+ for b := line; ; {
+ i := strings.IndexRune(b, '\'')
+ if i < 0 {
+ break
+ }
+ if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
+ return "" // not followed by "s "
+ }
+ b = b[i+2:]
+ }
+
+ return line
+}
+
+type op int
+
+const (
+ opPara op = iota
+ opHead
+ opPre
+)
+
+type block struct {
+ op op
+ lines []string
+}
+
+var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
+
+func anchorID(line string) string {
+ return nonAlphaNumRx.ReplaceAllString(line, "_")
+}
+
+// ToHTML converts comment text to formatted HTML.
// The comment was prepared by DocReader,
// so it is known not to have leading, trailing blank lines
// nor to have trailing spaces at the end of lines.
@@ -276,6 +240,7 @@ func unindent(block [][]byte) {
//
// Turn each run of multiple \n into </p><p>.
// Turn each run of indented lines into a <pre> block without indent.
+// Enclose headings with header tags.
//
// URLs in the comment text are converted into links; if the URL also appears
// in the words map, the link is taken from the map (if the corresponding map
@@ -284,23 +249,57 @@ func unindent(block [][]byte) {
// Go identifiers that appear in the words map are italicized; if the corresponding
// map value is not the empty string, it is considered a URL and the word is converted
// into a link.
-func ToHTML(w io.Writer, s []byte, words map[string]string) {
- inpara := false
-
- close := func() {
- if inpara {
+func ToHTML(w io.Writer, text string, words map[string]string) {
+ for _, b := range blocks(text) {
+ switch b.op {
+ case opPara:
+ w.Write(html_p)
+ for _, line := range b.lines {
+ emphasize(w, line, words, true)
+ }
w.Write(html_endp)
- inpara = false
+ case opHead:
+ w.Write(html_h)
+ id := ""
+ for _, line := range b.lines {
+ if id == "" {
+ id = anchorID(line)
+ w.Write([]byte(id))
+ w.Write(html_hq)
+ }
+ commentEscape(w, line, true)
+ }
+ if id == "" {
+ w.Write(html_hq)
+ }
+ w.Write(html_endh)
+ case opPre:
+ w.Write(html_pre)
+ for _, line := range b.lines {
+ emphasize(w, line, nil, false)
+ }
+ w.Write(html_endpre)
}
}
- open := func() {
- if !inpara {
- w.Write(html_p)
- inpara = true
+}
+
+func blocks(text string) []block {
+ var (
+ out []block
+ para []string
+
+ lastWasBlank = false
+ lastWasHeading = false
+ )
+
+ close := func() {
+ if para != nil {
+ out = append(out, block{opPara, para})
+ para = nil
}
}
- lines := split(s)
+ lines := strings.SplitAfter(text, "\n")
unindent(lines)
for i := 0; i < len(lines); {
line := lines[i]
@@ -308,6 +307,7 @@ func ToHTML(w io.Writer, s []byte, words map[string]string) {
// close paragraph
close()
i++
+ lastWasBlank = true
continue
}
if indentLen(line) > 0 {
@@ -323,23 +323,119 @@ func ToHTML(w io.Writer, s []byte, words map[string]string) {
for j > i && isBlank(lines[j-1]) {
j--
}
- block := lines[i:j]
+ pre := lines[i:j]
i = j
- unindent(block)
+ unindent(pre)
// put those lines in a pre block
- w.Write(html_pre)
- for _, line := range block {
- emphasize(w, line, nil, false) // no nice text formatting
- }
- w.Write(html_endpre)
+ out = append(out, block{opPre, pre})
+ lastWasHeading = false
continue
}
+
+ if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
+ isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
+ // current line is non-blank, surrounded by blank lines
+ // and the next non-blank line is not indented: this
+ // might be a heading.
+ if head := heading(line); head != "" {
+ close()
+ out = append(out, block{opHead, []string{head}})
+ i += 2
+ lastWasHeading = true
+ continue
+ }
+ }
+
// open paragraph
- open()
- emphasize(w, lines[i], words, true) // nice text formatting
+ lastWasBlank = false
+ lastWasHeading = false
+ para = append(para, lines[i])
i++
}
close()
+
+ return out
+}
+
+// ToText prepares comment text for presentation in textual output.
+// It wraps paragraphs of text to width or fewer Unicode code points
+// and then prefixes each line with the indent. In preformatted sections
+// (such as program text), it prefixes each non-blank line with preIndent.
+func ToText(w io.Writer, text string, indent, preIndent string, width int) {
+ l := lineWrapper{
+ out: w,
+ width: width,
+ indent: indent,
+ }
+ for _, b := range blocks(text) {
+ switch b.op {
+ case opPara:
+ // l.write will add leading newline if required
+ for _, line := range b.lines {
+ l.write(line)
+ }
+ l.flush()
+ case opHead:
+ w.Write(nl)
+ for _, line := range b.lines {
+ l.write(line + "\n")
+ }
+ l.flush()
+ case opPre:
+ w.Write(nl)
+ for _, line := range b.lines {
+ if !isBlank(line) {
+ w.Write([]byte(preIndent))
+ w.Write([]byte(line))
+ }
+ }
+ }
+ }
+}
+
+type lineWrapper struct {
+ out io.Writer
+ printed bool
+ width int
+ indent string
+ n int
+ pendSpace int
+}
+
+var nl = []byte("\n")
+var space = []byte(" ")
+
+func (l *lineWrapper) write(text string) {
+ if l.n == 0 && l.printed {
+ l.out.Write(nl) // blank line before new paragraph
+ }
+ l.printed = true
+
+ for _, f := range strings.Fields(text) {
+ w := utf8.RuneCountInString(f)
+ // wrap if line is too long
+ if l.n > 0 && l.n+l.pendSpace+w > l.width {
+ l.out.Write(nl)
+ l.n = 0
+ l.pendSpace = 0
+ }
+ if l.n == 0 {
+ l.out.Write([]byte(l.indent))
+ }
+ l.out.Write(space[:l.pendSpace])
+ l.out.Write([]byte(f))
+ l.n += l.pendSpace + w
+ l.pendSpace = 1
+ }
+}
+
+func (l *lineWrapper) flush() {
+ if l.n == 0 {
+ return
+ }
+ l.out.Write(nl)
+ l.pendSpace = 0
+ l.n = 0
}