1 files changed, 236 insertions, 140 deletions
diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go
index e1989226b..6f0edd4ba 100644
--- a/src/pkg/go/doc/comment.go
+++ b/src/pkg/go/doc/comment.go
@@ -7,114 +7,14 @@
 package doc
 
 import (
-	"go/ast"
 	"io"
 	"regexp"
 	"strings"
-	"template" // for HTMLEscape
+	"text/template" // for HTMLEscape
+	"unicode"
+	"unicode/utf8"
 )
 
-func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
-
-func stripTrailingWhitespace(s string) string {
-	i := len(s)
-	for i > 0 && isWhitespace(s[i-1]) {
-		i--
-	}
-	return s[0:i]
-}
-
-// CommentText returns the text of comment,
-// with the comment markers - //, /*, and */ - removed.
-func CommentText(comment *ast.CommentGroup) string {
-	if comment == nil {
-		return ""
-	}
-	comments := make([]string, len(comment.List))
-	for i, c := range comment.List {
-		comments[i] = string(c.Text)
-	}
-
-	lines := make([]string, 0, 10) // most comments are less than 10 lines
-	for _, c := range comments {
-		// Remove comment markers.
-		// The parser has given us exactly the comment text.
-		switch c[1] {
-		case '/':
-			//-style comment
-			c = c[2:]
-			// Remove leading space after //, if there is one.
-			// TODO(gri) This appears to be necessary in isolated
-			//           cases (bignum.RatFromString) - why?
-			if len(c) > 0 && c[0] == ' ' {
-				c = c[1:]
-			}
-		case '*':
-			/*-style comment */
-			c = c[2 : len(c)-2]
-		}
-
-		// Split on newlines.
-		cl := strings.Split(c, "\n")
-
-		// Walk lines, stripping trailing white space and adding to list.
-		for _, l := range cl {
-			lines = append(lines, stripTrailingWhitespace(l))
-		}
-	}
-
-	// Remove leading blank lines; convert runs of
-	// interior blank lines to a single blank line.
-	n := 0
-	for _, line := range lines {
-		if line != "" || n > 0 && lines[n-1] != "" {
-			lines[n] = line
-			n++
-		}
-	}
-	lines = lines[0:n]
-
-	// Add final "" entry to get trailing newline from Join.
-	if n > 0 && lines[n-1] != "" {
-		lines = append(lines, "")
-	}
-
-	return strings.Join(lines, "\n")
-}
-
-// Split bytes into lines.
-func split(text []byte) [][]byte {
-	// count lines
-	n := 0
-	last := 0
-	for i, c := range text {
-		if c == '\n' {
-			last = i + 1
-			n++
-		}
-	}
-	if last < len(text) {
-		n++
-	}
-
-	// split
-	out := make([][]byte, n)
-	last = 0
-	n = 0
-	for i, c := range text {
-		if c == '\n' {
-			out[n] = text[last : i+1]
-			last = i + 1
-			n++
-		}
-	}
-	if last < len(text) {
-		out[n] = text[last:]
-	}
-
-	return out
-}
-
 var (
 	ldquo = []byte("&ldquo;")
 	rdquo = []byte("&rdquo;")
@@ -122,13 +22,13 @@ var (
 
 // Escape comment text for HTML. If nice is set,
 // also turn `` into &ldquo; and '' into &rdquo;.
-func commentEscape(w io.Writer, s []byte, nice bool) {
+func commentEscape(w io.Writer, text string, nice bool) {
 	last := 0
 	if nice {
-		for i := 0; i < len(s)-1; i++ {
-			ch := s[i]
-			if ch == s[i+1] && (ch == '`' || ch == '\'') {
-				template.HTMLEscape(w, s[last:i])
+		for i := 0; i < len(text)-1; i++ {
+			ch := text[i]
+			if ch == text[i+1] && (ch == '`' || ch == '\'') {
+				template.HTMLEscape(w, []byte(text[last:i]))
 				last = i + 2
 				switch ch {
 				case '`':
@@ -140,7 +40,7 @@ func commentEscape(w io.Writer, s []byte, nice bool) {
 			}
 		}
 	}
-	template.HTMLEscape(w, s[last:])
+	template.HTMLEscape(w, []byte(text[last:]))
 }
 
 const (
@@ -156,7 +56,7 @@ const (
 		filePart + `([:.,]` + filePart + `)*`
 )
 
-var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`)
+var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
 
 var (
 	html_a      = []byte(`<a href="`)
@@ -168,6 +68,9 @@ var (
 	html_endp   = []byte("</p>\n")
 	html_pre    = []byte("<pre>")
 	html_endpre = []byte("</pre>\n")
+	html_h      = []byte(`<h3 id="`)
+	html_hq     = []byte(`">`)
+	html_endh   = []byte("</h3>\n")
 )
 
 // Emphasize and escape a line of text for HTML. URLs are converted into links;
@@ -178,13 +81,13 @@ var (
 // and the word is converted into a link. If nice is set, the remaining text's
 // appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
 // and '' into &rdquo;).
-func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
+func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
 	for {
-		m := matchRx.FindSubmatchIndex(line)
+		m := matchRx.FindStringSubmatchIndex(line)
 		if m == nil {
 			break
 		}
-		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
+		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
 
 		// write text before match
 		commentEscape(w, line[0:m[0]], nice)
@@ -196,8 +99,8 @@ func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
 		if words != nil {
 			url, italics = words[string(match)]
 		}
-		if m[2] < 0 {
-			// didn't match against first parenthesized sub-regexp; must be match against urlRx
+		if m[2] >= 0 {
+			// match against first parenthesized sub-regexp; must be match against urlRx
 			if !italics {
 				// no alternative URL in words list, use match instead
 				url = string(match)
@@ -228,7 +131,7 @@ func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
 	commentEscape(w, line, nice)
 }
 
-func indentLen(s []byte) int {
+func indentLen(s string) int {
 	i := 0
 	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
 		i++
@@ -236,9 +139,11 @@ func indentLen(s []byte) int {
 	return i
 }
 
-func isBlank(s []byte) bool { return len(s) == 0 || (len(s) == 1 && s[0] == '\n') }
+func isBlank(s string) bool {
+	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
+}
 
-func commonPrefix(a, b []byte) []byte {
+func commonPrefix(a, b string) string {
 	i := 0
 	for i < len(a) && i < len(b) && a[i] == b[i] {
 		i++
@@ -246,7 +151,7 @@ func commonPrefix(a, b []byte) []byte {
 	return a[0:i]
 }
 
-func unindent(block [][]byte) {
+func unindent(block []string) {
 	if len(block) == 0 {
 		return
 	}
@@ -268,7 +173,66 @@ func unindent(block [][]byte) {
 	}
 }
 
-// Convert comment text to formatted HTML.
+// heading returns the trimmed line if it passes as a section heading;
+// otherwise it returns the empty string. 
+func heading(line string) string {
+	line = strings.TrimSpace(line)
+	if len(line) == 0 {
+		return ""
+	}
+
+	// a heading must start with an uppercase letter
+	r, _ := utf8.DecodeRuneInString(line)
+	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
+		return ""
+	}
+
+	// it must end in a letter or digit:
+	r, _ = utf8.DecodeLastRuneInString(line)
+	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
+		return ""
+	}
+
+	// exclude lines with illegal characters
+	if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 {
+		return ""
+	}
+
+	// allow "'" for possessive "'s" only
+	for b := line; ; {
+		i := strings.IndexRune(b, '\'')
+		if i < 0 {
+			break
+		}
+		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
+			return "" // not followed by "s "
+		}
+		b = b[i+2:]
+	}
+
+	return line
+}
+
+type op int
+
+const (
+	opPara op = iota
+	opHead
+	opPre
+)
+
+type block struct {
+	op    op
+	lines []string
+}
+
+var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
+
+func anchorID(line string) string {
+	return nonAlphaNumRx.ReplaceAllString(line, "_")
+}
+
+// ToHTML converts comment text to formatted HTML.
 // The comment was prepared by DocReader,
 // so it is known not to have leading, trailing blank lines
 // nor to have trailing spaces at the end of lines.
@@ -276,6 +240,7 @@ func unindent(block [][]byte) {
 //
 // Turn each run of multiple \n into </p><p>.
 // Turn each run of indented lines into a <pre> block without indent.
+// Enclose headings with header tags.
 //
 // URLs in the comment text are converted into links; if the URL also appears
 // in the words map, the link is taken from the map (if the corresponding map
@@ -284,23 +249,57 @@ func unindent(block [][]byte) {
 // Go identifiers that appear in the words map are italicized; if the corresponding
 // map value is not the empty string, it is considered a URL and the word is converted
 // into a link.
-func ToHTML(w io.Writer, s []byte, words map[string]string) {
-	inpara := false
-
-	close := func() {
-		if inpara {
+func ToHTML(w io.Writer, text string, words map[string]string) {
+	for _, b := range blocks(text) {
+		switch b.op {
+		case opPara:
+			w.Write(html_p)
+			for _, line := range b.lines {
+				emphasize(w, line, words, true)
+			}
 			w.Write(html_endp)
-			inpara = false
+		case opHead:
+			w.Write(html_h)
+			id := ""
+			for _, line := range b.lines {
+				if id == "" {
+					id = anchorID(line)
+					w.Write([]byte(id))
+					w.Write(html_hq)
+				}
+				commentEscape(w, line, true)
+			}
+			if id == "" {
+				w.Write(html_hq)
+			}
+			w.Write(html_endh)
+		case opPre:
+			w.Write(html_pre)
+			for _, line := range b.lines {
+				emphasize(w, line, nil, false)
+			}
+			w.Write(html_endpre)
 		}
 	}
-	open := func() {
-		if !inpara {
-			w.Write(html_p)
-			inpara = true
+}
+
+func blocks(text string) []block {
+	var (
+		out  []block
+		para []string
+
+		lastWasBlank   = false
+		lastWasHeading = false
+	)
+
+	close := func() {
+		if para != nil {
+			out = append(out, block{opPara, para})
+			para = nil
 		}
 	}
 
-	lines := split(s)
+	lines := strings.SplitAfter(text, "\n")
 	unindent(lines)
 	for i := 0; i < len(lines); {
 		line := lines[i]
@@ -308,6 +307,7 @@ func ToHTML(w io.Writer, s []byte, words map[string]string) {
 			// close paragraph
 			close()
 			i++
+			lastWasBlank = true
 			continue
 		}
 		if indentLen(line) > 0 {
@@ -323,23 +323,119 @@ func ToHTML(w io.Writer, s []byte, words map[string]string) {
 			for j > i && isBlank(lines[j-1]) {
 				j--
 			}
-			block := lines[i:j]
+			pre := lines[i:j]
 			i = j
 
-			unindent(block)
+			unindent(pre)
 
 			// put those lines in a pre block
-			w.Write(html_pre)
-			for _, line := range block {
-				emphasize(w, line, nil, false) // no nice text formatting
-			}
-			w.Write(html_endpre)
+			out = append(out, block{opPre, pre})
+			lastWasHeading = false
 			continue
 		}
+
+		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
+			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
+			// current line is non-blank, surrounded by blank lines
+			// and the next non-blank line is not indented: this
+			// might be a heading.
+			if head := heading(line); head != "" {
+				close()
+				out = append(out, block{opHead, []string{head}})
+				i += 2
+				lastWasHeading = true
+				continue
+			}
+		}
+
 		// open paragraph
-		open()
-		emphasize(w, lines[i], words, true) // nice text formatting
+		lastWasBlank = false
+		lastWasHeading = false
+		para = append(para, lines[i])
 		i++
 	}
 	close()
+
+	return out
+}
+
+// ToText prepares comment text for presentation in textual output.
+// It wraps paragraphs of text to width or fewer Unicode code points
+// and then prefixes each line with the indent.  In preformatted sections
+// (such as program text), it prefixes each non-blank line with preIndent.
+func ToText(w io.Writer, text string, indent, preIndent string, width int) {
+	l := lineWrapper{
+		out:    w,
+		width:  width,
+		indent: indent,
+	}
+	for _, b := range blocks(text) {
+		switch b.op {
+		case opPara:
+			// l.write will add leading newline if required
+			for _, line := range b.lines {
+				l.write(line)
+			}
+			l.flush()
+		case opHead:
+			w.Write(nl)
+			for _, line := range b.lines {
+				l.write(line + "\n")
+			}
+			l.flush()
+		case opPre:
+			w.Write(nl)
+			for _, line := range b.lines {
+				if !isBlank(line) {
+					w.Write([]byte(preIndent))
+					w.Write([]byte(line))
+				}
+			}
+		}
+	}
+}
+
+type lineWrapper struct {
+	out       io.Writer
+	printed   bool
+	width     int
+	indent    string
+	n         int
+	pendSpace int
+}
+
+var nl = []byte("\n")
+var space = []byte(" ")
+
+func (l *lineWrapper) write(text string) {
+	if l.n == 0 && l.printed {
+		l.out.Write(nl) // blank line before new paragraph
+	}
+	l.printed = true
+
+	for _, f := range strings.Fields(text) {
+		w := utf8.RuneCountInString(f)
+		// wrap if line is too long
+		if l.n > 0 && l.n+l.pendSpace+w > l.width {
+			l.out.Write(nl)
+			l.n = 0
+			l.pendSpace = 0
+		}
+		if l.n == 0 {
+			l.out.Write([]byte(l.indent))
+		}
+		l.out.Write(space[:l.pendSpace])
+		l.out.Write([]byte(f))
+		l.n += l.pendSpace + w
+		l.pendSpace = 1
+	}
+}
+
+func (l *lineWrapper) flush() {
+	if l.n == 0 {
+		return
+	}
+	l.out.Write(nl)
+	l.pendSpace = 0
+	l.n = 0
 }